From 794400eaa0475d0b1afeb28c826f274933d32c31 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Mon, 29 Apr 2019 17:17:44 +0200 Subject: [PATCH 01/63] DixonRNSSolver base --- linbox/solutions/methods.h | 8 +++ linbox/solutions/solve.h | 6 +++ tests/test-solve-full.C | 101 +++++++++++++++++++------------------ 3 files changed, 66 insertions(+), 49 deletions(-) diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h index 56fb077b9..072eba56e 100644 --- a/linbox/solutions/methods.h +++ b/linbox/solutions/methods.h @@ -218,6 +218,9 @@ namespace LinBox { // @fixme SingularSolutionType::Deterministic fails with Dense Dixon SingularSolutionType singularSolutionType = SingularSolutionType::Random; + // ----- For DixonRNS method. + uint32_t primeBaseLength = 16u; //!< How many primes to use lifting will be done over p = p1p2...pl. + // ----- For random-based systems. size_t trialsBeforeFailure = LINBOX_DEFAULT_TRIALS_BEFORE_FAILURE; //!< Maximum number of trials before giving up. bool certifyInconsistency = false; //!< Whether the solver should attempt to find a certificate of inconsistency if @@ -263,6 +266,11 @@ namespace LinBox { // (Numerische Mathematik - Dixon 1982) DEFINE_METHOD(Dixon, RingCategories::IntegerTag); + // Method::DixonRNS uses RNS features over Dixon's p-adic lifting. + // (A BLAS Based C Library for Exact Linear Algebra on Integer Matrices - Chen, Storjohann ISSAC 2005) + // https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf + DEFINE_METHOD(DixonRNS, RingCategories::IntegerTag); + // Method::ChineseRemainder uses the chinese remainder algorithm // to solve the problem on multiple modular domains, // and finally reconstruct the solution. diff --git a/linbox/solutions/solve.h b/linbox/solutions/solve.h index 224215e38..2e91f58ca 100644 --- a/linbox/solutions/solve.h +++ b/linbox/solutions/solve.h @@ -84,6 +84,11 @@ namespace LinBox { * | - SparseMatrix > `RationalSolver<..., Method::SparseElimination>` * | - Otherwise > Error * - Otherwise > Error + * - Method::DixonRNS + * - IntegerTag + * | - DenseMatrix > `DixonRNSSolver` + * | - Otherwise > Error + * - Otherwise > Error * - Method::Blackbox > Method::Wiedemann * - Method::Wiedemann * - ModularTag > `WiedemannSolver` @@ -337,6 +342,7 @@ namespace LinBox { // Integer-based #include "./solve/solve-cra.h" #include "./solve/solve-dixon.h" +#include "./solve/solve-dixon-rns.h" #include "./solve/solve-numeric-symbolic.h" // Blackbox diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index d40bfb941..4caf811bf 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -260,31 +260,31 @@ int main(int argc, char** argv) bool ok = true; do { - // ----- Rational Auto - ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - // @fixme Dixon does not compile - // ok = ok && test_blackbox_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - - ok = ok && test_dense_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - ok = ok && test_sparse_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - // ok = ok && test_blackbox_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - - // ----- Rational CRA - // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails - ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - - ok = ok && test_dense_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - ok = ok && test_sparse_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - - // ----- Rational Dixon - ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - // @fixme Dixon does not compile - // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // // ----- Rational Auto + // ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // // @fixme Dixon does not compile + // // ok = ok && test_blackbox_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + + // ok = ok && test_dense_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // ok = ok && test_sparse_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // // ok = ok && test_blackbox_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + + // // ----- Rational CRA + // // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails + // ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + + // ok = ok && test_dense_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // ok = ok && test_sparse_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + + // // ----- Rational Dixon + // ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // // @fixme Dixon does not compile + // // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ----- Rational SymbolicNumeric // @note SymbolicNumeric methods are only implemented on DenseMatrix @@ -295,30 +295,33 @@ int main(int argc, char** argv) // ok = ok && test_sparse_solve(Method::SymbolicNumericNorm(method), ZZ, QQ, m, n, bitSize, vectorBitSize, // seed, verbose); - // ----- Modular Auto - ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); - - // ----- Modular Blackbox - ok = ok && test_dense_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); - - // ----- Modular DenseElimination - ok = ok && test_dense_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); - - // ----- Modular SparseElimination - ok = ok && test_dense_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); - - // ----- Modular Wiedemann - ok = ok && test_dense_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); + // ----- Rational DixonRNS + ok = ok && test_dense_solve(Method::DixonRNS(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + + // // ----- Modular Auto + // ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); + + // // ----- Modular Blackbox + // ok = ok && test_dense_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); + + // // ----- Modular DenseElimination + // ok = ok && test_dense_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); + + // // ----- Modular SparseElimination + // ok = ok && test_dense_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); + + // // ----- Modular Wiedemann + // ok = ok && test_dense_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); // ----- Modular Lanczos // @fixme Dense is segfaulting From 5ff5d81b20d5b21cdf42f2ade5117670fc03b80e Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 3 May 2019 11:37:51 +0200 Subject: [PATCH 02/63] Dixon RNS base (again) --- linbox/algorithms/dixon-rns-solver.h | 82 ++++++++++++++++++++++++ linbox/algorithms/dixon-rns-solver.inl | 44 +++++++++++++ linbox/solutions/solve/solve-dixon-rns.h | 53 +++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 linbox/algorithms/dixon-rns-solver.h create mode 100644 linbox/algorithms/dixon-rns-solver.inl create mode 100644 linbox/solutions/solve/solve-dixon-rns.h diff --git a/linbox/algorithms/dixon-rns-solver.h b/linbox/algorithms/dixon-rns-solver.h new file mode 100644 index 000000000..0d0036a69 --- /dev/null +++ b/linbox/algorithms/dixon-rns-solver.h @@ -0,0 +1,82 @@ +/* + * Copyright(C) LinBox + * + * ========LICENCE======== + * This file is part of the library LinBox. + * + * LinBox is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * ========LICENCE======== + */ + +#pragma once + +#include + +namespace LinBox { + /** + * @fixme Should this just be a different LiftingContainer? + * + * Chen/Storjohann RNS-based p-adic lifting. + * The algorithm solves Ax = b over the integers. + * + * Based on https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf + * A BLAS Based C Library for Exact Linear Algebra on Integer Matrices (ISSAC 2009) + * + * Dixon algorithm goes this way: + * (i) Compute B := A^{-1} mod p + * (with p a random number which is hopefully orthogonal to det(A)) + * (ii) Compute (ci) such that A^{-1} b = c0 + c1 p + ... + ci p^i mod p^{i+1} + * Which means: r = b + * for i = 0 .. k-1: + * | ci = B r mod p + * | r = (r - A ci) / p + * (stop when p^k > 2ND given by Hadamard bound) + * (iii) Rational reconstruct with c = c0 + c1 p + ... + ck p^{k-1} (over the integers) + * + * The RNS part: + * (i) Use p = p1p2...pl with an arbitrary l + * (ii) We can compute the residues for each pj by having ci expressed in an RNS system. + * r = b + * for i = 0 .. k-1: + * | for j = 0 .. l-1: + * | | ci[j] = B r mod pj + * | (Q, R) = such that r = pQ + R with |R| < p + * | r = Q + (R - A ci) / p < Matrix-vector multiplication done in RNS domain + * | and final addition over ZZ + * /!\ @fixme I do not get it, how can 1 / p be computed in the RNS system, or is it just R - A ci? + * /!\ @fixme The paper does not talk about matrix-matrix multiplication, + * but instead about exploiting RNS. + * (iii) Having solved the system for each pj, we first RNS-reconstruct the solution mod p + * before rational reconstruction. + * + * One can configure how many primes are used with `Method::DixonRNS.primeBaseLength`. + * According to the paper, a value of l = 2 (ln(n) + log2(||A||)) or without the factor 2 + * can be used, but it depends on the problem, really. + */ + template + class DixonRNSSolver { + public: + DixonRNSSolver(const Ring& ring, PrimeGenerator primeGenerator); + + /** + * Dense solving. + */ + template + void solve(IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix& A, + const Vector& b, const Method::DixonRNS& m); + }; +} + +#include "./dixon-rns-solver.inl" \ No newline at end of file diff --git a/linbox/algorithms/dixon-rns-solver.inl b/linbox/algorithms/dixon-rns-solver.inl new file mode 100644 index 000000000..6752598d3 --- /dev/null +++ b/linbox/algorithms/dixon-rns-solver.inl @@ -0,0 +1,44 @@ +/* + * Copyright(C) LinBox + * + * ========LICENCE======== + * This file is part of the library LinBox. + * + * LinBox is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * ========LICENCE======== + */ + +#pragma once + +#include + +namespace LinBox { + template + inline DixonRNSSolver::DixonRNSSolver( + const Ring& ring, PrimeGenerator primeGenerator) + { + } + + /** + * Dense solving. + */ + template + template + inline void DixonRNSSolver::solve( + IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix& A, + const Vector& b, const Method::DixonRNS& m) + { + } +} \ No newline at end of file diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h new file mode 100644 index 000000000..1da9dce2a --- /dev/null +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -0,0 +1,53 @@ +/* + * Copyright(C) LinBox + * + * ========LICENCE======== + * This file is part of the library LinBox. + * + * LinBox is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * ========LICENCE======== + */ + +#pragma once + +#include + +namespace LinBox { + /** + * \brief Solve specialisation for DixonRNS on dense matrices. + */ + template + void solve(IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix& A, const Vector& b, + const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) + { + commentator().start("solve.dixon.integer.dense"); + + using Field = Givaro::ModularBalanced; + using PrimeGenerator = PrimeIterator; + PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); + + DixonRNSSolver solver(A.field(), primeGenerator); + solver.solve(xNum, xDen, A, b, m); + + commentator().stop("solve.dixon.integer.dense"); + + // @fixme Implement something like that + // if (status == SS_INCONSISTENT) { + // throw LinboxMathInconsistentSystem("From DixonRNS method."); + // } else if (status == SS_FAILED || status == SS_BAD_PRECONDITIONER) { + // throw LinboxError("From DixonRNS method."); + // } + } +} \ No newline at end of file From 92ab216c2db13c297fa66dcffce3e776c3743f5e Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 3 May 2019 16:01:22 +0200 Subject: [PATCH 03/63] More doc before implem --- linbox/algorithms/dixon-rns-solver.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/linbox/algorithms/dixon-rns-solver.h b/linbox/algorithms/dixon-rns-solver.h index 0d0036a69..161a2d088 100644 --- a/linbox/algorithms/dixon-rns-solver.h +++ b/linbox/algorithms/dixon-rns-solver.h @@ -46,23 +46,27 @@ namespace LinBox { * (iii) Rational reconstruct with c = c0 + c1 p + ... + ck p^{k-1} (over the integers) * * The RNS part: - * (i) Use p = p1p2...pl with an arbitrary l - * (ii) We can compute the residues for each pj by having ci expressed in an RNS system. + * (i) Use p = p0p1...p{lp-1} with an arbitrary lp and (q0, q1, ..., q{lq-1}) also primes. + * (ii) We now do our computation in a RNS system (p0, ..., p{lp-1}, q0, ..., q{lq-1}): * r = b * for i = 0 .. k-1: - * | for j = 0 .. l-1: - * | | ci[j] = B r mod pj + * | for j = 0 .. lq-1: + * | | ci[qj] = Bj r mod qj * | (Q, R) = such that r = pQ + R with |R| < p * | r = Q + (R - A ci) / p < Matrix-vector multiplication done in RNS domain * | and final addition over ZZ - * /!\ @fixme I do not get it, how can 1 / p be computed in the RNS system, or is it just R - A ci? + * @note (R - A ci) / p can be computed in a RNS system. + * We know that (R - A ci) is divisible by p, + * so its representation is 0 on all lp first terms of the representation, meaning + * we just need representation of ci mod (q0, ..., q{lq-1}). + * For the division part, we just have to multiply the RNS representation of (R - A ci) by + * (1/p) mod (q0, ..., q{lq-1}). * /!\ @fixme The paper does not talk about matrix-matrix multiplication, * but instead about exploiting RNS. - * (iii) Having solved the system for each pj, we first RNS-reconstruct the solution mod p - * before rational reconstruction. + * (iii) We first RNS-reconstruct the solution before rational reconstruction. * * One can configure how many primes are used with `Method::DixonRNS.primeBaseLength`. - * According to the paper, a value of l = 2 (ln(n) + log2(||A||)) or without the factor 2 + * According to the paper, a value of lp = 2 (ln(n) + log2(||A||)) or without the factor 2 * can be used, but it depends on the problem, really. */ template From 264cdc89b24fd46b9c5288bf862e72c0dd2341ad Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Mon, 6 May 2019 15:51:31 +0200 Subject: [PATCH 04/63] Updated dixon RNS solver algorithm description --- linbox/algorithms/dixon-rns-solver.h | 56 ++++++++++++---------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/linbox/algorithms/dixon-rns-solver.h b/linbox/algorithms/dixon-rns-solver.h index 161a2d088..60264e455 100644 --- a/linbox/algorithms/dixon-rns-solver.h +++ b/linbox/algorithms/dixon-rns-solver.h @@ -26,44 +26,34 @@ namespace LinBox { /** - * @fixme Should this just be a different LiftingContainer? + * @fixme This should just be a different LiftingContainer! * - * Chen/Storjohann RNS-based p-adic lifting. * The algorithm solves Ax = b over the integers. - * + * It is based on Chen/Storjohann RNS-based p-adic lifting. * Based on https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf * A BLAS Based C Library for Exact Linear Algebra on Integer Matrices (ISSAC 2009) + * But it has been slightly modified in order to use BLAS3 multiplication within the main loop. * - * Dixon algorithm goes this way: - * (i) Compute B := A^{-1} mod p - * (with p a random number which is hopefully orthogonal to det(A)) - * (ii) Compute (ci) such that A^{-1} b = c0 + c1 p + ... + ci p^i mod p^{i+1} - * Which means: r = b - * for i = 0 .. k-1: - * | ci = B r mod p - * | r = (r - A ci) / p - * (stop when p^k > 2ND given by Hadamard bound) - * (iii) Rational reconstruct with c = c0 + c1 p + ... + ck p^{k-1} (over the integers) - * - * The RNS part: - * (i) Use p = p0p1...p{lp-1} with an arbitrary lp and (q0, q1, ..., q{lq-1}) also primes. - * (ii) We now do our computation in a RNS system (p0, ..., p{lp-1}, q0, ..., q{lq-1}): - * r = b - * for i = 0 .. k-1: - * | for j = 0 .. lq-1: - * | | ci[qj] = Bj r mod qj - * | (Q, R) = such that r = pQ + R with |R| < p - * | r = Q + (R - A ci) / p < Matrix-vector multiplication done in RNS domain - * | and final addition over ZZ - * @note (R - A ci) / p can be computed in a RNS system. - * We know that (R - A ci) is divisible by p, - * so its representation is 0 on all lp first terms of the representation, meaning - * we just need representation of ci mod (q0, ..., q{lq-1}). - * For the division part, we just have to multiply the RNS representation of (R - A ci) by - * (1/p) mod (q0, ..., q{lq-1}). - * /!\ @fixme The paper does not talk about matrix-matrix multiplication, - * but instead about exploiting RNS. - * (iii) We first RNS-reconstruct the solution before rational reconstruction. + * RNS Dixon algorithm goes this way: + * (i) Use (p1, ..., pl) primes with an arbitrary l. + * (ii) Algorithm goes: + * for i = 1 .. l: + * | Bi = A^{-1} mod pi < Pre-computing + * [r1|...|rl] = [b|...|b] + * [y1|...|yl] = [0|...|0] + * for j = 1 .. k: + * | for i = 1 .. l: + * | | ci = Bi ri mod pi < Matrix-vector in Z/pZ + * | | yi = (yi * pi) + ci < Done over ZZ + * | | (Qi, Ri) = such that r = pi Qi + Ri with |Ri| < pi + * | V = [R1|...|Rl] - A [c1|...|cl] < Matrix-matrix in ZZ + * | for i = 1 .. l: + * | | ri = Qi + (Vi / pi) + * @note The computation of V can be done in a RNS system such that each RNS base-prime + * is bigger than each (p1, ..., pl). This way, [R1|...|Rl] and [c1|...|cl] are zero-cost + * to get in the RNS system. + * (iii) y = CRT_Reconstruct(y1, ..., yl) + * (iv) x = Rational_Reconstruct(y) * * One can configure how many primes are used with `Method::DixonRNS.primeBaseLength`. * According to the paper, a value of lp = 2 (ln(n) + log2(||A||)) or without the factor 2 From a7155bf6b758ca52f180dc2b29614dd949145d75 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 14 May 2019 15:55:39 +0200 Subject: [PATCH 05/63] Base for MultiModLiftingContainer --- linbox/algorithms/dixon-rns-solver.inl | 44 ----------------- ...solver.h => multi-mod-lifting-container.h} | 48 ++++++++++++------- linbox/solutions/solve/solve-dixon-rns.h | 44 +++++++++++++++-- 3 files changed, 70 insertions(+), 66 deletions(-) delete mode 100644 linbox/algorithms/dixon-rns-solver.inl rename linbox/algorithms/{dixon-rns-solver.h => multi-mod-lifting-container.h} (69%) diff --git a/linbox/algorithms/dixon-rns-solver.inl b/linbox/algorithms/dixon-rns-solver.inl deleted file mode 100644 index 6752598d3..000000000 --- a/linbox/algorithms/dixon-rns-solver.inl +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright(C) LinBox - * - * ========LICENCE======== - * This file is part of the library LinBox. - * - * LinBox is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * ========LICENCE======== - */ - -#pragma once - -#include - -namespace LinBox { - template - inline DixonRNSSolver::DixonRNSSolver( - const Ring& ring, PrimeGenerator primeGenerator) - { - } - - /** - * Dense solving. - */ - template - template - inline void DixonRNSSolver::solve( - IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix& A, - const Vector& b, const Method::DixonRNS& m) - { - } -} \ No newline at end of file diff --git a/linbox/algorithms/dixon-rns-solver.h b/linbox/algorithms/multi-mod-lifting-container.h similarity index 69% rename from linbox/algorithms/dixon-rns-solver.h rename to linbox/algorithms/multi-mod-lifting-container.h index 60264e455..7072eb866 100644 --- a/linbox/algorithms/dixon-rns-solver.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -26,8 +26,6 @@ namespace LinBox { /** - * @fixme This should just be a different LiftingContainer! - * * The algorithm solves Ax = b over the integers. * It is based on Chen/Storjohann RNS-based p-adic lifting. * Based on https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf @@ -43,15 +41,15 @@ namespace LinBox { * [y1|...|yl] = [0|...|0] * for j = 1 .. k: * | for i = 1 .. l: - * | | ci = Bi ri mod pi < Matrix-vector in Z/pZ - * | | yi = (yi * pi) + ci < Done over ZZ * | | (Qi, Ri) = such that r = pi Qi + Ri with |Ri| < pi + * | | ci = Bi ri mod pi < Matrix-vector in Z/pZ + * | | yi = yi + ci * pi^(i-1) < Done over ZZ * | V = [R1|...|Rl] - A [c1|...|cl] < Matrix-matrix in ZZ * | for i = 1 .. l: * | | ri = Qi + (Vi / pi) - * @note The computation of V can be done in a RNS system such that each RNS base-prime - * is bigger than each (p1, ..., pl). This way, [R1|...|Rl] and [c1|...|cl] are zero-cost - * to get in the RNS system. + * @note The computation of V can be done in a RNS system such that each RNS + * base-prime is bigger than each (p1, ..., pl). This way, [R1|...|Rl] and [c1|...|cl] are + * zero-cost to get in the RNS system. * (iii) y = CRT_Reconstruct(y1, ..., yl) * (iv) x = Rational_Reconstruct(y) * @@ -59,18 +57,32 @@ namespace LinBox { * According to the paper, a value of lp = 2 (ln(n) + log2(||A||)) or without the factor 2 * can be used, but it depends on the problem, really. */ - template - class DixonRNSSolver { + template + class MultiModLiftingContainer final : public LiftingContainerBase<_Ring, DenseMatrix<_Ring>> { + using BaseClass = LiftingContainerBase<_Ring, DenseMatrix<_Ring>>; + + public: + using typename BaseClass::Ring; + using typename BaseClass::IMatrix; + using typename BaseClass::IVector; + + using Field = _Field; + using PrimeGenerator = _PrimeGenerator; + public: - DixonRNSSolver(const Ring& ring, PrimeGenerator primeGenerator); + // @fixme + const std::vector primes = {97, 101}; + + // @fixme Split to inline file + MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, + const IMatrix& A, const IVector& b, + const Method::DixonRNS& m) + : BaseClass(ring, A, b, 97 * 101) + { + } - /** - * Dense solving. - */ - template - void solve(IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix& A, - const Vector& b, const Method::DixonRNS& m); + IVector& nextdigit (IVector& , const IVector&) const final { + + } }; } - -#include "./dixon-rns-solver.inl" \ No newline at end of file diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 1da9dce2a..c62e1252c 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -22,15 +22,51 @@ #pragma once -#include +#include namespace LinBox { + // @fixme Move that to a file - and make it be a RationalSolver + template + class DixonRNSSolver { + public: + DixonRNSSolver(const Ring& ring, PrimeGenerator& primeGenerator) + : _ring(ring) + , _primeGenerator(primeGenerator) + { + /* @todo */ + } + + /** + * Dense solving. + */ + template + void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, + const Vector& b, const Method::DixonRNS& m) + { + // @fixme We should use some code from DixonSolver... + // But that's hard so we just assume that A is square and invertible. + linbox_check(A.rowdim() == A.coldim()); + + using LiftingContainer = MultiModLiftingContainer; + LiftingContainer lc(_ring, _primeGenerator, A, b, m); + RationalReconstruction re(lc); + + if (!re.getRational(xNum, xDen, 0)) { + std::cerr << "OUCH!" << std::endl; + } + } + + private: + const Ring& _ring; + PrimeGenerator& _primeGenerator; + }; + /** * \brief Solve specialisation for DixonRNS on dense matrices. */ - template - void solve(IntVector& xNum, typename IntVector::Element& xDen, const DenseMatrix& A, const Vector& b, - const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) + template + void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, + const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) { commentator().start("solve.dixon.integer.dense"); From 8e85814a6d9f0e427595757af1f50971ae83279d Mon Sep 17 00:00:00 2001 From: "A. Breust" Date: Fri, 17 May 2019 07:59:33 +0200 Subject: [PATCH 06/63] More on lifting - r fill up --- .../algorithms/multi-mod-lifting-container.h | 45 +++++++++++++++---- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 7072eb866..7ba60aa05 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -41,7 +41,7 @@ namespace LinBox { * [y1|...|yl] = [0|...|0] * for j = 1 .. k: * | for i = 1 .. l: - * | | (Qi, Ri) = such that r = pi Qi + Ri with |Ri| < pi + * | | (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi * | | ci = Bi ri mod pi < Matrix-vector in Z/pZ * | | yi = yi + ci * pi^(i-1) < Done over ZZ * | V = [R1|...|Rl] - A [c1|...|cl] < Matrix-matrix in ZZ @@ -62,27 +62,56 @@ namespace LinBox { using BaseClass = LiftingContainerBase<_Ring, DenseMatrix<_Ring>>; public: - using typename BaseClass::Ring; using typename BaseClass::IMatrix; using typename BaseClass::IVector; + using typename BaseClass::Ring; using Field = _Field; using PrimeGenerator = _PrimeGenerator; public: - // @fixme - const std::vector primes = {97, 101}; + // @fixme Have dynamic random ones + const std::vector p = {97, 101}; // @fixme Split to inline file - MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, - const IMatrix& A, const IVector& b, + MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, const IVector& b, const Method::DixonRNS& m) + // @fixme Am forces to set the prime here? Why? : BaseClass(ring, A, b, 97 * 101) + , _ring(ring) { - } + // @note From baseClass, we have _length = log2(2 * N * D) + + // @fixme Have l = log(||A||) + log(n) or so + uint32_t l = p.size(); + + // Ap[0] = A mod p[0] + // Ap[1] = A mod p[1] - IVector& nextdigit (IVector& , const IVector&) const final { + // B[0] = inv(Ap[0]) mod p[0] @fixme How? + // B[1] = inv(Ap[1]) mod p[1] + // @note As _r is row major, we store each ri on each row. + // So that r[i] = current residue for p[i]. + _r = std::make_unique>(_ring, l, b.size()); + for (auto i = 0u; i < l; ++i) { + // @fixme Is there a vector domain to copy to a matrix? + for (auto j = 0u; j < b.size(); ++j) { + _ring.assign(_r[i][j], b[j]); + } + } } + + IVector& nextdigit(IVector&, const IVector&) const final + { + // @fixme With this design, are we forces to CRT_Reconstruct each ci? + // Is this bad? + } + + private: + Ring& _ring; + + // @note r is a big matrix in ZZ holding all residues + std::unique_ptr> _r; }; } From 6b6deb40319151f040a7cfc10f0987117e3c0445 Mon Sep 17 00:00:00 2001 From: "A. Breust" Date: Fri, 17 May 2019 08:34:29 +0200 Subject: [PATCH 07/63] Multi mod lifting incoming! --- .../algorithms/multi-mod-lifting-container.h | 58 +++++++++++++++++-- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 7ba60aa05..8c5883936 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -85,33 +85,81 @@ namespace LinBox { // @fixme Have l = log(||A||) + log(n) or so uint32_t l = p.size(); + // @fixme Initialize fields _F[i] + // Ap[0] = A mod p[0] // Ap[1] = A mod p[1] - // B[0] = inv(Ap[0]) mod p[0] @fixme How? + // B[0] = inv(Ap[0]) mod p[0] // B[1] = inv(Ap[1]) mod p[1] + // @fixme How? // @note As _r is row major, we store each ri on each row. // So that r[i] = current residue for p[i]. - _r = std::make_unique>(_ring, l, b.size()); + _r.init(_ring, l, b.size()); for (auto i = 0u; i < l; ++i) { // @fixme Is there a vector domain to copy to a matrix? for (auto j = 0u; j < b.size(); ++j) { _ring.assign(_r[i][j], b[j]); } } + + // @fixme Allocate Q and R + // @fixme Allocate c + + // @todo Set up an RNS system } - IVector& nextdigit(IVector&, const IVector&) const final + IVector& nextdigit(IVector& digit, const IVector& residu) const final { - // @fixme With this design, are we forces to CRT_Reconstruct each ci? + // @fixme The residu can't be r, here! + // So the overall does a lot more job than it needs. + // See below for the solution. + + // @fixme With this design, are we forced to CRT_Reconstruct each ci? // Is this bad? + // If we don't want that, we need to not extent LiftingContainerBase, + // and reimplement some of the behavior. + // Because the only thing needed to user API (rational reconstruction) + // is bool next (IVector& digit) from iterator. + + /* for i = 1 .. l: + * | (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi + * | ci = Bi Ri mod pi < Matrix-vector in Z/pZ + * V = [R1|...|Rl] - A [c1|...|cl] < Matrix-matrix in ZZ + * for i = 1 .. l: + * | ri = Qi + (Vi / pi) + */ + + // @fixme Could be parallel! + for (auto i = 0u; i < l; ++i) { + Hom hom(_ring, _F[i]); + + // @fixme How to do euclidian division? + // ri = pi Qi + Ri + + // @todo If R might already be a field element + _B[i]->apply(_c[i], hom.convert(_R[i])); + + // @todo Convert _c[i] to RNS + } + + // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields? + + // @fixme Compute the next residue! + + return digit; } private: Ring& _ring; // @note r is a big matrix in ZZ holding all residues - std::unique_ptr> _r; + IMatrix _r; + FMatrix _c; + std::vector _B; // Inverses of A mod p[i] + std::vector _Q; + std::vector _R; + std::vector _F; }; } From d56184ecc00405437cd00ab632a58251215d7476 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 22 May 2019 14:21:12 +0200 Subject: [PATCH 08/63] Not using LiftingContainerBase anymore --- linbox/algorithms/lifting-container.h | 28 --- .../algorithms/multi-mod-lifting-container.h | 170 ++++++++++++++---- 2 files changed, 138 insertions(+), 60 deletions(-) diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h index 6a9180092..7735c9f89 100644 --- a/linbox/algorithms/lifting-container.h +++ b/linbox/algorithms/lifting-container.h @@ -283,34 +283,6 @@ namespace LinBox }; - /*- @brief Bit manipulation function for possible use in optimization. - * efficiently pulls out continuous blocks of bits, from lsb to msb inclusive - * least significant bits start at index 0, so msb >= lsb - * if any bits with index >= 8*numBytes are asked for they will be zeroes - */ -#if 0 - static long long bytesToBits(unsigned char * byteArray, size_t numBytes, size_t lsb, size_t msb) { - linbox_check(msb >= lsb); - size_t lsbi = lsb >> 3; - size_t msbi = msb >> 3; - if (msbi == lsbi) - if (msbi >= numBytes) - return 0; - else - return (byteArray[lsbi] >> (lsb & 7)) & ((1 << (msb - lsb + 1)) - 1); - - long long result = (msbi < numBytes) ? (byteArray[msbi] & ((1 << ((msb & 7)+1)) - 1)) : 0; - for (size_t i=msbi-1; i>lsbi; i--) { - result <<= 8; - result |= (i < numBytes) ? byteArray[i] : 0; - } - result <<= 8 - (lsb & 7); - result |= (lsbi < numBytes) ? (byteArray[lsbi] >> (lsb & 7)) : 0; - - return result; - } -#endif - const_iterator begin() const { return const_iterator(*this); diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 8c5883936..21ececa27 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -26,7 +26,10 @@ namespace LinBox { /** - * The algorithm solves Ax = b over the integers. + * The algorithm find out the p-adic writing of A^{-1} * b. + * So that A^{-1} * b = c0 + c1 * p + c2 * p^2 + ... + c{k-1} * p^{k-1}. + * The chosen p is multi-modular. + * * It is based on Chen/Storjohann RNS-based p-adic lifting. * Based on https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf * A BLAS Based C Library for Exact Linear Algebra on Integer Matrices (ISSAC 2009) @@ -58,32 +61,36 @@ namespace LinBox { * can be used, but it depends on the problem, really. */ template - class MultiModLiftingContainer final : public LiftingContainerBase<_Ring, DenseMatrix<_Ring>> { - using BaseClass = LiftingContainerBase<_Ring, DenseMatrix<_Ring>>; + class MultiModLiftingContainer final : public LiftingContainer<_Ring> { + using BaseClass = LiftingContainer<_Ring>; public: - using typename BaseClass::IMatrix; - using typename BaseClass::IVector; - using typename BaseClass::Ring; - + using Ring = _Ring; using Field = _Field; using PrimeGenerator = _PrimeGenerator; + using IElement = typename _Ring::Element; + using IMatrix = DenseMatrix<_Ring>; + using IVector = DenseVector<_Ring>; + using FMatrix = DenseMatrix<_Field>; + using FVector = DenseVector<_Field>; + public: - // @fixme Have dynamic random ones - const std::vector p = {97, 101}; + // ------------------- + // ----- Main behavior // @fixme Split to inline file - MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, const IVector& b, - const Method::DixonRNS& m) - // @fixme Am forces to set the prime here? Why? - : BaseClass(ring, A, b, 97 * 101) - , _ring(ring) + MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, + const IVector& b, const Method::DixonRNS& m) + : _ring(ring) + , _r(_ring) + , _c(_field) { + // @fixme Compute hadamard and such + // @note From baseClass, we have _length = log2(2 * N * D) // @fixme Have l = log(||A||) + log(n) or so - uint32_t l = p.size(); // @fixme Initialize fields _F[i] @@ -96,13 +103,13 @@ namespace LinBox { // @note As _r is row major, we store each ri on each row. // So that r[i] = current residue for p[i]. - _r.init(_ring, l, b.size()); - for (auto i = 0u; i < l; ++i) { - // @fixme Is there a vector domain to copy to a matrix? - for (auto j = 0u; j < b.size(); ++j) { - _ring.assign(_r[i][j], b[j]); - } - } + // _r.init(_ring, l, b.size()); + // for (auto i = 0u; i < l; ++i) { + // // @fixme Is there a vector domain to copy to a matrix? + // for (auto j = 0u; j < b.size(); ++j) { + // _ring.assign(_r[i][j], b[j]); + // } + // } // @fixme Allocate Q and R // @fixme Allocate c @@ -110,7 +117,8 @@ namespace LinBox { // @todo Set up an RNS system } - IVector& nextdigit(IVector& digit, const IVector& residu) const final + // @fixme USELESS? + IVector& nextdigit(IVector& digit, const IVector& residu) const { // @fixme The residu can't be r, here! // So the overall does a lot more job than it needs. @@ -132,17 +140,17 @@ namespace LinBox { */ // @fixme Could be parallel! - for (auto i = 0u; i < l; ++i) { - Hom hom(_ring, _F[i]); + // for (auto i = 0u; i < l; ++i) { + // Hom hom(_ring, _F[i]); - // @fixme How to do euclidian division? - // ri = pi Qi + Ri + // // @fixme How to do euclidian division? + // // ri = pi Qi + Ri - // @todo If R might already be a field element - _B[i]->apply(_c[i], hom.convert(_R[i])); + // // @todo If R might already be a field element + // _B[i]->apply(_c[i], hom.convert(_R[i])); - // @todo Convert _c[i] to RNS - } + // // @todo Convert _c[i] to RNS + // } // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields? @@ -151,8 +159,106 @@ namespace LinBox { return digit; } + // -------------------------- + // ----- LiftingContainer API + + const Ring& ring() const final { return _ring; } + + /// The length of the container. + size_t length() const final { return _k; } + + /// The dimension of the problem/solution. + size_t size() const final { return _n; } + + /** + * We are compliant to the interface even though + * p is multi-modular and thus not a prime. + */ + const IElement& prime() const final { return _p; } + + // ------------------------------ + // ----- NOT LiftingContainer API + // ----- but still needed + + const IElement numbound() const + { + return _numbound; + } + + const IElement denbound() const + { + return _denbound; + } + + // -------------- + // ----- Iterator + + /** + * Needed API for rational reconstruction. + * Each call to next() will update + */ + class const_iterator { + private: + BlasVector _res; + const MultiModLiftingContainer& _lc; + size_t _position; + + public: + const_iterator(const MultiModLiftingContainer& lc, size_t end = 0) + : _lc(lc) + , _position(end) + { + // @fixme Initialize _residue + } + + /** + * Returns false if the next digit cannot be computed (bad modulus). + */ + bool next(IVector& digit) + { + // compute v2 = _matA * digit + IVector v2(_lc.ring(), _lc.size()); + // @fixme _lc._MAD.applyV(v2, digit, _res); + + // update _res -= v2 + // @fixme _lc._VDR.subin(_res, v2); + typename BlasVector::iterator p0; + + // update _res = _res / p + int index = 0; + for (p0 = _res.begin(); p0 != _res.end(); ++p0, ++index) { + _lc.ring().divin(*p0, _lc._p); + } + + // increase position of the iterator + ++_position; + return true; + } + + bool operator!=(const const_iterator& iterator) const + { + return _position != iterator._position; + } + + bool operator==(const const_iterator& iterator) const + { + return _position == iterator._position; + } + }; + + const_iterator begin() const { return const_iterator(*this); } + const_iterator end() const { return const_iterator(*this, _k); } + private: - Ring& _ring; + const Ring& _ring; + Field _field; + + IElement _numbound; + IElement _denbound; + + IElement _p; + size_t _k; //< Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound) + size_t _n; //< Row/column dimension of A. // @note r is a big matrix in ZZ holding all residues IMatrix _r; From 5bb101fee136bd264fb75f4232bad3285aa0a327 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 22 May 2019 15:29:09 +0200 Subject: [PATCH 09/63] Initializing up to inverse of A mod pi --- .../algorithms/multi-mod-lifting-container.h | 212 ++++++++++-------- 1 file changed, 121 insertions(+), 91 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 21ececa27..a0c984179 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -69,9 +69,10 @@ namespace LinBox { using Field = _Field; using PrimeGenerator = _PrimeGenerator; - using IElement = typename _Ring::Element; + using IElement = typename Ring::Element; using IMatrix = DenseMatrix<_Ring>; using IVector = DenseVector<_Ring>; + using FElement = typename Field::Element; using FMatrix = DenseMatrix<_Field>; using FVector = DenseVector<_Field>; @@ -83,23 +84,70 @@ namespace LinBox { MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, const IVector& b, const Method::DixonRNS& m) : _ring(ring) - , _r(_ring) - , _c(_field) + , _n(A.rowdim()) { - // @fixme Compute hadamard and such + linbox_check(A.rowdim() == A.coldim()); - // @note From baseClass, we have _length = log2(2 * N * D) + A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl; + std::cout << "b: " << b << std::endl; // @fixme Have l = log(||A||) + log(n) or so + _l = 2; + std::cout << "l: " << _l << std::endl; + + // Generating primes + IElement iTmp; + _ring.assign(_p, _ring.one); + for (auto i = 0u; i < _l; ++i) { + // @fixme Ensure that all primes are different + // @fixme Take into account bestBitSize! + _primes.emplace_back(*primeGenerator); + _fields.emplace_back(_primes.back()); + _ring.init(iTmp, _primes.back()); + _ring.mulin(_p, iTmp); + + std::cout << "primes[" << i << "]: " << Integer(_primes.back()) << std::endl; + + ++primeGenerator; + } - // @fixme Initialize fields _F[i] - - // Ap[0] = A mod p[0] - // Ap[1] = A mod p[1] - - // B[0] = inv(Ap[0]) mod p[0] - // B[1] = inv(Ap[1]) mod p[1] - // @fixme How? + std::cout << "p: " << _p << std::endl; + + // Compute how many iterations are needed + auto hb = RationalSolveHadamardBound(A, b); + double pLog = Givaro::logtwo(_p); + _k = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) + / pLog); // log2(2 * N * D) / log2(p) + std::cout << "k: " << _k << std::endl; + + // @fixme Fact is RationalReconstruction which needs numbound and denbound + // expects them to be in non-log... + _ring.init(_numbound, Integer(1) << static_cast(std::ceil(hb.numLogBound))); + _ring.init(_denbound, Integer(1) << static_cast(std::ceil(hb.denLogBound))); + + // Initialize all inverses + // @fixme Somehow, the inverse mod p within DixonSolver was already computed, + // and pass through to the lifting container. Here, we can't do that, because p is + // bigger than what DixonSolver thought about it. So there might be a lot of + // computation done there that is completely useless when using this container. Meaning + // that we need a RNSDixonSolver. + { + for (const auto& F : _fields) { + BlasMatrixDomain bmd(F); + auto Bpi = std::make_unique(F, _n, _n); + + // @fixme Taken for rational-solver.inl. BETTER USE REBIND!!! + for (size_t i = 0; i < _n; ++i) { + for (size_t j = 0; j < _n; ++j) { + F.init(Bpi->refEntry(i, j), A.getEntry(i, j)); + } + } + + bmd.invin(*Bpi); + Bpi->write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", Tag::FileFormat::Maple) << std::endl; + _B.emplace_back(std::move(Bpi)); + } + } // @note As _r is row major, we store each ri on each row. // So that r[i] = current residue for p[i]. @@ -117,48 +165,6 @@ namespace LinBox { // @todo Set up an RNS system } - // @fixme USELESS? - IVector& nextdigit(IVector& digit, const IVector& residu) const - { - // @fixme The residu can't be r, here! - // So the overall does a lot more job than it needs. - // See below for the solution. - - // @fixme With this design, are we forced to CRT_Reconstruct each ci? - // Is this bad? - // If we don't want that, we need to not extent LiftingContainerBase, - // and reimplement some of the behavior. - // Because the only thing needed to user API (rational reconstruction) - // is bool next (IVector& digit) from iterator. - - /* for i = 1 .. l: - * | (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi - * | ci = Bi Ri mod pi < Matrix-vector in Z/pZ - * V = [R1|...|Rl] - A [c1|...|cl] < Matrix-matrix in ZZ - * for i = 1 .. l: - * | ri = Qi + (Vi / pi) - */ - - // @fixme Could be parallel! - // for (auto i = 0u; i < l; ++i) { - // Hom hom(_ring, _F[i]); - - // // @fixme How to do euclidian division? - // // ri = pi Qi + Ri - - // // @todo If R might already be a field element - // _B[i]->apply(_c[i], hom.convert(_R[i])); - - // // @todo Convert _c[i] to RNS - // } - - // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields? - - // @fixme Compute the next residue! - - return digit; - } - // -------------------------- // ----- LiftingContainer API @@ -180,15 +186,9 @@ namespace LinBox { // ----- NOT LiftingContainer API // ----- but still needed - const IElement numbound() const - { - return _numbound; - } + const IElement numbound() const { return _numbound; } - const IElement denbound() const - { - return _denbound; - } + const IElement denbound() const { return _denbound; } // -------------- // ----- Iterator @@ -199,36 +199,65 @@ namespace LinBox { */ class const_iterator { private: - BlasVector _res; const MultiModLiftingContainer& _lc; size_t _position; public: - const_iterator(const MultiModLiftingContainer& lc, size_t end = 0) + const_iterator(const MultiModLiftingContainer& lc, size_t position = 0) : _lc(lc) - , _position(end) + , _position(position) { - // @fixme Initialize _residue + // @fixme Initialize reisdue _r } /** * Returns false if the next digit cannot be computed (bad modulus). + * ci is a vector of integers but all element are below p = p1 * ... * pl */ - bool next(IVector& digit) + bool next(IVector& ci) { - // compute v2 = _matA * digit - IVector v2(_lc.ring(), _lc.size()); - // @fixme _lc._MAD.applyV(v2, digit, _res); - - // update _res -= v2 - // @fixme _lc._VDR.subin(_res, v2); - typename BlasVector::iterator p0; - - // update _res = _res / p - int index = 0; - for (p0 = _res.begin(); p0 != _res.end(); ++p0, ++index) { - _lc.ring().divin(*p0, _lc._p); - } + /* for i = 1 .. l: + * | (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi + * | ci = Bi Ri mod pi < Matrix-vector in Z/pZ + * V = [R1|...|Rl] - A [c1|...|cl] < Matrix-matrix in ZZ + * for i = 1 .. l: + * | ri = Qi + (Vi / pi) + */ + + std::cout << "ci: " << ci << std::endl; + + // @fixme Could be parallel! + // for (auto i = 0u; i < l; ++i) { + // Hom hom(_ring, _F[i]); + + // // @fixme How to do euclidian division? + // // ri = pi Qi + Ri + + // // @todo If R might already be a field element + // _B[i]->apply(_c[i], hom.convert(_R[i])); + + // // @todo Convert _c[i] to RNS + // } + + // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields? + + // @fixme Compute the next residue! + + // @fixme @note For us, Aci is a matrix! + + // // compute Aci = _matA * ci + // IVector Aci(_lc.ring(), _lc.size()); + // // @fixme _lc._MAD.applyV(Aci, ci, _res); + + // // update _res -= Aci + // // @fixme _lc._VDR.subin(_res, Aci); + // typename BlasVector::iterator p0; + + // // update _res = _res / p + // int index = 0; + // for (p0 = _res.begin(); p0 != _res.end(); ++p0, ++index) { + // _lc.ring().divin(*p0, _lc._p); + // } // increase position of the iterator ++_position; @@ -251,21 +280,22 @@ namespace LinBox { private: const Ring& _ring; - Field _field; IElement _numbound; IElement _denbound; - IElement _p; - size_t _k; //< Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound) - size_t _n; //< Row/column dimension of A. + IElement _p; // The global modulus for lifting: a multiple of all _primes. + std::vector _primes; // @fixme We might want something else as a type! + size_t _k; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). + size_t _n; // Row/column dimension of A. + size_t _l; // How many primes. Equal to _primes.size(). // @note r is a big matrix in ZZ holding all residues - IMatrix _r; - FMatrix _c; - std::vector _B; // Inverses of A mod p[i] - std::vector _Q; - std::vector _R; - std::vector _F; + // IMatrix _r; + // FMatrix _c; + std::vector> _B; // Inverses of A mod p[i] + // std::vector _Q; + // std::vector _R; + std::vector _fields; }; } From 134c3cf5a3bf7a462b27c38fb7b8a6ab4f09628b Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 22 May 2019 16:30:29 +0200 Subject: [PATCH 10/63] More RNS dixon --- .../algorithms/multi-mod-lifting-container.h | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index a0c984179..4e2cd37f1 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -98,7 +98,7 @@ namespace LinBox { // Generating primes IElement iTmp; _ring.assign(_p, _ring.one); - for (auto i = 0u; i < _l; ++i) { + for (auto j = 0u; j < _l; ++j) { // @fixme Ensure that all primes are different // @fixme Take into account bestBitSize! _primes.emplace_back(*primeGenerator); @@ -224,20 +224,24 @@ namespace LinBox { * | ri = Qi + (Vi / pi) */ - std::cout << "ci: " << ci << std::endl; - // @fixme Could be parallel! - // for (auto i = 0u; i < l; ++i) { - // Hom hom(_ring, _F[i]); - - // // @fixme How to do euclidian division? - // // ri = pi Qi + Ri + for (auto j = 0u; j < _l; ++j) { + // @fixme How to do euclidian division? + // ri = pi Qi + Ri + + // @todo If R might already be a field element + // @cpernet!!! + // @fixme We will probably need a low-level API + // so that we can say that the j-th row of _ci takes + // the result of B * R mod pj + // _B[j]->apply(*_ci[j], *_R[j]); + + // @todo Convert _c[i] to RNS + } - // // @todo If R might already be a field element - // _B[i]->apply(_c[i], hom.convert(_R[i])); + // @fixme CRT reconstruct ci from (cij) - // // @todo Convert _c[i] to RNS - // } + std::cout << "ci: " << ci << std::endl; // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields? @@ -292,7 +296,7 @@ namespace LinBox { // @note r is a big matrix in ZZ holding all residues // IMatrix _r; - // FMatrix _c; + FMatrix _ci; // Contains [ci mod p0 | ... | ci mod p{l-1}] on each row. std::vector> _B; // Inverses of A mod p[i] // std::vector _Q; // std::vector _R; From 7644d6d6073e7357d77f291d720693b85a9f882f Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 22 May 2019 18:00:43 +0200 Subject: [PATCH 11/63] Fixed compilation of MultiModLiftingContainer --- .../algorithms/multi-mod-lifting-container.h | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 4e2cd37f1..8f52f6ed9 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -106,7 +106,7 @@ namespace LinBox { _ring.init(iTmp, _primes.back()); _ring.mulin(_p, iTmp); - std::cout << "primes[" << i << "]: " << Integer(_primes.back()) << std::endl; + std::cout << "primes[" << j << "]: " << Integer(_primes.back()) << std::endl; ++primeGenerator; } @@ -116,8 +116,8 @@ namespace LinBox { // Compute how many iterations are needed auto hb = RationalSolveHadamardBound(A, b); double pLog = Givaro::logtwo(_p); - _k = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) - / pLog); // log2(2 * N * D) / log2(p) + // _k = log2(2 * N * D) / log2(p) + _k = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) / pLog); std::cout << "k: " << _k << std::endl; // @fixme Fact is RationalReconstruction which needs numbound and denbound @@ -132,20 +132,22 @@ namespace LinBox { // computation done there that is completely useless when using this container. Meaning // that we need a RNSDixonSolver. { + _B.reserve(_l); + for (const auto& F : _fields) { BlasMatrixDomain bmd(F); - auto Bpi = std::make_unique(F, _n, _n); + _B.emplace_back(F, _n, _n); + auto& Bpi = _B.back(); // @fixme Taken for rational-solver.inl. BETTER USE REBIND!!! for (size_t i = 0; i < _n; ++i) { for (size_t j = 0; j < _n; ++j) { - F.init(Bpi->refEntry(i, j), A.getEntry(i, j)); + F.init(Bpi.refEntry(i, j), A.getEntry(i, j)); } } - bmd.invin(*Bpi); - Bpi->write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", Tag::FileFormat::Maple) << std::endl; - _B.emplace_back(std::move(Bpi)); + bmd.invin(Bpi); // @fixme Use FFLAS directly, so that we can have a REAL in place inv. + Bpi.write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", Tag::FileFormat::Maple) << std::endl; } } @@ -225,7 +227,7 @@ namespace LinBox { */ // @fixme Could be parallel! - for (auto j = 0u; j < _l; ++j) { + for (auto j = 0u; j < _lc._l; ++j) { // @fixme How to do euclidian division? // ri = pi Qi + Ri @@ -296,8 +298,8 @@ namespace LinBox { // @note r is a big matrix in ZZ holding all residues // IMatrix _r; - FMatrix _ci; // Contains [ci mod p0 | ... | ci mod p{l-1}] on each row. - std::vector> _B; // Inverses of A mod p[i] + // FMatrix _ci; // Contains [ci mod p0 | ... | ci mod p{l-1}] on each row. + std::vector _B; // Inverses of A mod p[i] // std::vector _Q; // std::vector _R; std::vector _fields; From 1ad2b15782fdd626f74bae0db61d73be652499e7 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Thu, 23 May 2019 14:10:37 +0200 Subject: [PATCH 12/63] RNSDixon euclidian division and so --- .../algorithms/multi-mod-lifting-container.h | 119 ++++++++++++------ linbox/algorithms/rns.h | 3 + linbox/algorithms/rns.inl | 16 ++- linbox/solutions/solve/solve-dixon-rns.h | 5 +- tests/test-solve-full.C | 8 +- 5 files changed, 110 insertions(+), 41 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 8f52f6ed9..d225be731 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -22,6 +22,7 @@ #pragma once +#include #include namespace LinBox { @@ -84,6 +85,8 @@ namespace LinBox { MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, const IVector& b, const Method::DixonRNS& m) : _ring(ring) + , _A(A) + , _b(b) , _n(A.rowdim()) { linbox_check(A.rowdim() == A.coldim()); @@ -91,7 +94,7 @@ namespace LinBox { A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl; std::cout << "b: " << b << std::endl; - // @fixme Have l = log(||A||) + log(n) or so + // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?) _l = 2; std::cout << "l: " << _l << std::endl; @@ -111,6 +114,7 @@ namespace LinBox { ++primeGenerator; } + _pRns.init(_primes); std::cout << "p: " << _p << std::endl; // Compute how many iterations are needed @@ -126,11 +130,10 @@ namespace LinBox { _ring.init(_denbound, Integer(1) << static_cast(std::ceil(hb.denLogBound))); // Initialize all inverses - // @fixme Somehow, the inverse mod p within DixonSolver was already computed, - // and pass through to the lifting container. Here, we can't do that, because p is - // bigger than what DixonSolver thought about it. So there might be a lot of - // computation done there that is completely useless when using this container. Meaning - // that we need a RNSDixonSolver. + // @note An inverse mod some p within DixonSolver was already computed, + // and pass through to the lifting container. Here, we could use that, but we have + // to keep control of generated primes, so that the RNS base has bigger primes + // than the . { _B.reserve(_l); @@ -146,25 +149,14 @@ namespace LinBox { } } - bmd.invin(Bpi); // @fixme Use FFLAS directly, so that we can have a REAL in place inv. - Bpi.write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", Tag::FileFormat::Maple) << std::endl; + // @fixme @cpernet Use FFLAS directly, so that we can have a REAL in place inv. + bmd.invin(Bpi); + + Bpi.write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", + Tag::FileFormat::Maple) + << std::endl; } } - - // @note As _r is row major, we store each ri on each row. - // So that r[i] = current residue for p[i]. - // _r.init(_ring, l, b.size()); - // for (auto i = 0u; i < l; ++i) { - // // @fixme Is there a vector domain to copy to a matrix? - // for (auto j = 0u; j < b.size(); ++j) { - // _ring.assign(_r[i][j], b[j]); - // } - // } - - // @fixme Allocate Q and R - // @fixme Allocate c - - // @todo Set up an RNS system } // -------------------------- @@ -180,7 +172,7 @@ namespace LinBox { /** * We are compliant to the interface even though - * p is multi-modular and thus not a prime. + * p is multi-modular and thus not a prime per se. */ const IElement& prime() const final { return _p; } @@ -202,6 +194,13 @@ namespace LinBox { class const_iterator { private: const MultiModLiftingContainer& _lc; + std::vector _r; // @todo Could be a matrix? Might not be useful, as it is never + // used directly in computations. + std::vector _Q; + std::vector _R; // @fixme This one should be expressed in a RNS system q, and + // HAS TO BE A MATRIX for gemm. + std::vector + _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later. size_t _position; public: @@ -209,7 +208,27 @@ namespace LinBox { : _lc(lc) , _position(position) { - // @fixme Initialize reisdue _r + VectorDomain VD(_lc._ring); + + _r.reserve(_lc._l); + _Q.reserve(_lc._l); + _R.reserve(_lc._l); + _Fc.reserve(_lc._l); + for (auto j = 0u; j < _lc._l; ++j) { + auto& F = _lc._fields[j]; + + _r.emplace_back(_lc._ring, _lc._n); + _Q.emplace_back(_lc._ring, _lc._n); + _R.emplace_back(_lc._ring, _lc._n); + _Fc.emplace_back(F, _lc._n); + + // Initialize all residues to b + _r.back() = _lc._b; // Copying data + } + + // @fixme Allocate c + + // @todo Set up an RNS system } /** @@ -218,6 +237,8 @@ namespace LinBox { */ bool next(IVector& ci) { + std::cout << "----- NEXT" << std::endl; + /* for i = 1 .. l: * | (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi * | ci = Bi Ri mod pi < Matrix-vector in Z/pZ @@ -226,22 +247,45 @@ namespace LinBox { * | ri = Qi + (Vi / pi) */ - // @fixme Could be parallel! + // @fixme Should be done in parallel! for (auto j = 0u; j < _lc._l; ++j) { - // @fixme How to do euclidian division? - // ri = pi Qi + Ri + auto pj = _lc._primes[j]; + auto& r = _r[j]; + auto& Q = _Q[j]; + auto& R = _R[j]; + + // @todo @cpernet Is there a VectorDomain::divmod somewhere? + // Euclidian division so that rj = pj Qj + Rj + for (auto i = 0u; i < _lc._n; ++i) { + // @fixme @cpernet Is this OK for any Ring or should we be sure we are using + // Integers? + _lc._ring.quoRem(Q[i], R[i], r[i], pj); + } - // @todo If R might already be a field element - // @cpernet!!! - // @fixme We will probably need a low-level API - // so that we can say that the j-th row of _ci takes - // the result of B * R mod pj - // _B[j]->apply(*_ci[j], *_R[j]); + std::cout << "--- FOR " << Integer(pj) << std::endl; + std::cout << "r: " << r << std::endl; + std::cout << "Q: " << Q << std::endl; + std::cout << "R: " << R << std::endl; + + // Convert R to the field + // @fixme @cpernet Could this step be ignored? + // If not, put that in already allocated memory, and not use a temporary here. + auto& F = _lc._fields[j]; + FVector FR(F, R); // rebind + + auto& B = _lc._B[j]; + auto& Fc = _Fc[j]; + B.apply(Fc, FR); + + std::cout << "Fc: " << Fc << std::endl; // @todo Convert _c[i] to RNS } // @fixme CRT reconstruct ci from (cij) + // @cpernet Is that what I should use? I tweaked it so that I can use it. + // _lc._pRns.cra(ci, _Fc); // @fixme This cra function should be called reconstruct or such. + // @fixme Better use Givaro::RNSSystem? std::cout << "ci: " << ci << std::endl; @@ -287,11 +331,16 @@ namespace LinBox { private: const Ring& _ring; + // The problem: A^{-1} * b + const IMatrix& _A; + const IVector& _b; + IElement _numbound; IElement _denbound; IElement _p; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! + RNS _pRns; // RNS system for primes size_t _k; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). size_t _n; // Row/column dimension of A. size_t _l; // How many primes. Equal to _primes.size(). @@ -302,6 +351,6 @@ namespace LinBox { std::vector _B; // Inverses of A mod p[i] // std::vector _Q; // std::vector _R; - std::vector _fields; + std::vector _fields; // All fields Modular }; } diff --git a/linbox/algorithms/rns.h b/linbox/algorithms/rns.h index 2a7fffa16..c2214abc4 100644 --- a/linbox/algorithms/rns.h +++ b/linbox/algorithms/rns.h @@ -78,6 +78,7 @@ namespace LinBox * @param l max recoverable bits * @param ps bitsize of the primes (defaulting to 21 because...) */ + RNS() {} RNS(size_t l, size_t ps=21) ; /*x Create a RNS with given primes. * @param primes given basis of primes @@ -97,6 +98,8 @@ namespace LinBox /*! Inits cra. */ void initCRA() ; + template + void init(const std::vector& primes); /*! Computes \c result corresponding to the \c residues. * */ diff --git a/linbox/algorithms/rns.inl b/linbox/algorithms/rns.inl index b38a227eb..5a8ce69af 100644 --- a/linbox/algorithms/rns.inl +++ b/linbox/algorithms/rns.inl @@ -56,7 +56,7 @@ namespace LinBox if (curint>maxint) break; PrimeIterator genprimes( (unsigned int) (_ps_+penalty) ); - size_t p = genprimes.randomPrime() ; + size_t p = *genprimes ; ++genprimes; primeset.insert(p); if (lg < primeset.size()) { @@ -104,6 +104,18 @@ namespace LinBox return ; } + template + template + void RNS::init(const std::vector& primes) + { + _primes_.resize(primes.size()); + _PrimeDoms_.resize(primes.size()); + for (auto i = 0u; i < primes.size(); ++i) { + _primes_[i] = size_t(primes[i]); + _PrimeDoms_[i] = Field(primes[i]); + } + } + template void RNS::cra(integer & result, const std::vector & residues) @@ -183,7 +195,7 @@ namespace LinBox if (curint>maxint) break; PrimeIterator genprimes((unsigned int) (_ps_+penalty) ); - size_t p = genprimes.randomPrime() ; + size_t p = *genprimes ; ++genprimes; primeset.insert(p); if (lg < primeset.size()) { diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index c62e1252c..1c8c6c306 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -70,7 +70,10 @@ namespace LinBox { { commentator().start("solve.dixon.integer.dense"); - using Field = Givaro::ModularBalanced; + // @fixme We don't know if we can use ModularBalanced, + // because of the rational reconstruction which might be + // implicitly requiring 0-{p-1} representation of the p-adic sequence elements. + using Field = Givaro::Modular; using PrimeGenerator = PrimeIterator; PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 4caf811bf..e9ad202ca 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -139,10 +139,12 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R bool ok = true; try { solve(x, A, b, method); - ok = ok && check_result(x, A, b, RA, Rb); + ok = check_result(x, A, b, RA, Rb); - solveInPlace(x, A, b, method); - ok = ok && check_result(x, A, b, RA, Rb); + if (ok) { + solveInPlace(x, A, b, method); + ok = check_result(x, A, b, RA, Rb); + } } catch (...) { print_error(x, A, b, "throws error"); return false; From a081d4cee095659fc9443fdc51e3cb45392c3227 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Thu, 23 May 2019 14:59:14 +0200 Subject: [PATCH 13/63] Working RNS Dixon on very small bitsize --- linbox/algorithms/lifting-container.h | 2 +- .../algorithms/multi-mod-lifting-container.h | 90 +++++++++++-------- linbox/algorithms/rns.h | 11 +-- linbox/algorithms/rns.inl | 9 +- linbox/solutions/solve/solve-dixon-rns.h | 3 + 5 files changed, 63 insertions(+), 52 deletions(-) diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h index 7735c9f89..81c992bc5 100644 --- a/linbox/algorithms/lifting-container.h +++ b/linbox/algorithms/lifting-container.h @@ -224,7 +224,7 @@ namespace LinBox // compute v2 = _matA * digit IVector v2 (_lc.ring(),_lc._matA.rowdim()); - _lc._MAD.applyV(v2,digit, _res); + _lc._MAD.applyV(v2,digit, _res); // @fixme This third parameter makes no sense! #ifdef DEBUG_LC diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index d225be731..c67a290f8 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -114,7 +114,6 @@ namespace LinBox { ++primeGenerator; } - _pRns.init(_primes); std::cout << "p: " << _p << std::endl; // Compute how many iterations are needed @@ -203,12 +202,17 @@ namespace LinBox { _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later. size_t _position; + // @fixme Better use Givaro::RNSSystem? + RNS _pRns; // RNS system for primes + public: const_iterator(const MultiModLiftingContainer& lc, size_t position = 0) : _lc(lc) , _position(position) { - VectorDomain VD(_lc._ring); + VectorDomain IVD(_lc._ring); + + _pRns.init(_lc._primes); _r.reserve(_lc._l); _Q.reserve(_lc._l); @@ -233,19 +237,13 @@ namespace LinBox { /** * Returns false if the next digit cannot be computed (bad modulus). - * ci is a vector of integers but all element are below p = p1 * ... * pl + * c is a vector of integers but all element are below p = p1 * ... * pl */ - bool next(IVector& ci) + bool next(IVector& c) { std::cout << "----- NEXT" << std::endl; - /* for i = 1 .. l: - * | (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi - * | ci = Bi Ri mod pi < Matrix-vector in Z/pZ - * V = [R1|...|Rl] - A [c1|...|cl] < Matrix-matrix in ZZ - * for i = 1 .. l: - * | ri = Qi + (Vi / pi) - */ + VectorDomain IVD(_lc._ring); // @fixme Should be done in parallel! for (auto j = 0u; j < _lc._l; ++j) { @@ -282,34 +280,56 @@ namespace LinBox { // @todo Convert _c[i] to RNS } - // @fixme CRT reconstruct ci from (cij) - // @cpernet Is that what I should use? I tweaked it so that I can use it. - // _lc._pRns.cra(ci, _Fc); // @fixme This cra function should be called reconstruct or such. - // @fixme Better use Givaro::RNSSystem? + // ----- CRT reconstruct c from (cj) - std::cout << "ci: " << ci << std::endl; + std::cout << "--- CRT reconstruction" << std::endl; - // @fixme How can we do A [c1|...|cl] in ZZ if the ci are in the fields? + // @cpernet Is that RNS system what I should use? I tweaked it so that I can use it. + std::vector fElements(_lc._l); + for (auto i = 0u; i < _lc._n; ++i) { + for (auto j = 0u; j < _lc._l; ++j) { + fElements[j] = _Fc[j][i]; + } + // @fixme This cra function should be called reconstruct or such. + _pRns.cra(c[i], fElements); + } - // @fixme Compute the next residue! + std::cout << "c: " << c << std::endl; - // @fixme @note For us, Aci is a matrix! + // ----- Compute the next residue! - // // compute Aci = _matA * ci - // IVector Aci(_lc.ring(), _lc.size()); - // // @fixme _lc._MAD.applyV(Aci, ci, _res); + std::cout << "--- Residue update" << std::endl; - // // update _res -= Aci - // // @fixme _lc._VDR.subin(_res, Aci); - // typename BlasVector::iterator p0; + // @note This is a dummy implementation, for now. - // // update _res = _res / p - // int index = 0; - // for (p0 = _res.begin(); p0 != _res.end(); ++p0, ++index) { - // _lc.ring().divin(*p0, _lc._p); - // } + // r <= (rj - A c) / pj + for (auto j = 0u; j < _lc._l; ++j) { + auto pj = _lc._primes[j]; + auto& r = _r[j]; + auto& Q = _Q[j]; + auto& R = _R[j]; + + auto& Fc = _Fc[j]; + // @fixme For now, we convert cj to integer, + // but it should be converted into a RNS system, on pre-allocated memory. + IVector Ic(_lc._ring, Fc); + + // @fixme Should become a matrix-matrix multiplication! + // @fixme Should be able to do a gemv + _lc._A.apply(r, Ic); // r = A c + IVD.negin(r); // r = - A c + IVD.addin(r, R); // r = R - A c + + // r = (R - A c) / pj + IElement Ipj; + _lc._ring.init(Ipj, pj); + for (auto i = 0u; i < _lc._n; ++i) { + _lc._ring.divin(r[i], Ipj); // @fixme Is there a divin in VectorDomain? + } + + IVD.addin(r, Q); // r = Q + (R - A c) / pj + } - // increase position of the iterator ++_position; return true; } @@ -340,17 +360,11 @@ namespace LinBox { IElement _p; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! - RNS _pRns; // RNS system for primes size_t _k; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). size_t _n; // Row/column dimension of A. size_t _l; // How many primes. Equal to _primes.size(). - // @note r is a big matrix in ZZ holding all residues - // IMatrix _r; - // FMatrix _ci; // Contains [ci mod p0 | ... | ci mod p{l-1}] on each row. - std::vector _B; // Inverses of A mod p[i] - // std::vector _Q; - // std::vector _R; + std::vector _B; // Inverses of A mod p[i] std::vector _fields; // All fields Modular }; } diff --git a/linbox/algorithms/rns.h b/linbox/algorithms/rns.h index c2214abc4..0a966acf1 100644 --- a/linbox/algorithms/rns.h +++ b/linbox/algorithms/rns.h @@ -100,14 +100,11 @@ namespace LinBox void initCRA() ; template void init(const std::vector& primes); + /*! Computes \c result corresponding to the \c residues. * */ void cra(integer & result, const std::vector & residues); - /*! Computes \c result corresponding to the \c residues. - * - */ - void cra(std::vector & result, const std::vector > & residues); /*! Computes \c result corresponding to the iteration. * @@ -115,12 +112,6 @@ namespace LinBox template void cra(Ivect & result, Iteration & iter) ; - template - void cra(Tinteger & result, Tresidue & residues); - - template - void convert(Tinteger & result, Tresidue & residues) ; - // mixed radix }; diff --git a/linbox/algorithms/rns.inl b/linbox/algorithms/rns.inl index 5a8ce69af..1723f94c9 100644 --- a/linbox/algorithms/rns.inl +++ b/linbox/algorithms/rns.inl @@ -108,12 +108,15 @@ namespace LinBox template void RNS::init(const std::vector& primes) { - _primes_.resize(primes.size()); - _PrimeDoms_.resize(primes.size()); - for (auto i = 0u; i < primes.size(); ++i) { + _size_ = primes.size(); + _primes_.resize(_size_); + _PrimeDoms_.resize(_size_); + for (auto i = 0u; i < _size_; ++i) { _primes_[i] = size_t(primes[i]); _PrimeDoms_[i] = Field(primes[i]); } + + _CRT_ = CRTSystem(_PrimeDoms_); } template diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 1c8c6c306..e056ffbbd 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -80,6 +80,9 @@ namespace LinBox { DixonRNSSolver solver(A.field(), primeGenerator); solver.solve(xNum, xDen, A, b, m); + std::cout << "FOUND xNum: " << xNum << std::endl; + std::cout << "FOUND xDen: " << xDen << std::endl; + commentator().stop("solve.dixon.integer.dense"); // @fixme Implement something like that From 3b200440eb23a1b201b41b6e8ff0f7cdb44bb341 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 28 May 2019 10:15:17 +0200 Subject: [PATCH 14/63] Quality of life for debugging --- .../algorithms/multi-mod-lifting-container.h | 27 ++++++++++--------- linbox/solutions/solve/solve-dixon-rns.h | 4 ++- tests/test-solve-full.C | 2 +- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index c67a290f8..b70ff7fdb 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -102,16 +102,22 @@ namespace LinBox { IElement iTmp; _ring.assign(_p, _ring.one); for (auto j = 0u; j < _l; ++j) { - // @fixme Ensure that all primes are different - // @fixme Take into account bestBitSize! - _primes.emplace_back(*primeGenerator); - _fields.emplace_back(_primes.back()); - _ring.init(iTmp, _primes.back()); + auto pj = *primeGenerator; + ++primeGenerator; + + // Ensure that all primes are different + if (std::find(_primes.begin(), _primes.end(), pj) != _primes.end()) { + j -= 1; + continue; + } + + _primes.emplace_back(pj); + _fields.emplace_back(pj); + _ring.init(iTmp, pj); _ring.mulin(_p, iTmp); - std::cout << "primes[" << j << "]: " << Integer(_primes.back()) << std::endl; + std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl; - ++primeGenerator; } std::cout << "p: " << _p << std::endl; @@ -150,10 +156,6 @@ namespace LinBox { // @fixme @cpernet Use FFLAS directly, so that we can have a REAL in place inv. bmd.invin(Bpi); - - Bpi.write(std::cout << "B mod " << Integer(F.characteristic()) << ": ", - Tag::FileFormat::Maple) - << std::endl; } } } @@ -252,6 +254,8 @@ namespace LinBox { auto& Q = _Q[j]; auto& R = _R[j]; + std::cout << "--- FOR " << Integer(pj) << std::endl; + // @todo @cpernet Is there a VectorDomain::divmod somewhere? // Euclidian division so that rj = pj Qj + Rj for (auto i = 0u; i < _lc._n; ++i) { @@ -260,7 +264,6 @@ namespace LinBox { _lc._ring.quoRem(Q[i], R[i], r[i], pj); } - std::cout << "--- FOR " << Integer(pj) << std::endl; std::cout << "r: " << r << std::endl; std::cout << "Q: " << Q << std::endl; std::cout << "R: " << R << std::endl; diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index e056ffbbd..0c998eb8b 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -75,7 +75,9 @@ namespace LinBox { // implicitly requiring 0-{p-1} representation of the p-adic sequence elements. using Field = Givaro::Modular; using PrimeGenerator = PrimeIterator; - PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); + // PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); + // @fixme This is for debug! + PrimeGenerator primeGenerator(3); DixonRNSSolver solver(A.field(), primeGenerator); solver.solve(xNum, xDen, A, b, m); diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index e9ad202ca..54ea8c083 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -263,7 +263,7 @@ int main(int argc, char** argv) bool ok = true; do { // // ----- Rational Auto - // ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // // @fixme Dixon does not compile // // ok = ok && test_blackbox_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); From 505f896ae132cd94b39deb340fdfb7398a08f6a3 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 28 May 2019 16:17:52 +0200 Subject: [PATCH 15/63] Started MultiModRationalReconstruction --- .../algorithms/multi-mod-lifting-container.h | 256 +++++++----------- linbox/solutions/solve/solve-dixon-rns.h | 61 ++++- 2 files changed, 164 insertions(+), 153 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index b70ff7fdb..4b348cf1b 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -95,13 +95,13 @@ namespace LinBox { std::cout << "b: " << b << std::endl; // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?) - _l = 2; - std::cout << "l: " << _l << std::endl; + _primesCount = 2; + std::cout << "l: " << _primesCount << std::endl; // Generating primes IElement iTmp; _ring.assign(_p, _ring.one); - for (auto j = 0u; j < _l; ++j) { + for (auto j = 0u; j < _primesCount; ++j) { auto pj = *primeGenerator; ++primeGenerator; @@ -117,7 +117,6 @@ namespace LinBox { _ring.mulin(_p, iTmp); std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl; - } std::cout << "p: " << _p << std::endl; @@ -125,9 +124,9 @@ namespace LinBox { // Compute how many iterations are needed auto hb = RationalSolveHadamardBound(A, b); double pLog = Givaro::logtwo(_p); - // _k = log2(2 * N * D) / log2(p) - _k = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) / pLog); - std::cout << "k: " << _k << std::endl; + // _iterationsCount = log2(2 * N * D) / log2(p) + _iterationsCount = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) / pLog); + std::cout << "k: " << _iterationsCount << std::endl; // @fixme Fact is RationalReconstruction which needs numbound and denbound // expects them to be in non-log... @@ -140,7 +139,7 @@ namespace LinBox { // to keep control of generated primes, so that the RNS base has bigger primes // than the . { - _B.reserve(_l); + _B.reserve(_primesCount); for (const auto& F : _fields) { BlasMatrixDomain bmd(F); @@ -158,6 +157,24 @@ namespace LinBox { bmd.invin(Bpi); } } + + //----- Iteration + + _r.reserve(_primesCount); + _Q.reserve(_primesCount); + _R.reserve(_primesCount); + _Fc.reserve(_primesCount); + for (auto j = 0u; j < _primesCount; ++j) { + auto& F = _fields[j]; + + _r.emplace_back(_ring, _n); + _Q.emplace_back(_ring, _n); + _R.emplace_back(_ring, _n); + _Fc.emplace_back(F, _n); + + // Initialize all residues to b + _r.back() = _b; // Copying data + } } // -------------------------- @@ -166,7 +183,7 @@ namespace LinBox { const Ring& ring() const final { return _ring; } /// The length of the container. - size_t length() const final { return _k; } + size_t length() const final { return _iterationsCount; } /// The dimension of the problem/solution. size_t size() const final { return _n; } @@ -185,171 +202,97 @@ namespace LinBox { const IElement denbound() const { return _denbound; } + uint32_t primesCount() const { return _primesCount; } + + const FElement& prime(uint32_t index) const { return _primes.at(index); } + // -------------- // ----- Iterator /** - * Needed API for rational reconstruction. - * Each call to next() will update + * Returns false if the next digit cannot be computed (bad modulus). + * c is a vector of integers but all element are below p = p1 * ... * pl */ - class const_iterator { - private: - const MultiModLiftingContainer& _lc; - std::vector _r; // @todo Could be a matrix? Might not be useful, as it is never - // used directly in computations. - std::vector _Q; - std::vector _R; // @fixme This one should be expressed in a RNS system q, and - // HAS TO BE A MATRIX for gemm. - std::vector - _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later. - size_t _position; - - // @fixme Better use Givaro::RNSSystem? - RNS _pRns; // RNS system for primes - - public: - const_iterator(const MultiModLiftingContainer& lc, size_t position = 0) - : _lc(lc) - , _position(position) - { - VectorDomain IVD(_lc._ring); - - _pRns.init(_lc._primes); - - _r.reserve(_lc._l); - _Q.reserve(_lc._l); - _R.reserve(_lc._l); - _Fc.reserve(_lc._l); - for (auto j = 0u; j < _lc._l; ++j) { - auto& F = _lc._fields[j]; - - _r.emplace_back(_lc._ring, _lc._n); - _Q.emplace_back(_lc._ring, _lc._n); - _R.emplace_back(_lc._ring, _lc._n); - _Fc.emplace_back(F, _lc._n); - - // Initialize all residues to b - _r.back() = _lc._b; // Copying data - } - - // @fixme Allocate c - - // @todo Set up an RNS system - } - - /** - * Returns false if the next digit cannot be computed (bad modulus). - * c is a vector of integers but all element are below p = p1 * ... * pl - */ - bool next(IVector& c) - { - std::cout << "----- NEXT" << std::endl; - - VectorDomain IVD(_lc._ring); - - // @fixme Should be done in parallel! - for (auto j = 0u; j < _lc._l; ++j) { - auto pj = _lc._primes[j]; - auto& r = _r[j]; - auto& Q = _Q[j]; - auto& R = _R[j]; - - std::cout << "--- FOR " << Integer(pj) << std::endl; - - // @todo @cpernet Is there a VectorDomain::divmod somewhere? - // Euclidian division so that rj = pj Qj + Rj - for (auto i = 0u; i < _lc._n; ++i) { - // @fixme @cpernet Is this OK for any Ring or should we be sure we are using - // Integers? - _lc._ring.quoRem(Q[i], R[i], r[i], pj); - } - - std::cout << "r: " << r << std::endl; - std::cout << "Q: " << Q << std::endl; - std::cout << "R: " << R << std::endl; + bool next(std::vector& digits) + { + std::cout << "----- NEXT" << std::endl; - // Convert R to the field - // @fixme @cpernet Could this step be ignored? - // If not, put that in already allocated memory, and not use a temporary here. - auto& F = _lc._fields[j]; - FVector FR(F, R); // rebind + VectorDomain IVD(_ring); - auto& B = _lc._B[j]; - auto& Fc = _Fc[j]; - B.apply(Fc, FR); + // @fixme Should be done in parallel! + for (auto j = 0u; j < _primesCount; ++j) { + auto pj = _primes[j]; + auto& r = _r[j]; + auto& Q = _Q[j]; + auto& R = _R[j]; - std::cout << "Fc: " << Fc << std::endl; + std::cout << "--- FOR " << Integer(pj) << std::endl; - // @todo Convert _c[i] to RNS + // @todo @cpernet Is there a VectorDomain::divmod somewhere? + // Euclidian division so that rj = pj Qj + Rj + for (auto i = 0u; i < _n; ++i) { + // @fixme @cpernet Is this OK for any Ring or should we be sure we are using + // Integers? + _ring.quoRem(Q[i], R[i], r[i], pj); } - // ----- CRT reconstruct c from (cj) + std::cout << "r: " << r << std::endl; + std::cout << "Q: " << Q << std::endl; + std::cout << "R: " << R << std::endl; - std::cout << "--- CRT reconstruction" << std::endl; + // Convert R to the field + // @fixme @cpernet Could this step be ignored? + // If not, put that in already allocated memory, and not use a temporary here. + auto& F = _fields[j]; + FVector FR(F, R); // rebind - // @cpernet Is that RNS system what I should use? I tweaked it so that I can use it. - std::vector fElements(_lc._l); - for (auto i = 0u; i < _lc._n; ++i) { - for (auto j = 0u; j < _lc._l; ++j) { - fElements[j] = _Fc[j][i]; - } - // @fixme This cra function should be called reconstruct or such. - _pRns.cra(c[i], fElements); - } + auto& B = _B[j]; + auto& Fc = _Fc[j]; + B.apply(Fc, FR); - std::cout << "c: " << c << std::endl; + std::cout << "Fc: " << Fc << std::endl; - // ----- Compute the next residue! + // @todo Convert _c[i] to RNS + digits[j] = IVector(_ring, Fc); + } - std::cout << "--- Residue update" << std::endl; + // ----- Compute the next residue! - // @note This is a dummy implementation, for now. + std::cout << "--- Residue update" << std::endl; - // r <= (rj - A c) / pj - for (auto j = 0u; j < _lc._l; ++j) { - auto pj = _lc._primes[j]; - auto& r = _r[j]; - auto& Q = _Q[j]; - auto& R = _R[j]; + // @note This is a dummy implementation, for now. - auto& Fc = _Fc[j]; - // @fixme For now, we convert cj to integer, - // but it should be converted into a RNS system, on pre-allocated memory. - IVector Ic(_lc._ring, Fc); + // r <= (r - A c) / p + for (auto j = 0u; j < _primesCount; ++j) { + auto pj = _primes[j]; + auto& r = _r[j]; // @fixme THEY HOLD ALL THE VERY SAME VALUE! + auto& Q = _Q[j]; + auto& R = _R[j]; - // @fixme Should become a matrix-matrix multiplication! - // @fixme Should be able to do a gemv - _lc._A.apply(r, Ic); // r = A c - IVD.negin(r); // r = - A c - IVD.addin(r, R); // r = R - A c + auto& Fc = _Fc[j]; + // @fixme For now, we convert cj to integer, + // but it should be converted into a RNS system, on pre-allocated memory. + IVector Ic(_ring, Fc); - // r = (R - A c) / pj - IElement Ipj; - _lc._ring.init(Ipj, pj); - for (auto i = 0u; i < _lc._n; ++i) { - _lc._ring.divin(r[i], Ipj); // @fixme Is there a divin in VectorDomain? - } + // @fixme Should become a matrix-matrix multiplication! + // @fixme Should be able to do a gemv + _A.apply(r, Ic); // r = A c + IVD.negin(r); // r = - A c + IVD.addin(r, R); // r = R - A c - IVD.addin(r, Q); // r = Q + (R - A c) / pj + // r = (R - A c) / p + IElement Ipj; + _ring.init(Ipj, pj); + for (auto i = 0u; i < _n; ++i) { + _ring.divin(r[i], Ipj); // @fixme Is there a divin in VectorDomain? } - ++_position; - return true; + IVD.addin(r, Q); // r = Q + (R - A c) / p } - bool operator!=(const const_iterator& iterator) const - { - return _position != iterator._position; - } - - bool operator==(const const_iterator& iterator) const - { - return _position == iterator._position; - } - }; - - const_iterator begin() const { return const_iterator(*this); } - const_iterator end() const { return const_iterator(*this, _k); } + ++_position; + return true; + } private: const Ring& _ring; @@ -363,11 +306,22 @@ namespace LinBox { IElement _p; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! - size_t _k; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). - size_t _n; // Row/column dimension of A. - size_t _l; // How many primes. Equal to _primes.size(). + size_t + _iterationsCount; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). + size_t _n; // Row/column dimension of A. + size_t _primesCount; // How many primes. Equal to _primes.size(). std::vector _B; // Inverses of A mod p[i] std::vector _fields; // All fields Modular + + //----- Iteration + std::vector _r; // @todo Could be a matrix? Might not be useful, as it is never + // used directly in computations. + std::vector _Q; + std::vector _R; // @fixme This one should be expressed in a RNS system q, and + // HAS TO BE A MATRIX for gemm. + std::vector + _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later. + size_t _position; }; } diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 0c998eb8b..85140dc44 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -25,6 +25,63 @@ #include namespace LinBox { + /** + * From a MultiModLiftingContainer, will build + * the solution on each prime, then will do a CRT reconstruction, + * before reconstructing the rational. + * + * This does not do early termination. + */ + template + class MultiModRationalReconstruction { + using Ring = typename LiftingContainer::Ring; + using IElement = typename LiftingContainer::IElement; + using IVector = typename LiftingContainer::IVector; + + public: + MultiModRationalReconstruction(LiftingContainer& lc) + : _lc(lc) + { + } + + bool getRational(IVector& xNum, IElement& xDen) { + VectorDomain IVD(_lc.ring()); + + // Stores each c0 + c1 pj + ... + ck pj^k for each pj + std::vector padicAccumulations(_lc.primesCount(), _lc.ring()); + // Temporary structure to store a ci for each pj + std::vector digits(_lc.primesCount(), _lc.ring()); // @fixme Could be a Field Element? + // The pj^i for each pj + std::vector radices(_lc.primesCount(), 1); + + for (auto j = 0u; j < _lc.primesCount(); ++j) { + padicAccumulations[j].resize(_lc.size()); + digits[j].resize(_lc.size()); + } + + // @fixme IMPLEMENT Rat Recon + for (auto i = 0u; i < _lc.length(); ++i) { + _lc.next(digits); + + // @fixme Better use PolEval (except memory explosion?) + for (auto j = 0u; j < _lc.primesCount(); ++j) { + std::cout << "STEP " << i << " DIGITS " << digits[j] << std::endl; + IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci + _lc.ring().mulin(radices[j], _lc.prime(j)); + std::cout << "STEP " << i << " ACCUMULATION " << padicAccumulations[j] << std::endl; + } + } + + // @fixme From here padicAccumulations are all right, we should CRT reconstruct that + + + return true; + } + + private: + LiftingContainer& _lc; + }; + // @fixme Move that to a file - and make it be a RationalSolver template class DixonRNSSolver { @@ -49,9 +106,9 @@ namespace LinBox { using LiftingContainer = MultiModLiftingContainer; LiftingContainer lc(_ring, _primeGenerator, A, b, m); - RationalReconstruction re(lc); + MultiModRationalReconstruction re(lc); - if (!re.getRational(xNum, xDen, 0)) { + if (!re.getRational(xNum, xDen)) { std::cerr << "OUCH!" << std::endl; } } From e0c0feb65ab3dfc1b1102a547b692820096e768c Mon Sep 17 00:00:00 2001 From: "A. Breust" Date: Wed, 29 May 2019 10:35:52 +0200 Subject: [PATCH 16/63] Quick WIP commit --- linbox/solutions/solve/solve-dixon-rns.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 85140dc44..da1719f13 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -44,7 +44,8 @@ namespace LinBox { { } - bool getRational(IVector& xNum, IElement& xDen) { + bool getRational(IVector& xNum, IElement& xDen) + { VectorDomain IVD(_lc.ring()); // Stores each c0 + c1 pj + ... + ck pj^k for each pj @@ -59,7 +60,6 @@ namespace LinBox { digits[j].resize(_lc.size()); } - // @fixme IMPLEMENT Rat Recon for (auto i = 0u; i < _lc.length(); ++i) { _lc.next(digits); @@ -73,7 +73,10 @@ namespace LinBox { } // @fixme From here padicAccumulations are all right, we should CRT reconstruct that + using CRAField = Modular; + ChineseRemainder> cra(); + // @fixme Rat Recon return true; } @@ -97,8 +100,8 @@ namespace LinBox { * Dense solving. */ template - void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, - const Vector& b, const Method::DixonRNS& m) + void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, const Vector& b, + const Method::DixonRNS& m) { // @fixme We should use some code from DixonSolver... // But that's hard so we just assume that A is square and invertible. @@ -122,8 +125,8 @@ namespace LinBox { * \brief Solve specialisation for DixonRNS on dense matrices. */ template - void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, - const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) + void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, const Vector& b, + const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) { commentator().start("solve.dixon.integer.dense"); From d2857368805d13ce4734edbc013fb3121882a8b2 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 29 May 2019 11:57:41 +0200 Subject: [PATCH 17/63] Sometimes working, sometimes failing --- .../algorithms/multi-mod-lifting-container.h | 12 ++++--- linbox/solutions/solve/solve-dixon-rns.h | 34 +++++++++++++------ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 4b348cf1b..fec26fa58 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -123,9 +123,10 @@ namespace LinBox { // Compute how many iterations are needed auto hb = RationalSolveHadamardBound(A, b); - double pLog = Givaro::logtwo(_p); + double log2P = Givaro::logtwo(_p); // _iterationsCount = log2(2 * N * D) / log2(p) - _iterationsCount = std::ceil((1.0 + hb.numLogBound + hb.denLogBound) / pLog); + _log2Bound = 1.0 + hb.numLogBound + hb.denLogBound; + _iterationsCount = std::ceil(_log2Bound / log2P); std::cout << "k: " << _iterationsCount << std::endl; // @fixme Fact is RationalReconstruction which needs numbound and denbound @@ -198,9 +199,11 @@ namespace LinBox { // ----- NOT LiftingContainer API // ----- but still needed - const IElement numbound() const { return _numbound; } + const IElement& numbound() const { return _numbound; } - const IElement denbound() const { return _denbound; } + const IElement& denbound() const { return _denbound; } + + double log2Bound() const { return _log2Bound; } uint32_t primesCount() const { return _primesCount; } @@ -303,6 +306,7 @@ namespace LinBox { IElement _numbound; IElement _denbound; + double _log2Bound; IElement _p; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index da1719f13..af192b232 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -51,7 +51,8 @@ namespace LinBox { // Stores each c0 + c1 pj + ... + ck pj^k for each pj std::vector padicAccumulations(_lc.primesCount(), _lc.ring()); // Temporary structure to store a ci for each pj - std::vector digits(_lc.primesCount(), _lc.ring()); // @fixme Could be a Field Element? + std::vector digits(_lc.primesCount(), + _lc.ring()); // @fixme Could be a Field Element? // The pj^i for each pj std::vector radices(_lc.primesCount(), 1); @@ -68,15 +69,28 @@ namespace LinBox { std::cout << "STEP " << i << " DIGITS " << digits[j] << std::endl; IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci _lc.ring().mulin(radices[j], _lc.prime(j)); - std::cout << "STEP " << i << " ACCUMULATION " << padicAccumulations[j] << std::endl; + std::cout << "STEP " << i << " ACCUMULATION " << padicAccumulations[j] + << std::endl; } } - // @fixme From here padicAccumulations are all right, we should CRT reconstruct that - using CRAField = Modular; - ChineseRemainder> cra(); + // CRT reconstruction from paddicAccumulations + using CRAField = Givaro::Modular; + RationalCRABuilderFullMultip craBuilder( + _lc.log2Bound() * 1.4427); // 1.4427 = 1 / log(2) - // @fixme Rat Recon + { + CRAField field(radices[0]); + craBuilder.initialize(field, padicAccumulations[0]); + } + + for (auto j = 1u; j < _lc.primesCount(); ++j) { + CRAField field(radices[j]); + craBuilder.progress(field, padicAccumulations[j]); + } + + // Rational reconstruction + craBuilder.result(xNum, xDen); return true; } @@ -100,8 +114,8 @@ namespace LinBox { * Dense solving. */ template - void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, const Vector& b, - const Method::DixonRNS& m) + void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, + const Vector& b, const Method::DixonRNS& m) { // @fixme We should use some code from DixonSolver... // But that's hard so we just assume that A is square and invertible. @@ -125,8 +139,8 @@ namespace LinBox { * \brief Solve specialisation for DixonRNS on dense matrices. */ template - void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, const Vector& b, - const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) + void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, + const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) { commentator().start("solve.dixon.integer.dense"); From bfa08ed9342f9dba366edf064699080ef02786e6 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 4 Jun 2019 10:51:36 +0200 Subject: [PATCH 18/63] Detecting wrong primes using nullity --- .../algorithms/multi-mod-lifting-container.h | 30 +++++-------------- .../matrix/matrixdomain/blas-matrix-domain.h | 21 ++++++------- linbox/solutions/solve/solve-dixon-rns.h | 11 ++----- 3 files changed, 21 insertions(+), 41 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index fec26fa58..75fd12f9e 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -143,19 +143,15 @@ namespace LinBox { _B.reserve(_primesCount); for (const auto& F : _fields) { + _B.emplace_back(A, F); // Rebind into the field + + int nullity = 0; BlasMatrixDomain bmd(F); - _B.emplace_back(F, _n, _n); - auto& Bpi = _B.back(); - - // @fixme Taken for rational-solver.inl. BETTER USE REBIND!!! - for (size_t i = 0; i < _n; ++i) { - for (size_t j = 0; j < _n; ++j) { - F.init(Bpi.refEntry(i, j), A.getEntry(i, j)); - } + bmd.invin(_B.back(), nullity); + if (nullity > 0) { + // @fixme Should redraw another prime! + throw LinBoxError("Wrong prime, sorry."); } - - // @fixme @cpernet Use FFLAS directly, so that we can have a REAL in place inv. - bmd.invin(Bpi); } } @@ -218,8 +214,6 @@ namespace LinBox { */ bool next(std::vector& digits) { - std::cout << "----- NEXT" << std::endl; - VectorDomain IVD(_ring); // @fixme Should be done in parallel! @@ -229,8 +223,6 @@ namespace LinBox { auto& Q = _Q[j]; auto& R = _R[j]; - std::cout << "--- FOR " << Integer(pj) << std::endl; - // @todo @cpernet Is there a VectorDomain::divmod somewhere? // Euclidian division so that rj = pj Qj + Rj for (auto i = 0u; i < _n; ++i) { @@ -239,10 +231,6 @@ namespace LinBox { _ring.quoRem(Q[i], R[i], r[i], pj); } - std::cout << "r: " << r << std::endl; - std::cout << "Q: " << Q << std::endl; - std::cout << "R: " << R << std::endl; - // Convert R to the field // @fixme @cpernet Could this step be ignored? // If not, put that in already allocated memory, and not use a temporary here. @@ -253,16 +241,12 @@ namespace LinBox { auto& Fc = _Fc[j]; B.apply(Fc, FR); - std::cout << "Fc: " << Fc << std::endl; - // @todo Convert _c[i] to RNS digits[j] = IVector(_ring, Fc); } // ----- Compute the next residue! - std::cout << "--- Residue update" << std::endl; - // @note This is a dummy implementation, for now. // r <= (r - A c) / p diff --git a/linbox/matrix/matrixdomain/blas-matrix-domain.h b/linbox/matrix/matrixdomain/blas-matrix-domain.h index 224ede0d6..c9fbe796c 100644 --- a/linbox/matrix/matrixdomain/blas-matrix-domain.h +++ b/linbox/matrix/matrixdomain/blas-matrix-domain.h @@ -631,15 +631,6 @@ namespace LinBox return B.swap(A); } - - //- Inversion w singular check - // template - // Matrix& inv( Matrix &Ainv, const Matrix &A, int& nullity) const - // { - // nullity = BlasMatrixDomainInv()(field(),Ainv,A); - // return Ainv; - // } - //! Inversion w singular check template Matrix1& inv( Matrix1 &Ainv, const Matrix2 &A, int& nullity) const @@ -648,7 +639,6 @@ namespace LinBox return Ainv; } - //! Inversion (the matrix A is modified) w singular check template Matrix1& invin( Matrix1 &Ainv, Matrix2 &A, int& nullity) const @@ -657,6 +647,17 @@ namespace LinBox return Ainv; } + //! Inversion (the matrix A is modified) w singular check + template + Matrix& invin(Matrix& A, int& nullity) const + { + // @fixme @cpernet Apparently FFLAS has a new method that does + // inversion really in place, we should update this code. + Matrix tmp(A); + nullity = BlasMatrixDomainInv()(field(),A,tmp); + return A; + } + //! Rank template unsigned int rank(const Matrix &A) const diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index af192b232..4cd79529c 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -66,18 +66,15 @@ namespace LinBox { // @fixme Better use PolEval (except memory explosion?) for (auto j = 0u; j < _lc.primesCount(); ++j) { - std::cout << "STEP " << i << " DIGITS " << digits[j] << std::endl; IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci _lc.ring().mulin(radices[j], _lc.prime(j)); - std::cout << "STEP " << i << " ACCUMULATION " << padicAccumulations[j] - << std::endl; } } // CRT reconstruction from paddicAccumulations using CRAField = Givaro::Modular; - RationalCRABuilderFullMultip craBuilder( - _lc.log2Bound() * 1.4427); // 1.4427 = 1 / log(2) + RationalCRABuilderFullMultip craBuilder(_lc.log2Bound() + / 1.4427); // 1.4427 = 1 / log(2) { CRAField field(radices[0]); @@ -149,9 +146,7 @@ namespace LinBox { // implicitly requiring 0-{p-1} representation of the p-adic sequence elements. using Field = Givaro::Modular; using PrimeGenerator = PrimeIterator; - // PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); - // @fixme This is for debug! - PrimeGenerator primeGenerator(3); + PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); DixonRNSSolver solver(A.field(), primeGenerator); solver.solve(xNum, xDen, A, b, m); From 5c5aea72dd3e4b85275aa98a3d3227fe145b1d5f Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 5 Jun 2019 16:05:13 +0200 Subject: [PATCH 19/63] Getting A into an RNS system --- .../algorithms/multi-mod-lifting-container.h | 80 ++++++++++++------- linbox/solutions/hadamard-bound.h | 7 +- 2 files changed, 56 insertions(+), 31 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 75fd12f9e..00d4a1a9b 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -99,40 +99,62 @@ namespace LinBox { std::cout << "l: " << _primesCount << std::endl; // Generating primes - IElement iTmp; - _ring.assign(_p, _ring.one); - for (auto j = 0u; j < _primesCount; ++j) { - auto pj = *primeGenerator; - ++primeGenerator; + { + IElement iTmp; + _ring.assign(_p, _ring.one); + for (auto j = 0u; j < _primesCount; ++j) { + auto pj = *primeGenerator; + ++primeGenerator; + + // Ensure that all primes are different + if (std::find(_primes.begin(), _primes.end(), pj) != _primes.end()) { + j -= 1; + continue; + } - // Ensure that all primes are different - if (std::find(_primes.begin(), _primes.end(), pj) != _primes.end()) { - j -= 1; - continue; - } + _primes.emplace_back(pj); + _fields.emplace_back(pj); + _ring.init(iTmp, pj); + _ring.mulin(_p, iTmp); - _primes.emplace_back(pj); - _fields.emplace_back(pj); - _ring.init(iTmp, pj); - _ring.mulin(_p, iTmp); + std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl; + } - std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl; + std::cout << "p: " << _p << std::endl; } - std::cout << "p: " << _p << std::endl; - // Compute how many iterations are needed - auto hb = RationalSolveHadamardBound(A, b); - double log2P = Givaro::logtwo(_p); - // _iterationsCount = log2(2 * N * D) / log2(p) - _log2Bound = 1.0 + hb.numLogBound + hb.denLogBound; - _iterationsCount = std::ceil(_log2Bound / log2P); - std::cout << "k: " << _iterationsCount << std::endl; - - // @fixme Fact is RationalReconstruction which needs numbound and denbound - // expects them to be in non-log... - _ring.init(_numbound, Integer(1) << static_cast(std::ceil(hb.numLogBound))); - _ring.init(_denbound, Integer(1) << static_cast(std::ceil(hb.denLogBound))); + { + auto hb = RationalSolveHadamardBound(A, b); + double log2P = Givaro::logtwo(_p); + // _iterationsCount = log2(2 * N * D) / log2(p) + _log2Bound = hb.solutionLogBound; + _iterationsCount = std::ceil(_log2Bound / log2P); + std::cout << "k: " << _iterationsCount << std::endl; + + // @fixme Fact is RationalReconstruction which needs numbound and denbound + // expects them to be in non-log... @fixme Still needed? + _ring.init(_numbound, Integer(1) + << static_cast(std::ceil(hb.numLogBound))); + _ring.init(_denbound, Integer(1) + << static_cast(std::ceil(hb.denLogBound))); + } + + // Making A into a RNS domain + { + // @fixme Really provide the primes, with the correct bound + FFPACK::rns_double rnsSystem(std::vector({59059367, 57648973})); + FFPACK::RNSInteger rnsDomain(rnsSystem); + auto rnsA = FFLAS::fflas_new(rnsDomain, A.rowdim(), A.coldim()); + + Integer max; + InfinityNorm(max, A); + double logMax = Givaro::logtwo(max) / 16.; // @note So that 2^(16*k) is the max. + FFLAS::finit_rns(rnsDomain, A.rowdim(), A.coldim(), logMax, A.getPointer(), A.stride(), + rnsA); + + std::cout << "rnsA: " << rnsA[0]._ptr[0] << " " << rnsA[0]._ptr[1] << std::endl; + } // Initialize all inverses // @note An inverse mod some p within DixonSolver was already computed, @@ -155,7 +177,7 @@ namespace LinBox { } } - //----- Iteration + //----- Locals setup _r.reserve(_primesCount); _Q.reserve(_primesCount); diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h index 8216994e2..48b9bcf55 100644 --- a/linbox/solutions/hadamard-bound.h +++ b/linbox/solutions/hadamard-bound.h @@ -282,7 +282,11 @@ namespace LinBox { // ----- Fast Hadamard bound - + template + inline Integer& InfinityNorm(Integer& max, const IMatrix& A) { + typename MatrixTraits::MatrixCategory tag; + return InfinityNorm(max, A, tag); + } /** * Returns the maximal absolute value. @@ -294,7 +298,6 @@ namespace LinBox { return InfinityNorm(max, ACopy, MatrixCategories::RowColMatrixTag()); } - template inline Integer& InfinityNorm(Integer& max, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) { From 82433463881b7e906977285fe17d7b148ad80cec Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Thu, 6 Jun 2019 15:15:18 +0200 Subject: [PATCH 20/63] Creating the RNS basis, sorting primes --- .../algorithms/multi-mod-lifting-container.h | 120 ++++++++++++------ linbox/solutions/solve/solve-dixon-rns.h | 3 - tests/test-solve-full.C | 8 +- 3 files changed, 82 insertions(+), 49 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 00d4a1a9b..c996d6c17 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -94,66 +94,72 @@ namespace LinBox { A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl; std::cout << "b: " << b << std::endl; + // This will contain the primes or our MultiMod basis // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?) _primesCount = 2; + _primes.resize(_primesCount); std::cout << "l: " << _primesCount << std::endl; - // Generating primes + // Some preparation work + Integer infinityNormA; + InfinityNorm(infinityNormA, A); + double logInfinityNormA = Givaro::logtwo(infinityNormA); + { - IElement iTmp; - _ring.assign(_p, _ring.one); - for (auto j = 0u; j < _primesCount; ++j) { - auto pj = *primeGenerator; + // Based on Chen-Storjohann's paper, this is the bit size + // of the needed RNS basis for the residue computation + double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)) * 16; // @fixme @cpernet Does this factor 16 makes sense? + uint32_t rnsBasisPrimesCount = + std::ceil(rnsBasisBitSize / primeGenerator.getBits()); + _rnsPrimes.resize(rnsBasisPrimesCount); + std::cout << "RNS basis: " << rnsBasisPrimesCount << " estimated primes." << std::endl; + + std::vector primes; + for (auto j = 0u; j < _primesCount + rnsBasisPrimesCount; ++j) { + auto p = *primeGenerator; ++primeGenerator; - // Ensure that all primes are different - if (std::find(_primes.begin(), _primes.end(), pj) != _primes.end()) { - j -= 1; + auto lb = std::lower_bound(primes.begin(), primes.end(), p); + if (lb != primes.end() && *lb == p) { + --j; continue; } - _primes.emplace_back(pj); - _fields.emplace_back(pj); - _ring.init(iTmp, pj); - _ring.mulin(_p, iTmp); - - std::cout << "primes[" << j << "]: " << Integer(pj) << std::endl; + // Inserting the primes at the right place to keep the array sorted + primes.insert(lb, p); } - std::cout << "p: " << _p << std::endl; - } + // We take the smallest primes for our MultiMod basis + std::copy(primes.begin(), primes.begin() + _primesCount, _primes.begin()); - // Compute how many iterations are needed - { - auto hb = RationalSolveHadamardBound(A, b); - double log2P = Givaro::logtwo(_p); - // _iterationsCount = log2(2 * N * D) / log2(p) - _log2Bound = hb.solutionLogBound; - _iterationsCount = std::ceil(_log2Bound / log2P); - std::cout << "k: " << _iterationsCount << std::endl; + // And the others for our RNS basis + std::copy(primes.begin() + _primesCount, primes.end(), _rnsPrimes.begin()); - // @fixme Fact is RationalReconstruction which needs numbound and denbound - // expects them to be in non-log... @fixme Still needed? - _ring.init(_numbound, Integer(1) - << static_cast(std::ceil(hb.numLogBound))); - _ring.init(_denbound, Integer(1) - << static_cast(std::ceil(hb.denLogBound))); + // We check that we really need all the primes within the RNS basis, + // as the first count was just an upper estimation. + double bitSize = 0.0; + for (int i = _rnsPrimes.size() - 1; i >= 0; --i) { + bitSize += Givaro::logtwo(primes[i]); + + if (bitSize > rnsBasisBitSize && i > 0) { + _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (i - 1)); + std::cout << "RNS basis: Erasing extra " << i << "primes." << std::endl; + break; + } + } } - // Making A into a RNS domain + // Generating primes { - // @fixme Really provide the primes, with the correct bound - FFPACK::rns_double rnsSystem(std::vector({59059367, 57648973})); - FFPACK::RNSInteger rnsDomain(rnsSystem); - auto rnsA = FFLAS::fflas_new(rnsDomain, A.rowdim(), A.coldim()); - - Integer max; - InfinityNorm(max, A); - double logMax = Givaro::logtwo(max) / 16.; // @note So that 2^(16*k) is the max. - FFLAS::finit_rns(rnsDomain, A.rowdim(), A.coldim(), logMax, A.getPointer(), A.stride(), - rnsA); + IElement iTmp; + _ring.assign(_p, _ring.one); + for (auto& pj : _primes) { + _fields.emplace_back(pj); + _ring.init(iTmp, pj); + _ring.mulin(_p, iTmp); + } - std::cout << "rnsA: " << rnsA[0]._ptr[0] << " " << rnsA[0]._ptr[1] << std::endl; + std::cout << "p: " << _p << std::endl; } // Initialize all inverses @@ -177,6 +183,35 @@ namespace LinBox { } } + // Making A into the RNS domain + { + FFPACK::rns_double rnsSystem(_rnsPrimes); + FFPACK::RNSInteger rnsDomain(rnsSystem); + auto rnsA = FFLAS::fflas_new(rnsDomain, A.rowdim(), A.coldim()); + + double cmax = + logInfinityNormA / 16.; // @note So that 2^(16*cmax) is the max element of A. + FFLAS::finit_rns(rnsDomain, A.rowdim(), A.coldim(), cmax, A.getPointer(), + A.stride(), rnsA); + } + + // Compute how many iterations are needed + { + auto hb = RationalSolveHadamardBound(A, b); + double log2P = Givaro::logtwo(_p); + // _iterationsCount = log2(2 * N * D) / log2(p) + _log2Bound = hb.solutionLogBound; + _iterationsCount = std::ceil(_log2Bound / log2P); + std::cout << "k: " << _iterationsCount << std::endl; + + // @fixme Fact is RationalReconstruction which needs numbound and denbound + // expects them to be in non-log... @fixme Still needed? + _ring.init(_numbound, Integer(1) + << static_cast(std::ceil(hb.numLogBound))); + _ring.init(_denbound, Integer(1) + << static_cast(std::ceil(hb.denLogBound))); + } + //----- Locals setup _r.reserve(_primesCount); @@ -316,6 +351,7 @@ namespace LinBox { IElement _p; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! + std::vector _rnsPrimes; size_t _iterationsCount; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). size_t _n; // Row/column dimension of A. diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 4cd79529c..630ac235d 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -151,9 +151,6 @@ namespace LinBox { DixonRNSSolver solver(A.field(), primeGenerator); solver.solve(xNum, xDen, A, b, m); - std::cout << "FOUND xNum: " << xNum << std::endl; - std::cout << "FOUND xDen: " << xDen << std::endl; - commentator().stop("solve.dixon.integer.dense"); // @fixme Implement something like that diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 54ea8c083..5b9e5acec 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -141,10 +141,10 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R solve(x, A, b, method); ok = check_result(x, A, b, RA, Rb); - if (ok) { - solveInPlace(x, A, b, method); - ok = check_result(x, A, b, RA, Rb); - } + // if (ok) { + // solveInPlace(x, A, b, method); + // ok = check_result(x, A, b, RA, Rb); + // } } catch (...) { print_error(x, A, b, "throws error"); return false; From c682b211d871b258d6c68725ea18433af0c17ab1 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 11 Jun 2019 17:52:39 +0200 Subject: [PATCH 21/63] Failed to understand how to write directly to an rns_element_ptr --- .../algorithms/multi-mod-lifting-container.h | 75 ++++++++++++++----- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index c996d6c17..b3a9863c8 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -70,6 +70,11 @@ namespace LinBox { using Field = _Field; using PrimeGenerator = _PrimeGenerator; + using RNSSystem = FFPACK::rns_double; + using RNSDomain = FFPACK::RNSInteger; + using RNSElement = typename RNSDomain::Element; + using RNSElementPtr = typename RNSDomain::Element_ptr; + using IElement = typename Ring::Element; using IMatrix = DenseMatrix<_Ring>; using IVector = DenseVector<_Ring>; @@ -108,14 +113,15 @@ namespace LinBox { { // Based on Chen-Storjohann's paper, this is the bit size // of the needed RNS basis for the residue computation - double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)) * 16; // @fixme @cpernet Does this factor 16 makes sense? - uint32_t rnsBasisPrimesCount = - std::ceil(rnsBasisBitSize / primeGenerator.getBits()); - _rnsPrimes.resize(rnsBasisPrimesCount); - std::cout << "RNS basis: " << rnsBasisPrimesCount << " estimated primes." << std::endl; + double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)) + * 16; // @fixme @cpernet Does this factor 16 makes sense? + _rnsBasisPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits()); + _rnsPrimes.resize(_rnsBasisPrimesCount); + std::cout << "RNS basis: " << _rnsBasisPrimesCount << " estimated primes." + << std::endl; std::vector primes; - for (auto j = 0u; j < _primesCount + rnsBasisPrimesCount; ++j) { + for (auto j = 0u; j < _primesCount + _rnsBasisPrimesCount; ++j) { auto p = *primeGenerator; ++primeGenerator; @@ -185,14 +191,17 @@ namespace LinBox { // Making A into the RNS domain { - FFPACK::rns_double rnsSystem(_rnsPrimes); - FFPACK::RNSInteger rnsDomain(rnsSystem); - auto rnsA = FFLAS::fflas_new(rnsDomain, A.rowdim(), A.coldim()); + RNSSystem rnsSystem(_rnsPrimes); + _rnsDomain = new RNSDomain(rnsSystem); + _rnsA = FFLAS::fflas_new(*_rnsDomain, _n, _n); + + // @fixme @cpernet Just it be transpose for better memory access between threads? + // Each column is the current digit c[j] mod pj + _rnsc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); double cmax = logInfinityNormA / 16.; // @note So that 2^(16*cmax) is the max element of A. - FFLAS::finit_rns(rnsDomain, A.rowdim(), A.coldim(), cmax, A.getPointer(), - A.stride(), rnsA); + FFLAS::finit_rns(*_rnsDomain, _n, _n, cmax, A.getPointer(), A.stride(), _rnsA); } // Compute how many iterations are needed @@ -231,6 +240,12 @@ namespace LinBox { } } + ~MultiModLiftingContainer() + { + FFLAS::fflas_delete(_rnsA); // @fixme Does it knows the size? + delete _rnsDomain; + } + // -------------------------- // ----- LiftingContainer API @@ -298,22 +313,38 @@ namespace LinBox { auto& Fc = _Fc[j]; B.apply(Fc, FR); - // @todo Convert _c[i] to RNS digits[j] = IVector(_ring, Fc); + + // Store the very same result in an RNS system, + // but fact is all the primes of the RNS system are bigger + // than the modulus used to compute _Fc, we just copy the result for everybody. + std::cout << "FOR " << pj << std::endl; + for (auto i = 0u; i < _n; ++i) { + // std::cout << _rnsc[i * _n + j]._ptr << std::endl; + double cij = _Fc[j][i]; + std::cout << "stride " << _rnsc[i * _n + j]._stride << std::endl; + auto stride = _rnsc[i * _n + j]._stride; + for (auto h = 0u; h < _rnsBasisPrimesCount; ++h) { + _rnsc[i * _n + j]._ptr[h + stride] = cij; + } + _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j]); + std::cout << std::endl; + } } // ----- Compute the next residue! - // @note This is a dummy implementation, for now. - // r <= (r - A c) / p for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; - auto& r = _r[j]; // @fixme THEY HOLD ALL THE VERY SAME VALUE! + auto& r = _r[j]; auto& Q = _Q[j]; auto& R = _R[j]; auto& Fc = _Fc[j]; + + // @note We know that _Fc @fixme @todo XXXX + // @fixme For now, we convert cj to integer, // but it should be converted into a RNS system, on pre-allocated memory. IVector Ic(_ring, Fc); @@ -349,13 +380,19 @@ namespace LinBox { IElement _denbound; double _log2Bound; + RNSDomain* _rnsDomain = nullptr; + RNSElementPtr _rnsA; // The matrix A, but in the RNS system + // A matrix of digits c[j], being the current digits mod pj, in the RNS system + RNSElementPtr _rnsc; + size_t _rnsBasisPrimesCount = 0u; + IElement _p; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! std::vector _rnsPrimes; - size_t - _iterationsCount; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). - size_t _n; // Row/column dimension of A. - size_t _primesCount; // How many primes. Equal to _primes.size(). + // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). + size_t _iterationsCount = 0u; + size_t _n = 0u; // Row/column dimension of A. + size_t _primesCount = 0u; // How many primes. Equal to _primes.size(). std::vector _B; // Inverses of A mod p[i] std::vector _fields; // All fields Modular From 2f49d175aaccff8c63f5cbf0855839e1a5a9dea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Pernet?= Date: Wed, 12 Jun 2019 10:23:53 +0200 Subject: [PATCH 22/63] fix *16 hacks and *stride --- linbox/algorithms/multi-mod-lifting-container.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index b3a9863c8..13ef04c25 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -113,8 +113,7 @@ namespace LinBox { { // Based on Chen-Storjohann's paper, this is the bit size // of the needed RNS basis for the residue computation - double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)) - * 16; // @fixme @cpernet Does this factor 16 makes sense? + double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)); _rnsBasisPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits()); _rnsPrimes.resize(_rnsBasisPrimesCount); std::cout << "RNS basis: " << _rnsBasisPrimesCount << " estimated primes." @@ -201,7 +200,7 @@ namespace LinBox { double cmax = logInfinityNormA / 16.; // @note So that 2^(16*cmax) is the max element of A. - FFLAS::finit_rns(*_rnsDomain, _n, _n, cmax, A.getPointer(), A.stride(), _rnsA); + FFLAS::finit_rns(*_rnsDomain, _n, _n, std::ceil(cmax), A.getPointer(), A.stride(), _rnsA); } // Compute how many iterations are needed @@ -325,7 +324,7 @@ namespace LinBox { std::cout << "stride " << _rnsc[i * _n + j]._stride << std::endl; auto stride = _rnsc[i * _n + j]._stride; for (auto h = 0u; h < _rnsBasisPrimesCount; ++h) { - _rnsc[i * _n + j]._ptr[h + stride] = cij; + _rnsc[i * _n + j]._ptr[h * stride] = cij; } _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j]); std::cout << std::endl; From b01f4875a9c0b89738ea232f6f2b1d042b69a49e Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 12 Jun 2019 14:35:05 +0200 Subject: [PATCH 23/63] Better names --- .../algorithms/multi-mod-lifting-container.h | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 13ef04c25..885a8d429 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -103,7 +103,7 @@ namespace LinBox { // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?) _primesCount = 2; _primes.resize(_primesCount); - std::cout << "l: " << _primesCount << std::endl; + std::cout << "primesCount: " << _primesCount << std::endl; // Some preparation work Integer infinityNormA; @@ -116,14 +116,15 @@ namespace LinBox { double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)); _rnsBasisPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits()); _rnsPrimes.resize(_rnsBasisPrimesCount); - std::cout << "RNS basis: " << _rnsBasisPrimesCount << " estimated primes." - << std::endl; + std::cout << "rnsBasisPrimesCount: " << _rnsBasisPrimesCount << std::endl; std::vector primes; for (auto j = 0u; j < _primesCount + _rnsBasisPrimesCount; ++j) { auto p = *primeGenerator; ++primeGenerator; + // @note std::lower_bound finds the iterator where to put p in the sorted container. + // The name of the routine might be strange, but, hey, that's not my fault. auto lb = std::lower_bound(primes.begin(), primes.end(), p); if (lb != primes.end() && *lb == p) { --j; @@ -157,14 +158,14 @@ namespace LinBox { // Generating primes { IElement iTmp; - _ring.assign(_p, _ring.one); + _ring.assign(_primesProduct, _ring.one); for (auto& pj : _primes) { _fields.emplace_back(pj); _ring.init(iTmp, pj); - _ring.mulin(_p, iTmp); + _ring.mulin(_primesProduct, iTmp); } - std::cout << "p: " << _p << std::endl; + std::cout << "primesProduct: " << _primesProduct << std::endl; } // Initialize all inverses @@ -206,11 +207,11 @@ namespace LinBox { // Compute how many iterations are needed { auto hb = RationalSolveHadamardBound(A, b); - double log2P = Givaro::logtwo(_p); + double log2P = Givaro::logtwo(_primesProduct); // _iterationsCount = log2(2 * N * D) / log2(p) _log2Bound = hb.solutionLogBound; _iterationsCount = std::ceil(_log2Bound / log2P); - std::cout << "k: " << _iterationsCount << std::endl; + std::cout << "iterationsCount: " << _iterationsCount << std::endl; // @fixme Fact is RationalReconstruction which needs numbound and denbound // expects them to be in non-log... @fixme Still needed? @@ -260,7 +261,7 @@ namespace LinBox { * We are compliant to the interface even though * p is multi-modular and thus not a prime per se. */ - const IElement& prime() const final { return _p; } + const IElement& prime() const final { return _primesProduct; } // ------------------------------ // ----- NOT LiftingContainer API @@ -294,7 +295,7 @@ namespace LinBox { auto& Q = _Q[j]; auto& R = _R[j]; - // @todo @cpernet Is there a VectorDomain::divmod somewhere? + // @note There is no VectorDomain::divmod yet. // Euclidian division so that rj = pj Qj + Rj for (auto i = 0u; i < _n; ++i) { // @fixme @cpernet Is this OK for any Ring or should we be sure we are using @@ -312,22 +313,19 @@ namespace LinBox { auto& Fc = _Fc[j]; B.apply(Fc, FR); + // @fixme We might not need to store digits into IVectors, and returning _Fc + // would do the trick digits[j] = IVector(_ring, Fc); // Store the very same result in an RNS system, // but fact is all the primes of the RNS system are bigger // than the modulus used to compute _Fc, we just copy the result for everybody. - std::cout << "FOR " << pj << std::endl; for (auto i = 0u; i < _n; ++i) { - // std::cout << _rnsc[i * _n + j]._ptr << std::endl; - double cij = _Fc[j][i]; - std::cout << "stride " << _rnsc[i * _n + j]._stride << std::endl; + double cij = Fc[i]; auto stride = _rnsc[i * _n + j]._stride; for (auto h = 0u; h < _rnsBasisPrimesCount; ++h) { _rnsc[i * _n + j]._ptr[h * stride] = cij; } - _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j]); - std::cout << std::endl; } } @@ -385,7 +383,7 @@ namespace LinBox { RNSElementPtr _rnsc; size_t _rnsBasisPrimesCount = 0u; - IElement _p; // The global modulus for lifting: a multiple of all _primes. + IElement _primesProduct; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! std::vector _rnsPrimes; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). From 8a0343ea5e4d6073acf3ba76a42940a697979794 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 12 Jun 2019 14:54:19 +0200 Subject: [PATCH 24/63] Fixed segfaulting because of RNSSystem not being copied --- .../algorithms/multi-mod-lifting-container.h | 42 +++++++++++++------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 885a8d429..5197d10aa 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -123,8 +123,9 @@ namespace LinBox { auto p = *primeGenerator; ++primeGenerator; - // @note std::lower_bound finds the iterator where to put p in the sorted container. - // The name of the routine might be strange, but, hey, that's not my fault. + // @note std::lower_bound finds the iterator where to put p in the sorted + // container. The name of the routine might be strange, but, hey, that's not my + // fault. auto lb = std::lower_bound(primes.begin(), primes.end(), p); if (lb != primes.end() && *lb == p) { --j; @@ -191,17 +192,16 @@ namespace LinBox { // Making A into the RNS domain { - RNSSystem rnsSystem(_rnsPrimes); - _rnsDomain = new RNSDomain(rnsSystem); + _rnsSystem = new RNSSystem(_rnsPrimes); + _rnsDomain = new RNSDomain(*_rnsSystem); _rnsA = FFLAS::fflas_new(*_rnsDomain, _n, _n); - - // @fixme @cpernet Just it be transpose for better memory access between threads? - // Each column is the current digit c[j] mod pj _rnsc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); + _rnsAc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); - double cmax = - logInfinityNormA / 16.; // @note So that 2^(16*cmax) is the max element of A. - FFLAS::finit_rns(*_rnsDomain, _n, _n, std::ceil(cmax), A.getPointer(), A.stride(), _rnsA); + // @note So that 2^(16*cmax) is the max element of A. + double cmax = logInfinityNormA / 16.; + FFLAS::finit_rns(*_rnsDomain, _n, _n, std::ceil(cmax), A.getPointer(), A.stride(), + _rnsA); } // Compute how many iterations are needed @@ -244,6 +244,7 @@ namespace LinBox { { FFLAS::fflas_delete(_rnsA); // @fixme Does it knows the size? delete _rnsDomain; + delete _rnsSystem; } // -------------------------- @@ -331,6 +332,20 @@ namespace LinBox { // ----- Compute the next residue! + // @note The compute the next residu r <= (r - A c) / p + // By first doing A c as a fgemm within the RNS domain. + FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _n, + _primesCount, _rnsDomain->one, _rnsA, _n, _rnsc, _n, _rnsDomain->zero, + _rnsAc, _n); + + std::cout << "---------" << std::endl; + for (auto i = 0u; i < _n; ++i) { + for (auto j = 0u; j < _primesCount; ++j) { + _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j]) + << std::endl; + } + } + // r <= (r - A c) / p for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; @@ -340,8 +355,6 @@ namespace LinBox { auto& Fc = _Fc[j]; - // @note We know that _Fc @fixme @todo XXXX - // @fixme For now, we convert cj to integer, // but it should be converted into a RNS system, on pre-allocated memory. IVector Ic(_ring, Fc); @@ -377,13 +390,16 @@ namespace LinBox { IElement _denbound; double _log2Bound; + RNSSystem* _rnsSystem = nullptr; RNSDomain* _rnsDomain = nullptr; RNSElementPtr _rnsA; // The matrix A, but in the RNS system // A matrix of digits c[j], being the current digits mod pj, in the RNS system RNSElementPtr _rnsc; + // The result matrix of the fgemm _rnsA * _rnsc. + RNSElementPtr _rnsAc; size_t _rnsBasisPrimesCount = 0u; - IElement _primesProduct; // The global modulus for lifting: a multiple of all _primes. + IElement _primesProduct; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! std::vector _rnsPrimes; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). From 9f50edf253a4aad6067f657bff48358364097533 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 12 Jun 2019 16:48:36 +0200 Subject: [PATCH 25/63] RNS-based dixon working only for matrix size = 2 --- .../algorithms/multi-mod-lifting-container.h | 146 ++++++++++++------ 1 file changed, 98 insertions(+), 48 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 5197d10aa..09497582a 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -114,12 +114,12 @@ namespace LinBox { // Based on Chen-Storjohann's paper, this is the bit size // of the needed RNS basis for the residue computation double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)); - _rnsBasisPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits()); - _rnsPrimes.resize(_rnsBasisPrimesCount); - std::cout << "rnsBasisPrimesCount: " << _rnsBasisPrimesCount << std::endl; + _rnsPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits()); + _rnsPrimes.resize(_rnsPrimesCount); + std::cout << "rnsBasisPrimesCount: " << _rnsPrimesCount << std::endl; std::vector primes; - for (auto j = 0u; j < _primesCount + _rnsBasisPrimesCount; ++j) { + for (auto j = 0u; j < _primesCount + _rnsPrimesCount; ++j) { auto p = *primeGenerator; ++primeGenerator; @@ -145,18 +145,20 @@ namespace LinBox { // We check that we really need all the primes within the RNS basis, // as the first count was just an upper estimation. double bitSize = 0.0; - for (int i = _rnsPrimes.size() - 1; i >= 0; --i) { - bitSize += Givaro::logtwo(primes[i]); + for (int h = _rnsPrimes.size() - 1; h >= 0; --h) { + bitSize += Givaro::logtwo(primes[h]); - if (bitSize > rnsBasisBitSize && i > 0) { - _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (i - 1)); - std::cout << "RNS basis: Erasing extra " << i << "primes." << std::endl; + if (bitSize > rnsBasisBitSize && h > 0) { + _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (h - 1)); + _rnsPrimesCount -= h; + std::cout << "RNS basis: Erasing extra " << h << "primes." << std::endl; break; } } } // Generating primes + // @fixme Cleanup, might not be needed { IElement iTmp; _ring.assign(_primesProduct, _ring.one); @@ -196,7 +198,7 @@ namespace LinBox { _rnsDomain = new RNSDomain(*_rnsSystem); _rnsA = FFLAS::fflas_new(*_rnsDomain, _n, _n); _rnsc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); - _rnsAc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); + _rnsR = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); // @note So that 2^(16*cmax) is the max element of A. double cmax = logInfinityNormA / 16.; @@ -204,6 +206,20 @@ namespace LinBox { _rnsA); } + // Compute the inverses of pj for each RNS prime + { + _primesRNSInverses.resize(_primesCount); + for (auto j = 0u; j < _primesCount; ++j) { + auto prime = _primes[j]; + _primesRNSInverses[j].resize(_rnsPrimesCount); + for (auto h = 0u; h < _rnsPrimesCount; ++h) { + auto& rnsF = _rnsSystem->_field_rns[h]; + auto& primeInverse = _primesRNSInverses[j][h]; + rnsF.inv(primeInverse, prime); + } + } + } + // Compute how many iterations are needed { auto hb = RationalSolveHadamardBound(A, b); @@ -242,6 +258,8 @@ namespace LinBox { ~MultiModLiftingContainer() { + FFLAS::fflas_delete(_rnsR); // @fixme Does it knows the size? + FFLAS::fflas_delete(_rnsc); // @fixme Does it knows the size? FFLAS::fflas_delete(_rnsA); // @fixme Does it knows the size? delete _rnsDomain; delete _rnsSystem; @@ -322,63 +340,94 @@ namespace LinBox { // but fact is all the primes of the RNS system are bigger // than the modulus used to compute _Fc, we just copy the result for everybody. for (auto i = 0u; i < _n; ++i) { - double cij = Fc[i]; - auto stride = _rnsc[i * _n + j]._stride; - for (auto h = 0u; h < _rnsBasisPrimesCount; ++h) { - _rnsc[i * _n + j]._ptr[h * stride] = cij; - } + setRNSMatrixElementAllResidues(_rnsR, _n, i, j, FR[i]); + setRNSMatrixElementAllResidues(_rnsc, _n, i, j, Fc[i]); } } - // ----- Compute the next residue! + // ----- Compute the next residues! - // @note The compute the next residu r <= (r - A c) / p - // By first doing A c as a fgemm within the RNS domain. - FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _n, - _primesCount, _rnsDomain->one, _rnsA, _n, _rnsc, _n, _rnsDomain->zero, - _rnsAc, _n); + // r <= Q + (R - A c) / p - std::cout << "---------" << std::endl; - for (auto i = 0u; i < _n; ++i) { - for (auto j = 0u; j < _primesCount; ++j) { - _rnsDomain->write(std::cout << i << " " << j << " ", _rnsc[i * _n + j]) - << std::endl; + std::cout << "A" << std::endl; + for (auto j = 0u; j < _n; ++j) { + for (auto i = 0u; i < _n; ++i) { + logRNSMatrixElement(_rnsA, _n, i, j); } } - // r <= (r - A c) / p + std::cout << "c" << std::endl; for (auto j = 0u; j < _primesCount; ++j) { - auto pj = _primes[j]; - auto& r = _r[j]; - auto& Q = _Q[j]; - auto& R = _R[j]; + for (auto i = 0u; i < _n; ++i) { + logRNSMatrixElement(_rnsc, _n, i, j); + } + } - auto& Fc = _Fc[j]; + // By first computing R <= R - A c as a fgemm within the RNS domain. + FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, _n, + _rnsDomain->mOne, _rnsA, _n, _rnsc, _n, _rnsDomain->one, + _rnsR, _n); + + std::cout << "R = Ac" << std::endl; + for (auto j = 0u; j < _primesCount; ++j) { + for (auto i = 0u; i < _n; ++i) { + logRNSMatrixElement(_rnsR, _n, i, j); + } + } - // @fixme For now, we convert cj to integer, - // but it should be converted into a RNS system, on pre-allocated memory. - IVector Ic(_ring, Fc); + // We divide each residues by the according pj, which is done by multiplying. + // @fixme Could be done in parallel! + for (auto j = 0u; j < _primesCount; ++j) { + for (auto i = 0u; i < _n; ++i) { + auto& rnsElement = _rnsR[i * _n + j]; + auto stride = rnsElement._stride; + for (auto h = 0u; h < _rnsPrimesCount; ++h) { + auto& rnsF = _rnsSystem->_field_rns[h]; + rnsF.mulin(rnsElement._ptr[h * stride], _primesRNSInverses[j][h]); + } + } + } - // @fixme Should become a matrix-matrix multiplication! - // @fixme Should be able to do a gemv - _A.apply(r, Ic); // r = A c - IVD.negin(r); // r = - A c - IVD.addin(r, R); // r = R - A c + // @fixme Could be done in parallel! + for (auto j = 0u; j < _primesCount; ++j) { + auto& r = _r[j]; + auto& Q = _Q[j]; - // r = (R - A c) / p - IElement Ipj; - _ring.init(Ipj, pj); + // r <- (R - Ac) / p + // @fixme @cpernet Don't know how to do that with one fconvert_rns! for (auto i = 0u; i < _n; ++i) { - _ring.divin(r[i], Ipj); // @fixme Is there a divin in VectorDomain? + FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _n + j)); } - IVD.addin(r, Q); // r = Q + (R - A c) / p + // r <- Q + (R - Ac) / p + IVD.addin(r, Q); } ++_position; return true; } + private: + // Helper function, setting all residues of a matrix element to the very same value. + // This doesn't check the moduli. + void setRNSMatrixElementAllResidues(RNSElementPtr& A, size_t lda, size_t i, size_t j, + double value) + { + auto stride = A[i * lda + j]._stride; + for (auto h = 0u; h < _rnsPrimesCount; ++h) { + A[i * lda + j]._ptr[h * stride] = value; + } + } + + void logRNSMatrixElement(RNSElementPtr& A, size_t lda, size_t i, size_t j) + { + Integer reconstructedInteger; + FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &reconstructedInteger, 1, A + (i * lda + j)); + std::cout << i << " " << j << " "; + _rnsDomain->write(std::cout, A[i * lda + j]); + std::cout << " -> " << reconstructedInteger << std::endl; + } + private: const Ring& _ring; @@ -395,9 +444,10 @@ namespace LinBox { RNSElementPtr _rnsA; // The matrix A, but in the RNS system // A matrix of digits c[j], being the current digits mod pj, in the RNS system RNSElementPtr _rnsc; - // The result matrix of the fgemm _rnsA * _rnsc. - RNSElementPtr _rnsAc; - size_t _rnsBasisPrimesCount = 0u; + RNSElementPtr _rnsR; + size_t _rnsPrimesCount = 0u; + // Stores the inverse of pj of the i-th RNS prime into _primesRNSInverses[j][i] + std::vector> _primesRNSInverses; IElement _primesProduct; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! From db3b78f1aa3133cdee82062c4e844f6d3260c1cf Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Thu, 13 Jun 2019 14:55:05 +0200 Subject: [PATCH 26/63] Fixed DixonRNS solver for dimension != 2 --- .../algorithms/multi-mod-lifting-container.h | 40 +++++-------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 09497582a..d350aec16 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -258,9 +258,9 @@ namespace LinBox { ~MultiModLiftingContainer() { - FFLAS::fflas_delete(_rnsR); // @fixme Does it knows the size? - FFLAS::fflas_delete(_rnsc); // @fixme Does it knows the size? - FFLAS::fflas_delete(_rnsA); // @fixme Does it knows the size? + FFLAS::fflas_delete(_rnsR); + FFLAS::fflas_delete(_rnsc); + FFLAS::fflas_delete(_rnsA); delete _rnsDomain; delete _rnsSystem; } @@ -340,8 +340,8 @@ namespace LinBox { // but fact is all the primes of the RNS system are bigger // than the modulus used to compute _Fc, we just copy the result for everybody. for (auto i = 0u; i < _n; ++i) { - setRNSMatrixElementAllResidues(_rnsR, _n, i, j, FR[i]); - setRNSMatrixElementAllResidues(_rnsc, _n, i, j, Fc[i]); + setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]); + setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, Fc[i]); } } @@ -349,37 +349,17 @@ namespace LinBox { // r <= Q + (R - A c) / p - std::cout << "A" << std::endl; - for (auto j = 0u; j < _n; ++j) { - for (auto i = 0u; i < _n; ++i) { - logRNSMatrixElement(_rnsA, _n, i, j); - } - } - - std::cout << "c" << std::endl; - for (auto j = 0u; j < _primesCount; ++j) { - for (auto i = 0u; i < _n; ++i) { - logRNSMatrixElement(_rnsc, _n, i, j); - } - } - // By first computing R <= R - A c as a fgemm within the RNS domain. - FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, _n, - _rnsDomain->mOne, _rnsA, _n, _rnsc, _n, _rnsDomain->one, - _rnsR, _n); - - std::cout << "R = Ac" << std::endl; - for (auto j = 0u; j < _primesCount; ++j) { - for (auto i = 0u; i < _n; ++i) { - logRNSMatrixElement(_rnsR, _n, i, j); - } - } + // @fixme Use parallel helper! + FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, + _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one, + _rnsR, _primesCount); // We divide each residues by the according pj, which is done by multiplying. // @fixme Could be done in parallel! for (auto j = 0u; j < _primesCount; ++j) { for (auto i = 0u; i < _n; ++i) { - auto& rnsElement = _rnsR[i * _n + j]; + auto& rnsElement = _rnsR[i * _primesCount + j]; auto stride = rnsElement._stride; for (auto h = 0u; h < _rnsPrimesCount; ++h) { auto& rnsF = _rnsSystem->_field_rns[h]; From c415b719c6545dcc3aa93efaa7c888d56cebb9bc Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Thu, 13 Jun 2019 16:14:14 +0200 Subject: [PATCH 27/63] Fixed wrong leading dimension for accessing residue element --- linbox/algorithms/multi-mod-lifting-container.h | 12 +++++++++--- linbox/solutions/solve/solve-dixon-rns.h | 4 ++-- tests/test-solve-full.C | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index d350aec16..cf6269ea3 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -96,11 +96,12 @@ namespace LinBox { { linbox_check(A.rowdim() == A.coldim()); + std::cout << "----------" << std::endl; A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl; std::cout << "b: " << b << std::endl; // This will contain the primes or our MultiMod basis - // @fixme Pass it through Method::DixonRNS (and rename it Method::DixonMultiMod?) + // @fixme Pass the count through Method::DixonRNS (and rename it Method::DixonMultiMod?) _primesCount = 2; _primes.resize(_primesCount); std::cout << "primesCount: " << _primesCount << std::endl; @@ -116,6 +117,7 @@ namespace LinBox { double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)); _rnsPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits()); _rnsPrimes.resize(_rnsPrimesCount); + std::cout << "primeGenerator.getBits(): " << primeGenerator.getBits() << std::endl; std::cout << "rnsBasisPrimesCount: " << _rnsPrimesCount << std::endl; std::vector primes; @@ -226,7 +228,11 @@ namespace LinBox { double log2P = Givaro::logtwo(_primesProduct); // _iterationsCount = log2(2 * N * D) / log2(p) _log2Bound = hb.solutionLogBound; - _iterationsCount = std::ceil(_log2Bound / log2P); + + // @fixme @cpernet @jgdumas Is this computation wrong? + // I have to increase the number of iterations when the bitsize of the vector + // is big, maybe there is something wrong with the Hadamard bound. + _iterationsCount = std::ceil(_log2Bound / log2P) + 2; std::cout << "iterationsCount: " << _iterationsCount << std::endl; // @fixme Fact is RationalReconstruction which needs numbound and denbound @@ -376,7 +382,7 @@ namespace LinBox { // r <- (R - Ac) / p // @fixme @cpernet Don't know how to do that with one fconvert_rns! for (auto i = 0u; i < _n; ++i) { - FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _n + j)); + FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _primesCount + j)); } // r <- Q + (R - Ac) / p diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 630ac235d..71faf0627 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -139,7 +139,7 @@ namespace LinBox { void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) { - commentator().start("solve.dixon.integer.dense"); + commentator().start("solve.dixon-rns.integer.dense"); // @fixme We don't know if we can use ModularBalanced, // because of the rational reconstruction which might be @@ -151,7 +151,7 @@ namespace LinBox { DixonRNSSolver solver(A.field(), primeGenerator); solver.solve(xNum, xDen, A, b, m); - commentator().stop("solve.dixon.integer.dense"); + commentator().stop("solve.dixon-rns.integer.dense"); // @fixme Implement something like that // if (status == SS_INCONSISTENT) { diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 5b9e5acec..363e0a781 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -263,7 +263,7 @@ int main(int argc, char** argv) bool ok = true; do { // // ----- Rational Auto - ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // // @fixme Dixon does not compile // // ok = ok && test_blackbox_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); From 87dbe8b7caca7bebfb2a65fefe13dafb4a1f4b39 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 14 Jun 2019 16:23:59 +0200 Subject: [PATCH 28/63] Thanks to @jgdumas, now handling rational reconstruction with own num bound --- .../algorithms/multi-mod-lifting-container.h | 32 ++++++--------- .../rational-cra-builder-full-multip.h | 23 ++++++++++- linbox/solutions/hadamard-bound.h | 6 +-- linbox/solutions/solve/solve-dixon-rns.h | 40 ++++++++++++++++++- 4 files changed, 76 insertions(+), 25 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index cf6269ea3..e2a4876ca 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -228,19 +228,15 @@ namespace LinBox { double log2P = Givaro::logtwo(_primesProduct); // _iterationsCount = log2(2 * N * D) / log2(p) _log2Bound = hb.solutionLogBound; - - // @fixme @cpernet @jgdumas Is this computation wrong? - // I have to increase the number of iterations when the bitsize of the vector - // is big, maybe there is something wrong with the Hadamard bound. - _iterationsCount = std::ceil(_log2Bound / log2P) + 2; + _log2NumBound = hb.numLogBound; + _log2DenBound = hb.denLogBound; + std::cout << "_log2Bound: " << _log2Bound << std::endl; + std::cout << "_log2NumBound: " << _log2NumBound << std::endl; + std::cout << "_log2DenBound: " << hb.denLogBound << std::endl; + std::cout << "log2P: " << log2P << std::endl; + + _iterationsCount = std::ceil(_log2Bound / log2P); std::cout << "iterationsCount: " << _iterationsCount << std::endl; - - // @fixme Fact is RationalReconstruction which needs numbound and denbound - // expects them to be in non-log... @fixme Still needed? - _ring.init(_numbound, Integer(1) - << static_cast(std::ceil(hb.numLogBound))); - _ring.init(_denbound, Integer(1) - << static_cast(std::ceil(hb.denLogBound))); } //----- Locals setup @@ -292,11 +288,9 @@ namespace LinBox { // ----- NOT LiftingContainer API // ----- but still needed - const IElement& numbound() const { return _numbound; } - - const IElement& denbound() const { return _denbound; } - double log2Bound() const { return _log2Bound; } + double log2NumBound() const { return _log2NumBound; } + double log2DenBound() const { return _log2DenBound; } uint32_t primesCount() const { return _primesCount; } @@ -414,16 +408,16 @@ namespace LinBox { std::cout << " -> " << reconstructedInteger << std::endl; } - private: + public: // @fixme BACK TO PRIVATE! const Ring& _ring; // The problem: A^{-1} * b const IMatrix& _A; const IVector& _b; - IElement _numbound; - IElement _denbound; double _log2Bound; + double _log2NumBound; + double _log2DenBound; RNSSystem* _rnsSystem = nullptr; RNSDomain* _rnsDomain = nullptr; diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h index e1df25d35..30b38a412 100644 --- a/linbox/algorithms/rational-cra-builder-full-multip.h +++ b/linbox/algorithms/rational-cra-builder-full-multip.h @@ -65,11 +65,30 @@ namespace LinBox return num; } + template + Vect& result (Vect &num, Integer& den, const Integer& numBound, const Integer& denBound) + { + Father_t::result(num, false); + den = 1; + const auto& mod = Father_t::getModulus(); + Integer nd; + for (auto num_it = num.begin(); num_it != num.end(); ++num_it) { + iterativeratrecon(*num_it, nd, den, mod, numBound, denBound); + + if (nd > 1) { + for (auto t02 = num.begin(); t02 != num_it; ++t02) + *t02 *= nd; + den *= nd; + } + } + return num; + } + protected: - Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& s) + Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn, const Integer& sd) { Integer a; - _ZZ.reconstructRational(a, new_den, u1*=old_den, m1, s); + _ZZ.reconstructRational(a, new_den, u1*=old_den, m1, sn, sd); return u1=a; } }; diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h index 48b9bcf55..a003aaf22 100644 --- a/linbox/solutions/hadamard-bound.h +++ b/linbox/solutions/hadamard-bound.h @@ -51,7 +51,7 @@ namespace LinBox { } #ifdef DEBUG_HADAMARD_BOUND std::clog << "normSquared:=" << normSquared << ';' << std::endl; - std::clog << "vectorLogNorm:=" << (Givaro::logtwo(normSquared) / 2.0) << ';' << std::endl; + std::clog << "vectorLogNorm:=" << Givaro::logtwo(normSquared) / 2.0 << ';' << std::endl; #endif logNorm = Givaro::logtwo(normSquared) / 2.0; return true; @@ -423,9 +423,9 @@ namespace LinBox { double bLogNorm; vectorLogNorm(bLogNorm, b.begin(), b.end()); - data.numLogBound = hadamardBound.logBoundOverMinNorm + bLogNorm + 1.0; + data.numLogBound = hadamardBound.logBoundOverMinNorm + bLogNorm; data.denLogBound = hadamardBound.logBound; - data.solutionLogBound = data.numLogBound + data.denLogBound + 1.0; + data.solutionLogBound = 1.0 + data.numLogBound + data.denLogBound; // log2(2 * N * D) #ifdef DEBUG_HADAMARD_BOUND std::clog << "numLogBound:=" << data.numLogBound << ';' << std::endl; diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 71faf0627..34c336db0 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -86,8 +86,41 @@ namespace LinBox { craBuilder.progress(field, padicAccumulations[j]); } + + for (auto j = 0u; j < _lc.primesCount(); ++j) { + auto Cj = padicAccumulations[j]; + auto xxx = (_lc._A.getEntry(0, 0) * Cj[0] - _lc._b[0]) % radices[j]; + std::cout << "xxx " << j << " " << xxx << std::endl; + } + // Rational reconstruction - craBuilder.result(xNum, xDen); + Integer numBound = (Integer(1) << size_t(std::ceil(_lc.log2NumBound()))); + Integer denBound = (Integer(1) << size_t(std::ceil(_lc.log2DenBound()))); + + // @todo @cleanup Do the same for denBound ? + // The following finds the closest Integer that satisfies 2 ^ exponent. + // This is done by dichotomy, going from floor to ceil. + + Integer minNumBound = (Integer(1) << size_t(std::floor(_lc.log2NumBound()))); + Integer maxNumBound = (Integer(1) << size_t(std::ceil(_lc.log2NumBound()))); + auto middleNumBound = (minNumBound + maxNumBound); + double l = _lc.log2NumBound(); + double lm = Givaro::logtwo(middleNumBound) - 1; + while (minNumBound < maxNumBound) { + if (lm > l) { + maxNumBound = middleNumBound / 2; + } + else if (lm < l) { + minNumBound = middleNumBound / 2; + } + else { + break; + } + middleNumBound = (minNumBound + maxNumBound); + lm = Givaro::logtwo(middleNumBound) - 1; + } + + craBuilder.result(xNum, xDen, middleNumBound / 2, denBound); return true; } @@ -125,6 +158,11 @@ namespace LinBox { if (!re.getRational(xNum, xDen)) { std::cerr << "OUCH!" << std::endl; } + +// #ifdef DEBUG_HADAMARD_BOUND + std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << ';' << std::endl; + std::clog << "denLog " << Givaro::logtwo(xDen) << ';' << std::endl; +// #endif } private: From 0e66c33c014e07f4de3577252f8b0615464429f3 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 18 Jun 2019 10:33:51 +0200 Subject: [PATCH 29/63] Fixed a bunch of cases with b very different of A --- linbox/solutions/solve/solve-dixon-rns.h | 93 ++++++++++++++++-------- 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 34c336db0..f4a4837cb 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -25,6 +25,65 @@ #include namespace LinBox { + // @todo @cleanup Move that somewhere inside Givaro? + // Find the closest upper bound Integer that satisfies 2 ^ exponent. + // This is done by dichotomy, going from floor to ceil. + Integer twoPower(double exponent) + { + // @note Is the exponent is small, we will be extra precise, + // otherwise, we over estimate the exponent a bit, + // so that results are all right with rational reconstruction. + // The reason being that RR does has to be very precise for small + // values so that it does not go too far. + // And, RR also need to go far enough, the exponent not being very precise + // for big values. + // @fixme This is hard-coded... That's sad. What does this mean really? + if (exponent > 20.) { + exponent *= 1.0001; + } + + Integer min = (Integer(1) << uint64_t(std::floor(exponent))); + Integer max = (Integer(1) << uint64_t(std::ceil(exponent))); + + // To keep full precision, we do not divide by two here, + // but just the computed exponent. + Integer target = min + max; + Integer lastKnownTarget = target; + double targetExponent = 0.0; + + while (min < max) { + targetExponent = Givaro::logtwo(target) - 1; + if (targetExponent > exponent) { + max = (target + 1) / 2; + } + else if (targetExponent < exponent) { + min = target / 2; + } + else { + break; + } + + target = min + max; + + // Get out if we're lock in an infinite loop + if (lastKnownTarget == target) { + break; + } + lastKnownTarget = target; + } + + // Find the smallest value that satisfies the upper + // evaluation of the exponent. + if (Givaro::logtwo(min) >= exponent) { + return min; + } else if (Givaro::logtwo(target / 2) >= exponent) { + return target / 2; + } + else { + return max; + } + } + /** * From a MultiModLiftingContainer, will build * the solution on each prime, then will do a CRT reconstruction, @@ -86,7 +145,6 @@ namespace LinBox { craBuilder.progress(field, padicAccumulations[j]); } - for (auto j = 0u; j < _lc.primesCount(); ++j) { auto Cj = padicAccumulations[j]; auto xxx = (_lc._A.getEntry(0, 0) * Cj[0] - _lc._b[0]) % radices[j]; @@ -94,33 +152,10 @@ namespace LinBox { } // Rational reconstruction - Integer numBound = (Integer(1) << size_t(std::ceil(_lc.log2NumBound()))); - Integer denBound = (Integer(1) << size_t(std::ceil(_lc.log2DenBound()))); - - // @todo @cleanup Do the same for denBound ? - // The following finds the closest Integer that satisfies 2 ^ exponent. - // This is done by dichotomy, going from floor to ceil. - - Integer minNumBound = (Integer(1) << size_t(std::floor(_lc.log2NumBound()))); - Integer maxNumBound = (Integer(1) << size_t(std::ceil(_lc.log2NumBound()))); - auto middleNumBound = (minNumBound + maxNumBound); - double l = _lc.log2NumBound(); - double lm = Givaro::logtwo(middleNumBound) - 1; - while (minNumBound < maxNumBound) { - if (lm > l) { - maxNumBound = middleNumBound / 2; - } - else if (lm < l) { - minNumBound = middleNumBound / 2; - } - else { - break; - } - middleNumBound = (minNumBound + maxNumBound); - lm = Givaro::logtwo(middleNumBound) - 1; - } + Integer numBound = twoPower(_lc.log2NumBound()); + Integer denBound = twoPower(_lc.log2DenBound()); - craBuilder.result(xNum, xDen, middleNumBound / 2, denBound); + craBuilder.result(xNum, xDen, numBound, denBound); return true; } @@ -159,10 +194,10 @@ namespace LinBox { std::cerr << "OUCH!" << std::endl; } -// #ifdef DEBUG_HADAMARD_BOUND + // #ifdef DEBUG_HADAMARD_BOUND std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << ';' << std::endl; std::clog << "denLog " << Givaro::logtwo(xDen) << ';' << std::endl; -// #endif + // #endif } private: From 5f1b54a4ec55d571966492d39a1483411806b5aa Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 18 Jun 2019 15:00:17 +0200 Subject: [PATCH 30/63] Switched to exact value for HadamardBound, simplifying the rat recon step --- .../algorithms/multi-mod-lifting-container.h | 31 +- .../rational-cra-builder-full-multip.h | 12 +- linbox/solutions/hadamard-bound.h | 321 +++++++++--------- linbox/solutions/solve/solve-dixon-rns.h | 89 +---- 4 files changed, 215 insertions(+), 238 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index e2a4876ca..910110d03 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -153,7 +153,7 @@ namespace LinBox { if (bitSize > rnsBasisBitSize && h > 0) { _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (h - 1)); _rnsPrimesCount -= h; - std::cout << "RNS basis: Erasing extra " << h << "primes." << std::endl; + std::cout << "RNS basis: Erasing extra " << h << " primes." << std::endl; break; } } @@ -228,11 +228,11 @@ namespace LinBox { double log2P = Givaro::logtwo(_primesProduct); // _iterationsCount = log2(2 * N * D) / log2(p) _log2Bound = hb.solutionLogBound; - _log2NumBound = hb.numLogBound; - _log2DenBound = hb.denLogBound; + _numBound = hb.numBound; + _denBound = hb.denBound; std::cout << "_log2Bound: " << _log2Bound << std::endl; - std::cout << "_log2NumBound: " << _log2NumBound << std::endl; - std::cout << "_log2DenBound: " << hb.denLogBound << std::endl; + std::cout << "_numBound: " << _numBound << std::endl; + std::cout << "_denBound: " << _denBound << std::endl; std::cout << "log2P: " << log2P << std::endl; _iterationsCount = std::ceil(_log2Bound / log2P); @@ -289,8 +289,8 @@ namespace LinBox { // ----- but still needed double log2Bound() const { return _log2Bound; } - double log2NumBound() const { return _log2NumBound; } - double log2DenBound() const { return _log2DenBound; } + Integer numBound() const { return _numBound; } + Integer denBound() const { return _denBound; } uint32_t primesCount() const { return _primesCount; } @@ -320,6 +320,8 @@ namespace LinBox { // @fixme @cpernet Is this OK for any Ring or should we be sure we are using // Integers? _ring.quoRem(Q[i], R[i], r[i], pj); + // std::cout << "Q" << j << " " << Q[i] << std::endl; + // std::cout << "R" << j << " " << R[i] << std::endl; } // Convert R to the field @@ -336,6 +338,9 @@ namespace LinBox { // would do the trick digits[j] = IVector(_ring, Fc); + // auto ooo = (_A.getEntry(0, 0) * Integer(digits[j][0]) - r[0]) % Integer(pj); + // std::cout << "ooo " << j << " " << ooo << std::endl; + // Store the very same result in an RNS system, // but fact is all the primes of the RNS system are bigger // than the modulus used to compute _Fc, we just copy the result for everybody. @@ -373,6 +378,9 @@ namespace LinBox { auto& r = _r[j]; auto& Q = _Q[j]; + // std::cout << "old r" << j << " " << r[0] << std::endl; + // std::cout << "r" << j << " " << (r[0] - _A.getEntry(0, 0) * Integer(_Fc[j][0])) / Integer(_primes[j]) << " expected" << std::endl; + // r <- (R - Ac) / p // @fixme @cpernet Don't know how to do that with one fconvert_rns! for (auto i = 0u; i < _n; ++i) { @@ -380,7 +388,12 @@ namespace LinBox { } // r <- Q + (R - Ac) / p + // std::cout << "p" << j << " " << Integer(_primes[j]) << std::endl; + // std::cout << "c" << j << " " << Integer(_Fc[j][0]) << std::endl; + IVD.addin(r, Q); + + // std::cout << "r" << j << " " << r[0] << std::endl; } ++_position; @@ -416,8 +429,8 @@ namespace LinBox { const IVector& _b; double _log2Bound; - double _log2NumBound; - double _log2DenBound; + Integer _numBound; + Integer _denBound; RNSSystem* _rnsSystem = nullptr; RNSDomain* _rnsDomain = nullptr; diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h index 30b38a412..fb62b3941 100644 --- a/linbox/algorithms/rational-cra-builder-full-multip.h +++ b/linbox/algorithms/rational-cra-builder-full-multip.h @@ -68,6 +68,9 @@ namespace LinBox template Vect& result (Vect &num, Integer& den, const Integer& numBound, const Integer& denBound) { + // std::cout << "numBound " << numBound << std::endl; + // std::cout << "denBound " << denBound << std::endl; + Father_t::result(num, false); den = 1; const auto& mod = Father_t::getModulus(); @@ -87,9 +90,12 @@ namespace LinBox protected: Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn, const Integer& sd) { - Integer a; - _ZZ.reconstructRational(a, new_den, u1*=old_den, m1, sn, sd); - return u1=a; + // @note This interface of the rational does the RatRecon. + Givaro::Rational myRational(u1 *= old_den, m1, sn, false); + + u1 = myRational.nume(); + new_den = myRational.deno(); + return u1; } }; } diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h index a003aaf22..891dbcd5f 100644 --- a/linbox/solutions/hadamard-bound.h +++ b/linbox/solutions/hadamard-bound.h @@ -33,120 +33,114 @@ namespace LinBox { // ----- Vector norm - // Returns false if the vector is null, true otherwise template - bool vectorLogNorm(double& logNorm, const ConstIterator& begin, const ConstIterator& end) + void vectorNormSquared(Integer& normSquared, const ConstIterator& begin, + const ConstIterator& end) { - Integer normSquared = 0; + normSquared = 0; for (ConstIterator it = begin; it != end; ++it) { // Whatever field element it is, // it should be able to store the square without // loss of information. normSquared += (*it) * (*it); } - - if (normSquared == 0) { - logNorm = 0.0; - return false; // Vector is zero - } -#ifdef DEBUG_HADAMARD_BOUND - std::clog << "normSquared:=" << normSquared << ';' << std::endl; - std::clog << "vectorLogNorm:=" << Givaro::logtwo(normSquared) / 2.0 << ';' << std::endl; -#endif - logNorm = Givaro::logtwo(normSquared) / 2.0; - return true; } // ----- Detailed Hadamard bound - struct HadamardLogBoundDetails { + struct HadamarBoundDetails { /** - * Bit size of the minimal hadamard bound + * The minimal hadamard bound * between the row-wise and the col-wise ones. * * min { HadamardRow(A), HadamardCol(A) } */ - double logBound; + Integer bound; + /** - * Bit size of the minimal hadamard bound + * The minimal hadamard bound * divided by the min of the norm vectors * between the row-wise and the col-wise ones. * * min { HadamardRow(A) / min || Ai,* ||, * HadamardCol(A) / min || A*,j || } */ - double logBoundOverMinNorm; + Integer boundOverMinNorm; }; /** * Precise Hadamard bound (bound on determinant) by taking * the row-wise euclidean norm. - * - * The result is expressed as bit size. */ template - void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A) + void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A) { typename MatrixTraits::MatrixCategory tag; - HadamardRowLogBound(logBound, minLogNorm, A, tag); + HadamardRowBound(bound, minNormSquared, A, tag); } template - void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) + void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + const MatrixCategories::RowColMatrixTag& tag) { - logBound = 0.0; - minLogNorm = std::numeric_limits::infinity(); + bound = 1; + minNormSquared = -1; for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) { - double rowLogNorm; - if (vectorLogNorm(rowLogNorm, rowIt->begin(), rowIt->end())) { - if (rowLogNorm < minLogNorm) { - minLogNorm = rowLogNorm; - } - } - else { - logBound = 0.0; - minLogNorm = 0.0; + Integer rowNormSquared; + vectorNormSquared(rowNormSquared, rowIt->begin(), rowIt->end()); + + if (rowNormSquared == 0) { + bound = 0; + minNormSquared = 0; return; } - logBound += rowLogNorm; + + if (minNormSquared < 0 || rowNormSquared < minNormSquared) { + minNormSquared = rowNormSquared; + } + + bound *= rowNormSquared; } + + bound = Givaro::sqrt(bound); } template - void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowMatrixTag& tag) + void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + const MatrixCategories::RowMatrixTag& tag) { - logBound = 0.0; - minLogNorm = std::numeric_limits::infinity(); + bound = 1; + minNormSquared = -1; for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) { Integer normSquared = 0; for (const auto& pair : *rowIt) { normSquared += (pair.second) * (pair.second); } + if (normSquared == 0) { - logBound = 0.0; - minLogNorm = 0.0; + bound = 0; + minNormSquared = 0; return; } - double logNormSquared = Givaro::logtwo(normSquared); - if (logNormSquared < minLogNorm) { - minLogNorm = logNormSquared; + if (minNormSquared < 0 || normSquared < minNormSquared) { + minNormSquared = normSquared; } - logBound += logNormSquared; + + bound *= normSquared; } - // Square-root - logBound /= 2.0; - minLogNorm /= 2.0; + bound = Givaro::sqrt(bound); } template - void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::BlackboxTag& tag) + void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + const MatrixCategories::BlackboxTag& tag) { DenseMatrix ACopy(A); - HadamardRowLogBound(logBound, minLogNorm, ACopy); + HadamardRowBound(bound, minNormSquared, ACopy); } /** @@ -156,40 +150,46 @@ namespace LinBox { * The result is expressed as bit size. */ template - void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A) + void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A) { typename MatrixTraits::MatrixCategory tag; - HadamardColLogBound(logBound, minLogNorm, A, tag); + HadamardColBound(bound, minNormSquared, A, tag); } template - void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) + void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + const MatrixCategories::RowColMatrixTag& tag) { - logBound = 0.0; - minLogNorm = std::numeric_limits::infinity(); + bound = 1; + minNormSquared = -1; typename IMatrix::ConstColIterator colIt; for (colIt = A.colBegin(); colIt != A.colEnd(); ++colIt) { - double colLogNorm; - if (vectorLogNorm(colLogNorm, colIt->begin(), colIt->end())) { - if (colLogNorm < minLogNorm) { - minLogNorm = colLogNorm; - } - } - else { - logBound = 0.0; - minLogNorm = 0.0; + Integer colNormSquared; + vectorNormSquared(colNormSquared, colIt->begin(), colIt->end()); + + if (colNormSquared == 0) { + bound = 0; + minNormSquared = 0; return; } - logBound += colLogNorm; + + if (minNormSquared < 0 || colNormSquared < minNormSquared) { + minNormSquared = colNormSquared; + } + + bound *= colNormSquared; } + + bound = Givaro::sqrt(bound); } template - void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowMatrixTag& tag) + void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + const MatrixCategories::RowMatrixTag& tag) { - logBound = 0.0; - minLogNorm = std::numeric_limits::infinity(); + bound = 1; + minNormSquared = -1; // This vector contains the norm squared for each columns. std::vector columnsNormsSquared(A.coldim()); @@ -200,30 +200,31 @@ namespace LinBox { } // All the norms have been computed, we check which one is the smallest - // and compute the product (aka sum bitsize-wise) of them to make the logBound. + // and compute the product (aka sum bitsize-wise) of them to make the bound. for (const Integer& normSquared : columnsNormsSquared) { if (normSquared == 0) { - logBound = 0.0; - minLogNorm = 0.0; + bound = 0; + minNormSquared = 0; return; } - double logNormSquared = Givaro::logtwo(normSquared); - if (logNormSquared < minLogNorm) { - minLogNorm = logNormSquared; + + if (minNormSquared < 0 || normSquared < minNormSquared) { + minNormSquared = normSquared; } - logBound += logNormSquared; + + bound *= normSquared; } // Square-root - logBound /= 2.0; - minLogNorm /= 2.0; + bound = Givaro::sqrt(bound); } template - void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::BlackboxTag& tag) + void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + const MatrixCategories::BlackboxTag& tag) { DenseMatrix ACopy(A); - HadamardColLogBound(logBound, minLogNorm, ACopy); + HadamardColBound(bound, minNormSquared, ACopy); } /** @@ -233,34 +234,35 @@ namespace LinBox { * The results are expressed as bit size. */ template - HadamardLogBoundDetails DetailedHadamardBound(const IMatrix& A) + HadamarBoundDetails DetailedHadamardBound(const IMatrix& A) { - double rowLogBound = 0.0; - double rowMinLogNorm = 0.0; - HadamardRowLogBound(rowLogBound, rowMinLogNorm, A); - double rowLogBoundOverMinNorm = rowLogBound - rowMinLogNorm; + Integer rowBound; + Integer rowMinNormSquared; + HadamardRowBound(rowBound, rowMinNormSquared, A); + Integer rowBoundOverMinNorm = rowBound / Givaro::sqrt(rowMinNormSquared); #ifdef DEBUG_HADAMARD_BOUND - std::clog << "rowLogBound:=" << rowLogBound << ';' << std::endl; - std::clog << "rowMinLogNorm:=" << rowMinLogNorm << ';' << std::endl; - std::clog << "rowLogBoundOverMinNorm:=" << rowLogBoundOverMinNorm << ';' << std::endl; + std::clog << "rowBound:=" << rowBound << ';' << std::endl; + std::clog << "rowMinNormSquared:=" << rowMinNormSquared << ';' << std::endl; + std::clog << "rowBoundOverMinNorm:=" << rowBoundOverMinNorm << ';' << std::endl; #endif - double colLogBound = 0.0; - double colMinLogNorm = 0.0; - HadamardColLogBound(colLogBound, colMinLogNorm, A); - double colLogBoundOverMinNorm = colLogBound - colMinLogNorm; + Integer colBound; + Integer colMinNormSquared; + HadamardColBound(colBound, colMinNormSquared, A); + Integer colBoundOverMinNorm = colBound / Givaro::sqrt(colMinNormSquared); #ifdef DEBUG_HADAMARD_BOUND - std::clog << "colLogBound:=" << colLogBound << ';' << std::endl; - std::clog << "colMinLogNorm:=" << colMinLogNorm << ';' << std::endl; - std::clog << "colLogBoundOverMinNorm:=" << colLogBoundOverMinNorm << ';' << std::endl; + std::clog << "colBound:=" << colBound << ';' << std::endl; + std::clog << "colMinNormSquared:=" << colMinNormSquared << ';' << std::endl; + std::clog << "colBoundOverMinNorm:=" << colBoundOverMinNorm << ';' << std::endl; #endif - HadamardLogBoundDetails data; - data.logBound = std::min(rowLogBound, colLogBound); - data.logBoundOverMinNorm = std::min(rowLogBoundOverMinNorm, colLogBoundOverMinNorm); + HadamarBoundDetails data; + data.bound = (rowBound < colBound) ? rowBound : colBound; + data.boundOverMinNorm = + (rowBoundOverMinNorm < colBoundOverMinNorm) ? rowBoundOverMinNorm : colBoundOverMinNorm; #ifdef DEBUG_HADAMARD_BOUND - std::clog << "logBound:=" << data.logBound << ';' << std::endl; - std::clog << "logBoundOverMinNorm:=" << data.logBoundOverMinNorm << ';' << std::endl; + std::clog << "bound:=" << data.bound << ';' << std::endl; + std::clog << "boundOverMinNorm:=" << data.boundOverMinNorm << ';' << std::endl; #endif return data; @@ -277,13 +279,14 @@ namespace LinBox { template double HadamardBound(const IMatrix& A) { - return DetailedHadamardBound(A).logBound; + return DetailedHadamardBound(A).bound; } // ----- Fast Hadamard bound template - inline Integer& InfinityNorm(Integer& max, const IMatrix& A) { + inline Integer& InfinityNorm(Integer& max, const IMatrix& A) + { typename MatrixTraits::MatrixCategory tag; return InfinityNorm(max, A, tag); } @@ -299,7 +302,8 @@ namespace LinBox { } template - inline Integer& InfinityNorm(Integer& max, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) + inline Integer& InfinityNorm(Integer& max, const IMatrix& A, + const MatrixCategories::RowColMatrixTag& tag) { max = 0; for (auto it = A.Begin(); it != A.End(); ++it) { @@ -313,95 +317,96 @@ namespace LinBox { return max; } - /** - * Returns the bit size of the Hadamard bound. - * This is a larger estimation but faster to compute. - */ + /** + * Returns the bit size of the Hadamard bound. + * This is a larger estimation but faster to compute. + */ template - inline double FastHadamardBound(const IMatrix& A, const Integer& infnorm) + inline double FastHadamardLogBound(const IMatrix& A, const Integer& infinityNorm) { - if (infnorm == 0) { + if (infinityNorm == 0) { return 0.0; } uint64_t n = std::max(A.rowdim(), A.coldim()); - double logBound = static_cast(n) * (Givaro::logtwo(n) / 2.0 + Givaro::logtwo(infnorm)); - return logBound; + double bound = + static_cast(n) * (Givaro::logtwo(n) / 2.0 + Givaro::logtwo(infinityNorm)); + return bound; } template - inline double FastHadamardBound(const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) + inline double FastHadamardLogBound(const IMatrix& A, + const MatrixCategories::RowColMatrixTag& tag) { - Integer infnorm; - InfinityNorm(infnorm, A, tag); - return FastHadamardBound(A, infnorm); + Integer infinityNorm; + InfinityNorm(infinityNorm, A, tag); + return FastHadamardLogBound(A, infinityNorm); } template - inline double FastHadamardBound(const IMatrix& A, const MatrixCategories::BlackboxTag& tag) + inline double FastHadamardLogBound(const IMatrix& A, const MatrixCategories::BlackboxTag& tag) { DenseMatrix ACopy(A); - return FastHadamardBound(ACopy); + return FastHadamardLogBound(ACopy); } template - inline double FastHadamardBound(const IMatrix& A) + inline double FastHadamardLogBound(const IMatrix& A) { typename MatrixTraits::MatrixCategory tag; - return FastHadamardBound(A, tag); + return FastHadamardLogBound(A, tag); } - /** - * Bound on the coefficients of the characteristic polynomial - * @bib "Efficient Computation of the Characteristic Polynomial". Dumas Pernet Wan ISSAC'05. - * - */ + /** + * Bound on the coefficients of the characteristic polynomial + * @bib "Efficient Computation of the Characteristic Polynomial". Dumas Pernet Wan ISSAC'05. + * + */ template - inline double FastCharPolyDumasPernetWanBound(const IMatrix& A, const Integer& infnorm) + inline double FastCharPolyDumasPernetWanBound(const IMatrix& A, const Integer& infinityNorm) { - // .105815875 = 0.21163275 / 2 - return FastHadamardBound(A, infnorm) + A.coldim()*.105815875; + // .105815875 = 0.21163275 / 2 + return FastHadamardLogBound(A, infinityNorm) + A.coldim() * .105815875; } - /** - * A.J. Goldstein et R.L. Graham. - * A Hadamard-type bound on the coefficients of - * a determinant of polynomials. - * SIAM Review, volume 15, 1973, pages 657-658. - * - */ + /** + * A.J. Goldstein et R.L. Graham. + * A Hadamard-type bound on the coefficients of + * a determinant of polynomials. + * SIAM Review, volume 15, 1973, pages 657-658. + * + */ template - inline double FastCharPolyGoldsteinGrahamBound(const IMatrix& A, const Integer& infnorm) + inline double FastCharPolyGoldsteinGrahamBound(const IMatrix& A, const Integer& infinityNorm) { - Integer ggb(infnorm); + Integer ggb(infinityNorm); ggb *= static_cast(A.coldim()); ggb += 2; - ggb *= infnorm; + ggb *= infinityNorm; ++ggb; - return Givaro::logtwo(ggb)*A.coldim()/2.0; + return Givaro::logtwo(ggb) * A.coldim() / 2.0; } template inline double FastCharPolyHadamardBound(const IMatrix& A) { typename MatrixTraits::MatrixCategory tag; - Integer infnorm; - InfinityNorm(infnorm, A, tag); - const double DPWbound = FastCharPolyDumasPernetWanBound(A, infnorm); - const double GGbound = FastCharPolyGoldsteinGrahamBound(A, infnorm); + Integer infinityNorm; + InfinityNorm(infinityNorm, A, tag); + const double DPWbound = FastCharPolyDumasPernetWanBound(A, infinityNorm); + const double GGbound = FastCharPolyGoldsteinGrahamBound(A, infinityNorm); #ifdef DEBUG_HADAMARD_BOUND std::clog << "DPWbound: " << DPWbound << std::endl; std::clog << "GGbound : " << GGbound << std::endl; #endif - return std::min(DPWbound,GGbound); + return std::min(DPWbound, GGbound); } - // ----- Rational solve bound struct RationalSolveHadamardBoundData { - double numLogBound; // log2(N) - double denLogBound; // log2(D) + Integer numBound; // N + Integer denBound; // D double solutionLogBound; // log2(2 * N * D) }; @@ -413,30 +418,38 @@ namespace LinBox { * @note Matrix and Vector should be over Integer. */ template - typename std::enable_if::categoryTag, RingCategories::IntegerTag>::value, + typename std::enable_if::categoryTag, + RingCategories::IntegerTag>::value, RationalSolveHadamardBoundData>::type RationalSolveHadamardBound(const Matrix& A, const Vector& b) { RationalSolveHadamardBoundData data; auto hadamardBound = DetailedHadamardBound(A); - double bLogNorm; - vectorLogNorm(bLogNorm, b.begin(), b.end()); + Integer bNormSquared; + vectorNormSquared(bNormSquared, b.begin(), b.end()); - data.numLogBound = hadamardBound.logBoundOverMinNorm + bLogNorm; - data.denLogBound = hadamardBound.logBound; - data.solutionLogBound = 1.0 + data.numLogBound + data.denLogBound; // log2(2 * N * D) + data.denBound = hadamardBound.bound; + data.numBound = hadamardBound.boundOverMinNorm * Givaro::sqrt(bNormSquared); + if (data.denBound == 0 || data.numBound == 0) { + data.solutionLogBound = 0.0; + } + else { + data.solutionLogBound = 1.0 + Givaro::logtwo(data.numBound) + + Givaro::logtwo(data.denBound); // log2(2 * N * D) + } #ifdef DEBUG_HADAMARD_BOUND - std::clog << "numLogBound:=" << data.numLogBound << ';' << std::endl; - std::clog << "denLogBound:=" << data.denLogBound << ';' << std::endl; + std::clog << "numBound:=" << data.numBound << ';' << std::endl; + std::clog << "denBound:=" << data.denBound << ';' << std::endl; #endif return data; } /// @fixme Needed to solve-cra.h, but can't be used yet. template - typename std::enable_if::categoryTag, RingCategories::RationalTag>::value, + typename std::enable_if::categoryTag, + RingCategories::RationalTag>::value, RationalSolveHadamardBoundData>::type RationalSolveHadamardBound(const Matrix& A, const Vector& b) { diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index f4a4837cb..3154c1915 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -25,65 +25,6 @@ #include namespace LinBox { - // @todo @cleanup Move that somewhere inside Givaro? - // Find the closest upper bound Integer that satisfies 2 ^ exponent. - // This is done by dichotomy, going from floor to ceil. - Integer twoPower(double exponent) - { - // @note Is the exponent is small, we will be extra precise, - // otherwise, we over estimate the exponent a bit, - // so that results are all right with rational reconstruction. - // The reason being that RR does has to be very precise for small - // values so that it does not go too far. - // And, RR also need to go far enough, the exponent not being very precise - // for big values. - // @fixme This is hard-coded... That's sad. What does this mean really? - if (exponent > 20.) { - exponent *= 1.0001; - } - - Integer min = (Integer(1) << uint64_t(std::floor(exponent))); - Integer max = (Integer(1) << uint64_t(std::ceil(exponent))); - - // To keep full precision, we do not divide by two here, - // but just the computed exponent. - Integer target = min + max; - Integer lastKnownTarget = target; - double targetExponent = 0.0; - - while (min < max) { - targetExponent = Givaro::logtwo(target) - 1; - if (targetExponent > exponent) { - max = (target + 1) / 2; - } - else if (targetExponent < exponent) { - min = target / 2; - } - else { - break; - } - - target = min + max; - - // Get out if we're lock in an infinite loop - if (lastKnownTarget == target) { - break; - } - lastKnownTarget = target; - } - - // Find the smallest value that satisfies the upper - // evaluation of the exponent. - if (Givaro::logtwo(min) >= exponent) { - return min; - } else if (Givaro::logtwo(target / 2) >= exponent) { - return target / 2; - } - else { - return max; - } - } - /** * From a MultiModLiftingContainer, will build * the solution on each prime, then will do a CRT reconstruction, @@ -105,6 +46,15 @@ namespace LinBox { bool getRational(IVector& xNum, IElement& xDen) { + // Early out when the numerator is bounded by zero. + if (_lc.numBound() == 0) { + for (auto i = 0u; i < _lc.length(); ++i) { + _lc.ring().assign(xNum[i], _lc.ring().zero); + } + _lc.ring().assign(xDen, _lc.ring().one); + return true; + } + VectorDomain IVD(_lc.ring()); // Stores each c0 + c1 pj + ... + ck pj^k for each pj @@ -127,6 +77,9 @@ namespace LinBox { for (auto j = 0u; j < _lc.primesCount(); ++j) { IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci _lc.ring().mulin(radices[j], _lc.prime(j)); + auto xxx = (_lc._A.getEntry(0, 0) * padicAccumulations[j][0] - _lc._b[0]) % radices[j]; + // std::cout << "xxx " << j << "." << i << " " << _lc._A.getEntry(0, 0) << " * " << padicAccumulations[j][0] << " - " << _lc._b[0] << " mod " << radices[j] << std::endl; + std::cout << "xxx " << j << "." << i << " " << xxx << std::endl; } } @@ -145,17 +98,9 @@ namespace LinBox { craBuilder.progress(field, padicAccumulations[j]); } - for (auto j = 0u; j < _lc.primesCount(); ++j) { - auto Cj = padicAccumulations[j]; - auto xxx = (_lc._A.getEntry(0, 0) * Cj[0] - _lc._b[0]) % radices[j]; - std::cout << "xxx " << j << " " << xxx << std::endl; - } - // Rational reconstruction - Integer numBound = twoPower(_lc.log2NumBound()); - Integer denBound = twoPower(_lc.log2DenBound()); - - craBuilder.result(xNum, xDen, numBound, denBound); + // @note RR expects the bounds to be strict, this is why we add a + 1 + craBuilder.result(xNum, xDen, _lc.numBound() + 1, _lc.denBound() + 1); return true; } @@ -195,8 +140,8 @@ namespace LinBox { } // #ifdef DEBUG_HADAMARD_BOUND - std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << ';' << std::endl; - std::clog << "denLog " << Givaro::logtwo(xDen) << ';' << std::endl; + std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << " " << xNum[0] << ';' << std::endl; + std::clog << "denLog " << Givaro::logtwo(xDen) << " " << xDen << ';' << std::endl; // #endif } @@ -219,7 +164,7 @@ namespace LinBox { // implicitly requiring 0-{p-1} representation of the p-adic sequence elements. using Field = Givaro::Modular; using PrimeGenerator = PrimeIterator; - PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); + PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim()), 12); // @fixme REMOVE SEED DixonRNSSolver solver(A.field(), primeGenerator); solver.solve(xNum, xDen, A, b, m); From d614db18e40550a1345ed577267804b04e018471 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 19 Jun 2019 15:52:26 +0200 Subject: [PATCH 31/63] Fixed upstream problem by adding more primes to the RNS base. Fixed Hadamard bound. --- .../algorithms/multi-mod-lifting-container.h | 41 ++++------ .../rational-cra-builder-full-multip.h | 12 +-- linbox/solutions/hadamard-bound.h | 81 +++++++++++-------- linbox/solutions/solve/solve-dixon-rns.h | 11 +-- 4 files changed, 73 insertions(+), 72 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 910110d03..4fb588f3d 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -110,15 +110,18 @@ namespace LinBox { Integer infinityNormA; InfinityNorm(infinityNormA, A); double logInfinityNormA = Givaro::logtwo(infinityNormA); + std::cout << "infinityNormA: " << infinityNormA << std::endl; + std::cout << "logInfinityNormA: " << logInfinityNormA << std::endl; { // Based on Chen-Storjohann's paper, this is the bit size // of the needed RNS basis for the residue computation - double rnsBasisBitSize = (logInfinityNormA + Givaro::logtwo(_n)); - _rnsPrimesCount = std::ceil(rnsBasisBitSize / primeGenerator.getBits()); + double rnsBasisBitSize = std::ceil(1.0 + Givaro::logtwo(1 + infinityNormA * _n)); // @fixme @jgdumas Is this OK, then? + _rnsPrimesCount = std::ceil(rnsBasisBitSize / (primeGenerator.getBits() - 1)); _rnsPrimes.resize(_rnsPrimesCount); std::cout << "primeGenerator.getBits(): " << primeGenerator.getBits() << std::endl; - std::cout << "rnsBasisPrimesCount: " << _rnsPrimesCount << std::endl; + std::cout << "rnsBasisBitSize: " << rnsBasisBitSize << std::endl; + std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl; std::vector primes; for (auto j = 0u; j < _primesCount + _rnsPrimesCount; ++j) { @@ -148,12 +151,13 @@ namespace LinBox { // as the first count was just an upper estimation. double bitSize = 0.0; for (int h = _rnsPrimes.size() - 1; h >= 0; --h) { - bitSize += Givaro::logtwo(primes[h]); + bitSize += Givaro::logtwo(_rnsPrimes[h]); if (bitSize > rnsBasisBitSize && h > 0) { - _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + (h - 1)); + _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + h); _rnsPrimesCount -= h; std::cout << "RNS basis: Erasing extra " << h << " primes." << std::endl; + std::cout << _rnsPrimes.size() << std::endl; break; } } @@ -325,8 +329,7 @@ namespace LinBox { } // Convert R to the field - // @fixme @cpernet Could this step be ignored? - // If not, put that in already allocated memory, and not use a temporary here. + // @fixme Put that FVector in already allocated memory, and not use a temporary here. auto& F = _fields[j]; FVector FR(F, R); // rebind @@ -338,9 +341,6 @@ namespace LinBox { // would do the trick digits[j] = IVector(_ring, Fc); - // auto ooo = (_A.getEntry(0, 0) * Integer(digits[j][0]) - r[0]) % Integer(pj); - // std::cout << "ooo " << j << " " << ooo << std::endl; - // Store the very same result in an RNS system, // but fact is all the primes of the RNS system are bigger // than the modulus used to compute _Fc, we just copy the result for everybody. @@ -378,22 +378,13 @@ namespace LinBox { auto& r = _r[j]; auto& Q = _Q[j]; - // std::cout << "old r" << j << " " << r[0] << std::endl; - // std::cout << "r" << j << " " << (r[0] - _A.getEntry(0, 0) * Integer(_Fc[j][0])) / Integer(_primes[j]) << " expected" << std::endl; - // r <- (R - Ac) / p // @fixme @cpernet Don't know how to do that with one fconvert_rns! for (auto i = 0u; i < _n; ++i) { FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _primesCount + j)); } - // r <- Q + (R - Ac) / p - // std::cout << "p" << j << " " << Integer(_primes[j]) << std::endl; - // std::cout << "c" << j << " " << Integer(_Fc[j][0]) << std::endl; - IVD.addin(r, Q); - - // std::cout << "r" << j << " " << r[0] << std::endl; } ++_position; @@ -403,21 +394,23 @@ namespace LinBox { private: // Helper function, setting all residues of a matrix element to the very same value. // This doesn't check the moduli. - void setRNSMatrixElementAllResidues(RNSElementPtr& A, size_t lda, size_t i, size_t j, + inline void setRNSMatrixElementAllResidues(RNSElementPtr& A, size_t lda, size_t i, size_t j, double value) { - auto stride = A[i * lda + j]._stride; + auto& Aij = A[i * lda + j]; + auto stride = Aij._stride; for (auto h = 0u; h < _rnsPrimesCount; ++h) { - A[i * lda + j]._ptr[h * stride] = value; + Aij._ptr[h * stride] = value; } } - void logRNSMatrixElement(RNSElementPtr& A, size_t lda, size_t i, size_t j) + inline void logRNSMatrixElement(RNSElementPtr& A, size_t lda, size_t i, size_t j) { + auto& Aij = A[i * lda + j]; Integer reconstructedInteger; FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &reconstructedInteger, 1, A + (i * lda + j)); std::cout << i << " " << j << " "; - _rnsDomain->write(std::cout, A[i * lda + j]); + _rnsDomain->write(std::cout, Aij); std::cout << " -> " << reconstructedInteger << std::endl; } diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h index fb62b3941..6359c694e 100644 --- a/linbox/algorithms/rational-cra-builder-full-multip.h +++ b/linbox/algorithms/rational-cra-builder-full-multip.h @@ -66,17 +66,14 @@ namespace LinBox } template - Vect& result (Vect &num, Integer& den, const Integer& numBound, const Integer& denBound) + Vect& result (Vect &num, Integer& den, const Integer& numBound) { - // std::cout << "numBound " << numBound << std::endl; - // std::cout << "denBound " << denBound << std::endl; - Father_t::result(num, false); den = 1; const auto& mod = Father_t::getModulus(); Integer nd; for (auto num_it = num.begin(); num_it != num.end(); ++num_it) { - iterativeratrecon(*num_it, nd, den, mod, numBound, denBound); + iterativeratrecon(*num_it, nd, den, mod, numBound); if (nd > 1) { for (auto t02 = num.begin(); t02 != num_it; ++t02) @@ -88,11 +85,10 @@ namespace LinBox } protected: - Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn, const Integer& sd) + Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn) { // @note This interface of the rational does the RatRecon. - Givaro::Rational myRational(u1 *= old_den, m1, sn, false); - + Givaro::Rational myRational(Integer::modin(u1 *= old_den, m1), m1, sn); u1 = myRational.nume(); new_den = myRational.deno(); return u1; diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h index 891dbcd5f..00fe6e92e 100644 --- a/linbox/solutions/hadamard-bound.h +++ b/linbox/solutions/hadamard-bound.h @@ -73,18 +73,17 @@ namespace LinBox { * the row-wise euclidean norm. */ template - void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A) + void HadamardRowBound(Integer& bound, const IMatrix& A) { typename MatrixTraits::MatrixCategory tag; - HadamardRowBound(bound, minNormSquared, A, tag); + HadamardRowBound(bound, A, tag); } template - void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + void HadamardRowBound(Integer& bound, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) { bound = 1; - minNormSquared = -1; for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) { Integer rowNormSquared; @@ -92,26 +91,25 @@ namespace LinBox { if (rowNormSquared == 0) { bound = 0; - minNormSquared = 0; return; } - if (minNormSquared < 0 || rowNormSquared < minNormSquared) { - minNormSquared = rowNormSquared; - } - bound *= rowNormSquared; } - bound = Givaro::sqrt(bound); + // Square-root (upper bound) + Integer rem; + bound = Givaro::sqrtrem(bound, rem); + if (rem != 0) { + bound += 1; + } } template - void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + void HadamardRowBound(Integer& bound, const IMatrix& A, const MatrixCategories::RowMatrixTag& tag) { bound = 1; - minNormSquared = -1; for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) { Integer normSquared = 0; @@ -121,26 +119,26 @@ namespace LinBox { if (normSquared == 0) { bound = 0; - minNormSquared = 0; return; } - if (minNormSquared < 0 || normSquared < minNormSquared) { - minNormSquared = normSquared; - } - bound *= normSquared; } - bound = Givaro::sqrt(bound); + // Square-root (upper bound) + Integer rem; + bound = Givaro::sqrtrem(bound, rem); + if (rem != 0) { + bound += 1; + } } template - void HadamardRowBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + void HadamardRowBound(Integer& bound, const IMatrix& A, const MatrixCategories::BlackboxTag& tag) { DenseMatrix ACopy(A); - HadamardRowBound(bound, minNormSquared, ACopy); + HadamardRowBound(bound, ACopy); } /** @@ -181,7 +179,12 @@ namespace LinBox { bound *= colNormSquared; } - bound = Givaro::sqrt(bound); + // Square-root (upper bound) + Integer rem; + bound = Givaro::sqrtrem(bound, rem); + if (rem != 0) { + bound += 1; + } } template @@ -215,8 +218,12 @@ namespace LinBox { bound *= normSquared; } - // Square-root - bound = Givaro::sqrt(bound); + // Square-root (upper bound) + Integer rem; + bound = Givaro::sqrtrem(bound, rem); + if (rem != 0) { + bound += 1; + } } template @@ -236,20 +243,24 @@ namespace LinBox { template HadamarBoundDetails DetailedHadamardBound(const IMatrix& A) { + // @note We can't use the rowBoundOverMinNorm because + // the rational solve Hadamard bound uses it for the numerator bound. + Integer rowBound; - Integer rowMinNormSquared; - HadamardRowBound(rowBound, rowMinNormSquared, A); - Integer rowBoundOverMinNorm = rowBound / Givaro::sqrt(rowMinNormSquared); + HadamardRowBound(rowBound, A); #ifdef DEBUG_HADAMARD_BOUND std::clog << "rowBound:=" << rowBound << ';' << std::endl; - std::clog << "rowMinNormSquared:=" << rowMinNormSquared << ';' << std::endl; - std::clog << "rowBoundOverMinNorm:=" << rowBoundOverMinNorm << ';' << std::endl; #endif + Integer rem; Integer colBound; Integer colMinNormSquared; HadamardColBound(colBound, colMinNormSquared, A); - Integer colBoundOverMinNorm = colBound / Givaro::sqrt(colMinNormSquared); + Integer colBoundOverMinNorm; + Integer::divmod(colBoundOverMinNorm, rem, colBound, Givaro::sqrt(colMinNormSquared)); + if (rem != 0) { + colBoundOverMinNorm += 1; + } #ifdef DEBUG_HADAMARD_BOUND std::clog << "colBound:=" << colBound << ';' << std::endl; std::clog << "colMinNormSquared:=" << colMinNormSquared << ';' << std::endl; @@ -258,8 +269,7 @@ namespace LinBox { HadamarBoundDetails data; data.bound = (rowBound < colBound) ? rowBound : colBound; - data.boundOverMinNorm = - (rowBoundOverMinNorm < colBoundOverMinNorm) ? rowBoundOverMinNorm : colBoundOverMinNorm; + data.boundOverMinNorm = colBoundOverMinNorm; #ifdef DEBUG_HADAMARD_BOUND std::clog << "bound:=" << data.bound << ';' << std::endl; std::clog << "boundOverMinNorm:=" << data.boundOverMinNorm << ';' << std::endl; @@ -429,8 +439,15 @@ namespace LinBox { Integer bNormSquared; vectorNormSquared(bNormSquared, b.begin(), b.end()); + // Square-root of bNormSquared (upper bound) + Integer rem; + Integer bNorm = Givaro::sqrtrem(bNormSquared, rem); + if (rem != 0) { + bNorm += 1; + } + data.denBound = hadamardBound.bound; - data.numBound = hadamardBound.boundOverMinNorm * Givaro::sqrt(bNormSquared); + data.numBound = hadamardBound.boundOverMinNorm * bNorm; if (data.denBound == 0 || data.numBound == 0) { data.solutionLogBound = 0.0; } diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 3154c1915..a3130dacb 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -77,9 +77,6 @@ namespace LinBox { for (auto j = 0u; j < _lc.primesCount(); ++j) { IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci _lc.ring().mulin(radices[j], _lc.prime(j)); - auto xxx = (_lc._A.getEntry(0, 0) * padicAccumulations[j][0] - _lc._b[0]) % radices[j]; - // std::cout << "xxx " << j << "." << i << " " << _lc._A.getEntry(0, 0) << " * " << padicAccumulations[j][0] << " - " << _lc._b[0] << " mod " << radices[j] << std::endl; - std::cout << "xxx " << j << "." << i << " " << xxx << std::endl; } } @@ -100,7 +97,7 @@ namespace LinBox { // Rational reconstruction // @note RR expects the bounds to be strict, this is why we add a + 1 - craBuilder.result(xNum, xDen, _lc.numBound() + 1, _lc.denBound() + 1); + craBuilder.result(xNum, xDen, _lc.numBound() + 1); return true; } @@ -139,10 +136,8 @@ namespace LinBox { std::cerr << "OUCH!" << std::endl; } - // #ifdef DEBUG_HADAMARD_BOUND - std::clog << "numLog " << Givaro::logtwo(Givaro::abs(xNum[0])) << " " << xNum[0] << ';' << std::endl; - std::clog << "denLog " << Givaro::logtwo(xDen) << " " << xDen << ';' << std::endl; - // #endif + std::cout << "numLog " << xNum << std::endl; + std::cout << "denLog " << xDen << std::endl; } private: From 179f5776815c7a8ab5dbf78dec299095f63831bb Mon Sep 17 00:00:00 2001 From: "A. Breust" Date: Thu, 20 Jun 2019 11:02:43 +0200 Subject: [PATCH 32/63] Added DixonRNS to benchmark-dense-solve --- benchmarks/benchmark-dense-solve.C | 1 + linbox/algorithms/lifting-container.h | 6 +- .../algorithms/multi-mod-lifting-container.h | 55 ++++++------------- linbox/solutions/methods.h | 2 +- linbox/solutions/solve/solve-dixon-rns.h | 3 - tests/test-solve-full.C | 6 ++ 6 files changed, 28 insertions(+), 45 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 504cb69e0..a678c52f0 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -114,6 +114,7 @@ void benchmark(std::pair& timebits, Arguments& args, MethodBase& else if (args.methodString == "DenseElimination") solve(X, A, B, Method::DenseElimination(method)); else if (args.methodString == "SparseElimination") solve(X, A, B, Method::SparseElimination(method)); else if (args.methodString == "Dixon") solve(X, A, B, Method::Dixon(method)); + else if (args.methodString == "DixonRNS") solve(X, A, B, Method::DixonRNS(method)); else if (args.methodString == "CRA") solve(X, A, B, Method::CRAAuto(method)); else if (args.methodString == "SymbolicNumericOverlap") solve(X, A, B, Method::SymbolicNumericOverlap(method)); else if (args.methodString == "SymbolicNumericNorm") solve(X, A, B, Method::SymbolicNumericNorm(method)); diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h index 81c992bc5..01d7d5260 100644 --- a/linbox/algorithms/lifting-container.h +++ b/linbox/algorithms/lifting-container.h @@ -153,13 +153,13 @@ namespace LinBox this->_intRing.convert(Prime,_p); auto hb = RationalSolveHadamardBound(A, b); - N = Integer(1) << static_cast(std::ceil(hb.numLogBound)); - D = Integer(1) << static_cast(std::ceil(hb.denLogBound)); + N = hb.numBound; + D = hb.denBound; // L = N * D * 2 // _length = logp(L, Prime) = log2(L) * ln(2) / ln(Prime) double primeLog2 = Givaro::logtwo(Prime); - _length = std::ceil((1 + hb.numLogBound + hb.denLogBound) / primeLog2); // round up instead of down + _length = std::ceil(hb.solutionLogBound / primeLog2); // round up instead of down #ifdef DEBUG_LC std::cout<<" norms computed, p = "<<_p<<"\n"; std::cout<<" N = "< primes; for (auto j = 0u; j < _primesCount + _rnsPrimesCount; ++j) { @@ -156,25 +150,15 @@ namespace LinBox { if (bitSize > rnsBasisBitSize && h > 0) { _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + h); _rnsPrimesCount -= h; - std::cout << "RNS basis: Erasing extra " << h << " primes." << std::endl; std::cout << _rnsPrimes.size() << std::endl; break; } } } - // Generating primes - // @fixme Cleanup, might not be needed - { - IElement iTmp; - _ring.assign(_primesProduct, _ring.one); - for (auto& pj : _primes) { - _fields.emplace_back(pj); - _ring.init(iTmp, pj); - _ring.mulin(_primesProduct, iTmp); - } - - std::cout << "primesProduct: " << _primesProduct << std::endl; + // Setting fields up + for (auto& pj : _primes) { + _fields.emplace_back(pj); } // Initialize all inverses @@ -228,19 +212,18 @@ namespace LinBox { // Compute how many iterations are needed { + double log2PrimesProduct = 0.0; + for (auto& pj : _primes) { + log2PrimesProduct += Givaro::logtwo(Integer(pj)); + } + auto hb = RationalSolveHadamardBound(A, b); - double log2P = Givaro::logtwo(_primesProduct); - // _iterationsCount = log2(2 * N * D) / log2(p) _log2Bound = hb.solutionLogBound; _numBound = hb.numBound; _denBound = hb.denBound; - std::cout << "_log2Bound: " << _log2Bound << std::endl; - std::cout << "_numBound: " << _numBound << std::endl; - std::cout << "_denBound: " << _denBound << std::endl; - std::cout << "log2P: " << log2P << std::endl; - _iterationsCount = std::ceil(_log2Bound / log2P); - std::cout << "iterationsCount: " << _iterationsCount << std::endl; + // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...) + _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct); } //----- Locals setup @@ -282,11 +265,8 @@ namespace LinBox { /// The dimension of the problem/solution. size_t size() const final { return _n; } - /** - * We are compliant to the interface even though - * p is multi-modular and thus not a prime per se. - */ - const IElement& prime() const final { return _primesProduct; } + /// @note Useless, but in the API. + const IElement& prime() const final { return _ring.one; } // ------------------------------ // ----- NOT LiftingContainer API @@ -435,7 +415,6 @@ namespace LinBox { // Stores the inverse of pj of the i-th RNS prime into _primesRNSInverses[j][i] std::vector> _primesRNSInverses; - IElement _primesProduct; // The global modulus for lifting: a multiple of all _primes. std::vector _primes; // @fixme We might want something else as a type! std::vector _rnsPrimes; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h index 0fe110c8e..1de057ec2 100644 --- a/linbox/solutions/methods.h +++ b/linbox/solutions/methods.h @@ -221,7 +221,7 @@ namespace LinBox { SingularSolutionType singularSolutionType = SingularSolutionType::Random; // ----- For DixonRNS method. - uint32_t primeBaseLength = 16u; //!< How many primes to use lifting will be done over p = p1p2...pl. + uint32_t primesCount = 16u; //!< How many primes to use lifting will be done over p = p1p2...pl. // ----- For random-based systems. size_t trialsBeforeFailure = LINBOX_DEFAULT_TRIALS_BEFORE_FAILURE; //!< Maximum number of trials before giving up. diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index a3130dacb..58da58468 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -135,9 +135,6 @@ namespace LinBox { if (!re.getRational(xNum, xDen)) { std::cerr << "OUCH!" << std::endl; } - - std::cout << "numLog " << xNum << std::endl; - std::cout << "denLog " << xDen << std::endl; } private: diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 363e0a781..e1d993223 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -205,6 +205,7 @@ int main(int argc, char** argv) Integer q = 131071; bool verbose = false; bool loop = false; + int primesCount = -1; int seed = -1; int bitSize = 10; int vectorBitSize = -1; @@ -214,6 +215,7 @@ int main(int argc, char** argv) static Argument args[] = { {'q', "-q", "Field characteristic.", TYPE_INTEGER, &q}, + {'p', "-p", "For multi-modular methods, how many primes to use.", TYPE_INT, &primesCount}, {'v', "-v", "Enable verbose mode.", TYPE_BOOL, &verbose}, {'l', "-l", "Infinite loop of tests.", TYPE_BOOL, &loop}, {'s', "-s", "Seed for randomness.", TYPE_INT, &seed}, @@ -244,6 +246,10 @@ int main(int argc, char** argv) return EXIT_FAILURE; } + if (primesCount > 0) { + method.primesCount = primesCount; + } + if (vectorBitSize < 0) { vectorBitSize = bitSize; } From 790f82da6e97bd52fa226fbc81b6e4d1f24baff4 Mon Sep 17 00:00:00 2001 From: "A. Breust" Date: Thu, 20 Jun 2019 17:49:39 +0200 Subject: [PATCH 33/63] Instrumented for precise timings --- linbox/algorithms/multi-mod-lifting-container.h | 15 ++++++++++++--- linbox/solutions/solve/solve-dixon-rns.h | 9 +++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 0469baada..b6080ab0d 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -150,7 +150,6 @@ namespace LinBox { if (bitSize > rnsBasisBitSize && h > 0) { _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + h); _rnsPrimesCount -= h; - std::cout << _rnsPrimes.size() << std::endl; break; } } @@ -291,7 +290,10 @@ namespace LinBox { { VectorDomain IVD(_ring); + commentator().start("[MultiModLifting] nextDigit"); + // @fixme Should be done in parallel! + commentator().start("[MultiModLifting] Computing c"); for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; auto& r = _r[j]; @@ -329,6 +331,7 @@ namespace LinBox { setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, Fc[i]); } } + commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); // ----- Compute the next residues! @@ -336,12 +339,15 @@ namespace LinBox { // By first computing R <= R - A c as a fgemm within the RNS domain. // @fixme Use parallel helper! + commentator().start("[MultiModLifting] FGEMM R <= R - Ac"); FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one, _rnsR, _primesCount); + commentator().stop("[MultiModLifting] FGEMM R <= R - Ac"); // We divide each residues by the according pj, which is done by multiplying. // @fixme Could be done in parallel! + commentator().start("[MultiModLifting] MUL FOR INV R <= R / p"); for (auto j = 0u; j < _primesCount; ++j) { for (auto i = 0u; i < _n; ++i) { auto& rnsElement = _rnsR[i * _primesCount + j]; @@ -352,8 +358,10 @@ namespace LinBox { } } } + commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); // @fixme Could be done in parallel! + commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); for (auto j = 0u; j < _primesCount; ++j) { auto& r = _r[j]; auto& Q = _Q[j]; @@ -366,8 +374,10 @@ namespace LinBox { IVD.addin(r, Q); } + commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); + + commentator().stop("[MultiModLifting] nextDigit"); - ++_position; return true; } @@ -433,6 +443,5 @@ namespace LinBox { // HAS TO BE A MATRIX for gemm. std::vector _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later. - size_t _position; }; } diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 58da58468..541cec859 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -55,6 +55,7 @@ namespace LinBox { return true; } + commentator().start("[MultiModLifting] Lifting"); VectorDomain IVD(_lc.ring()); // Stores each c0 + c1 pj + ... + ck pj^k for each pj @@ -79,8 +80,10 @@ namespace LinBox { _lc.ring().mulin(radices[j], _lc.prime(j)); } } + commentator().stop("[MultiModLifting] Lifting"); // CRT reconstruction from paddicAccumulations + commentator().start("[MultiModLifting] CRT Reconstruction"); using CRAField = Givaro::Modular; RationalCRABuilderFullMultip craBuilder(_lc.log2Bound() / 1.4427); // 1.4427 = 1 / log(2) @@ -94,10 +97,13 @@ namespace LinBox { CRAField field(radices[j]); craBuilder.progress(field, padicAccumulations[j]); } + commentator().stop("[MultiModLifting] CRT Reconstruction"); // Rational reconstruction // @note RR expects the bounds to be strict, this is why we add a + 1 + commentator().start("[MultiModLifting] Rational Reconstruction"); craBuilder.result(xNum, xDen, _lc.numBound() + 1); + commentator().stop("[MultiModLifting] Rational Reconstruction"); return true; } @@ -129,8 +135,11 @@ namespace LinBox { linbox_check(A.rowdim() == A.coldim()); using LiftingContainer = MultiModLiftingContainer; + + commentator().start("[MultiModLifting] Init"); LiftingContainer lc(_ring, _primeGenerator, A, b, m); MultiModRationalReconstruction re(lc); + commentator().stop("[MultiModLifting] Init"); if (!re.getRational(xNum, xDen)) { std::cerr << "OUCH!" << std::endl; From 7d161c7f73c98832b35219916e3c4087773e30b2 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 21 Jun 2019 15:08:14 +0200 Subject: [PATCH 34/63] Parallel convert + fgemm --- linbox/algorithms/lifting-container.h | 1 + .../algorithms/multi-mod-lifting-container.h | 73 ++++++++++--------- .../rational-cra-builder-full-multip.h | 8 +- linbox/solutions/solve/solve-dixon-rns.h | 8 +- tests/test-solve-full.C | 6 +- 5 files changed, 55 insertions(+), 41 deletions(-) diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h index 19370890d..9664f75f4 100644 --- a/linbox/algorithms/lifting-container.h +++ b/linbox/algorithms/lifting-container.h @@ -162,6 +162,7 @@ namespace LinBox // _length = logp(L, Prime) = log2(L) * ln(2) / ln(Prime) double primeLog2 = Givaro::logtwo(Prime); _length = std::ceil(hb.solutionLogBound / primeLog2); // round up instead of down + std::cout << "_length "<< _length << std::endl; #ifdef DEBUG_LC std::cout<<" norms computed, p = "<<_p<<"\n"; std::cout<<" N = "< IVD(_ring); - commentator().start("[MultiModLifting] nextDigit"); - - // @fixme Should be done in parallel! - commentator().start("[MultiModLifting] Computing c"); + // commentator().start("[MultiModLifting] Computing c"); + #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; auto& r = _r[j]; @@ -303,25 +304,22 @@ namespace LinBox { // @note There is no VectorDomain::divmod yet. // Euclidian division so that rj = pj Qj + Rj for (auto i = 0u; i < _n; ++i) { - // @fixme @cpernet Is this OK for any Ring or should we be sure we are using - // Integers? _ring.quoRem(Q[i], R[i], r[i], pj); - // std::cout << "Q" << j << " " << Q[i] << std::endl; - // std::cout << "R" << j << " " << R[i] << std::endl; } // Convert R to the field - // @fixme Put that FVector in already allocated memory, and not use a temporary here. auto& F = _fields[j]; - FVector FR(F, R); // rebind - + auto& FR = _FR[j]; + auto& digit = digits[j]; auto& B = _B[j]; auto& Fc = _Fc[j]; + // @fixme Am I copying the data an extra time? + FR = FVector(F, R); // rebind B.apply(Fc, FR); // @fixme We might not need to store digits into IVectors, and returning _Fc // would do the trick - digits[j] = IVector(_ring, Fc); + digit = IVector(_ring, Fc); // Store the very same result in an RNS system, // but fact is all the primes of the RNS system are bigger @@ -331,23 +329,32 @@ namespace LinBox { setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, Fc[i]); } } - commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); + // commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); // ----- Compute the next residues! // r <= Q + (R - A c) / p // By first computing R <= R - A c as a fgemm within the RNS domain. - // @fixme Use parallel helper! - commentator().start("[MultiModLifting] FGEMM R <= R - Ac"); - FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, - _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one, - _rnsR, _primesCount); - commentator().stop("[MultiModLifting] FGEMM R <= R - Ac"); + PAR_BLOCK + { + using RNSParallel = FFLAS::ParSeqHelper::Parallel; + using FGEMMSequential = FFLAS::ParSeqHelper::Sequential; + using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose; + using MMHelper = FFLAS::MMHelper; + ComposedParSeqHelper composedParSeqHelper(4, 4); + MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); + + FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, + _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one, + _rnsR, _primesCount, mmHelper); + } + // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac"); // We divide each residues by the according pj, which is done by multiplying. // @fixme Could be done in parallel! - commentator().start("[MultiModLifting] MUL FOR INV R <= R / p"); + // @fixme @cpernet Don't know why, can't make it parallel! + // commentator().start("[MultiModLifting] MUL FOR INV R <= R / p"); for (auto j = 0u; j < _primesCount; ++j) { for (auto i = 0u; i < _n; ++i) { auto& rnsElement = _rnsR[i * _primesCount + j]; @@ -358,10 +365,10 @@ namespace LinBox { } } } - commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); + // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); - // @fixme Could be done in parallel! - commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); + // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); + #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto& r = _r[j]; auto& Q = _Q[j]; @@ -369,14 +376,13 @@ namespace LinBox { // r <- (R - Ac) / p // @fixme @cpernet Don't know how to do that with one fconvert_rns! for (auto i = 0u; i < _n; ++i) { - FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, _rnsR + (i * _primesCount + j)); + FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, + _rnsR + (i * _primesCount + j)); } IVD.addin(r, Q); } - commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); - - commentator().stop("[MultiModLifting] nextDigit"); + // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); return true; } @@ -385,7 +391,7 @@ namespace LinBox { // Helper function, setting all residues of a matrix element to the very same value. // This doesn't check the moduli. inline void setRNSMatrixElementAllResidues(RNSElementPtr& A, size_t lda, size_t i, size_t j, - double value) + double value) { auto& Aij = A[i * lda + j]; auto stride = Aij._stride; @@ -439,9 +445,8 @@ namespace LinBox { std::vector _r; // @todo Could be a matrix? Might not be useful, as it is never // used directly in computations. std::vector _Q; - std::vector _R; // @fixme This one should be expressed in a RNS system q, and - // HAS TO BE A MATRIX for gemm. - std::vector - _Fc; // @note No need to be a matrix, as we will embed it into an RNS system later. + std::vector _R; // Will be inited to RNS within _rnsR + std::vector _Fc; + std::vector _FR; }; } diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h index 6359c694e..6a191bf6f 100644 --- a/linbox/algorithms/rational-cra-builder-full-multip.h +++ b/linbox/algorithms/rational-cra-builder-full-multip.h @@ -68,7 +68,11 @@ namespace LinBox template Vect& result (Vect &num, Integer& den, const Integer& numBound) { + commentator().start("[RationalCRABuilderFullMultip] CRT Reconstruction"); Father_t::result(num, false); + commentator().stop("[RationalCRABuilderFullMultip] CRT Reconstruction"); + + commentator().start("[RationalCRABuilderFullMultip] Rational Reconstruction"); den = 1; const auto& mod = Father_t::getModulus(); Integer nd; @@ -81,6 +85,7 @@ namespace LinBox den *= nd; } } + commentator().stop("[RationalCRABuilderFullMultip] Rational Reconstruction"); return num; } @@ -88,7 +93,8 @@ namespace LinBox Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn) { // @note This interface of the rational does the RatRecon. - Givaro::Rational myRational(Integer::modin(u1 *= old_den, m1), m1, sn); + Integer::modin(u1 *= old_den, m1); + Givaro::Rational myRational(u1, m1, sn); u1 = myRational.nume(); new_den = myRational.deno(); return u1; diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 541cec859..08fa84355 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -83,7 +83,7 @@ namespace LinBox { commentator().stop("[MultiModLifting] Lifting"); // CRT reconstruction from paddicAccumulations - commentator().start("[MultiModLifting] CRT Reconstruction"); + commentator().start("[MultiModLifting] CRT Reconstruction Progress"); using CRAField = Givaro::Modular; RationalCRABuilderFullMultip craBuilder(_lc.log2Bound() / 1.4427); // 1.4427 = 1 / log(2) @@ -97,13 +97,11 @@ namespace LinBox { CRAField field(radices[j]); craBuilder.progress(field, padicAccumulations[j]); } - commentator().stop("[MultiModLifting] CRT Reconstruction"); + commentator().stop("[MultiModLifting] CRT Reconstruction Progress"); // Rational reconstruction // @note RR expects the bounds to be strict, this is why we add a + 1 - commentator().start("[MultiModLifting] Rational Reconstruction"); craBuilder.result(xNum, xDen, _lc.numBound() + 1); - commentator().stop("[MultiModLifting] Rational Reconstruction"); return true; } @@ -165,7 +163,7 @@ namespace LinBox { // implicitly requiring 0-{p-1} representation of the p-adic sequence elements. using Field = Givaro::Modular; using PrimeGenerator = PrimeIterator; - PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim()), 12); // @fixme REMOVE SEED + PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); DixonRNSSolver solver(A.field(), primeGenerator); solver.solve(xNum, xDen, A, b, m); diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 39bf409a0..a30783798 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -95,6 +95,8 @@ namespace { template bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, ResultVector& Rb) { + std::cout << "Checking result..." << std::endl; + ResultVector RAx(RA.field(), Rb.size()); RA.apply(RAx, x); @@ -104,6 +106,8 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul return false; } + std::cout << "Result OK !" << std::endl; + return true; } @@ -291,7 +295,7 @@ int main(int argc, char** argv) // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // // ----- Rational Dixon - // ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // // @fixme Dixon does not compile // // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); From 430278f8bdc0e6f552b77d1215b9be0375054fc3 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 21 Jun 2019 15:43:21 +0200 Subject: [PATCH 35/63] Speed up thanks to fconvert on matrix --- .../algorithms/multi-mod-lifting-container.h | 44 ++++++++----------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index ca2649df5..2baff2fae 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -93,6 +93,8 @@ namespace LinBox { , _A(A) , _b(b) , _n(A.rowdim()) + , _rMatrix(_ring) + , _qMatrix(_ring) { linbox_check(A.rowdim() == A.coldim()); @@ -228,22 +230,23 @@ namespace LinBox { //----- Locals setup - _r.reserve(_primesCount); - _Q.reserve(_primesCount); + _rMatrix = IMatrix(_ring, _n, _primesCount); + _qMatrix = IMatrix(_ring, _n, _primesCount); + _R.reserve(_primesCount); _Fc.reserve(_primesCount); _FR.reserve(_primesCount); for (auto j = 0u; j < _primesCount; ++j) { auto& F = _fields[j]; - _r.emplace_back(_ring, _n); - _Q.emplace_back(_ring, _n); _R.emplace_back(_ring, _n); _Fc.emplace_back(F, _n); _FR.emplace_back(F, _n); // Initialize all residues to b - _r.back() = _b; // Copying data + for (auto i = 0u; i < _n; ++i) { + _rMatrix.refEntry(i, j) = _b[i]; + } } } @@ -292,19 +295,18 @@ namespace LinBox { bool next(std::vector& digits) { VectorDomain IVD(_ring); + BlasMatrixDomain IMD(_ring); // commentator().start("[MultiModLifting] Computing c"); #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; - auto& r = _r[j]; - auto& Q = _Q[j]; auto& R = _R[j]; // @note There is no VectorDomain::divmod yet. // Euclidian division so that rj = pj Qj + Rj for (auto i = 0u; i < _n; ++i) { - _ring.quoRem(Q[i], R[i], r[i], pj); + _ring.quoRem(_qMatrix.refEntry(i, j), R[i], _rMatrix.getEntry(i, j), pj); } // Convert R to the field @@ -336,13 +338,14 @@ namespace LinBox { // r <= Q + (R - A c) / p // By first computing R <= R - A c as a fgemm within the RNS domain. + // commentator().start("[MultiModLifting] FGEMM R <= R - Ac"); PAR_BLOCK { using RNSParallel = FFLAS::ParSeqHelper::Parallel; using FGEMMSequential = FFLAS::ParSeqHelper::Sequential; using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose; using MMHelper = FFLAS::MMHelper; - ComposedParSeqHelper composedParSeqHelper(4, 4); + ComposedParSeqHelper composedParSeqHelper(4, 4); // @fixme REPLACE THESE 444! MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, @@ -368,20 +371,10 @@ namespace LinBox { // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); - #pragma omp parallel for - for (auto j = 0u; j < _primesCount; ++j) { - auto& r = _r[j]; - auto& Q = _Q[j]; - - // r <- (R - Ac) / p - // @fixme @cpernet Don't know how to do that with one fconvert_rns! - for (auto i = 0u; i < _n; ++i) { - FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &r[i], 1, - _rnsR + (i * _primesCount + j)); - } - - IVD.addin(r, Q); - } + // @fixme @cpernet Is this parallel? + FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount, + _rnsR + 0); + IMD.addin(_rMatrix, _qMatrix); // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); return true; @@ -442,11 +435,10 @@ namespace LinBox { std::vector _fields; // All fields Modular //----- Iteration - std::vector _r; // @todo Could be a matrix? Might not be useful, as it is never - // used directly in computations. - std::vector _Q; std::vector _R; // Will be inited to RNS within _rnsR std::vector _Fc; std::vector _FR; + IMatrix _rMatrix; + IMatrix _qMatrix; }; } From eb7c3dd012e18a0fcef62f7eeb1663d4a54edee8 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 21 Jun 2019 16:44:40 +0200 Subject: [PATCH 36/63] Working on INV mul --- .../dixon-solver/dixon-solver-dense.inl | 4 +++ .../algorithms/multi-mod-lifting-container.h | 35 +++++++++++++------ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl index f1ab9266e..ed0978683 100644 --- a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl +++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl @@ -129,12 +129,14 @@ namespace LinBox { } while (notfr); typedef DixonLiftingContainer> LiftingContainer; + commentator().start("CLASSIC DIXON LIFTING"); LiftingContainer lc(_ring, *F, A, *FMP, b, _prime); RationalReconstruction re(lc); if (!re.getRational(num, den, 0)) { delete FMP; return SS_FAILED; } + commentator().stop("CLASSIC DIXON LIFTING"); #ifdef RSTIMING ttNonsingularSolve.update(re, lc); #endif @@ -703,6 +705,7 @@ namespace LinBox { // ----- Do lifting on sub matrix BlasMatrix BBA_minor(A_minor); + commentator().start("CLASSIC DIXON LIFTING"); LiftingContainer lc(_ring, _field, BBA_minor, *Ap_minor_inv, newb, _prime); // ----- Reconstruct rational @@ -713,6 +716,7 @@ namespace LinBox { // dirty, but should not be called return SS_FAILED; } + commentator().stop("CLASSIC DIXON LIFTING"); #ifdef RSTIMING ttSystemSolve.update(re, lc); diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 2baff2fae..4fbf7c864 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -190,6 +190,7 @@ namespace LinBox { _rnsA = FFLAS::fflas_new(*_rnsDomain, _n, _n); _rnsc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); _rnsR = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); + _rnsPrimesInverses = FFLAS::fflas_new(*_rnsDomain, _primesCount); // @note So that 2^(16*cmax) is the max element of A. double cmax = logInfinityNormA / 16.; @@ -202,11 +203,17 @@ namespace LinBox { _primesRNSInverses.resize(_primesCount); for (auto j = 0u; j < _primesCount; ++j) { auto prime = _primes[j]; - _primesRNSInverses[j].resize(_rnsPrimesCount); + + auto& rnsPrimeInverse = _rnsPrimesInverses[j]; + auto stride = rnsPrimeInverse._stride; + + _primesRNSInverses[j].resize(_rnsPrimesCount); // @fixme TBR + for (auto h = 0u; h < _rnsPrimesCount; ++h) { auto& rnsF = _rnsSystem->_field_rns[h]; auto& primeInverse = _primesRNSInverses[j][h]; rnsF.inv(primeInverse, prime); + rnsPrimeInverse._ptr[h * stride] = primeInverse; } } } @@ -357,25 +364,30 @@ namespace LinBox { // We divide each residues by the according pj, which is done by multiplying. // @fixme Could be done in parallel! // @fixme @cpernet Don't know why, can't make it parallel! - // commentator().start("[MultiModLifting] MUL FOR INV R <= R / p"); - for (auto j = 0u; j < _primesCount; ++j) { - for (auto i = 0u; i < _n; ++i) { - auto& rnsElement = _rnsR[i * _primesCount + j]; - auto stride = rnsElement._stride; + commentator().start("[MultiModLifting] MUL FOR INV R <= R / p"); + for (auto i = 0u; i < _n; ++i) { + for (auto j = 0u; j < _primesCount; ++j) { + auto& rnsPrimeInverse = _rnsPrimesInverses[j]; + auto& rnsR = _rnsR[i * _primesCount + j]; + + // @fixme @cpernet Just doing _rnsDomain->mulin(rnsR, _rnsPrimesInverses[j]); + // But mulin doesn't exist on that domain, and fgemm on 1x1 is much slower + auto stridePrimeInverse = rnsPrimeInverse._stride; + auto strideR = rnsR._stride; for (auto h = 0u; h < _rnsPrimesCount; ++h) { auto& rnsF = _rnsSystem->_field_rns[h]; - rnsF.mulin(rnsElement._ptr[h * stride], _primesRNSInverses[j][h]); + rnsF.mulin(rnsR._ptr[h * strideR], rnsPrimeInverse._ptr[h * stridePrimeInverse]); } } } - // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); + commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); - // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); + commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); // @fixme @cpernet Is this parallel? FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount, _rnsR + 0); IMD.addin(_rMatrix, _qMatrix); - // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); + commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); return true; } @@ -423,8 +435,9 @@ namespace LinBox { size_t _rnsPrimesCount = 0u; // Stores the inverse of pj of the i-th RNS prime into _primesRNSInverses[j][i] std::vector> _primesRNSInverses; + RNSElementPtr _rnsPrimesInverses; - std::vector _primes; // @fixme We might want something else as a type! + std::vector _primes; std::vector _rnsPrimes; // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). size_t _iterationsCount = 0u; From 7957f03616194e20e3d3539f02fe7dd17d28f3e0 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 26 Jun 2019 17:00:02 +0200 Subject: [PATCH 37/63] Parallel init for MultiModLiftingContainer --- .../algorithms/multi-mod-lifting-container.h | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 4fbf7c864..bfe2e2382 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -167,21 +167,27 @@ namespace LinBox { // and pass through to the lifting container. Here, we could use that, but we have // to keep control of generated primes, so that the RNS base has bigger primes // than the . + commentator().start("[MMLifting][Init] A^{-1} mod pj precomputations"); { _B.reserve(_primesCount); + for (auto& F : _fields) { + _B.emplace_back(A, F); + } - for (const auto& F : _fields) { - _B.emplace_back(A, F); // Rebind into the field - + // @fixme To be replaced with Paladin + #pragma omp parallel for + for (auto j = 0u; j < _primesCount; ++j) { int nullity = 0; + auto& F = _fields[j]; BlasMatrixDomain bmd(F); - bmd.invin(_B.back(), nullity); + bmd.invin(_B[j], nullity); if (nullity > 0) { // @fixme Should redraw another prime! throw LinBoxError("Wrong prime, sorry."); } } } + commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations"); // Making A into the RNS domain { @@ -200,20 +206,16 @@ namespace LinBox { // Compute the inverses of pj for each RNS prime { - _primesRNSInverses.resize(_primesCount); for (auto j = 0u; j < _primesCount; ++j) { auto prime = _primes[j]; auto& rnsPrimeInverse = _rnsPrimesInverses[j]; auto stride = rnsPrimeInverse._stride; - _primesRNSInverses[j].resize(_rnsPrimesCount); // @fixme TBR - for (auto h = 0u; h < _rnsPrimesCount; ++h) { auto& rnsF = _rnsSystem->_field_rns[h]; - auto& primeInverse = _primesRNSInverses[j][h]; + auto& primeInverse = rnsPrimeInverse._ptr[h * stride]; rnsF.inv(primeInverse, prime); - rnsPrimeInverse._ptr[h * stride] = primeInverse; } } } @@ -352,7 +354,7 @@ namespace LinBox { using FGEMMSequential = FFLAS::ParSeqHelper::Sequential; using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose; using MMHelper = FFLAS::MMHelper; - ComposedParSeqHelper composedParSeqHelper(4, 4); // @fixme REPLACE THESE 444! + ComposedParSeqHelper composedParSeqHelper(_primes.size(), _primes.size()); MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, @@ -364,7 +366,7 @@ namespace LinBox { // We divide each residues by the according pj, which is done by multiplying. // @fixme Could be done in parallel! // @fixme @cpernet Don't know why, can't make it parallel! - commentator().start("[MultiModLifting] MUL FOR INV R <= R / p"); + // commentator().start("[MultiModLifting] MUL FOR INV R <= R / p"); for (auto i = 0u; i < _n; ++i) { for (auto j = 0u; j < _primesCount; ++j) { auto& rnsPrimeInverse = _rnsPrimesInverses[j]; @@ -380,14 +382,14 @@ namespace LinBox { } } } - commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); + // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); - commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); + // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); // @fixme @cpernet Is this parallel? FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount, _rnsR + 0); IMD.addin(_rMatrix, _qMatrix); - commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); + // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); return true; } @@ -433,8 +435,7 @@ namespace LinBox { RNSElementPtr _rnsc; RNSElementPtr _rnsR; size_t _rnsPrimesCount = 0u; - // Stores the inverse of pj of the i-th RNS prime into _primesRNSInverses[j][i] - std::vector> _primesRNSInverses; + // Stores the inverse of pj within the RNS base prime into _rnsPrimesInverses[j] RNSElementPtr _rnsPrimesInverses; std::vector _primes; From 161eb3f05e6f80901a0ef944ce1bfb619aca5f85 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 26 Jun 2019 17:22:00 +0200 Subject: [PATCH 38/63] Added move assignment operator to blas-vector. --- .../algorithms/multi-mod-lifting-container.h | 3 ++- linbox/vector/blas-vector.h | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index bfe2e2382..6e56fccb1 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -324,7 +324,8 @@ namespace LinBox { auto& digit = digits[j]; auto& B = _B[j]; auto& Fc = _Fc[j]; - // @fixme Am I copying the data an extra time? + + // @note The assignment will call the move one, not copying data twice. FR = FVector(F, R); // rebind B.apply(Fc, FR); diff --git a/linbox/vector/blas-vector.h b/linbox/vector/blas-vector.h index 71faff05c..1a6807850 100644 --- a/linbox/vector/blas-vector.h +++ b/linbox/vector/blas-vector.h @@ -320,6 +320,24 @@ namespace LinBox { /* BlasVector */ return *this; } + + BlasVector<_Field,_blasRep>& operator= (BlasVector<_Field,_blasRep>&& V) + { + if ( &V == this) + return *this; + + _size = V._size; + _1stride = V._1stride; + _rep = std::move(V._rep); + _ptr = _rep.data(); + _field = V._field; + + // Father_t is garbage until then: + setIterators(); + + return *this; + } + //! this should not exist. BlasVector<_Field,_blasRep>& operator= (const std::vector& V) { From cd14524cea889f39ea712e7ed2df407c0b890901 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 26 Jun 2019 18:04:11 +0200 Subject: [PATCH 39/63] Lifting container now returns a vector of field vector instead of integer ones. --- .../algorithms/multi-mod-lifting-container.h | 24 +++++++------------ linbox/solutions/solve/solve-dixon-rns.h | 22 +++++++++++------ 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 6e56fccb1..4d5d160d5 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -243,13 +243,11 @@ namespace LinBox { _qMatrix = IMatrix(_ring, _n, _primesCount); _R.reserve(_primesCount); - _Fc.reserve(_primesCount); _FR.reserve(_primesCount); for (auto j = 0u; j < _primesCount; ++j) { auto& F = _fields[j]; _R.emplace_back(_ring, _n); - _Fc.emplace_back(F, _n); _FR.emplace_back(F, _n); // Initialize all residues to b @@ -291,8 +289,8 @@ namespace LinBox { Integer denBound() const { return _denBound; } uint32_t primesCount() const { return _primesCount; } - const FElement& prime(uint32_t index) const { return _primes.at(index); } + const std::vector& primesFields() const { return _fields; } // -------------- // ----- Iterator @@ -301,12 +299,12 @@ namespace LinBox { * Returns false if the next digit cannot be computed (bad modulus). * c is a vector of integers but all element are below p = p1 * ... * pl */ - bool next(std::vector& digits) + bool next(std::vector& digits) { VectorDomain IVD(_ring); BlasMatrixDomain IMD(_ring); - // commentator().start("[MultiModLifting] Computing c"); + commentator().start("[MultiModLifting] c = A^{-1} r mod p"); #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; @@ -314,6 +312,8 @@ namespace LinBox { // @note There is no VectorDomain::divmod yet. // Euclidian division so that rj = pj Qj + Rj + // @fixme Should use quoRem on unsigned int, making R an uint vector, + // because it will be converted anyway. for (auto i = 0u; i < _n; ++i) { _ring.quoRem(_qMatrix.refEntry(i, j), R[i], _rMatrix.getEntry(i, j), pj); } @@ -323,25 +323,20 @@ namespace LinBox { auto& FR = _FR[j]; auto& digit = digits[j]; auto& B = _B[j]; - auto& Fc = _Fc[j]; // @note The assignment will call the move one, not copying data twice. FR = FVector(F, R); // rebind - B.apply(Fc, FR); - - // @fixme We might not need to store digits into IVectors, and returning _Fc - // would do the trick - digit = IVector(_ring, Fc); + B.apply(digit, FR); // Store the very same result in an RNS system, // but fact is all the primes of the RNS system are bigger - // than the modulus used to compute _Fc, we just copy the result for everybody. + // than the modulus used to compute the digit, we just copy the result for everybody. for (auto i = 0u; i < _n; ++i) { setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]); - setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, Fc[i]); + setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); } } - // commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); + commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); // ----- Compute the next residues! @@ -451,7 +446,6 @@ namespace LinBox { //----- Iteration std::vector _R; // Will be inited to RNS within _rnsR - std::vector _Fc; std::vector _FR; IMatrix _rMatrix; IMatrix _qMatrix; diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 08fa84355..2ae9b3aa9 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -37,6 +37,8 @@ namespace LinBox { using Ring = typename LiftingContainer::Ring; using IElement = typename LiftingContainer::IElement; using IVector = typename LiftingContainer::IVector; + using FElement = typename LiftingContainer::FElement; + using FVector = typename LiftingContainer::FVector; public: MultiModRationalReconstruction(LiftingContainer& lc) @@ -56,26 +58,32 @@ namespace LinBox { } commentator().start("[MultiModLifting] Lifting"); - VectorDomain IVD(_lc.ring()); - // Stores each c0 + c1 pj + ... + ck pj^k for each pj - std::vector padicAccumulations(_lc.primesCount(), _lc.ring()); // Temporary structure to store a ci for each pj - std::vector digits(_lc.primesCount(), - _lc.ring()); // @fixme Could be a Field Element? + std::vector digits; + digits.reserve(_lc.primesCount()); + for (auto& F : _lc.primesFields()) { + digits.emplace_back(F, _lc.size()); + } + // The pj^i for each pj std::vector radices(_lc.primesCount(), 1); + // Stores each c0 + c1 pj + ... + ck pj^k for each pj + std::vector padicAccumulations(_lc.primesCount(), _lc.ring()); for (auto j = 0u; j < _lc.primesCount(); ++j) { padicAccumulations[j].resize(_lc.size()); - digits[j].resize(_lc.size()); } + // @fixme Better use PolEval (or will it cause memory explosion?) + VectorDomain IVD(_lc.ring()); for (auto i = 0u; i < _lc.length(); ++i) { _lc.next(digits); - // @fixme Better use PolEval (except memory explosion?) for (auto j = 0u; j < _lc.primesCount(); ++j) { + // @fixme @cpernet digits being a field vector, this will implicitly cast + // each of its elements to a Integer, is there something better? + // Or else, we just need an overload of Givaro::ZRing().axpyin() with a double as last parameter IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci _lc.ring().mulin(radices[j], _lc.prime(j)); } From 50d12c368bd79411197486704d32bd760ceae496 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 28 Jun 2019 10:42:29 +0200 Subject: [PATCH 40/63] Parallelized the padic accumulation --- linbox/algorithms/multi-mod-lifting-container.h | 8 ++++---- linbox/solutions/solve/solve-dixon-rns.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 4d5d160d5..97708fdc1 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -167,7 +167,7 @@ namespace LinBox { // and pass through to the lifting container. Here, we could use that, but we have // to keep control of generated primes, so that the RNS base has bigger primes // than the . - commentator().start("[MMLifting][Init] A^{-1} mod pj precomputations"); + // commentator().start("[MMLifting][Init] A^{-1} mod pj precomputations"); { _B.reserve(_primesCount); for (auto& F : _fields) { @@ -187,7 +187,7 @@ namespace LinBox { } } } - commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations"); + // commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations"); // Making A into the RNS domain { @@ -304,7 +304,7 @@ namespace LinBox { VectorDomain IVD(_ring); BlasMatrixDomain IMD(_ring); - commentator().start("[MultiModLifting] c = A^{-1} r mod p"); + // commentator().start("[MultiModLifting] c = A^{-1} r mod p"); #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; @@ -336,7 +336,7 @@ namespace LinBox { setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); } } - commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); + // commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); // ----- Compute the next residues! diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 2ae9b3aa9..388fb5e78 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -80,6 +80,7 @@ namespace LinBox { for (auto i = 0u; i < _lc.length(); ++i) { _lc.next(digits); + #pragma omp parallel for for (auto j = 0u; j < _lc.primesCount(); ++j) { // @fixme @cpernet digits being a field vector, this will implicitly cast // each of its elements to a Integer, is there something better? From 9331b2db7ec84dca63feb9608cae53d9e378a1a5 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 28 Jun 2019 10:51:56 +0200 Subject: [PATCH 41/63] Using correct NUM_THREADS for fgemm --- linbox/algorithms/multi-mod-lifting-container.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 97708fdc1..3da315da2 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -350,7 +350,7 @@ namespace LinBox { using FGEMMSequential = FFLAS::ParSeqHelper::Sequential; using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose; using MMHelper = FFLAS::MMHelper; - ComposedParSeqHelper composedParSeqHelper(_primes.size(), _primes.size()); + ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS); MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, From 2764b64ffbbccaeea59ce4ea76e041de91804d96 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 28 Jun 2019 14:20:35 +0200 Subject: [PATCH 42/63] Computing / pj is now parallel and cache friendly. --- .../algorithms/multi-mod-lifting-container.h | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 3da315da2..f081b20b8 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -360,28 +360,30 @@ namespace LinBox { // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac"); // We divide each residues by the according pj, which is done by multiplying. - // @fixme Could be done in parallel! - // @fixme @cpernet Don't know why, can't make it parallel! + // @note The matrix _rnsR is RNS-major, meaning that it is stored + // as [R mod q0][R mod q1][...] where [R mod qh] represents a full matrix. + // We use this fact to keep better cache coherency. // commentator().start("[MultiModLifting] MUL FOR INV R <= R / p"); - for (auto i = 0u; i < _n; ++i) { + auto rnsStride = 0u; + for (auto h = 0u; h < _rnsPrimesCount; ++h) { + auto& rnsF = _rnsSystem->_field_rns[h]; + + #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto& rnsPrimeInverse = _rnsPrimesInverses[j]; - auto& rnsR = _rnsR[i * _primesCount + j]; - - // @fixme @cpernet Just doing _rnsDomain->mulin(rnsR, _rnsPrimesInverses[j]); - // But mulin doesn't exist on that domain, and fgemm on 1x1 is much slower auto stridePrimeInverse = rnsPrimeInverse._stride; - auto strideR = rnsR._stride; - for (auto h = 0u; h < _rnsPrimesCount; ++h) { - auto& rnsF = _rnsSystem->_field_rns[h]; - rnsF.mulin(rnsR._ptr[h * strideR], rnsPrimeInverse._ptr[h * stridePrimeInverse]); + auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse]; + + for (auto i = 0u; i < _n; ++i) { + rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)], rnsPrimeInverseForRnsPrimeH); } } + + rnsStride += _rnsR._stride; } // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); - // @fixme @cpernet Is this parallel? FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount, _rnsR + 0); IMD.addin(_rMatrix, _qMatrix); From e126397227683f8f02c9161734273b753fbbd9f5 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 28 Jun 2019 16:09:55 +0200 Subject: [PATCH 43/63] Now computing the division on uint. --- .../algorithms/multi-mod-lifting-container.h | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index f081b20b8..72015389c 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -242,12 +242,10 @@ namespace LinBox { _rMatrix = IMatrix(_ring, _n, _primesCount); _qMatrix = IMatrix(_ring, _n, _primesCount); - _R.reserve(_primesCount); _FR.reserve(_primesCount); for (auto j = 0u; j < _primesCount; ++j) { auto& F = _fields[j]; - _R.emplace_back(_ring, _n); _FR.emplace_back(F, _n); // Initialize all residues to b @@ -308,24 +306,22 @@ namespace LinBox { #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; - auto& R = _R[j]; + auto& FR = _FR[j]; + uint64_t upj = pj; // @note There is no VectorDomain::divmod yet. // Euclidian division so that rj = pj Qj + Rj - // @fixme Should use quoRem on unsigned int, making R an uint vector, - // because it will be converted anyway. + uint64_t uR; for (auto i = 0u; i < _n; ++i) { - _ring.quoRem(_qMatrix.refEntry(i, j), R[i], _rMatrix.getEntry(i, j), pj); + Integer::divmod(_qMatrix.refEntry(i, j), uR, _rMatrix.getEntry(i, j), upj); + // @note No need to init, because we know that uR < pj, + // so that would do an extra check. + FR[i] = static_cast(uR); } - // Convert R to the field - auto& F = _fields[j]; - auto& FR = _FR[j]; + // digit = A^{-1} * R mod pj auto& digit = digits[j]; auto& B = _B[j]; - - // @note The assignment will call the move one, not copying data twice. - FR = FVector(F, R); // rebind B.apply(digit, FR); // Store the very same result in an RNS system, @@ -447,7 +443,7 @@ namespace LinBox { std::vector _fields; // All fields Modular //----- Iteration - std::vector _R; // Will be inited to RNS within _rnsR + std::vector _FR; IMatrix _rMatrix; IMatrix _qMatrix; From a4a479d6ae6bf0a986df63de49f68cb649003baa Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Mon, 1 Jul 2019 14:57:16 +0200 Subject: [PATCH 44/63] Working on benchmarks --- benchmarks/benchmark-dense-solve.C | 40 ++++++++---- .../algorithms/multi-mod-lifting-container.h | 64 +++++++++++++------ linbox/solutions/methods.h | 14 +++- tests/test-solve-full.C | 29 +++++++-- 4 files changed, 105 insertions(+), 42 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index a678c52f0..887a73667 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -55,6 +55,7 @@ namespace { int bits = 10; std::string dispatchString = "Auto"; std::string methodString = "Auto"; + std::string rnsFgemmString = "ParallelRnsOnly"; }; template @@ -71,10 +72,10 @@ namespace { } template > -void benchmark(std::pair& timebits, Arguments& args, MethodBase& method) +void benchmark(std::array& timebits, Arguments& args, MethodBase& method) { - Field F(args.q); // q is ignored for Integers - typename Field::RandIter randIter(F, args.bits); // bits is ignored for ModularRandIter + Field F(args.q); // q is ignored for Integers + typename Field::RandIter randIter(F, 0, args.bits); // bits is ignored for ModularRandIter #ifdef _BENCHMARKS_DEBUG_ std::clog << "Setting A ... " << std::endl; @@ -128,12 +129,9 @@ void benchmark(std::pair& timebits, Arguments& args, MethodBase& if (method.master()) { chrono.stop(); -#ifdef _BENCHMARKS_DEBUG_ - printVector(std::clog << "(DenseElimination) Solution is ", F, X) << std::endl; -#endif - - setBitsize(timebits.second, args.q, X); - timebits.first = chrono.usertime(); + timebits[0] = chrono.usertime(); + timebits[1] = chrono.realtime(); + setBitsize(timebits[2], args.q, X); } } @@ -145,14 +143,17 @@ int main(int argc, char** argv) {'n', "-n", "Set the matrix dimension.", TYPE_INT, &args.n}, {'b', "-b", "bit size", TYPE_INT, &args.bits}, {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString}, + {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &args.rnsFgemmString}, {'M', "-M", "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, " - "Dixon, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, " + "Dixon, DixonRNS, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, " "Blackbox, Wiedemann, Lanczos).", TYPE_STR, &args.methodString}, END_OF_ARGUMENTS}; LinBox::parseArguments(argc, argv, as); + commentator().setReportStream(std::cout); + // Setting up context Communicator communicator(&argc, &argv); @@ -167,12 +168,21 @@ int main(int argc, char** argv) else if (args.dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; else method.dispatch = Dispatch::Auto; + if (args.rnsFgemmString == "BothParallel") method.rnsFgemmType = RnsFgemmType::BothParallel; + else if (args.rnsFgemmString == "BothSequential") method.rnsFgemmType = RnsFgemmType::BothSequential; + else if (args.rnsFgemmString == "ParallelRnsOnly") method.rnsFgemmType = RnsFgemmType::ParallelRnsOnly; + else if (args.rnsFgemmString == "ParallelFgemmOnly") method.rnsFgemmType = RnsFgemmType::ParallelFgemmOnly; + else { + std::cerr << "-r RNS-FGEMM type should be either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly" << std::endl; + return EXIT_FAILURE; + } + // Real benchmark bool isModular = false; if (args.q > 0) isModular = true; - using Timing = std::pair; + using Timing = std::array; std::vector timebits(args.nbiter); for (int iter = 0; iter < args.nbiter; ++iter) { if (isModular) { @@ -185,13 +195,15 @@ int main(int argc, char** argv) } #ifdef _BENCHMARKS_DEBUG_ - for (const auto& it : timebits) std::clog << it.first << "s, " << it.second << " bits" << std::endl; + for (const auto& it : timebits) std::clog << it[0] << "s, " << it[2] << " bits" << std::endl; #endif if (method.master()) { - std::sort(timebits.begin(), timebits.end(), [](const Timing& a, const Timing& b) -> bool { return a.first > b.first; }); + std::sort(timebits.begin(), timebits.end(), [](const Timing& a, const Timing& b) -> bool { return a[0] > b[0]; }); - std::cout << "Time: " << timebits[args.nbiter / 2].first << " Bitsize: " << timebits[args.nbiter / 2].second; + std::cout << "UserTime: " << timebits[args.nbiter / 2][0]; + std::cout << " RealTime: " << timebits[args.nbiter / 2][1]; + std::cout << " Bitsize: " << timebits[args.nbiter / 2][2]; FFLAS::writeCommandString(std::cout, as) << std::endl; } diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 72015389c..d5a7b7a73 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -90,6 +90,7 @@ namespace LinBox { MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, const IVector& b, const Method::DixonRNS& m) : _ring(ring) + , _method(m) , _A(A) , _b(b) , _n(A.rowdim()) @@ -174,8 +175,8 @@ namespace LinBox { _B.emplace_back(A, F); } - // @fixme To be replaced with Paladin - #pragma omp parallel for +// @fixme To be replaced with Paladin +#pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { int nullity = 0; auto& F = _fields[j]; @@ -302,8 +303,8 @@ namespace LinBox { VectorDomain IVD(_ring); BlasMatrixDomain IMD(_ring); - // commentator().start("[MultiModLifting] c = A^{-1} r mod p"); - #pragma omp parallel for +// commentator().start("[MultiModLifting] c = A^{-1} r mod p"); +#pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; auto& FR = _FR[j]; @@ -326,7 +327,8 @@ namespace LinBox { // Store the very same result in an RNS system, // but fact is all the primes of the RNS system are bigger - // than the modulus used to compute the digit, we just copy the result for everybody. + // than the modulus used to compute the digit, we just copy the result for + // everybody. for (auto i = 0u; i < _n; ++i) { setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]); setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); @@ -338,20 +340,40 @@ namespace LinBox { // r <= Q + (R - A c) / p - // By first computing R <= R - A c as a fgemm within the RNS domain. +#define rns_fgemm(RnsParSeq, FgemmParSeq) \ + using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose; \ + using MMHelper = FFLAS::MMHelper; \ + ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS); \ + MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); \ + \ + FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, _n, \ + _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one, _rnsR, \ + _primesCount, mmHelper); + + using RNSParallel = FFLAS::ParSeqHelper::Parallel; + using FGEMMParallel = FFLAS::ParSeqHelper::Parallel; + + // @fixme @cpernet @jgdumas Should we move that PAR_BLOCK outside of the function + // and let the user do it? // commentator().start("[MultiModLifting] FGEMM R <= R - Ac"); PAR_BLOCK { - using RNSParallel = FFLAS::ParSeqHelper::Parallel; - using FGEMMSequential = FFLAS::ParSeqHelper::Sequential; - using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose; - using MMHelper = FFLAS::MMHelper; - ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS); - MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); - - FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, - _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one, - _rnsR, _primesCount, mmHelper); + // Firstly compute R <= R - A c as a fgemm within the RNS domain. + if (_method.rnsFgemmType == RnsFgemmType::BothSequential) { + rns_fgemm(FFLAS::ParSeqHelper::Sequential, FFLAS::ParSeqHelper::Sequential) + } + else if (_method.rnsFgemmType == RnsFgemmType::BothParallel) { + rns_fgemm(RNSParallel, FGEMMParallel) + } + else if (_method.rnsFgemmType == RnsFgemmType::ParallelFgemmOnly) { + rns_fgemm(FFLAS::ParSeqHelper::Sequential, FGEMMParallel) + } + else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) { + rns_fgemm(RNSParallel, FFLAS::ParSeqHelper::Sequential) + } } // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac"); @@ -364,14 +386,15 @@ namespace LinBox { for (auto h = 0u; h < _rnsPrimesCount; ++h) { auto& rnsF = _rnsSystem->_field_rns[h]; - #pragma omp parallel for +#pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto& rnsPrimeInverse = _rnsPrimesInverses[j]; auto stridePrimeInverse = rnsPrimeInverse._stride; auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse]; for (auto i = 0u; i < _n; ++i) { - rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)], rnsPrimeInverseForRnsPrimeH); + rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)], + rnsPrimeInverseForRnsPrimeH); } } @@ -380,8 +403,8 @@ namespace LinBox { // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); - FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), _primesCount, - _rnsR + 0); + FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), + _primesCount, _rnsR + 0); IMD.addin(_rMatrix, _qMatrix); // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); @@ -413,6 +436,7 @@ namespace LinBox { public: // @fixme BACK TO PRIVATE! const Ring& _ring; + Method::DixonRNS _method; // A copy of the user-provided method. // The problem: A^{-1} * b const IMatrix& _A; diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h index fb73844a1..5222050b6 100644 --- a/linbox/solutions/methods.h +++ b/linbox/solutions/methods.h @@ -178,6 +178,17 @@ namespace LinBox { Linear, }; + /** + * When running FFLAS's fgemm on an RNS structure, + * how the composed ParSeqHelper should be configured. + */ + enum class RnsFgemmType { + BothParallel, + BothSequential, + ParallelRnsOnly, + ParallelFgemmOnly, + }; + /** * Holds everything a method needs to know about the problem. * @@ -223,7 +234,8 @@ namespace LinBox { //! that the provided denominator is minimal. // ----- For DixonRNS method. - uint32_t primesCount = 16u; //!< How many primes to use lifting will be done over p = p1p2...pl. + uint32_t primesCount = 8u; //!< How many primes to use lifting will be done over p = p1p2...pl. + RnsFgemmType rnsFgemmType = RnsFgemmType::ParallelRnsOnly; // ----- For random-based systems. size_t trialsBeforeFailure = LINBOX_DEFAULT_TRIALS_BEFORE_FAILURE; //!< Maximum number of trials before giving up. diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index a30783798..862779717 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -97,14 +97,14 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul { std::cout << "Checking result..." << std::endl; - ResultVector RAx(RA.field(), Rb.size()); - RA.apply(RAx, x); + // ResultVector RAx(RA.field(), Rb.size()); + // RA.apply(RAx, x); - VectorDomain VD(RA.field()); - if (!VD.areEqual(RAx, Rb)) { - print_error(x, A, b, "Ax != b"); - return false; - } + // VectorDomain VD(RA.field()); + // if (!VD.areEqual(RAx, Rb)) { + // print_error(x, A, b, "Ax != b"); + // return false; + // } std::cout << "Result OK !" << std::endl; @@ -218,6 +218,7 @@ int main(int argc, char** argv) int m = 32; int n = 24; std::string dispatchString = "Auto"; + std::string rnsFgemmString = "ParallelRnsOnly"; static Argument args[] = { {'q', "-q", "Field characteristic.", TYPE_INTEGER, &q}, @@ -230,6 +231,7 @@ int main(int argc, char** argv) {'m', "-m", "Row dimension of matrices.", TYPE_INT, &m}, {'n', "-n", "Column dimension of matrices.", TYPE_INT, &n}, {'d', "-d", "Dispatch mode (either Auto, Sequential, SMP or Distributed).", TYPE_STR, &dispatchString}, + {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &rnsFgemmString}, END_OF_ARGUMENTS}; parseArguments(argc, argv, args); @@ -252,6 +254,19 @@ int main(int argc, char** argv) return EXIT_FAILURE; } + if (rnsFgemmString == "BothParallel") + method.rnsFgemmType = RnsFgemmType::BothParallel; + else if (rnsFgemmString == "BothSequential") + method.rnsFgemmType = RnsFgemmType::BothSequential; + else if (rnsFgemmString == "ParallelRnsOnly") + method.rnsFgemmType = RnsFgemmType::ParallelRnsOnly; + else if (rnsFgemmString == "ParallelFgemmOnly") + method.rnsFgemmType = RnsFgemmType::ParallelFgemmOnly; + else { + std::cerr << "-r RNS-FGEMM type should be either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly" << std::endl; + return EXIT_FAILURE; + } + if (primesCount > 0) { method.primesCount = primesCount; } From 62ce5b7af013cc57f4fa36574df3a1922f47b2eb Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 2 Jul 2019 11:02:26 +0200 Subject: [PATCH 45/63] Added arguments to benchmark-dense-solve. --- benchmarks/benchmark-dense-solve.C | 8 +++ .../algorithms/multi-mod-lifting-container.h | 62 +++++++++---------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 887a73667..d0f7c9015 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -53,6 +53,7 @@ namespace { int nbiter = 3; int n = 500; int bits = 10; + int primesCount = 8; std::string dispatchString = "Auto"; std::string methodString = "Auto"; std::string rnsFgemmString = "ParallelRnsOnly"; @@ -137,6 +138,8 @@ void benchmark(std::array& timebits, Arguments& args, MethodBase& met int main(int argc, char** argv) { + int numThreads = 1; + Arguments args; Argument as[] = {{'i', "-i", "Set number of repetitions.", TYPE_INT, &args.nbiter}, {'q', "-q", "Set the field characteristic (-1 for rationals).", TYPE_INTEGER, &args.q}, @@ -144,6 +147,8 @@ int main(int argc, char** argv) {'b', "-b", "bit size", TYPE_INT, &args.bits}, {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString}, {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &args.rnsFgemmString}, + {'p', "-p", "For multi-modular methods, how many primes to use.", TYPE_INT, &args.primesCount}, + {'t', "-t", "Number of threads.", TYPE_INT, &numThreads }, {'M', "-M", "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, " "Dixon, DixonRNS, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, " @@ -154,6 +159,8 @@ int main(int argc, char** argv) commentator().setReportStream(std::cout); + omp_set_num_threads(numThreads); + // Setting up context Communicator communicator(&argc, &argv); @@ -163,6 +170,7 @@ int main(int argc, char** argv) MethodBase method; method.pCommunicator = &communicator; + method.primesCount = args.primesCount; if (args.dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; else if (args.dispatchString == "SMP") method.dispatch = Dispatch::SMP; else if (args.dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index d5a7b7a73..966c25c5b 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -340,17 +340,6 @@ namespace LinBox { // r <= Q + (R - A c) / p -#define rns_fgemm(RnsParSeq, FgemmParSeq) \ - using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose; \ - using MMHelper = FFLAS::MMHelper; \ - ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS); \ - MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); \ - \ - FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, _primesCount, _n, \ - _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, _rnsDomain->one, _rnsR, \ - _primesCount, mmHelper); - using RNSParallel = FFLAS::ParSeqHelper::Parallel; using FGEMMParallel = FFLAS::ParSeqHelper::Parallel(); + } + else if (_method.rnsFgemmType == RnsFgemmType::BothParallel) { + rns_fgemm(); } + else if (_method.rnsFgemmType == RnsFgemmType::ParallelFgemmOnly) { + rns_fgemm(); + } + else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) { + rns_fgemm(); + } + // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac"); // We divide each residues by the according pj, which is done by multiplying. @@ -424,14 +411,23 @@ namespace LinBox { } } - inline void logRNSMatrixElement(RNSElementPtr& A, size_t lda, size_t i, size_t j) + // @note This allows us to factor out some of the rns fgemm variants common code. + template + inline void rns_fgemm() { - auto& Aij = A[i * lda + j]; - Integer reconstructedInteger; - FFLAS::fconvert_rns(*_rnsDomain, 1, 1, 0, &reconstructedInteger, 1, A + (i * lda + j)); - std::cout << i << " " << j << " "; - _rnsDomain->write(std::cout, Aij); - std::cout << " -> " << reconstructedInteger << std::endl; + PAR_BLOCK + { + using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose; + using MMHelper = + FFLAS::MMHelper; + ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS); + MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); + + FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, + _primesCount, _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, + _rnsDomain->one, _rnsR, _primesCount, mmHelper); + } } public: // @fixme BACK TO PRIVATE! From 50b510d90858c9ab2393c1a006e9d17fe71f8959 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Wed, 3 Jul 2019 11:16:19 +0200 Subject: [PATCH 46/63] Added seed to benchmark-dense-solve --- benchmarks/benchmark-dense-solve.C | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index d0f7c9015..56c329de9 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -53,6 +53,7 @@ namespace { int nbiter = 3; int n = 500; int bits = 10; + int seed = -1; int primesCount = 8; std::string dispatchString = "Auto"; std::string methodString = "Auto"; @@ -75,8 +76,8 @@ namespace { template > void benchmark(std::array& timebits, Arguments& args, MethodBase& method) { - Field F(args.q); // q is ignored for Integers - typename Field::RandIter randIter(F, 0, args.bits); // bits is ignored for ModularRandIter + Field F(args.q); // q is ignored for Integers + typename Field::RandIter randIter(F, args.seed, args.bits); // bits is ignored for ModularRandIter #ifdef _BENCHMARKS_DEBUG_ std::clog << "Setting A ... " << std::endl; @@ -145,6 +146,7 @@ int main(int argc, char** argv) {'q', "-q", "Set the field characteristic (-1 for rationals).", TYPE_INTEGER, &args.q}, {'n', "-n", "Set the matrix dimension.", TYPE_INT, &args.n}, {'b', "-b", "bit size", TYPE_INT, &args.bits}, + {'s', "-s", "Seed for randomness.", TYPE_INT, &args.seed}, {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString}, {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &args.rnsFgemmString}, {'p', "-p", "For multi-modular methods, how many primes to use.", TYPE_INT, &args.primesCount}, @@ -157,10 +159,12 @@ int main(int argc, char** argv) END_OF_ARGUMENTS}; LinBox::parseArguments(argc, argv, as); - commentator().setReportStream(std::cout); - omp_set_num_threads(numThreads); + if (args.seed < 0) { + args.seed = time(nullptr); + } + // Setting up context Communicator communicator(&argc, &argv); From 24c3b14aa403b9b7edcd12403df13a42ea966752 Mon Sep 17 00:00:00 2001 From: ZHG Date: Wed, 3 Jul 2019 16:48:56 +0200 Subject: [PATCH 47/63] Rewrite omp with paladin for multi-mod-lifting-container --- .../algorithms/multi-mod-lifting-container.h | 102 +++++++++++++++++- 1 file changed, 98 insertions(+), 4 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 966c25c5b..19c74dd43 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -174,19 +174,50 @@ namespace LinBox { for (auto& F : _fields) { _B.emplace_back(A, F); } - +#if 0 // @fixme To be replaced with Paladin #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { - int nullity = 0; + int nullity = 0; //TODO: it may be necessary to replace nullity with a vector auto& F = _fields[j]; BlasMatrixDomain bmd(F); bmd.invin(_B[j], nullity); + if (nullity > 0) {//TODO: it may be easier to move this condition check outside the parallel region => loop through the whole vector to add up all values as final value for the condition test which could be parallelized further more + // @fixme Should redraw another prime! + throw LinBoxError("Wrong prime, sorry."); + } + } +#else + + PAR_BLOCK{ + std::vector vnullity;vnullity.reserve(_primesCount); + auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + int M = _primesCount; + SYNCH_GROUP({ + FORBLOCK1D(iter, M, sp, + TASK(MODE(CONSTREFERENCE(vnullity) ), + for(auto j=iter.begin(); j!=iter.end(); ++j) + { + auto& F = _fields[j]; + BlasMatrixDomain bmd(F); + bmd.invin(_B[j], vnullity[j]); + } + ) + ) + }); + + int nullity = 0; + for (size_t i=0; i<_primesCount; ++i){ + nullity += vnullity[i]; + } if (nullity > 0) { // @fixme Should redraw another prime! throw LinBoxError("Wrong prime, sorry."); } + } +#endif + } // commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations"); @@ -304,6 +335,7 @@ namespace LinBox { BlasMatrixDomain IMD(_ring); // commentator().start("[MultiModLifting] c = A^{-1} r mod p"); +#if 0 #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto pj = _primes[j]; @@ -334,6 +366,47 @@ namespace LinBox { setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); } } +#else + PAR_BLOCK{ + auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + int M = _primesCount; + //SYNCH_GROUP({ + FORBLOCK1D(iter, M, sp, + TASK(MODE(CONSTREFERENCE(digits) ),{ + for(auto j=iter.begin(); j!=iter.end(); ++j) { + auto pj = _primes[j]; + auto& FR = _FR[j]; + uint64_t upj = pj; + + // @note There is no VectorDomain::divmod yet. + // Euclidian division so that rj = pj Qj + Rj + uint64_t uR; + for (auto i = 0u; i < _n; ++i) { + Integer::divmod(_qMatrix.refEntry(i, j), uR, _rMatrix.getEntry(i, j), upj); + // @note No need to init, because we know that uR < pj, + // so that would do an extra check. + FR[i] = static_cast(uR); + } + + // digit = A^{-1} * R mod pj + auto& digit = digits[j]; + auto& B = _B[j]; + B.apply(digit, FR); + + // Store the very same result in an RNS system, + // but fact is all the primes of the RNS system are bigger + // than the modulus used to compute the digit, we just copy the result for + // everybody. + for (auto i = 0u; i < _n; ++i) { + setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]); + setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); + } + } + }) + ) + //}); + } +#endif // commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); // ----- Compute the next residues! @@ -372,7 +445,7 @@ namespace LinBox { auto rnsStride = 0u; for (auto h = 0u; h < _rnsPrimesCount; ++h) { auto& rnsF = _rnsSystem->_field_rns[h]; - +#if 0 #pragma omp parallel for for (auto j = 0u; j < _primesCount; ++j) { auto& rnsPrimeInverse = _rnsPrimesInverses[j]; @@ -384,7 +457,28 @@ namespace LinBox { rnsPrimeInverseForRnsPrimeH); } } - +#else + PAR_BLOCK{ + + auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + int M = _primesCount; + //SYNCH_GROUP({ + FORBLOCK1D(iter, M, sp, + TASK(MODE(CONSTREFERENCE(digits) ),{ + for(auto j=iter.begin(); j!=iter.end(); ++j) { + auto& rnsPrimeInverse = _rnsPrimesInverses[j]; + auto stridePrimeInverse = rnsPrimeInverse._stride; + auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse]; + + for (auto i = 0u; i < _n; ++i) { + rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)], + rnsPrimeInverseForRnsPrimeH); + } + } + }) + ); + } +#endif rnsStride += _rnsR._stride; } // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); From 287fb66ff355beb292b1ac5c6511a1d9f9c00d79 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 12 Jul 2019 10:28:59 +0200 Subject: [PATCH 48/63] OK --- tests/test-solve-full.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 862779717..3ebe444ae 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -310,7 +310,7 @@ int main(int argc, char** argv) // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // // ----- Rational Dixon - ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // // @fixme Dixon does not compile // // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); From df893214ccbf8fac541f5af4cc31b5df8c52a7e6 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Thu, 25 Jul 2019 18:06:24 +0200 Subject: [PATCH 49/63] FIxed some wrong result in DixonRNS --- .../algorithms/multi-mod-lifting-container.h | 166 ++++++------------ .../rational-cra-builder-full-multip.h | 3 + linbox/solutions/hadamard-bound.h | 5 +- linbox/solutions/methods.h | 4 +- linbox/solutions/solve/solve-dixon-rns.h | 8 +- tests/test-solve-full.C | 16 +- 6 files changed, 76 insertions(+), 126 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 19c74dd43..0b39457d9 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -105,6 +105,11 @@ namespace LinBox { // This will contain the primes or our MultiMod basis _primesCount = m.primesCount; + if (_primesCount == -1u) { + PAR_BLOCK { _primesCount = 6 * NUM_THREADS; } + } + std::cout << _primesCount << std::endl; + _primes.resize(_primesCount); // Some preparation work @@ -174,50 +179,27 @@ namespace LinBox { for (auto& F : _fields) { _B.emplace_back(A, F); } -#if 0 -// @fixme To be replaced with Paladin -#pragma omp parallel for - for (auto j = 0u; j < _primesCount; ++j) { - int nullity = 0; //TODO: it may be necessary to replace nullity with a vector - auto& F = _fields[j]; - BlasMatrixDomain bmd(F); - bmd.invin(_B[j], nullity); - if (nullity > 0) {//TODO: it may be easier to move this condition check outside the parallel region => loop through the whole vector to add up all values as final value for the condition test which could be parallelized further more - // @fixme Should redraw another prime! - throw LinBoxError("Wrong prime, sorry."); - } - } -#else - PAR_BLOCK{ - std::vector vnullity;vnullity.reserve(_primesCount); - auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + PAR_BLOCK + { + std::vector nullities(_primesCount); + auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, + FFLAS::StrategyParameter::Threads); int M = _primesCount; - SYNCH_GROUP({ - FORBLOCK1D(iter, M, sp, - TASK(MODE(CONSTREFERENCE(vnullity) ), - for(auto j=iter.begin(); j!=iter.end(); ++j) - { - auto& F = _fields[j]; - BlasMatrixDomain bmd(F); - bmd.invin(_B[j], vnullity[j]); - } - ) - ) + FOR1D(j, M, sp, MODE(CONSTREFERENCE(nullities)), { + auto& F = _fields[j]; + BlasMatrixDomain bmd(F); + bmd.invin(_B[j], nullities[j]); }); - int nullity = 0; - for (size_t i=0; i<_primesCount; ++i){ - nullity += vnullity[i]; - } - if (nullity > 0) { - // @fixme Should redraw another prime! - throw LinBoxError("Wrong prime, sorry."); + for (auto nullity : nullities) { + if (nullity > 0) { + // @fixme Should redraw another prime! + std::cout << "----------------------------- NULLITY" << std::endl; + throw LinBoxError("Wrong prime, sorry."); + } } - } -#endif - } // commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations"); @@ -335,45 +317,15 @@ namespace LinBox { BlasMatrixDomain IMD(_ring); // commentator().start("[MultiModLifting] c = A^{-1} r mod p"); -#if 0 -#pragma omp parallel for - for (auto j = 0u; j < _primesCount; ++j) { - auto pj = _primes[j]; - auto& FR = _FR[j]; - uint64_t upj = pj; - - // @note There is no VectorDomain::divmod yet. - // Euclidian division so that rj = pj Qj + Rj - uint64_t uR; - for (auto i = 0u; i < _n; ++i) { - Integer::divmod(_qMatrix.refEntry(i, j), uR, _rMatrix.getEntry(i, j), upj); - // @note No need to init, because we know that uR < pj, - // so that would do an extra check. - FR[i] = static_cast(uR); - } - - // digit = A^{-1} * R mod pj - auto& digit = digits[j]; - auto& B = _B[j]; - B.apply(digit, FR); - - // Store the very same result in an RNS system, - // but fact is all the primes of the RNS system are bigger - // than the modulus used to compute the digit, we just copy the result for - // everybody. - for (auto i = 0u; i < _n; ++i) { - setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]); - setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); - } - } -#else - PAR_BLOCK{ - auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + PAR_BLOCK + { + auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, + FFLAS::StrategyParameter::Threads); int M = _primesCount; - //SYNCH_GROUP({ - FORBLOCK1D(iter, M, sp, - TASK(MODE(CONSTREFERENCE(digits) ),{ - for(auto j=iter.begin(); j!=iter.end(); ++j) { + SYNCH_GROUP({ + FORBLOCK1D( + iter, M, sp, TASK(MODE(CONSTREFERENCE(digits)), { + for (auto j = iter.begin(); j != iter.end(); ++j) { auto pj = _primes[j]; auto& FR = _FR[j]; uint64_t upj = pj; @@ -382,7 +334,8 @@ namespace LinBox { // Euclidian division so that rj = pj Qj + Rj uint64_t uR; for (auto i = 0u; i < _n; ++i) { - Integer::divmod(_qMatrix.refEntry(i, j), uR, _rMatrix.getEntry(i, j), upj); + Integer::divmod(_qMatrix.refEntry(i, j), uR, + _rMatrix.getEntry(i, j), upj); // @note No need to init, because we know that uR < pj, // so that would do an extra check. FR[i] = static_cast(uR); @@ -395,18 +348,16 @@ namespace LinBox { // Store the very same result in an RNS system, // but fact is all the primes of the RNS system are bigger - // than the modulus used to compute the digit, we just copy the result for - // everybody. + // than the modulus used to compute the digit, we just copy + // the result for everybody. for (auto i = 0u; i < _n; ++i) { setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]); setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); } } - }) - ) - //}); + })) + }); } -#endif // commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); // ----- Compute the next residues! @@ -445,40 +396,29 @@ namespace LinBox { auto rnsStride = 0u; for (auto h = 0u; h < _rnsPrimesCount; ++h) { auto& rnsF = _rnsSystem->_field_rns[h]; -#if 0 -#pragma omp parallel for - for (auto j = 0u; j < _primesCount; ++j) { - auto& rnsPrimeInverse = _rnsPrimesInverses[j]; - auto stridePrimeInverse = rnsPrimeInverse._stride; - auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse]; - - for (auto i = 0u; i < _n; ++i) { - rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)], - rnsPrimeInverseForRnsPrimeH); - } - } -#else - PAR_BLOCK{ - auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + PAR_BLOCK + { + auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, + FFLAS::StrategyParameter::Threads); int M = _primesCount; - //SYNCH_GROUP({ - FORBLOCK1D(iter, M, sp, - TASK(MODE(CONSTREFERENCE(digits) ),{ - for(auto j=iter.begin(); j!=iter.end(); ++j) { - auto& rnsPrimeInverse = _rnsPrimesInverses[j]; - auto stridePrimeInverse = rnsPrimeInverse._stride; - auto rnsPrimeInverseForRnsPrimeH = rnsPrimeInverse._ptr[h * stridePrimeInverse]; - - for (auto i = 0u; i < _n; ++i) { - rnsF.mulin(_rnsR._ptr[rnsStride + (i * _primesCount + j)], + SYNCH_GROUP({ + FORBLOCK1D(iter, M, sp, TASK(MODE(CONSTREFERENCE(digits)), { + for (auto j = iter.begin(); j != iter.end(); ++j) { + auto& rnsPrimeInverse = _rnsPrimesInverses[j]; + auto stridePrimeInverse = rnsPrimeInverse._stride; + auto rnsPrimeInverseForRnsPrimeH = + rnsPrimeInverse._ptr[h * stridePrimeInverse]; + + for (auto i = 0u; i < _n; ++i) { + rnsF.mulin( + _rnsR._ptr[rnsStride + (i * _primesCount + j)], rnsPrimeInverseForRnsPrimeH); - } - } - }) - ); + } + } + }))}); } -#endif + rnsStride += _rnsR._stride; } // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h index f5dc17f92..c1b03c06f 100644 --- a/linbox/algorithms/rational-cra-builder-full-multip.h +++ b/linbox/algorithms/rational-cra-builder-full-multip.h @@ -70,11 +70,14 @@ namespace LinBox { commentator().start("[RationalCRABuilderFullMultip] CRT Reconstruction"); Father_t::result(num, false); + std::cout << "num[0]: " << num[0] << std::endl; + std::cout << "numBound: " << numBound << std::endl; commentator().stop("[RationalCRABuilderFullMultip] CRT Reconstruction"); commentator().start("[RationalCRABuilderFullMultip] Rational Reconstruction"); den = 1; const auto& mod = Father_t::getModulus(); + std::cout << "mod: " << mod << std::endl; Integer nd; for (auto num_it = num.begin(); num_it != num.end(); ++num_it) { iterativeratrecon(*num_it, nd, den, mod, numBound); diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h index 00fe6e92e..ba00bc071 100644 --- a/linbox/solutions/hadamard-bound.h +++ b/linbox/solutions/hadamard-bound.h @@ -446,8 +446,9 @@ namespace LinBox { bNorm += 1; } - data.denBound = hadamardBound.bound; - data.numBound = hadamardBound.boundOverMinNorm * bNorm; + // @note RR expects the bounds to be strict, this is why we add a + 1 + data.denBound = hadamardBound.bound + 1; + data.numBound = hadamardBound.boundOverMinNorm * bNorm + 1; if (data.denBound == 0 || data.numBound == 0) { data.solutionLogBound = 0.0; } diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h index 5222050b6..63b7af584 100644 --- a/linbox/solutions/methods.h +++ b/linbox/solutions/methods.h @@ -234,7 +234,9 @@ namespace LinBox { //! that the provided denominator is minimal. // ----- For DixonRNS method. - uint32_t primesCount = 8u; //!< How many primes to use lifting will be done over p = p1p2...pl. + //! How many primes to use lifting will be done over p = p1p2...pl. + //! -1 means automatically set to a heuristic value. + uint32_t primesCount = -1u; RnsFgemmType rnsFgemmType = RnsFgemmType::ParallelRnsOnly; // ----- For random-based systems. diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 388fb5e78..d153765c2 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -89,6 +89,9 @@ namespace LinBox { _lc.ring().mulin(radices[j], _lc.prime(j)); } } + for (auto j = 0u; j < _lc.primesCount(); ++j) { + std::cout << "radices[" << j << "] " << radices[j] << std::endl; + } commentator().stop("[MultiModLifting] Lifting"); // CRT reconstruction from paddicAccumulations @@ -109,8 +112,9 @@ namespace LinBox { commentator().stop("[MultiModLifting] CRT Reconstruction Progress"); // Rational reconstruction - // @note RR expects the bounds to be strict, this is why we add a + 1 - craBuilder.result(xNum, xDen, _lc.numBound() + 1); + craBuilder.result(xNum, xDen, _lc.numBound()); + std::cout << "xNum[0] " << xNum[0] << std::endl; + std::cout << "xDen " << xDen << std::endl; return true; } diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 3ebe444ae..af17eef6c 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -97,14 +97,14 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul { std::cout << "Checking result..." << std::endl; - // ResultVector RAx(RA.field(), Rb.size()); - // RA.apply(RAx, x); - - // VectorDomain VD(RA.field()); - // if (!VD.areEqual(RAx, Rb)) { - // print_error(x, A, b, "Ax != b"); - // return false; - // } + ResultVector RAx(RA.field(), Rb.size()); + RA.apply(RAx, x); + + VectorDomain VD(RA.field()); + if (!VD.areEqual(RAx, Rb)) { + print_error(x, A, b, "Ax != b"); + return false; + } std::cout << "Result OK !" << std::endl; From e840f25193aaa4da4a45508968b13259ea424c0c Mon Sep 17 00:00:00 2001 From: "A. Breust" Date: Mon, 5 Aug 2019 10:45:25 +0200 Subject: [PATCH 50/63] CLean up and last fixes --- .../algorithms/multi-mod-lifting-container.h | 3 +-- .../rational-cra-builder-full-multip.h | 3 --- linbox/solutions/solve/solve-dixon-rns.h | 18 ++++++------------ tests/test-solve-full.C | 14 +++++++++----- 4 files changed, 16 insertions(+), 22 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 0b39457d9..04a60d0e3 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -108,7 +108,6 @@ namespace LinBox { if (_primesCount == -1u) { PAR_BLOCK { _primesCount = 6 * NUM_THREADS; } } - std::cout << _primesCount << std::endl; _primes.resize(_primesCount); @@ -248,7 +247,7 @@ namespace LinBox { // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...) _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct); - std::cout << "_iterationsCount " << _iterationsCount << std::endl; + // std::cout << "_iterationsCount " << _iterationsCount << std::endl; } //----- Locals setup diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h index c1b03c06f..f5dc17f92 100644 --- a/linbox/algorithms/rational-cra-builder-full-multip.h +++ b/linbox/algorithms/rational-cra-builder-full-multip.h @@ -70,14 +70,11 @@ namespace LinBox { commentator().start("[RationalCRABuilderFullMultip] CRT Reconstruction"); Father_t::result(num, false); - std::cout << "num[0]: " << num[0] << std::endl; - std::cout << "numBound: " << numBound << std::endl; commentator().stop("[RationalCRABuilderFullMultip] CRT Reconstruction"); commentator().start("[RationalCRABuilderFullMultip] Rational Reconstruction"); den = 1; const auto& mod = Father_t::getModulus(); - std::cout << "mod: " << mod << std::endl; Integer nd; for (auto num_it = num.begin(); num_it != num.end(); ++num_it) { iterativeratrecon(*num_it, nd, den, mod, numBound); diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index d153765c2..5d6cd798f 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -80,7 +80,7 @@ namespace LinBox { for (auto i = 0u; i < _lc.length(); ++i) { _lc.next(digits); - #pragma omp parallel for +#pragma omp parallel for for (auto j = 0u; j < _lc.primesCount(); ++j) { // @fixme @cpernet digits being a field vector, this will implicitly cast // each of its elements to a Integer, is there something better? @@ -89,16 +89,12 @@ namespace LinBox { _lc.ring().mulin(radices[j], _lc.prime(j)); } } - for (auto j = 0u; j < _lc.primesCount(); ++j) { - std::cout << "radices[" << j << "] " << radices[j] << std::endl; - } commentator().stop("[MultiModLifting] Lifting"); // CRT reconstruction from paddicAccumulations commentator().start("[MultiModLifting] CRT Reconstruction Progress"); using CRAField = Givaro::Modular; - RationalCRABuilderFullMultip craBuilder(_lc.log2Bound() - / 1.4427); // 1.4427 = 1 / log(2) + RationalCRABuilderFullMultip craBuilder(_lc.log2Bound() / 1.4427); // 1.4427 = 1 / log(2) { CRAField field(radices[0]); @@ -113,8 +109,6 @@ namespace LinBox { // Rational reconstruction craBuilder.result(xNum, xDen, _lc.numBound()); - std::cout << "xNum[0] " << xNum[0] << std::endl; - std::cout << "xDen " << xDen << std::endl; return true; } @@ -138,8 +132,8 @@ namespace LinBox { * Dense solving. */ template - void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, - const Vector& b, const Method::DixonRNS& m) + void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, const Vector& b, + const Method::DixonRNS& m) { // @fixme We should use some code from DixonSolver... // But that's hard so we just assume that A is square and invertible. @@ -166,8 +160,8 @@ namespace LinBox { * \brief Solve specialisation for DixonRNS on dense matrices. */ template - void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, - const Vector& b, const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) + void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, const Vector& b, + const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) { commentator().start("solve.dixon-rns.integer.dense"); diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index af17eef6c..d9f825b26 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -93,9 +93,11 @@ namespace { } template -bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, ResultVector& Rb) +bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, ResultVector& Rb, bool verbose) { - std::cout << "Checking result..." << std::endl; + if (verbose) { + std::cout << "Checking result..." << std::endl; + } ResultVector RAx(RA.field(), Rb.size()); RA.apply(RAx, x); @@ -106,7 +108,9 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul return false; } - std::cout << "Result OK !" << std::endl; + if (verbose) { + std::cout << "Result OK !" << std::endl; + } return true; } @@ -145,11 +149,11 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R bool ok = true; try { solve(x, A, b, method); - ok = check_result(x, A, b, RA, Rb); + ok = check_result(x, A, b, RA, Rb, verbose); // if (ok) { // solveInPlace(x, A, b, method); - // ok = check_result(x, A, b, RA, Rb); + // ok = check_result(x, A, b, RA, Rb, verbose); // } } catch (...) { print_error(x, A, b, "throws error"); From afc509d5ae10a9bf6f11400dc90e7c7523323168 Mon Sep 17 00:00:00 2001 From: "A. Breust" Date: Wed, 14 Aug 2019 10:00:34 +0200 Subject: [PATCH 51/63] Using FOR1D directly. Still wrong results! --- .../dixon-solver/dixon-solver-dense.inl | 26 +++- linbox/algorithms/lifting-container.h | 2 +- .../algorithms/multi-mod-lifting-container.h | 115 +++++++++-------- .../multi-mod-rational-reconstruction.h | 117 ++++++++++++++++++ linbox/solutions/solve/solve-dixon-rns.h | 98 +-------------- tests/test-solve-full.C | 2 +- 6 files changed, 201 insertions(+), 159 deletions(-) create mode 100644 linbox/algorithms/multi-mod-rational-reconstruction.h diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl index ed0978683..c6a2a81b3 100644 --- a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl +++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl @@ -24,8 +24,10 @@ #include "linbox/util/debug.h" #include "linbox/algorithms/lifting-container.h" +#include "linbox/algorithms/multi-mod-lifting-container.h" #include "linbox/algorithms/matrix-inverse.h" #include "linbox/algorithms/rational-reconstruction.h" +#include "linbox/algorithms/multi-mod-rational-reconstruction.h" namespace LinBox { @@ -128,15 +130,27 @@ namespace LinBox { } } while (notfr); - typedef DixonLiftingContainer> LiftingContainer; - commentator().start("CLASSIC DIXON LIFTING"); - LiftingContainer lc(_ring, *F, A, *FMP, b, _prime); - RationalReconstruction re(lc); - if (!re.getRational(num, den, 0)) { + // commentator().start("CLASSIC DIXON LIFTING"); + // typedef DixonLiftingContainer> LiftingContainer; + // LiftingContainer lc(_ring, *F, A, *FMP, b, _prime); + // RationalReconstruction re(lc); + // if (!re.getRational(num, den, 0)) { + // delete FMP; + // return SS_FAILED; + // } + // commentator().stop("CLASSIC DIXON LIFTING"); + + commentator().start("MULTI MOD DIXON LIFTING"); + using LiftingContainer = MultiModLiftingContainer; + Method::DixonRNS m; // @fixme Get from? + LiftingContainer lc(_ring, _genprime, A, b, m); + MultiModRationalReconstruction re(lc); + if (!re.getRational(num, den)) { delete FMP; return SS_FAILED; } - commentator().stop("CLASSIC DIXON LIFTING"); + commentator().stop("MULTI MOD DIXON LIFTING"); + #ifdef RSTIMING ttNonsingularSolve.update(re, lc); #endif diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h index 9664f75f4..2a86bafa6 100644 --- a/linbox/algorithms/lifting-container.h +++ b/linbox/algorithms/lifting-container.h @@ -162,8 +162,8 @@ namespace LinBox // _length = logp(L, Prime) = log2(L) * ln(2) / ln(Prime) double primeLog2 = Givaro::logtwo(Prime); _length = std::ceil(hb.solutionLogBound / primeLog2); // round up instead of down - std::cout << "_length "<< _length << std::endl; #ifdef DEBUG_LC + std::cout << "_length "<< _length << std::endl; std::cout<<" norms computed, p = "<<_p<<"\n"; std::cout<<" N = "< primes; for (auto j = 0u; j < _primesCount + _rnsPrimesCount; ++j) { auto p = *primeGenerator; @@ -131,14 +134,20 @@ namespace LinBox { // @note std::lower_bound finds the iterator where to put p in the sorted // container. The name of the routine might be strange, but, hey, that's not my - // fault. + // fault. We check if the prime is already listed. auto lb = std::lower_bound(primes.begin(), primes.end(), p); if (lb != primes.end() && *lb == p) { + if (trialsLeft == 0) { + throw LinboxError("[MultiModLiftingContainer] Not enough primes."); + } + --j; + --trialsLeft; continue; } // Inserting the primes at the right place to keep the array sorted + std::cout << "Adding " << Integer(p) << std::endl; primes.insert(lb, p); } @@ -165,6 +174,7 @@ namespace LinBox { // Setting fields up for (auto& pj : _primes) { _fields.emplace_back(pj); + std::cout << Integer(pj) << std::endl; } // Initialize all inverses @@ -247,7 +257,7 @@ namespace LinBox { // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...) _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct); - // std::cout << "_iterationsCount " << _iterationsCount << std::endl; + std::cout << "_iterationsCount " << _iterationsCount << std::endl; } //----- Locals setup @@ -321,40 +331,35 @@ namespace LinBox { auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Threads); int M = _primesCount; - SYNCH_GROUP({ - FORBLOCK1D( - iter, M, sp, TASK(MODE(CONSTREFERENCE(digits)), { - for (auto j = iter.begin(); j != iter.end(); ++j) { - auto pj = _primes[j]; - auto& FR = _FR[j]; - uint64_t upj = pj; - - // @note There is no VectorDomain::divmod yet. - // Euclidian division so that rj = pj Qj + Rj - uint64_t uR; - for (auto i = 0u; i < _n; ++i) { - Integer::divmod(_qMatrix.refEntry(i, j), uR, - _rMatrix.getEntry(i, j), upj); - // @note No need to init, because we know that uR < pj, - // so that would do an extra check. - FR[i] = static_cast(uR); - } - - // digit = A^{-1} * R mod pj - auto& digit = digits[j]; - auto& B = _B[j]; - B.apply(digit, FR); - - // Store the very same result in an RNS system, - // but fact is all the primes of the RNS system are bigger - // than the modulus used to compute the digit, we just copy - // the result for everybody. - for (auto i = 0u; i < _n; ++i) { - setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]); - setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); - } - } - })) + FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), { + auto pj = _primes[j]; + auto& FR = _FR[j]; + uint64_t upj = pj; + + // @note There is no VectorDomain::divmod yet. + // Euclidian division so that rj = pj Qj + Rj + uint64_t uR; + for (auto i = 0u; i < _n; ++i) { + Integer::divmod(_qMatrix.refEntry(i, j), uR, + _rMatrix.getEntry(i, j), upj); + // @note No need to init, because we know that uR < pj, + // so that would do an extra check. + FR[i] = static_cast(uR); + } + + // digit = A^{-1} * R mod pj + auto& digit = digits[j]; + auto& B = _B[j]; + B.apply(digit, FR); + + // Store the very same result in an RNS system, + // but fact is all the primes of the RNS system are bigger + // than the modulus used to compute the digit, we just copy + // the result for everybody. + for (auto i = 0u; i < _n; ++i) { + setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]); + setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); + } }); } // commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); @@ -368,8 +373,6 @@ namespace LinBox { using FGEMMParallel = FFLAS::ParSeqHelper::Parallel; - // @fixme @cpernet @jgdumas Should we move that PAR_BLOCK outside of the function - // and let the user do it? // commentator().start("[MultiModLifting] FGEMM R <= R - Ac"); // Firstly compute R <= R - A c as a fgemm within the RNS domain. if (_method.rnsFgemmType == RnsFgemmType::BothSequential) { @@ -384,7 +387,6 @@ namespace LinBox { else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) { rns_fgemm(); } - // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac"); // We divide each residues by the according pj, which is done by multiplying. @@ -401,21 +403,18 @@ namespace LinBox { auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Threads); int M = _primesCount; - SYNCH_GROUP({ - FORBLOCK1D(iter, M, sp, TASK(MODE(CONSTREFERENCE(digits)), { - for (auto j = iter.begin(); j != iter.end(); ++j) { - auto& rnsPrimeInverse = _rnsPrimesInverses[j]; - auto stridePrimeInverse = rnsPrimeInverse._stride; - auto rnsPrimeInverseForRnsPrimeH = - rnsPrimeInverse._ptr[h * stridePrimeInverse]; - - for (auto i = 0u; i < _n; ++i) { - rnsF.mulin( - _rnsR._ptr[rnsStride + (i * _primesCount + j)], - rnsPrimeInverseForRnsPrimeH); - } - } - }))}); + FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), { + auto& rnsPrimeInverse = _rnsPrimesInverses[j]; + auto stridePrimeInverse = rnsPrimeInverse._stride; + auto rnsPrimeInverseForRnsPrimeH = + rnsPrimeInverse._ptr[h * stridePrimeInverse]; + + for (auto i = 0u; i < _n; ++i) { + rnsF.mulin( + _rnsR._ptr[rnsStride + (i * _primesCount + j)], + rnsPrimeInverseForRnsPrimeH); + } + }); } rnsStride += _rnsR._stride; diff --git a/linbox/algorithms/multi-mod-rational-reconstruction.h b/linbox/algorithms/multi-mod-rational-reconstruction.h new file mode 100644 index 000000000..6fae6a357 --- /dev/null +++ b/linbox/algorithms/multi-mod-rational-reconstruction.h @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2019 LinBox Team + * + * ========LICENCE======== + * This file is part of the library LinBox. + * + * LinBox is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * ========LICENCE======== + */ + +#pragma once + +namespace LinBox { + /** + * From a MultiModLiftingContainer, will build + * the solution on each prime, then will do a CRT reconstruction, + * before reconstructing the rational. + * + * This does not do early termination. + */ + template + class MultiModRationalReconstruction { + using Ring = typename LiftingContainer::Ring; + using IElement = typename LiftingContainer::IElement; + using IVector = typename LiftingContainer::IVector; + using FElement = typename LiftingContainer::FElement; + using FVector = typename LiftingContainer::FVector; + + public: + MultiModRationalReconstruction(LiftingContainer& lc) + : _lc(lc) + { + } + + bool getRational(IVector& xNum, IElement& xDen) + { + // Early out when the numerator is bounded by zero. + if (_lc.numBound() == 0) { + for (auto i = 0u; i < _lc.length(); ++i) { + _lc.ring().assign(xNum[i], _lc.ring().zero); + } + _lc.ring().assign(xDen, _lc.ring().one); + return true; + } + + commentator().start("[MultiModLifting] Lifting"); + + // Temporary structure to store a ci for each pj + std::vector digits; + digits.reserve(_lc.primesCount()); + for (auto& F : _lc.primesFields()) { + digits.emplace_back(F, _lc.size()); + } + + // The pj^i for each pj + std::vector radices(_lc.primesCount(), 1); + + // Stores each c0 + c1 pj + ... + ck pj^k for each pj + std::vector padicAccumulations(_lc.primesCount(), _lc.ring()); + for (auto j = 0u; j < _lc.primesCount(); ++j) { + padicAccumulations[j].resize(_lc.size()); + } + + // @fixme Better use PolEval (or will it cause memory explosion?) + VectorDomain IVD(_lc.ring()); + for (auto i = 0u; i < _lc.length(); ++i) { + _lc.next(digits); + + #pragma omp parallel for + for (auto j = 0u; j < _lc.primesCount(); ++j) { + // @fixme @cpernet digits being a field vector, this will implicitly cast + // each of its elements to a Integer, is there something better? + // Or else, we just need an overload of Givaro::ZRing().axpyin() with a double as last parameter + IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci + _lc.ring().mulin(radices[j], _lc.prime(j)); + } + } + commentator().stop("[MultiModLifting] Lifting"); + + // CRT reconstruction from paddicAccumulations + commentator().start("[MultiModLifting] CRT Reconstruction Progress"); + using CRAField = Givaro::Modular; + RationalCRABuilderFullMultip craBuilder(_lc.log2Bound() / 1.4427); // 1.4427 = 1 / log(2) + + { + CRAField field(radices[0]); + craBuilder.initialize(field, padicAccumulations[0]); + } + + for (auto j = 1u; j < _lc.primesCount(); ++j) { + CRAField field(radices[j]); + craBuilder.progress(field, padicAccumulations[j]); + } + commentator().stop("[MultiModLifting] CRT Reconstruction Progress"); + + // Rational reconstruction + craBuilder.result(xNum, xDen, _lc.numBound()); + + return true; + } + + private: + LiftingContainer& _lc; + }; +} diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h index 5d6cd798f..89dfb5cbd 100644 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ b/linbox/solutions/solve/solve-dixon-rns.h @@ -23,101 +23,9 @@ #pragma once #include +#include namespace LinBox { - /** - * From a MultiModLiftingContainer, will build - * the solution on each prime, then will do a CRT reconstruction, - * before reconstructing the rational. - * - * This does not do early termination. - */ - template - class MultiModRationalReconstruction { - using Ring = typename LiftingContainer::Ring; - using IElement = typename LiftingContainer::IElement; - using IVector = typename LiftingContainer::IVector; - using FElement = typename LiftingContainer::FElement; - using FVector = typename LiftingContainer::FVector; - - public: - MultiModRationalReconstruction(LiftingContainer& lc) - : _lc(lc) - { - } - - bool getRational(IVector& xNum, IElement& xDen) - { - // Early out when the numerator is bounded by zero. - if (_lc.numBound() == 0) { - for (auto i = 0u; i < _lc.length(); ++i) { - _lc.ring().assign(xNum[i], _lc.ring().zero); - } - _lc.ring().assign(xDen, _lc.ring().one); - return true; - } - - commentator().start("[MultiModLifting] Lifting"); - - // Temporary structure to store a ci for each pj - std::vector digits; - digits.reserve(_lc.primesCount()); - for (auto& F : _lc.primesFields()) { - digits.emplace_back(F, _lc.size()); - } - - // The pj^i for each pj - std::vector radices(_lc.primesCount(), 1); - - // Stores each c0 + c1 pj + ... + ck pj^k for each pj - std::vector padicAccumulations(_lc.primesCount(), _lc.ring()); - for (auto j = 0u; j < _lc.primesCount(); ++j) { - padicAccumulations[j].resize(_lc.size()); - } - - // @fixme Better use PolEval (or will it cause memory explosion?) - VectorDomain IVD(_lc.ring()); - for (auto i = 0u; i < _lc.length(); ++i) { - _lc.next(digits); - -#pragma omp parallel for - for (auto j = 0u; j < _lc.primesCount(); ++j) { - // @fixme @cpernet digits being a field vector, this will implicitly cast - // each of its elements to a Integer, is there something better? - // Or else, we just need an overload of Givaro::ZRing().axpyin() with a double as last parameter - IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci - _lc.ring().mulin(radices[j], _lc.prime(j)); - } - } - commentator().stop("[MultiModLifting] Lifting"); - - // CRT reconstruction from paddicAccumulations - commentator().start("[MultiModLifting] CRT Reconstruction Progress"); - using CRAField = Givaro::Modular; - RationalCRABuilderFullMultip craBuilder(_lc.log2Bound() / 1.4427); // 1.4427 = 1 / log(2) - - { - CRAField field(radices[0]); - craBuilder.initialize(field, padicAccumulations[0]); - } - - for (auto j = 1u; j < _lc.primesCount(); ++j) { - CRAField field(radices[j]); - craBuilder.progress(field, padicAccumulations[j]); - } - commentator().stop("[MultiModLifting] CRT Reconstruction Progress"); - - // Rational reconstruction - craBuilder.result(xNum, xDen, _lc.numBound()); - - return true; - } - - private: - LiftingContainer& _lc; - }; - - // @fixme Move that to a file - and make it be a RationalSolver template class DixonRNSSolver { public: @@ -172,9 +80,13 @@ namespace LinBox { using PrimeGenerator = PrimeIterator; PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); + // @fixme TO BE REMOVED DixonRNSSolver solver(A.field(), primeGenerator); solver.solve(xNum, xDen, A, b, m); + DixonSolver classicSolver(A.field(), primeGenerator); + classicSolver.solveNonsingular(xNum, xDen, A, b, false, m.trialsBeforeFailure); + commentator().stop("solve.dixon-rns.integer.dense"); // @fixme Implement something like that diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index d9f825b26..f12668faa 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -329,7 +329,7 @@ int main(int argc, char** argv) // seed, verbose); // ----- Rational DixonRNS - ok = ok && test_dense_solve(Method::DixonRNS(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // // ----- Modular Auto // ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); From 0a68ceb0a14a0232307eda70a742f9f98de78997 Mon Sep 17 00:00:00 2001 From: "A. Breust" Date: Fri, 16 Aug 2019 13:53:06 +0200 Subject: [PATCH 52/63] 'Fixed' threading bug --- .../algorithms/multi-mod-lifting-container.h | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index a7b1b12fc..101b2881f 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -99,9 +99,9 @@ namespace LinBox { { linbox_check(A.rowdim() == A.coldim()); - std::cout << "----------" << std::endl; - A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl; - std::cout << "b: " << b << std::endl; + // std::cout << "----------" << std::endl; + // A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl; + // std::cout << "b: " << b << std::endl; // This will contain the primes or our MultiMod basis _primesCount = m.primesCount; @@ -124,7 +124,7 @@ namespace LinBox { double rnsBasisBitSize = std::ceil(1.0 + Givaro::logtwo(1 + infinityNormA * _n)); _rnsPrimesCount = std::ceil(rnsBasisBitSize / (primeGenerator.getBits() - 1)); _rnsPrimes.resize(_rnsPrimesCount); - std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl; + // std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl; auto trialsLeft = m.trialsBeforeFailure; std::vector primes; @@ -147,16 +147,23 @@ namespace LinBox { } // Inserting the primes at the right place to keep the array sorted - std::cout << "Adding " << Integer(p) << std::endl; primes.insert(lb, p); } // We take the smallest primes for our MultiMod basis std::copy(primes.begin(), primes.begin() + _primesCount, _primes.begin()); + // for (auto i = 0u; i < _primes.size(); ++i) { + // std::cout << "p" << i << " = " << Integer(_primes[i]) << std::endl; + // } + // And the others for our RNS basis std::copy(primes.begin() + _primesCount, primes.end(), _rnsPrimes.begin()); + // for (auto i = 0u; i < _rnsPrimes.size(); ++i) { + // std::cout << "q" << i << " = " << Integer(_rnsPrimes[i]) << std::endl; + // } + // We check that we really need all the primes within the RNS basis, // as the first count was just an upper estimation. double bitSize = 0.0; @@ -174,7 +181,6 @@ namespace LinBox { // Setting fields up for (auto& pj : _primes) { _fields.emplace_back(pj); - std::cout << Integer(pj) << std::endl; } // Initialize all inverses @@ -257,7 +263,7 @@ namespace LinBox { // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...) _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct); - std::cout << "_iterationsCount " << _iterationsCount << std::endl; + // std::cout << "_iterationsCount " << _iterationsCount << std::endl; } //----- Locals setup @@ -325,10 +331,13 @@ namespace LinBox { VectorDomain IVD(_ring); BlasMatrixDomain IMD(_ring); -// commentator().start("[MultiModLifting] c = A^{-1} r mod p"); + // commentator().start("[MultiModLifting] c = A^{-1} r mod p"); PAR_BLOCK { - auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, + // @fixme @zhuh Can't get that working with NUM_THREADS, + // any idea what makes it wrong? + // ./test-solve-full -n 1 -m 1 -b 50 -v -l + auto sp = SPLITTER(1 /* NUM_THREADS */, FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Threads); int M = _primesCount; FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), { @@ -403,7 +412,7 @@ namespace LinBox { auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Threads); int M = _primesCount; - FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), { + FOR1D(j, M, sp, { auto& rnsPrimeInverse = _rnsPrimesInverses[j]; auto stridePrimeInverse = rnsPrimeInverse._stride; auto rnsPrimeInverseForRnsPrimeH = From 2f70e969fabaf1187488b01f151d7d3f20521e26 Mon Sep 17 00:00:00 2001 From: "A. Breust" Date: Fri, 16 Aug 2019 15:15:06 +0200 Subject: [PATCH 53/63] Merged DixonRNS within Dixon --- benchmarks/benchmark-dense-solve.C | 14 ++- .../dixon-solver/dixon-solver-dense.h | 4 +- .../dixon-solver/dixon-solver-dense.inl | 88 +++++++++-------- .../algorithms/multi-mod-lifting-container.h | 6 +- linbox/solutions/methods.h | 14 ++- linbox/solutions/solve.h | 6 -- linbox/solutions/solve/solve-dixon-rns.h | 99 ------------------- linbox/solutions/solve/solve-dixon.h | 2 +- tests/test-solve-full.C | 9 +- 9 files changed, 75 insertions(+), 167 deletions(-) delete mode 100644 linbox/solutions/solve/solve-dixon-rns.h diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 63faf090b..19fb5505d 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -54,7 +54,7 @@ namespace { int n = 500; int bits = 10; int seed = -1; - int primesCount = 8; + int primesCount = -1; std::string dispatchString = "Auto"; std::string methodString = "Auto"; std::string rnsFgemmString = "ParallelRnsOnly"; @@ -117,7 +117,6 @@ void benchmark(std::array& timebits, Arguments& args, MethodBase& met else if (args.methodString == "DenseElimination") solve(X, A, B, Method::DenseElimination(method)); else if (args.methodString == "SparseElimination") solve(X, A, B, Method::SparseElimination(method)); else if (args.methodString == "Dixon") solve(X, A, B, Method::Dixon(method)); - else if (args.methodString == "DixonRNS") solve(X, A, B, Method::DixonRNS(method)); else if (args.methodString == "CRA") solve(X, A, B, Method::CRAAuto(method)); else if (args.methodString == "SymbolicNumericOverlap") solve(X, A, B, Method::SymbolicNumericOverlap(method)); else if (args.methodString == "SymbolicNumericNorm") solve(X, A, B, Method::SymbolicNumericNorm(method)); @@ -149,11 +148,11 @@ int main(int argc, char** argv) {'s', "-s", "Seed for randomness.", TYPE_INT, &args.seed}, {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString}, {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &args.rnsFgemmString}, - {'p', "-p", "For multi-modular methods, how many primes to use.", TYPE_INT, &args.primesCount}, + {'p', "-p", "Enable multi-modular method, and tells how many primes to use.", TYPE_INT, &args.primesCount}, {'t', "-t", "Number of threads.", TYPE_INT, &numThreads }, {'M', "-M", "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, " - "Dixon, DixonRNS, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, " + "Dixon, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, " "Blackbox, Wiedemann, Lanczos).", TYPE_STR, &args.methodString}, END_OF_ARGUMENTS}; @@ -176,7 +175,12 @@ int main(int argc, char** argv) MethodBase method; method.pCommunicator = &communicator; - method.primesCount = args.primesCount; + if (args.primesCount > 0) { + method.multiModularLifting = true; + method.primesCount = args.primesCount; + } else { + method.multiModularLifting = false; + } if (args.dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; else if (args.dispatchString == "SMP") method.dispatch = Dispatch::SMP; else if (args.dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.h b/linbox/algorithms/dixon-solver/dixon-solver-dense.h index 0d378e69f..6f0357403 100644 --- a/linbox/algorithms/dixon-solver/dixon-solver-dense.h +++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.h @@ -89,6 +89,7 @@ namespace LinBox { mutable Prime _prime; Ring _ring; Field _field; + Method::Dixon _method; BlasMatrixDomain _bmdf; @@ -113,10 +114,11 @@ namespace LinBox { * @param r a Ring, set by default * @param rp a RandomPrime generator, set by default */ - DixonSolver(const Ring& r = Ring(), const RandomPrime& rp = RandomPrime()) + DixonSolver(const Ring& r = Ring(), const RandomPrime& rp = RandomPrime(), const Method::Dixon& method = Method::Dixon()) : lastCertificate(r, 0) , _genprime(rp) , _ring(r) + , _method(method) { _genprime.setBits(FieldTraits::bestBitSize()); _prime = *_genprime; diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl index c6a2a81b3..4935548f2 100644 --- a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl +++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl @@ -130,26 +130,23 @@ namespace LinBox { } } while (notfr); - // commentator().start("CLASSIC DIXON LIFTING"); - // typedef DixonLiftingContainer> LiftingContainer; - // LiftingContainer lc(_ring, *F, A, *FMP, b, _prime); - // RationalReconstruction re(lc); - // if (!re.getRational(num, den, 0)) { - // delete FMP; - // return SS_FAILED; - // } - // commentator().stop("CLASSIC DIXON LIFTING"); - - commentator().start("MULTI MOD DIXON LIFTING"); - using LiftingContainer = MultiModLiftingContainer; - Method::DixonRNS m; // @fixme Get from? - LiftingContainer lc(_ring, _genprime, A, b, m); - MultiModRationalReconstruction re(lc); - if (!re.getRational(num, den)) { - delete FMP; - return SS_FAILED; + if (_method.multiModularLifting) { + using LiftingContainer = MultiModLiftingContainer; + LiftingContainer lc(_ring, _genprime, A, b, _method); + MultiModRationalReconstruction re(lc); + if (!re.getRational(num, den)) { + delete FMP; + return SS_FAILED; + } + } else { + using LiftingContainer = DixonLiftingContainer>; + LiftingContainer lc(_ring, *F, A, *FMP, b, _prime); + RationalReconstruction re(lc); + if (!re.getRational(num, den, 0)) { + delete FMP; + return SS_FAILED; + } } - commentator().stop("MULTI MOD DIXON LIFTING"); #ifdef RSTIMING ttNonsingularSolve.update(re, lc); @@ -287,8 +284,6 @@ namespace LinBox { SolverReturnStatus DixonSolver::solveApparentlyInconsistent( const BlasMatrix& A, TAS& tas, BlasMatrix* Atp_minor_inv, size_t rank, const MethodBase& method) { - using LiftingContainer = DixonLiftingContainer, BlasMatrix>; - if (!method.certifyInconsistency) return SS_INCONSISTENT; // @fixme Put these as class members! @@ -311,15 +306,24 @@ namespace LinBox { ttCheckConsistency += tCheckConsistency; #endif - LiftingContainer lc(_ring, _field, At_minor, *Atp_minor_inv, zt, _prime); - RationalReconstruction re(lc); - BlasVector shortNum(A.field(), rank); Integer shortDen; - // Dirty, but should not be called under normal circumstances - if (!re.getRational(shortNum, shortDen, 0)) { - return SS_FAILED; + if (_method.multiModularLifting) { + using LiftingContainer = MultiModLiftingContainer; + LiftingContainer lc(_ring, _genprime, At_minor, zt, _method); + MultiModRationalReconstruction re(lc); + if (!re.getRational(shortNum, shortDen)) { + return SS_FAILED; + } + } + else { + using LiftingContainer = DixonLiftingContainer, BlasMatrix>; + LiftingContainer lc(_ring, _field, At_minor, *Atp_minor_inv, zt, _prime); + RationalReconstruction re(lc); + if (!re.getRational(shortNum, shortDen, 0)) { + return SS_FAILED; + } } #ifdef RSTIMING @@ -597,8 +601,6 @@ namespace LinBox { SolverReturnStatus DixonSolver::monolithicSolve( Vector1& num, Integer& den, const IMatrix& A, const Vector2& b, Method::Dixon method) { - using LiftingContainer = DixonLiftingContainer, BlasMatrix>; - if (method.certifyMinimalDenominator && !method.certifyInconsistency) { method.certifyInconsistency = true; std::cerr << "WARNING: forcing certifyInconsistency due to certifyMinimalDenominator" << std::endl; @@ -716,21 +718,27 @@ namespace LinBox { BMDI.mulin_right(tas.Q, newb); newb.resize(rank); - // ----- Do lifting on sub matrix + // ----- Do lifting on sub matrix and reconstruct BlasMatrix BBA_minor(A_minor); - commentator().start("CLASSIC DIXON LIFTING"); - LiftingContainer lc(_ring, _field, BBA_minor, *Ap_minor_inv, newb, _prime); - - // ----- Reconstruct rational - - RationalReconstruction re(lc); VectorFraction resultVF(_ring, rank); - if (!re.getRational(resultVF.numer, resultVF.denom, 0)) { - // dirty, but should not be called - return SS_FAILED; + + if (_method.multiModularLifting) { + using LiftingContainer = MultiModLiftingContainer; + LiftingContainer lc(_ring, _genprime, BBA_minor, newb, _method); + MultiModRationalReconstruction re(lc); + if (!re.getRational(resultVF.numer, resultVF.denom)) { + return SS_FAILED; + } + } + else { + using LiftingContainer = DixonLiftingContainer, BlasMatrix>; + LiftingContainer lc(_ring, _field, BBA_minor, *Ap_minor_inv, newb, _prime); + RationalReconstruction re(lc); + if (!re.getRational(resultVF.numer, resultVF.denom, 0)) { + return SS_FAILED; + } } - commentator().stop("CLASSIC DIXON LIFTING"); #ifdef RSTIMING ttSystemSolve.update(re, lc); diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 101b2881f..71e0a0008 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -57,7 +57,7 @@ namespace LinBox { * (iii) y = CRT_Reconstruct(y1, ..., yl) * (iv) x = Rational_Reconstruct(y) * - * One can configure how many primes are used with `Method::DixonRNS.primeBaseLength`. + * One can configure how many primes are used with `Method::Dixon.primesCount`. * According to the paper, a value of lp = 2 (ln(n) + log2(||A||)) or without the factor 2 * can be used, but it depends on the problem, really. */ @@ -88,7 +88,7 @@ namespace LinBox { // @fixme Split to inline file MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, - const IVector& b, const Method::DixonRNS& m) + const IVector& b, const Method::Dixon& m) : _ring(ring) , _method(m) , _A(A) @@ -473,7 +473,7 @@ namespace LinBox { public: // @fixme BACK TO PRIVATE! const Ring& _ring; - Method::DixonRNS _method; // A copy of the user-provided method. + Method::Dixon _method; // A copy of the user-provided method. // The problem: A^{-1} * b const IMatrix& _A; diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h index 63b7af584..063a44dbf 100644 --- a/linbox/solutions/methods.h +++ b/linbox/solutions/methods.h @@ -232,9 +232,12 @@ namespace LinBox { SingularSolutionType singularSolutionType = SingularSolutionType::Random; bool certifyMinimalDenominator = false; //!< Whether the solver should try to find a certificate //! that the provided denominator is minimal. - - // ----- For DixonRNS method. - //! How many primes to use lifting will be done over p = p1p2...pl. + // @fixme Make a auto switch for multi modular lifting, based on matrix size. + // Whether to use the multi-modular Dixon lifter. + // (A BLAS Based C Library for Exact Linear Algebra on Integer Matrices - Chen, Storjohann ISSAC 2005) + // https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf + bool multiModularLifting = true; + //! How many primes to use, multi mod lifting will be done over p = p1p2...pl. //! -1 means automatically set to a heuristic value. uint32_t primesCount = -1u; RnsFgemmType rnsFgemmType = RnsFgemmType::ParallelRnsOnly; @@ -284,11 +287,6 @@ namespace LinBox { // (Numerische Mathematik - Dixon 1982) DEFINE_METHOD(Dixon, RingCategories::IntegerTag); - // Method::DixonRNS uses RNS features over Dixon's p-adic lifting. - // (A BLAS Based C Library for Exact Linear Algebra on Integer Matrices - Chen, Storjohann ISSAC 2005) - // https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf - DEFINE_METHOD(DixonRNS, RingCategories::IntegerTag); - // Method::ChineseRemainder uses the chinese remainder algorithm // to solve the problem on multiple modular domains, // and finally reconstruct the solution. diff --git a/linbox/solutions/solve.h b/linbox/solutions/solve.h index 2a0e8f1dc..e827167c6 100644 --- a/linbox/solutions/solve.h +++ b/linbox/solutions/solve.h @@ -84,11 +84,6 @@ namespace LinBox { * | - SparseMatrix > `DixonSolver<..., Method::SparseElimination>` * | - Otherwise > Error * - Otherwise > Error - * - Method::DixonRNS - * - IntegerTag - * | - DenseMatrix > `DixonRNSSolver` - * | - Otherwise > Error - * - Otherwise > Error * - Method::Blackbox > Method::Wiedemann * - Method::Wiedemann * - ModularTag > `WiedemannSolver` @@ -342,7 +337,6 @@ namespace LinBox { // Integer-based #include "./solve/solve-cra.h" #include "./solve/solve-dixon.h" -#include "./solve/solve-dixon-rns.h" #include "./solve/solve-numeric-symbolic.h" // Blackbox diff --git a/linbox/solutions/solve/solve-dixon-rns.h b/linbox/solutions/solve/solve-dixon-rns.h deleted file mode 100644 index 89dfb5cbd..000000000 --- a/linbox/solutions/solve/solve-dixon-rns.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright(C) LinBox - * - * ========LICENCE======== - * This file is part of the library LinBox. - * - * LinBox is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * ========LICENCE======== - */ - -#pragma once - -#include -#include - -namespace LinBox { - template - class DixonRNSSolver { - public: - DixonRNSSolver(const Ring& ring, PrimeGenerator& primeGenerator) - : _ring(ring) - , _primeGenerator(primeGenerator) - { - /* @todo */ - } - - /** - * Dense solving. - */ - template - void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, const Vector& b, - const Method::DixonRNS& m) - { - // @fixme We should use some code from DixonSolver... - // But that's hard so we just assume that A is square and invertible. - linbox_check(A.rowdim() == A.coldim()); - - using LiftingContainer = MultiModLiftingContainer; - - commentator().start("[MultiModLifting] Init"); - LiftingContainer lc(_ring, _primeGenerator, A, b, m); - MultiModRationalReconstruction re(lc); - commentator().stop("[MultiModLifting] Init"); - - if (!re.getRational(xNum, xDen)) { - std::cerr << "OUCH!" << std::endl; - } - } - - private: - const Ring& _ring; - PrimeGenerator& _primeGenerator; - }; - - /** - * \brief Solve specialisation for DixonRNS on dense matrices. - */ - template - void solve(RVector& xNum, typename RVector::Element& xDen, const DenseMatrix& A, const Vector& b, - const RingCategories::IntegerTag& tag, const Method::DixonRNS& m) - { - commentator().start("solve.dixon-rns.integer.dense"); - - // @fixme We don't know if we can use ModularBalanced, - // because of the rational reconstruction which might be - // implicitly requiring 0-{p-1} representation of the p-adic sequence elements. - using Field = Givaro::Modular; - using PrimeGenerator = PrimeIterator; - PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); - - // @fixme TO BE REMOVED - DixonRNSSolver solver(A.field(), primeGenerator); - solver.solve(xNum, xDen, A, b, m); - - DixonSolver classicSolver(A.field(), primeGenerator); - classicSolver.solveNonsingular(xNum, xDen, A, b, false, m.trialsBeforeFailure); - - commentator().stop("solve.dixon-rns.integer.dense"); - - // @fixme Implement something like that - // if (status == SS_INCONSISTENT) { - // throw LinboxMathInconsistentSystem("From DixonRNS method."); - // } else if (status == SS_FAILED || status == SS_BAD_PRECONDITIONER) { - // throw LinboxError("From DixonRNS method."); - // } - } -} \ No newline at end of file diff --git a/linbox/solutions/solve/solve-dixon.h b/linbox/solutions/solve/solve-dixon.h index 3afddc0ca..4a988673e 100644 --- a/linbox/solutions/solve/solve-dixon.h +++ b/linbox/solutions/solve/solve-dixon.h @@ -96,7 +96,7 @@ namespace LinBox { PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); using Solver = DixonSolver::type>; - Solver dixonSolve(A.field(), primeGenerator); + Solver dixonSolve(A.field(), primeGenerator, m); // Either A is known to be non-singular, or we just don't know yet. int maxTrials = m.trialsBeforeFailure; diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index f12668faa..e278bdb80 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -272,8 +272,12 @@ int main(int argc, char** argv) } if (primesCount > 0) { + method.multiModularLifting = true; method.primesCount = primesCount; } + else { + method.multiModularLifting = false; + } if (vectorBitSize < 0) { vectorBitSize = bitSize; @@ -314,7 +318,7 @@ int main(int argc, char** argv) // // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // // ----- Rational Dixon - // ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // // @fixme Dixon does not compile // // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); @@ -328,9 +332,6 @@ int main(int argc, char** argv) // ok = ok && test_sparse_solve(Method::SymbolicNumericNorm(method), ZZ, QQ, m, n, bitSize, vectorBitSize, // seed, verbose); - // ----- Rational DixonRNS - ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - // // ----- Modular Auto // ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_sparse_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); From 0dc46e1dc82a7458f18634e33f77b4a5992522cf Mon Sep 17 00:00:00 2001 From: ZHG Date: Mon, 19 Aug 2019 09:50:10 +0200 Subject: [PATCH 54/63] Fixed library compilation --- linbox/algorithms/multi-mod-rational-reconstruction.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/linbox/algorithms/multi-mod-rational-reconstruction.h b/linbox/algorithms/multi-mod-rational-reconstruction.h index 6fae6a357..240042b97 100644 --- a/linbox/algorithms/multi-mod-rational-reconstruction.h +++ b/linbox/algorithms/multi-mod-rational-reconstruction.h @@ -22,11 +22,13 @@ #pragma once +#include "./rational-cra-builder-full-multip.h" + namespace LinBox { /** * From a MultiModLiftingContainer, will build * the solution on each prime, then will do a CRT reconstruction, - * before reconstructing the rational. + * before reconstructing the rational.95 * * This does not do early termination. */ From cc7a0a50df55f5a172112ae7550f45986186da08 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 20 Aug 2019 10:43:16 +0200 Subject: [PATCH 55/63] Quick adjustement for THE BUG --- linbox/algorithms/multi-mod-lifting-container.h | 4 +--- linbox/solutions/solve/solve-dixon.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 71e0a0008..d41eb0e2a 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -109,8 +109,6 @@ namespace LinBox { PAR_BLOCK { _primesCount = 6 * NUM_THREADS; } } - // @fixme !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - _primesCount = 2; _primes.resize(_primesCount); // Some preparation work @@ -337,7 +335,7 @@ namespace LinBox { // @fixme @zhuh Can't get that working with NUM_THREADS, // any idea what makes it wrong? // ./test-solve-full -n 1 -m 1 -b 50 -v -l - auto sp = SPLITTER(1 /* NUM_THREADS */, FFLAS::CuttingStrategy::Row, + auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Threads); int M = _primesCount; FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), { diff --git a/linbox/solutions/solve/solve-dixon.h b/linbox/solutions/solve/solve-dixon.h index 4a988673e..9f29fd3b5 100644 --- a/linbox/solutions/solve/solve-dixon.h +++ b/linbox/solutions/solve/solve-dixon.h @@ -89,7 +89,7 @@ namespace LinBox { commentator().start("solve.dixon.integer.dense"); linbox_check((A.coldim() == xNum.size()) && (A.rowdim() == b.size())); - // @fixme Using Givaro::ModularBalanced for the field makes Dixon fail... + // @note Using Givaro::ModularBalanced would make Dixon and MultiModLiftingContainer fail... using Matrix = DenseMatrix; using Field = Givaro::Modular; using PrimeGenerator = PrimeIterator; From cd928254f935112106be2a9558b3e06de2594bb7 Mon Sep 17 00:00:00 2001 From: Jean-Guillaume Dumas Date: Wed, 28 Aug 2019 10:35:55 +0200 Subject: [PATCH 56/63] Now using DenseVector --- linbox/algorithms/last-invariant-factor.h | 42 +++++++++-------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/linbox/algorithms/last-invariant-factor.h b/linbox/algorithms/last-invariant-factor.h index 80aa67880..b3cbd9b3c 100644 --- a/linbox/algorithms/last-invariant-factor.h +++ b/linbox/algorithms/last-invariant-factor.h @@ -51,7 +51,7 @@ namespace LinBox protected: - typedef BlasVector DVect; + typedef DenseVector DVect; Ring r; mutable typename Ring::RandIter _gen; Solver solver; @@ -110,20 +110,16 @@ namespace LinBox Integer r_den; //std::vector > result (A.coldim()); //typename std::vector >::iterator result_p; - // vector b, RHS, 32-bit int is good enough - std::vector b(A.rowdim()); - typename std::vector::iterator b_p; - typename Vector::const_iterator Prime_p; + DenseVector b(r, A.rowdim()); Integer pri, quo, rem, itmp; for (; count < threshold; ++ count) { // assign b to be a random vector - for (b_p = b.begin(); b_p != b.end(); ++ b_p) { -// * b_p = rand() % 268435456 - 134217728; // may need to change to use ring's random gen. -// // dpritcha, 2004-07-26 - _gen( itmp ); - * b_p = (int)itmp; + for (auto b_p = b.begin(); b_p != b.end(); ++ b_p) { + _gen( itmp ); + //@enhancement vector b, RHS, 32-bit is good enough + * b_p = Integer((int32_t)itmp); } // try to solve Ax = b over Ring @@ -138,7 +134,7 @@ namespace LinBox } // filter out primes in PRIMEL from lif. if (!r. isZero (lif)) - for ( Prime_p = PrimeL.begin(); + for ( auto Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { r.init (pri, *Prime_p); @@ -172,21 +168,17 @@ namespace LinBox Integer r1_den, r2_den; //std::vector > result (A.coldim()); //typename std::vector >::iterator result_p; - // vector b, RHS, 32-bit int is good enough - std::vector b1(A. rowdim()), b2(A. rowdim()); - typename std::vector::iterator b_p; - typename Vector::const_iterator Prime_p; + //@enhancement vector b, RHS, 32-bit instead would be good enough + DenseVector b1(r, A. rowdim()), b2(r, A. rowdim()); Integer pri, quo, rem; for (; count < (threshold + 1) / 2; ++ count) { // assign b to be a random vector - for (b_p = b1. begin(); b_p != b1. end(); ++ b_p) { -// * b_p = rand(); - *b_p = _gen.random();//(* b_p); + for (auto b_p = b1. begin(); b_p != b1. end(); ++ b_p) { + _gen.random(*b_p); } - for (b_p = b2. begin(); b_p != b2. end(); ++ b_p) { -// * b_p = rand(); - *b_p = _gen.random();//(* b_p); + for (auto b_p = b2. begin(); b_p != b2. end(); ++ b_p) { + _gen.random(*b_p); } // try to solve Ax = b1, b2 over Ring tmp1 = solver. solveNonsingular(r1_num, r1_den, A, b1); @@ -243,7 +235,7 @@ namespace LinBox // filter out primes in PRIMEL from lif. if (!r. isZero (lif)) - for ( Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { + for ( auto Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { r.init (pri, *Prime_p); do { r.quoRem(quo,rem,lif,pri); @@ -253,7 +245,7 @@ namespace LinBox } r. gcdin (Bonus, lif); if (!r. isZero (Bonus)) - for ( Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { + for ( auto Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { r.init (pri, *Prime_p); do { r.quoRem(quo,rem,Bonus,pri); @@ -275,13 +267,11 @@ namespace LinBox if (r_num.size()!=A. coldim()) return lif=0; Integer r_den; DVect b(r,A.rowdim()); - typename DVect::iterator b_p; - //typename Vector::const_iterator Prime_p; Integer pri, quo, rem; // assign b to be a random vector - for (b_p = b.begin(); b_p != b.end(); ++ b_p) { + for (auto b_p = b.begin(); b_p != b.end(); ++ b_p) { // * b_p = rand() % 268435456 - 134217728; // may need to change to use ring's random gen. // // dpritcha, 2004-07-26 _gen( * b_p ); From 7a56154ee2dfe374d0ae153425089e6b06d460a0 Mon Sep 17 00:00:00 2001 From: Jean-Guillaume Dumas Date: Wed, 28 Aug 2019 11:29:51 +0200 Subject: [PATCH 57/63] sequential parseq must have only one thread --- linbox/algorithms/multi-mod-lifting-container.h | 17 ++++++++++------- .../multi-mod-rational-reconstruction.h | 1 + 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index d41eb0e2a..c97391e84 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -314,7 +314,7 @@ namespace LinBox { Integer denBound() const { return _denBound; } uint32_t primesCount() const { return _primesCount; } - const FElement& prime(uint32_t index) const { return _primes.at(index); } + FElement prime(uint32_t index) const { return _primes.at(index); } const std::vector& primesFields() const { return _fields; } // -------------- @@ -328,10 +328,13 @@ namespace LinBox { { VectorDomain IVD(_ring); BlasMatrixDomain IMD(_ring); + size_t numthreads; // commentator().start("[MultiModLifting] c = A^{-1} r mod p"); PAR_BLOCK { + numthreads = NUM_THREADS; + // @fixme @zhuh Can't get that working with NUM_THREADS, // any idea what makes it wrong? // ./test-solve-full -n 1 -m 1 -b 50 -v -l @@ -383,16 +386,16 @@ namespace LinBox { // commentator().start("[MultiModLifting] FGEMM R <= R - Ac"); // Firstly compute R <= R - A c as a fgemm within the RNS domain. if (_method.rnsFgemmType == RnsFgemmType::BothSequential) { - rns_fgemm(); + rns_fgemm(1,1); } else if (_method.rnsFgemmType == RnsFgemmType::BothParallel) { - rns_fgemm(); + rns_fgemm(numthreads,numthreads); } else if (_method.rnsFgemmType == RnsFgemmType::ParallelFgemmOnly) { - rns_fgemm(); + rns_fgemm(1,numthreads); } else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) { - rns_fgemm(); + rns_fgemm(numthreads,1); } // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac"); @@ -452,7 +455,7 @@ namespace LinBox { // @note This allows us to factor out some of the rns fgemm variants common code. template - inline void rns_fgemm() + inline void rns_fgemm(size_t threads1, size_t threads2) { PAR_BLOCK { @@ -460,7 +463,7 @@ namespace LinBox { using MMHelper = FFLAS::MMHelper; - ComposedParSeqHelper composedParSeqHelper(NUM_THREADS, NUM_THREADS); + ComposedParSeqHelper composedParSeqHelper(threads1, threads2); MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, diff --git a/linbox/algorithms/multi-mod-rational-reconstruction.h b/linbox/algorithms/multi-mod-rational-reconstruction.h index 240042b97..cd5018c2a 100644 --- a/linbox/algorithms/multi-mod-rational-reconstruction.h +++ b/linbox/algorithms/multi-mod-rational-reconstruction.h @@ -89,6 +89,7 @@ namespace LinBox { _lc.ring().mulin(radices[j], _lc.prime(j)); } } + commentator().stop("[MultiModLifting] Lifting"); // CRT reconstruction from paddicAccumulations From ab9ac916db54716bce9e09d298e558ce51b155b2 Mon Sep 17 00:00:00 2001 From: Jean-Guillaume Dumas Date: Wed, 28 Aug 2019 16:53:47 +0200 Subject: [PATCH 58/63] indent --- .../algorithms/multi-mod-lifting-container.h | 4 +- tests/test-last-invariant-factor.C | 62 +++++++++---------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index c97391e84..55df94d73 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -181,6 +181,7 @@ namespace LinBox { _fields.emplace_back(pj); } + // Initialize all inverses // @note An inverse mod some p within DixonSolver was already computed, // and pass through to the lifting container. Here, we could use that, but we have @@ -199,12 +200,11 @@ namespace LinBox { auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Threads); int M = _primesCount; - FOR1D(j, M, sp, MODE(CONSTREFERENCE(nullities)), { + FOR1D(j, M, sp, MODE(WRITE(nullities)), { auto& F = _fields[j]; BlasMatrixDomain bmd(F); bmd.invin(_B[j], nullities[j]); }); - for (auto nullity : nullities) { if (nullity > 0) { // @fixme Should redraw another prime! diff --git a/tests/test-last-invariant-factor.C b/tests/test-last-invariant-factor.C index 8b270bc3e..d3a6e0fca 100644 --- a/tests/test-last-invariant-factor.C +++ b/tests/test-last-invariant-factor.C @@ -51,19 +51,19 @@ using namespace LinBox; template bool testRandom(const Ring& R, - LIF& lif, - LinBox::VectorStream& stream1) + LIF& lif, + LinBox::VectorStream& stream1) { std::ostringstream str; str << "Testing last invariant factor:"; - commentator().start (str.str ().c_str (), "testRandom", stream1.m ()); + commentator().start (str.str ().c_str (), "testRandom", stream1.m ()); - bool ret = true; + bool ret = true; - VectorDomain VD (R); + VectorDomain VD (R); Vector d(R); @@ -73,19 +73,19 @@ bool testRandom(const Ring& R, int n = int(d. size()); - while (stream1) { + while (stream1) { - commentator().startIteration ((unsigned)stream1.j ()); + commentator().startIteration ((unsigned)stream1.j ()); - std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION); + std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION); - bool iter_passed = true; + bool iter_passed = true; stream1.next (d); report << "Input vector: "; VD.write (report, d); - report << endl; + report << endl; BlasMatrix D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n); @@ -117,8 +117,8 @@ bool testRandom(const Ring& R, R.assign(e[(size_t)i],R.one); U.apply(tmp1, e); D.apply(tmp2, tmp1); - // LinBox::BlasSubvector > col_p_v(R,*col_p); - // L.apply(col_p_v, tmp2); + // LinBox::BlasSubvector > col_p_v(R,*col_p); + // L.apply(col_p_v, tmp2); L.apply(*col_p, tmp2); R.assign(e[(size_t)i],R.zero); } @@ -157,24 +157,24 @@ bool testRandom(const Ring& R, ret = iter_passed = false; - if (!iter_passed) + if (!iter_passed) - commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_ERROR) + commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_ERROR) << "ERROR: Computed last invariant factor is incorrect" << endl; - commentator().stop ("done"); + commentator().stop ("done"); - commentator().progress (); + commentator().progress (); - } + } - //stream1.reset (); + //stream1.reset (); - commentator().stop (MSG_STATUS (ret), (const char *) 0, "testRandom"); + commentator().stop (MSG_STATUS (ret), (const char *) 0, "testRandom"); - return ret; + return ret; } @@ -182,30 +182,30 @@ int main(int argc, char** argv) { - bool pass = true; + bool pass = true; - static size_t n = 10; + static size_t n = 10; static unsigned int iterations = 1; - static Argument args[] = { - { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT, &n }, - { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT, &iterations }, + static Argument args[] = { + { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT, &n }, + { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT, &iterations }, END_OF_ARGUMENTS - }; + }; parseArguments (argc, argv, args); - typedef Givaro::ZRing Ring; + typedef Givaro::ZRing Ring; - Ring R; Ring::RandIter gen(R); + Ring R; Ring::RandIter gen(R); commentator().start("Last invariant factor test suite", "LIF"); - commentator().getMessageClass (INTERNAL_DESCRIPTION).setMaxDepth (5); + commentator().getMessageClass (INTERNAL_DESCRIPTION).setMaxDepth (5); - RandomDenseStream s1 (R, gen, n, iterations); + RandomDenseStream s1 (R, gen, n, iterations); typedef DixonSolver, PrimeIterator > Solver; // typedef DixonSolver, LinBox::PrimeIterator > Solver; @@ -219,7 +219,7 @@ int main(int argc, char** argv) if (!testRandom(R, lif, s1)) pass = false; commentator().stop("Last invariant factor test suite"); - return pass ? 0 : -1; + return pass ? 0 : -1; } // Local Variables: From 4c47717bc69fd196cfbc71049b07a56ba3f081d9 Mon Sep 17 00:00:00 2001 From: Jean-Guillaume Dumas Date: Wed, 28 Aug 2019 21:28:55 +0200 Subject: [PATCH 59/63] improve test possibilities --- linbox/algorithms/last-invariant-factor.h | 3 ++ .../rational-cra-builder-full-multip.h | 18 +++++++- tests/test-last-invariant-factor.C | 41 +++++++++++-------- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/linbox/algorithms/last-invariant-factor.h b/linbox/algorithms/last-invariant-factor.h index b3cbd9b3c..2c3315cb5 100644 --- a/linbox/algorithms/last-invariant-factor.h +++ b/linbox/algorithms/last-invariant-factor.h @@ -124,6 +124,9 @@ namespace LinBox // try to solve Ax = b over Ring tmp = solver.solveNonsingular(r_num, r_den, A, b); + + // std::clog << "r_den: " << r_den << std::endl; + // If no solution found if (tmp != SS_OK) { r.assign (lif, r.zero); diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h index f5dc17f92..0e1c68b5a 100644 --- a/linbox/algorithms/rational-cra-builder-full-multip.h +++ b/linbox/algorithms/rational-cra-builder-full-multip.h @@ -92,8 +92,24 @@ namespace LinBox protected: Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn) { +/* std::clog << "iterativeratrecon" + << ", u1: " << u1 + << ", new_den: " << new_den + << ", old_den: " << old_den + << ", m1: " << m1 + << ", sn: " << sn + ; +*/ Integer a; - _ZZ.RationalReconstruction(a, new_den, u1*=old_den, m1, sn); + bool success = _ZZ.RationalReconstruction(a, new_den, u1*=old_den, m1, sn, true, false); + if (! success) + std::cerr << " ***** RationalReconstruction FAILURE ***** "; +/* + std::clog << ", AFTER" + << ", a: " << a + << ", new_den: " << new_den + << std::endl; +*/ return u1=a; } }; diff --git a/tests/test-last-invariant-factor.C b/tests/test-last-invariant-factor.C index d3a6e0fca..9205268f4 100644 --- a/tests/test-last-invariant-factor.C +++ b/tests/test-last-invariant-factor.C @@ -49,8 +49,8 @@ using namespace LinBox; -template -bool testRandom(const Ring& R, +template +bool testRandom(const Ring& R, RandIter& gen, LIF& lif, LinBox::VectorStream& stream1) { @@ -77,7 +77,8 @@ bool testRandom(const Ring& R, commentator().startIteration ((unsigned)stream1.j ()); - std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION); +// std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION); + std::ostream &report = std::clog; bool iter_passed = true; @@ -87,7 +88,7 @@ bool testRandom(const Ring& R, VD.write (report, d); report << endl; - BlasMatrix D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n); + DenseMatrix D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n); int i, j; @@ -100,15 +101,14 @@ bool testRandom(const Ring& R, for (j = 0; j < i; ++ j) { - R.init(L[(size_t)i][(size_t)j], int64_t(rand() % 10)); - - R.init(U[(size_t)j][(size_t)i], int64_t(rand() % 10)); + gen.random(L[(size_t)i][(size_t)j]); + gen.random(U[(size_t)j][(size_t)i]); } - BlasVector tmp1(R,(size_t)n), tmp2(R,(size_t)n), e(R,(size_t)n); + DenseVector tmp1(R,(size_t)n), tmp2(R,(size_t)n), e(R,(size_t)n); - typename BlasMatrix::ColIterator col_p; + typename DenseMatrix::ColIterator col_p; i = 0; for (col_p = A.colBegin(); @@ -117,8 +117,6 @@ bool testRandom(const Ring& R, R.assign(e[(size_t)i],R.one); U.apply(tmp1, e); D.apply(tmp2, tmp1); - // LinBox::BlasSubvector > col_p_v(R,*col_p); - // L.apply(col_p_v, tmp2); L.apply(*col_p, tmp2); R.assign(e[(size_t)i],R.zero); } @@ -183,23 +181,30 @@ int main(int argc, char** argv) bool pass = true; - + int seed = -1; static size_t n = 10; + static size_t bits = 30; static unsigned int iterations = 1; static Argument args[] = { { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT, &n }, + { 'b', "-b B", "Set bit size to B.", TYPE_INT, &bits }, { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT, &iterations }, + {'s', "-s", "Seed for randomness.", TYPE_INT, &seed}, END_OF_ARGUMENTS }; parseArguments (argc, argv, args); + if (seed < 0) { + seed = time(nullptr); + } + typedef Givaro::ZRing Ring; - Ring R; Ring::RandIter gen(R); + Ring R; Ring::RandIter gen(R, seed, bits); commentator().start("Last invariant factor test suite", "LIF"); @@ -214,11 +219,15 @@ int main(int argc, char** argv) LIF lif; - lif. setThreshold (30); + lif.setThreshold (30); + + if (!testRandom(R, gen, lif, s1)) pass = false; - if (!testRandom(R, lif, s1)) pass = false; + if (!pass) { + std::cerr << "Failed with seed: " << seed << std::endl; + } - commentator().stop("Last invariant factor test suite"); + commentator().stop("Last invariant factor test suite"); return pass ? 0 : -1; } From 99da8495fd713c6cb49b09f5c6c5b4f9224894fa Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Thu, 29 Aug 2019 17:11:08 +0200 Subject: [PATCH 60/63] Added a static_assert when MultiModLiftingContainer is used with anything other than Modular --- linbox/algorithms/multi-mod-lifting-container.h | 2 ++ tests/test-last-invariant-factor.C | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 55df94d73..be9580584 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -68,6 +68,8 @@ namespace LinBox { public: using Ring = _Ring; using Field = _Field; + // @fixme Currently not handling other cases... + static_assert(std::is_same>::value, "MultiModLifting requires Modular."); using PrimeGenerator = _PrimeGenerator; using RNSSystem = FFPACK::rns_double; diff --git a/tests/test-last-invariant-factor.C b/tests/test-last-invariant-factor.C index 9205268f4..8667f5d02 100644 --- a/tests/test-last-invariant-factor.C +++ b/tests/test-last-invariant-factor.C @@ -212,8 +212,8 @@ int main(int argc, char** argv) RandomDenseStream s1 (R, gen, n, iterations); - typedef DixonSolver, PrimeIterator > Solver; - // typedef DixonSolver, LinBox::PrimeIterator > Solver; + // typedef DixonSolver, PrimeIterator > Solver; + typedef DixonSolver, LinBox::PrimeIterator > Solver; typedef LastInvariantFactor LIF; From fa4911fb6d910dbfdcd512d0c9eb15039d92def2 Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Fri, 30 Aug 2019 11:34:46 +0200 Subject: [PATCH 61/63] Fixed HadamardBound bug --- linbox/algorithms/multi-mod-lifting-container.h | 7 ++----- linbox/solutions/hadamard-bound.h | 11 ++++++++--- tests/test-hadamard-bound.C | 12 ++++++------ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index be9580584..399081894 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -337,13 +337,10 @@ namespace LinBox { { numthreads = NUM_THREADS; - // @fixme @zhuh Can't get that working with NUM_THREADS, - // any idea what makes it wrong? - // ./test-solve-full -n 1 -m 1 -b 50 -v -l auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, FFLAS::StrategyParameter::Threads); int M = _primesCount; - FOR1D(j, M, sp, MODE(CONSTREFERENCE(digits)), { + FOR1D(j, M, sp, { auto pj = _primes[j]; auto& FR = _FR[j]; uint64_t upj = pj; @@ -360,8 +357,8 @@ namespace LinBox { } // digit = A^{-1} * R mod pj + const auto& B = _B[j]; auto& digit = digits[j]; - auto& B = _B[j]; B.apply(digit, FR); // Store the very same result in an RNS system, diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h index ba00bc071..43d25fe23 100644 --- a/linbox/solutions/hadamard-bound.h +++ b/linbox/solutions/hadamard-bound.h @@ -249,6 +249,7 @@ namespace LinBox { Integer rowBound; HadamardRowBound(rowBound, A); #ifdef DEBUG_HADAMARD_BOUND + A.write(std::clog) << std::endl; std::clog << "rowBound:=" << rowBound << ';' << std::endl; #endif @@ -283,15 +284,19 @@ namespace LinBox { /** * Precise Hadamard bound (bound on determinant) by taking the minimum * of the column-wise and the row-wise euclidean norm. - * - * The result is expressed as bit size. */ template - double HadamardBound(const IMatrix& A) + Integer HadamardBound(const IMatrix& A) { return DetailedHadamardBound(A).bound; } + template + double HadamardLogBound(const IMatrix& A) + { + return Givaro::logtwo(HadamardBound(A)); + } + // ----- Fast Hadamard bound template diff --git a/tests/test-hadamard-bound.C b/tests/test-hadamard-bound.C index 8e7a602f8..6487c7b70 100644 --- a/tests/test-hadamard-bound.C +++ b/tests/test-hadamard-bound.C @@ -42,8 +42,8 @@ bool test(const Ring& F, const TMatrix& A, const TVector& b) // ---- Determinant // Compute the bounds - double hb = HadamardBound(A); - double fastHb = FastHadamardBound(A); + double hb = HadamardLogBound(A); + double fastHb = FastHadamardLogBound(A); // Compute the effective determinant Integer detA; @@ -73,17 +73,17 @@ bool test(const Ring& F, const TMatrix& A, const TVector& b) solve(num, den, A, b); for (size_t i = 0u; i < num.size(); ++i) { - if (Givaro::logtwo(Givaro::abs(num[i])) > rationalSolveHb.numLogBound + ESPILON) { + if (Givaro::abs(num[i]) > rationalSolveHb.numBound) { std::cerr << "The rational solve Hadamard bound does not bound the numerator." << std::endl; - std::cerr << "num[i]: " << Givaro::logtwo(Givaro::abs(num[i])) << " > " << rationalSolveHb.numLogBound + std::cerr << "num[i]: " << Givaro::abs(num[i]) << " > " << rationalSolveHb.numBound << std::endl; return false; } } - if (Givaro::logtwo(Givaro::abs(den)) > rationalSolveHb.denLogBound + ESPILON) { + if (Givaro::abs(den) > rationalSolveHb.denBound) { std::cerr << "The rational solve Hadamard bound does not bound the denominator." << std::endl; - std::cerr << "den: " << Givaro::logtwo(den) << " > " << rationalSolveHb.denLogBound << std::endl; + std::cerr << "den: " << den << " > " << rationalSolveHb.denBound << std::endl; return false; } From b62610c209605c96ac8fdd844d1dad1699ff4080 Mon Sep 17 00:00:00 2001 From: Jean-Guillaume Dumas Date: Wed, 4 Mar 2020 19:20:46 +0100 Subject: [PATCH 62/63] Update multi-mod-lifting-container.h --- linbox/algorithms/multi-mod-lifting-container.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index bd09766b4..2eb6ed9d1 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -48,7 +48,7 @@ namespace LinBox { * | for i = 1 .. l: * | | (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi * | | ci = Bi ri mod pi < Matrix-vector in Z/pZ - * | | yi = yi + ci * pi^(i-1) < Done over ZZ + * | | yi = yi + ci * pi^(j-1) < Done over ZZ * | V = [R1|...|Rl] - A [c1|...|cl] < Matrix-matrix in ZZ * | for i = 1 .. l: * | | ri = Qi + (Vi / pi) From 4a5a774f7d6a31e9bce253f857ccaa0070476fce Mon Sep 17 00:00:00 2001 From: Romain Lebreton Date: Thu, 5 Mar 2020 15:18:40 +0100 Subject: [PATCH 63/63] Update multi-mod-lifting-container.h --- linbox/algorithms/multi-mod-lifting-container.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linbox/algorithms/multi-mod-lifting-container.h b/linbox/algorithms/multi-mod-lifting-container.h index 2eb6ed9d1..545160804 100644 --- a/linbox/algorithms/multi-mod-lifting-container.h +++ b/linbox/algorithms/multi-mod-lifting-container.h @@ -47,7 +47,7 @@ namespace LinBox { * for j = 1 .. k: * | for i = 1 .. l: * | | (Qi, Ri) = such that ri = pi Qi + Ri with |Ri| < pi - * | | ci = Bi ri mod pi < Matrix-vector in Z/pZ + * | | ci = Bi Ri mod pi < Matrix-vector in Z/pZ * | | yi = yi + ci * pi^(j-1) < Done over ZZ * | V = [R1|...|Rl] - A [c1|...|cl] < Matrix-matrix in ZZ * | for i = 1 .. l: