diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 4df5af1a2..4d4083fb8 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -54,8 +54,10 @@ namespace { int n = 500; int bits = 10; int seed = -1; + int primesCount = -1; std::string dispatchString = "Auto"; std::string methodString = "Auto"; + std::string rnsFgemmString = "ParallelRnsOnly"; }; template @@ -145,6 +147,8 @@ int main(int argc, char** argv) {'b', "-b", "bit size", TYPE_INT, &args.bits}, {'s', "-s", "Seed for randomness.", TYPE_INT, &args.seed}, {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString}, + {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &args.rnsFgemmString}, + {'p', "-p", "Enable multi-modular method, and tells how many primes to use.", TYPE_INT, &args.primesCount}, {'t', "-t", "Number of threads.", TYPE_INT, &numThreads }, {'M', "-M", "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, " @@ -171,11 +175,26 @@ int main(int argc, char** argv) MethodBase method; method.pCommunicator = &communicator; + if (args.primesCount > 0) { + method.multiModularLifting = true; + method.primesCount = args.primesCount; + } else { + method.multiModularLifting = false; + } if (args.dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; else if (args.dispatchString == "SMP") method.dispatch = Dispatch::SMP; else if (args.dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; else method.dispatch = Dispatch::Auto; + if (args.rnsFgemmString == "BothParallel") method.rnsFgemmType = RnsFgemmType::BothParallel; + else if (args.rnsFgemmString == "BothSequential") method.rnsFgemmType = RnsFgemmType::BothSequential; + else if (args.rnsFgemmString == "ParallelRnsOnly") method.rnsFgemmType = RnsFgemmType::ParallelRnsOnly; + else if (args.rnsFgemmString == "ParallelFgemmOnly") method.rnsFgemmType = RnsFgemmType::ParallelFgemmOnly; + else { + std::cerr << "-r RNS-FGEMM type should be either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly" << std::endl; + return EXIT_FAILURE; + } + // Real benchmark bool isModular = false; diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.h b/linbox/algorithms/dixon-solver/dixon-solver-dense.h index 0d378e69f..6f0357403 100644 --- a/linbox/algorithms/dixon-solver/dixon-solver-dense.h +++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.h @@ -89,6 +89,7 @@ namespace LinBox { mutable Prime _prime; Ring _ring; Field _field; + Method::Dixon _method; BlasMatrixDomain _bmdf; @@ -113,10 +114,11 @@ namespace LinBox { * @param r a Ring, set by default * @param rp a RandomPrime generator, set by default */ - DixonSolver(const Ring& r = Ring(), const RandomPrime& rp = RandomPrime()) + DixonSolver(const Ring& r = Ring(), const RandomPrime& rp = RandomPrime(), const Method::Dixon& method = Method::Dixon()) : lastCertificate(r, 0) , _genprime(rp) , _ring(r) + , _method(method) { _genprime.setBits(FieldTraits::bestBitSize()); _prime = *_genprime; diff --git a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl index 37b214bad..12098a085 100644 --- a/linbox/algorithms/dixon-solver/dixon-solver-dense.inl +++ b/linbox/algorithms/dixon-solver/dixon-solver-dense.inl @@ -24,8 +24,10 @@ #include "linbox/util/debug.h" #include "linbox/algorithms/lifting-container.h" +#include "linbox/algorithms/multi-mod-lifting-container.h" #include "linbox/algorithms/matrix-inverse.h" #include "linbox/algorithms/rational-reconstruction.h" +#include "linbox/algorithms/multi-mod-rational-reconstruction.h" namespace LinBox { @@ -128,13 +130,24 @@ namespace LinBox { } } while (notfr); - typedef DixonLiftingContainer> LiftingContainer; - LiftingContainer lc(_ring, *F, A, *FMP, b, _prime); - RationalReconstruction re(lc); - if (!re.getRational(num, den, 0)) { - delete FMP; - return SS_FAILED; + if (_method.multiModularLifting) { + using LiftingContainer = MultiModLiftingContainer; + LiftingContainer lc(_ring, _genprime, A, b, _method); + MultiModRationalReconstruction re(lc); + if (!re.getRational(num, den)) { + delete FMP; + return SS_FAILED; + } + } else { + using LiftingContainer = DixonLiftingContainer>; + LiftingContainer lc(_ring, *F, A, *FMP, b, _prime); + RationalReconstruction re(lc); + if (!re.getRational(num, den, 0)) { + delete FMP; + return SS_FAILED; + } } + #ifdef RSTIMING ttNonsingularSolve.update(re, lc); #endif @@ -271,8 +284,6 @@ namespace LinBox { SolverReturnStatus DixonSolver::solveApparentlyInconsistent( const BlasMatrix& A, TAS& tas, BlasMatrix* Atp_minor_inv, size_t rank, const MethodBase& method) { - using LiftingContainer = DixonLiftingContainer, BlasMatrix>; - if (!method.certifyInconsistency) return SS_INCONSISTENT; // @fixme Put these as class members! @@ -295,15 +306,24 @@ namespace LinBox { ttCheckConsistency += tCheckConsistency; #endif - LiftingContainer lc(_ring, _field, At_minor, *Atp_minor_inv, zt, _prime); - RationalReconstruction re(lc); - BlasVector shortNum(A.field(), rank); Integer shortDen; - // Dirty, but should not be called under normal circumstances - if (!re.getRational(shortNum, shortDen, 0)) { - return SS_FAILED; + if (_method.multiModularLifting) { + using LiftingContainer = MultiModLiftingContainer; + LiftingContainer lc(_ring, _genprime, At_minor, zt, _method); + MultiModRationalReconstruction re(lc); + if (!re.getRational(shortNum, shortDen)) { + return SS_FAILED; + } + } + else { + using LiftingContainer = DixonLiftingContainer, BlasMatrix>; + LiftingContainer lc(_ring, _field, At_minor, *Atp_minor_inv, zt, _prime); + RationalReconstruction re(lc); + if (!re.getRational(shortNum, shortDen, 0)) { + return SS_FAILED; + } } #ifdef RSTIMING @@ -699,18 +719,26 @@ namespace LinBox { BMDI.mulin_right(tas.Q, newb); newb.resize(rank); - // ----- Do lifting on sub matrix + // ----- Do lifting on sub matrix and reconstruct BlasMatrix BBA_minor(A_minor); - LiftingContainer lc(_ring, _field, BBA_minor, *Ap_minor_inv, newb, _prime); - - // ----- Reconstruct rational - - RationalReconstruction re(lc); VectorFraction resultVF(_ring, rank); - if (!re.getRational(resultVF.numer, resultVF.denom, 0)) { - // dirty, but should not be called - return SS_FAILED; + + if (_method.multiModularLifting) { + using LiftingContainer = MultiModLiftingContainer; + LiftingContainer lc(_ring, _genprime, BBA_minor, newb, _method); + MultiModRationalReconstruction re(lc); + if (!re.getRational(resultVF.numer, resultVF.denom)) { + return SS_FAILED; + } + } + else { + using LiftingContainer = DixonLiftingContainer, BlasMatrix>; + LiftingContainer lc(_ring, _field, BBA_minor, *Ap_minor_inv, newb, _prime); + RationalReconstruction re(lc); + if (!re.getRational(resultVF.numer, resultVF.denom, 0)) { + return SS_FAILED; + } } #ifdef RSTIMING diff --git a/linbox/algorithms/last-invariant-factor.h b/linbox/algorithms/last-invariant-factor.h index 80aa67880..2c3315cb5 100644 --- a/linbox/algorithms/last-invariant-factor.h +++ b/linbox/algorithms/last-invariant-factor.h @@ -51,7 +51,7 @@ namespace LinBox protected: - typedef BlasVector DVect; + typedef DenseVector DVect; Ring r; mutable typename Ring::RandIter _gen; Solver solver; @@ -110,24 +110,23 @@ namespace LinBox Integer r_den; //std::vector > result (A.coldim()); //typename std::vector >::iterator result_p; - // vector b, RHS, 32-bit int is good enough - std::vector b(A.rowdim()); - typename std::vector::iterator b_p; - typename Vector::const_iterator Prime_p; + DenseVector b(r, A.rowdim()); Integer pri, quo, rem, itmp; for (; count < threshold; ++ count) { // assign b to be a random vector - for (b_p = b.begin(); b_p != b.end(); ++ b_p) { -// * b_p = rand() % 268435456 - 134217728; // may need to change to use ring's random gen. -// // dpritcha, 2004-07-26 - _gen( itmp ); - * b_p = (int)itmp; + for (auto b_p = b.begin(); b_p != b.end(); ++ b_p) { + _gen( itmp ); + //@enhancement vector b, RHS, 32-bit is good enough + * b_p = Integer((int32_t)itmp); } // try to solve Ax = b over Ring tmp = solver.solveNonsingular(r_num, r_den, A, b); + + // std::clog << "r_den: " << r_den << std::endl; + // If no solution found if (tmp != SS_OK) { r.assign (lif, r.zero); @@ -138,7 +137,7 @@ namespace LinBox } // filter out primes in PRIMEL from lif. if (!r. isZero (lif)) - for ( Prime_p = PrimeL.begin(); + for ( auto Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { r.init (pri, *Prime_p); @@ -172,21 +171,17 @@ namespace LinBox Integer r1_den, r2_den; //std::vector > result (A.coldim()); //typename std::vector >::iterator result_p; - // vector b, RHS, 32-bit int is good enough - std::vector b1(A. rowdim()), b2(A. rowdim()); - typename std::vector::iterator b_p; - typename Vector::const_iterator Prime_p; + //@enhancement vector b, RHS, 32-bit instead would be good enough + DenseVector b1(r, A. rowdim()), b2(r, A. rowdim()); Integer pri, quo, rem; for (; count < (threshold + 1) / 2; ++ count) { // assign b to be a random vector - for (b_p = b1. begin(); b_p != b1. end(); ++ b_p) { -// * b_p = rand(); - *b_p = _gen.random();//(* b_p); + for (auto b_p = b1. begin(); b_p != b1. end(); ++ b_p) { + _gen.random(*b_p); } - for (b_p = b2. begin(); b_p != b2. end(); ++ b_p) { -// * b_p = rand(); - *b_p = _gen.random();//(* b_p); + for (auto b_p = b2. begin(); b_p != b2. end(); ++ b_p) { + _gen.random(*b_p); } // try to solve Ax = b1, b2 over Ring tmp1 = solver. solveNonsingular(r1_num, r1_den, A, b1); @@ -243,7 +238,7 @@ namespace LinBox // filter out primes in PRIMEL from lif. if (!r. isZero (lif)) - for ( Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { + for ( auto Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { r.init (pri, *Prime_p); do { r.quoRem(quo,rem,lif,pri); @@ -253,7 +248,7 @@ namespace LinBox } r. gcdin (Bonus, lif); if (!r. isZero (Bonus)) - for ( Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { + for ( auto Prime_p = PrimeL.begin(); Prime_p != PrimeL.end(); ++ Prime_p) { r.init (pri, *Prime_p); do { r.quoRem(quo,rem,Bonus,pri); @@ -275,13 +270,11 @@ namespace LinBox if (r_num.size()!=A. coldim()) return lif=0; Integer r_den; DVect b(r,A.rowdim()); - typename DVect::iterator b_p; - //typename Vector::const_iterator Prime_p; Integer pri, quo, rem; // assign b to be a random vector - for (b_p = b.begin(); b_p != b.end(); ++ b_p) { + for (auto b_p = b.begin(); b_p != b.end(); ++ b_p) { // * b_p = rand() % 268435456 - 134217728; // may need to change to use ring's random gen. // // dpritcha, 2004-07-26 _gen( * b_p ); diff --git a/linbox/algorithms/lifting-container.h b/linbox/algorithms/lifting-container.h index d0527076f..2a86bafa6 100644 --- a/linbox/algorithms/lifting-container.h +++ b/linbox/algorithms/lifting-container.h @@ -155,14 +155,15 @@ namespace LinBox this->_intRing.convert(Prime,_p); auto hb = RationalSolveHadamardBound(A, b); - N = Integer(1) << static_cast(std::ceil(hb.numLogBound)); - D = Integer(1) << static_cast(std::ceil(hb.denLogBound)); + N = hb.numBound; + D = hb.denBound; // L = N * D * 2 // _length = logp(L, Prime) = log2(L) * ln(2) / ln(Prime) double primeLog2 = Givaro::logtwo(Prime); - _length = std::ceil((1 + hb.numLogBound + hb.denLogBound) / primeLog2); // round up instead of down + _length = std::ceil(hb.solutionLogBound / primeLog2); // round up instead of down #ifdef DEBUG_LC + std::cout << "_length "<< _length << std::endl; std::cout<<" norms computed, p = "<<_p<<"\n"; std::cout<<" N = "<."); + using PrimeGenerator = _PrimeGenerator; + + using RNSSystem = FFPACK::rns_double; + using RNSDomain = FFPACK::RNSInteger; + using RNSElement = typename RNSDomain::Element; + using RNSElementPtr = typename RNSDomain::Element_ptr; + + using IElement = typename Ring::Element; + using IMatrix = DenseMatrix<_Ring>; + using IVector = DenseVector<_Ring>; + using FElement = typename Field::Element; + using FMatrix = DenseMatrix<_Field>; + using FVector = DenseVector<_Field>; + + public: + // ------------------- + // ----- Main behavior + + // @fixme Split to inline file + MultiModLiftingContainer(const Ring& ring, PrimeGenerator primeGenerator, const IMatrix& A, + const IVector& b, const Method::Dixon& m) + : _ring(ring) + , _method(m) + , _A(A) + , _b(b) + , _n(A.rowdim()) + , _rMatrix(_ring) + , _qMatrix(_ring) + { + linbox_check(A.rowdim() == A.coldim()); + + // std::cout << "----------" << std::endl; + // A.write(std::cout << "A: ", Tag::FileFormat::Maple) << std::endl; + // std::cout << "b: " << b << std::endl; + + // This will contain the primes or our MultiMod basis + _primesCount = m.primesCount; + if (_primesCount == -1u) { + PAR_BLOCK { _primesCount = 6 * NUM_THREADS; } + } + + _primes.resize(_primesCount); + + // Some preparation work + Integer infinityNormA; + InfinityNorm(infinityNormA, A); + double logInfinityNormA = Givaro::logtwo(infinityNormA); + + { + // Based on Chen-Storjohann's paper, this is the bit size + // of the needed RNS basis for the residue computation + double rnsBasisBitSize = std::ceil(1.0 + Givaro::logtwo(1 + infinityNormA * _n)); + _rnsPrimesCount = std::ceil(rnsBasisBitSize / (primeGenerator.getBits() - 1)); + _rnsPrimes.resize(_rnsPrimesCount); + // std::cout << "_rnsPrimesCount: " << _rnsPrimesCount << std::endl; + + auto trialsLeft = m.trialsBeforeFailure; + std::vector primes; + for (auto j = 0u; j < _primesCount + _rnsPrimesCount; ++j) { + auto p = *primeGenerator; + ++primeGenerator; + + // @note std::lower_bound finds the iterator where to put p in the sorted + // container. The name of the routine might be strange, but, hey, that's not my + // fault. We check if the prime is already listed. + auto lb = std::lower_bound(primes.begin(), primes.end(), p); + if (lb != primes.end() && *lb == p) { + if (trialsLeft == 0) { + throw LinboxError("[MultiModLiftingContainer] Not enough primes."); + } + + --j; + --trialsLeft; + continue; + } + + // Inserting the primes at the right place to keep the array sorted + primes.insert(lb, p); + } + + // We take the smallest primes for our MultiMod basis + std::copy(primes.begin(), primes.begin() + _primesCount, _primes.begin()); + + // for (auto i = 0u; i < _primes.size(); ++i) { + // std::cout << "p" << i << " = " << Integer(_primes[i]) << std::endl; + // } + + // And the others for our RNS basis + std::copy(primes.begin() + _primesCount, primes.end(), _rnsPrimes.begin()); + + // for (auto i = 0u; i < _rnsPrimes.size(); ++i) { + // std::cout << "q" << i << " = " << Integer(_rnsPrimes[i]) << std::endl; + // } + + // We check that we really need all the primes within the RNS basis, + // as the first count was just an upper estimation. + double bitSize = 0.0; + for (int h = _rnsPrimes.size() - 1; h >= 0; --h) { + bitSize += Givaro::logtwo(_rnsPrimes[h]); + + if (bitSize > rnsBasisBitSize && h > 0) { + _rnsPrimes.erase(_rnsPrimes.begin(), _rnsPrimes.begin() + h); + _rnsPrimesCount -= h; + break; + } + } + } + + // Setting fields up + for (auto& pj : _primes) { + _fields.emplace_back(pj); + } + + + // Initialize all inverses + // @note An inverse mod some p within DixonSolver was already computed, + // and pass through to the lifting container. Here, we could use that, but we have + // to keep control of generated primes, so that the RNS base has bigger primes + // than the . + // commentator().start("[MMLifting][Init] A^{-1} mod pj precomputations"); + { + _BB.reserve(_primesCount); + for (auto& F : _fields) { + _BB.emplace_back(A, F); + } + + PAR_BLOCK + { + std::vector nullities(_primesCount); + auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, + FFLAS::StrategyParameter::Threads); + int M = _primesCount; + FOR1D(j, M, sp, MODE(WRITE(nullities)), { + auto& F = _fields[j]; + BlasMatrixDomain bmd(F); + bmd.invin(_BB[j], nullities[j]); + }); + for (auto nullity : nullities) { + if (nullity > 0) { + // @fixme Should redraw another prime! + std::cout << "----------------------------- NULLITY" << std::endl; + throw LinBoxError("Wrong prime, sorry."); + } + } + } + } + // commentator().stop("[MMLifting][Init] A^{-1} mod pj precomputations"); + + // Making A into the RNS domain + { + _rnsSystem = new RNSSystem(_rnsPrimes); + _rnsDomain = new RNSDomain(*_rnsSystem); + _rnsA = FFLAS::fflas_new(*_rnsDomain, _n, _n); + _rnsc = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); + _rnsR = FFLAS::fflas_new(*_rnsDomain, _n, _primesCount); + _rnsPrimesInverses = FFLAS::fflas_new(*_rnsDomain, _primesCount); + + // @note So that 2^(16*cmax) is the max element of A. + double cmax = logInfinityNormA / 16.; + FFLAS::finit_rns(*_rnsDomain, _n, _n, std::ceil(cmax), A.getPointer(), A.stride(), + _rnsA); + } + + // Compute the inverses of pj for each RNS prime + { + for (auto j = 0u; j < _primesCount; ++j) { + auto prime = _primes[j]; + + auto& rnsPrimeInverse = _rnsPrimesInverses[j]; + auto stride = rnsPrimeInverse._stride; + + for (auto h = 0u; h < _rnsPrimesCount; ++h) { + auto& rnsF = _rnsSystem->_field_rns[h]; + auto& primeInverse = rnsPrimeInverse._ptr[h * stride]; + rnsF.inv(primeInverse, prime); + } + } + } + + // Compute how many iterations are needed + { + double log2PrimesProduct = 0.0; + for (auto& pj : _primes) { + log2PrimesProduct += Givaro::logtwo(pj); + } + + auto hb = RationalSolveHadamardBound(A, b); + _log2Bound = hb.solutionLogBound; + _numBound = hb.numBound; + _denBound = hb.denBound; + + // _iterationsCount = log2(2 * N * D) / log2(p1 * p2 * ...) + _iterationsCount = std::ceil(_log2Bound / log2PrimesProduct); + // std::cout << "_iterationsCount " << _iterationsCount << std::endl; + } + + //----- Locals setup + + _rMatrix = IMatrix(_ring, _n, _primesCount); + _qMatrix = IMatrix(_ring, _n, _primesCount); + + _FR.reserve(_primesCount); + for (auto j = 0u; j < _primesCount; ++j) { + auto& F = _fields[j]; + + _FR.emplace_back(F, _n); + + // Initialize all residues to b + for (auto i = 0u; i < _n; ++i) { + _rMatrix.refEntry(i, j) = _b[i]; + } + } + } + + ~MultiModLiftingContainer() + { + FFLAS::fflas_delete(_rnsR); + FFLAS::fflas_delete(_rnsc); + FFLAS::fflas_delete(_rnsA); + delete _rnsDomain; + delete _rnsSystem; + } + + // -------------------------- + // ----- LiftingContainer API + + const Ring& ring() const final { return _ring; } + + /// The length of the container. + size_t length() const final { return _iterationsCount; } + + /// The dimension of the problem/solution. + size_t size() const final { return _n; } + + /// @note Useless, but in the API. + const IElement& prime() const final { return _ring.one; } + + // ------------------------------ + // ----- NOT LiftingContainer API + // ----- but still needed + + double log2Bound() const { return _log2Bound; } + Integer numBound() const { return _numBound; } + Integer denBound() const { return _denBound; } + + uint32_t primesCount() const { return _primesCount; } + FElement prime(uint32_t index) const { return _primes.at(index); } + const std::vector& primesFields() const { return _fields; } + + // -------------- + // ----- Iterator + + /** + * Returns false if the next digit cannot be computed (bad modulus). + * c is a vector of integers but all element are below p = p1 * ... * pl + */ + bool next(std::vector& digits) + { + VectorDomain IVD(_ring); + BlasMatrixDomain IMD(_ring); + size_t numthreads; + + // commentator().start("[MultiModLifting] c = A^{-1} r mod p"); + PAR_BLOCK + { + numthreads = NUM_THREADS; + + auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, + FFLAS::StrategyParameter::Threads); + int M = _primesCount; + FOR1D(j, M, sp, { + auto pj = _primes[j]; + auto& FR = _FR[j]; + uint64_t upj = pj; + + // @note There is no VectorDomain::divmod yet. + // Euclidian division so that rj = pj Qj + Rj + uint64_t uR; + for (auto i = 0u; i < _n; ++i) { + Integer::divmod(_qMatrix.refEntry(i, j), uR, + _rMatrix.getEntry(i, j), upj); + // @note No need to init, because we know that uR < pj, + // so that would do an extra check. + FR[i] = static_cast(uR); + } + + // digit = A^{-1} * R mod pj + const auto& B = _BB[j]; + auto& digit = digits[j]; + B.apply(digit, FR); + + // Store the very same result in an RNS system, + // but fact is all the primes of the RNS system are bigger + // than the modulus used to compute the digit, we just copy + // the result for everybody. + for (auto i = 0u; i < _n; ++i) { + setRNSMatrixElementAllResidues(_rnsR, _primesCount, i, j, FR[i]); + setRNSMatrixElementAllResidues(_rnsc, _primesCount, i, j, digit[i]); + } + }); + } + // commentator().stop("[MultiModLifting] c = A^{-1} r mod p"); + + // ----- Compute the next residues! + + // r <= Q + (R - A c) / p + + using RNSParallel = FFLAS::ParSeqHelper::Parallel; + using FGEMMParallel = FFLAS::ParSeqHelper::Parallel; + + // commentator().start("[MultiModLifting] FGEMM R <= R - Ac"); + // Firstly compute R <= R - A c as a fgemm within the RNS domain. + if (_method.rnsFgemmType == RnsFgemmType::BothSequential) { + rns_fgemm(1,1); + } + else if (_method.rnsFgemmType == RnsFgemmType::BothParallel) { + rns_fgemm(numthreads,numthreads); + } + else if (_method.rnsFgemmType == RnsFgemmType::ParallelFgemmOnly) { + rns_fgemm(1,numthreads); + } + else if (_method.rnsFgemmType == RnsFgemmType::ParallelRnsOnly) { + rns_fgemm(numthreads,1); + } + // commentator().stop("[MultiModLifting] FGEMM R <= R - Ac"); + + // We divide each residues by the according pj, which is done by multiplying. + // @note The matrix _rnsR is RNS-major, meaning that it is stored + // as [R mod q0][R mod q1][...] where [R mod qh] represents a full matrix. + // We use this fact to keep better cache coherency. + // commentator().start("[MultiModLifting] MUL FOR INV R <= R / p"); + auto rnsStride = 0u; + for (auto h = 0u; h < _rnsPrimesCount; ++h) { + auto& rnsF = _rnsSystem->_field_rns[h]; + + PAR_BLOCK + { + auto sp = SPLITTER(NUM_THREADS, FFLAS::CuttingStrategy::Row, + FFLAS::StrategyParameter::Threads); + int M = _primesCount; + FOR1D(j, M, sp, { + auto& rnsPrimeInverse = _rnsPrimesInverses[j]; + auto stridePrimeInverse = rnsPrimeInverse._stride; + auto rnsPrimeInverseForRnsPrimeH = + rnsPrimeInverse._ptr[h * stridePrimeInverse]; + + for (auto i = 0u; i < _n; ++i) { + rnsF.mulin( + _rnsR._ptr[rnsStride + (i * _primesCount + j)], + rnsPrimeInverseForRnsPrimeH); + } + }); + } + + rnsStride += _rnsR._stride; + } + // commentator().stop("[MultiModLifting] MUL FOR INV R <= R / p"); + + // commentator().start("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); + FFLAS::fconvert_rns(*_rnsDomain, _n, _primesCount, 0, _rMatrix.getWritePointer(), + _primesCount, _rnsR + 0); + IMD.addin(_rMatrix, _qMatrix); + // commentator().stop("[MultiModLifting] CONVERT TO INTEGER r <= Q + R"); + + return true; + } + + private: + // Helper function, setting all residues of a matrix element to the very same value. + // This doesn't check the moduli. + inline void setRNSMatrixElementAllResidues(RNSElementPtr& A, size_t lda, size_t i, size_t j, + double value) + { + auto& Aij = A[i * lda + j]; + auto stride = Aij._stride; + for (auto h = 0u; h < _rnsPrimesCount; ++h) { + Aij._ptr[h * stride] = value; + } + } + + // @note This allows us to factor out some of the rns fgemm variants common code. + template + inline void rns_fgemm(size_t threads1, size_t threads2) + { + PAR_BLOCK + { + using ComposedParSeqHelper = FFLAS::ParSeqHelper::Compose; + using MMHelper = + FFLAS::MMHelper; + ComposedParSeqHelper composedParSeqHelper(threads1, threads2); + MMHelper mmHelper(*_rnsDomain, -1, composedParSeqHelper); + + FFLAS::fgemm(*_rnsDomain, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, _n, + _primesCount, _n, _rnsDomain->mOne, _rnsA, _n, _rnsc, _primesCount, + _rnsDomain->one, _rnsR, _primesCount, mmHelper); + } + } + + public: // @fixme BACK TO PRIVATE! + const Ring& _ring; + Method::Dixon _method; // A copy of the user-provided method. + + // The problem: A^{-1} * b + const IMatrix& _A; + const IVector& _b; + + double _log2Bound; + Integer _numBound; + Integer _denBound; + + RNSSystem* _rnsSystem = nullptr; + RNSDomain* _rnsDomain = nullptr; + RNSElementPtr _rnsA; // The matrix A, but in the RNS system + // A matrix of digits c[j], being the current digits mod pj, in the RNS system + RNSElementPtr _rnsc; + RNSElementPtr _rnsR; + size_t _rnsPrimesCount = 0u; + // Stores the inverse of pj within the RNS base prime into _rnsPrimesInverses[j] + RNSElementPtr _rnsPrimesInverses; + + std::vector _primes; + std::vector _rnsPrimes; + // Length of the ci sequence. So that p^{k-1} > 2ND (Hadamard bound). + size_t _iterationsCount = 0u; + size_t _n = 0u; // Row/column dimension of A. + size_t _primesCount = 0u; // How many primes. Equal to _primes.size(). + + std::vector _BB; // Inverses of A mod p[i] + std::vector _fields; // All fields Modular + + //----- Iteration + + std::vector _FR; + IMatrix _rMatrix; + IMatrix _qMatrix; + }; +} diff --git a/linbox/algorithms/multi-mod-rational-reconstruction.h b/linbox/algorithms/multi-mod-rational-reconstruction.h new file mode 100644 index 000000000..cd5018c2a --- /dev/null +++ b/linbox/algorithms/multi-mod-rational-reconstruction.h @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2019 LinBox Team + * + * ========LICENCE======== + * This file is part of the library LinBox. + * + * LinBox is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * ========LICENCE======== + */ + +#pragma once + +#include "./rational-cra-builder-full-multip.h" + +namespace LinBox { + /** + * From a MultiModLiftingContainer, will build + * the solution on each prime, then will do a CRT reconstruction, + * before reconstructing the rational.95 + * + * This does not do early termination. + */ + template + class MultiModRationalReconstruction { + using Ring = typename LiftingContainer::Ring; + using IElement = typename LiftingContainer::IElement; + using IVector = typename LiftingContainer::IVector; + using FElement = typename LiftingContainer::FElement; + using FVector = typename LiftingContainer::FVector; + + public: + MultiModRationalReconstruction(LiftingContainer& lc) + : _lc(lc) + { + } + + bool getRational(IVector& xNum, IElement& xDen) + { + // Early out when the numerator is bounded by zero. + if (_lc.numBound() == 0) { + for (auto i = 0u; i < _lc.length(); ++i) { + _lc.ring().assign(xNum[i], _lc.ring().zero); + } + _lc.ring().assign(xDen, _lc.ring().one); + return true; + } + + commentator().start("[MultiModLifting] Lifting"); + + // Temporary structure to store a ci for each pj + std::vector digits; + digits.reserve(_lc.primesCount()); + for (auto& F : _lc.primesFields()) { + digits.emplace_back(F, _lc.size()); + } + + // The pj^i for each pj + std::vector radices(_lc.primesCount(), 1); + + // Stores each c0 + c1 pj + ... + ck pj^k for each pj + std::vector padicAccumulations(_lc.primesCount(), _lc.ring()); + for (auto j = 0u; j < _lc.primesCount(); ++j) { + padicAccumulations[j].resize(_lc.size()); + } + + // @fixme Better use PolEval (or will it cause memory explosion?) + VectorDomain IVD(_lc.ring()); + for (auto i = 0u; i < _lc.length(); ++i) { + _lc.next(digits); + + #pragma omp parallel for + for (auto j = 0u; j < _lc.primesCount(); ++j) { + // @fixme @cpernet digits being a field vector, this will implicitly cast + // each of its elements to a Integer, is there something better? + // Or else, we just need an overload of Givaro::ZRing().axpyin() with a double as last parameter + IVD.axpyin(padicAccumulations[j], radices[j], digits[j]); // y <- y + p^i * ci + _lc.ring().mulin(radices[j], _lc.prime(j)); + } + } + + commentator().stop("[MultiModLifting] Lifting"); + + // CRT reconstruction from paddicAccumulations + commentator().start("[MultiModLifting] CRT Reconstruction Progress"); + using CRAField = Givaro::Modular; + RationalCRABuilderFullMultip craBuilder(_lc.log2Bound() / 1.4427); // 1.4427 = 1 / log(2) + + { + CRAField field(radices[0]); + craBuilder.initialize(field, padicAccumulations[0]); + } + + for (auto j = 1u; j < _lc.primesCount(); ++j) { + CRAField field(radices[j]); + craBuilder.progress(field, padicAccumulations[j]); + } + commentator().stop("[MultiModLifting] CRT Reconstruction Progress"); + + // Rational reconstruction + craBuilder.result(xNum, xDen, _lc.numBound()); + + return true; + } + + private: + LiftingContainer& _lc; + }; +} diff --git a/linbox/algorithms/numeric-solver-lapack.h b/linbox/algorithms/numeric-solver-lapack.h index 8361e12e4..00c99a2ec 100644 --- a/linbox/algorithms/numeric-solver-lapack.h +++ b/linbox/algorithms/numeric-solver-lapack.h @@ -29,6 +29,7 @@ #ifndef __LINBOX_numeric_solver_lapack_H #define __LINBOX_numeric_solver_lapack_H +#include "fflas-ffpack/config.h" #if defined(__FFLASFFPACK_HAVE_LAPACK) diff --git a/linbox/algorithms/polynomial-matrix/polynomial-matrix-domain.h b/linbox/algorithms/polynomial-matrix/polynomial-matrix-domain.h old mode 100755 new mode 100644 diff --git a/linbox/algorithms/rational-cra-builder-full-multip.h b/linbox/algorithms/rational-cra-builder-full-multip.h index e6828228a..0e1c68b5a 100644 --- a/linbox/algorithms/rational-cra-builder-full-multip.h +++ b/linbox/algorithms/rational-cra-builder-full-multip.h @@ -65,11 +65,51 @@ namespace LinBox return num; } + template + Vect& result (Vect &num, Integer& den, const Integer& numBound) + { + commentator().start("[RationalCRABuilderFullMultip] CRT Reconstruction"); + Father_t::result(num, false); + commentator().stop("[RationalCRABuilderFullMultip] CRT Reconstruction"); + + commentator().start("[RationalCRABuilderFullMultip] Rational Reconstruction"); + den = 1; + const auto& mod = Father_t::getModulus(); + Integer nd; + for (auto num_it = num.begin(); num_it != num.end(); ++num_it) { + iterativeratrecon(*num_it, nd, den, mod, numBound); + + if (nd > 1) { + for (auto t02 = num.begin(); t02 != num_it; ++t02) + *t02 *= nd; + den *= nd; + } + } + commentator().stop("[RationalCRABuilderFullMultip] Rational Reconstruction"); + return num; + } + protected: - Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& s) + Integer& iterativeratrecon(Integer& u1, Integer& new_den, const Integer& old_den, const Integer& m1, const Integer& sn) { +/* std::clog << "iterativeratrecon" + << ", u1: " << u1 + << ", new_den: " << new_den + << ", old_den: " << old_den + << ", m1: " << m1 + << ", sn: " << sn + ; +*/ Integer a; - _ZZ.RationalReconstruction(a, new_den, u1*=old_den, m1, s); + bool success = _ZZ.RationalReconstruction(a, new_den, u1*=old_den, m1, sn, true, false); + if (! success) + std::cerr << " ***** RationalReconstruction FAILURE ***** "; +/* + std::clog << ", AFTER" + << ", a: " << a + << ", new_den: " << new_den + << std::endl; +*/ return u1=a; } }; diff --git a/linbox/algorithms/rns.h b/linbox/algorithms/rns.h index 2a7fffa16..0a966acf1 100644 --- a/linbox/algorithms/rns.h +++ b/linbox/algorithms/rns.h @@ -78,6 +78,7 @@ namespace LinBox * @param l max recoverable bits * @param ps bitsize of the primes (defaulting to 21 because...) */ + RNS() {} RNS(size_t l, size_t ps=21) ; /*x Create a RNS with given primes. * @param primes given basis of primes @@ -97,14 +98,13 @@ namespace LinBox /*! Inits cra. */ void initCRA() ; + template + void init(const std::vector& primes); + /*! Computes \c result corresponding to the \c residues. * */ void cra(integer & result, const std::vector & residues); - /*! Computes \c result corresponding to the \c residues. - * - */ - void cra(std::vector & result, const std::vector > & residues); /*! Computes \c result corresponding to the iteration. * @@ -112,12 +112,6 @@ namespace LinBox template void cra(Ivect & result, Iteration & iter) ; - template - void cra(Tinteger & result, Tresidue & residues); - - template - void convert(Tinteger & result, Tresidue & residues) ; - // mixed radix }; diff --git a/linbox/algorithms/rns.inl b/linbox/algorithms/rns.inl index b38a227eb..1723f94c9 100644 --- a/linbox/algorithms/rns.inl +++ b/linbox/algorithms/rns.inl @@ -56,7 +56,7 @@ namespace LinBox if (curint>maxint) break; PrimeIterator genprimes( (unsigned int) (_ps_+penalty) ); - size_t p = genprimes.randomPrime() ; + size_t p = *genprimes ; ++genprimes; primeset.insert(p); if (lg < primeset.size()) { @@ -104,6 +104,21 @@ namespace LinBox return ; } + template + template + void RNS::init(const std::vector& primes) + { + _size_ = primes.size(); + _primes_.resize(_size_); + _PrimeDoms_.resize(_size_); + for (auto i = 0u; i < _size_; ++i) { + _primes_[i] = size_t(primes[i]); + _PrimeDoms_[i] = Field(primes[i]); + } + + _CRT_ = CRTSystem(_PrimeDoms_); + } + template void RNS::cra(integer & result, const std::vector & residues) @@ -183,7 +198,7 @@ namespace LinBox if (curint>maxint) break; PrimeIterator genprimes((unsigned int) (_ps_+penalty) ); - size_t p = genprimes.randomPrime() ; + size_t p = *genprimes ; ++genprimes; primeset.insert(p); if (lg < primeset.size()) { diff --git a/linbox/matrix/matrixdomain/blas-matrix-domain.h b/linbox/matrix/matrixdomain/blas-matrix-domain.h index e235a9dbd..5c09fc5d0 100644 --- a/linbox/matrix/matrixdomain/blas-matrix-domain.h +++ b/linbox/matrix/matrixdomain/blas-matrix-domain.h @@ -631,15 +631,6 @@ namespace LinBox return B.swap(A); } - - //- Inversion w singular check - // template - // Matrix& inv( Matrix &Ainv, const Matrix &A, int& nullity) const - // { - // nullity = BlasMatrixDomainInv()(field(),Ainv,A); - // return Ainv; - // } - //! Inversion w singular check template Matrix1& inv( Matrix1 &Ainv, const Matrix2 &A, int& nullity) const @@ -648,7 +639,6 @@ namespace LinBox return Ainv; } - //! Inversion (the matrix A is modified) w singular check template Matrix1& invin( Matrix1 &Ainv, Matrix2 &A, int& nullity) const @@ -657,6 +647,17 @@ namespace LinBox return Ainv; } + //! Inversion (the matrix A is modified) w singular check + template + Matrix& invin(Matrix& A, int& nullity) const + { + // @fixme @cpernet Apparently FFLAS has a new method that does + // inversion really in place, we should update this code. + Matrix tmp(A); + nullity = BlasMatrixDomainInv()(field(),A,tmp); + return A; + } + //! Rank template unsigned int rank(const Matrix &A) const diff --git a/linbox/solutions/hadamard-bound.h b/linbox/solutions/hadamard-bound.h index 8216994e2..43d25fe23 100644 --- a/linbox/solutions/hadamard-bound.h +++ b/linbox/solutions/hadamard-bound.h @@ -33,120 +33,112 @@ namespace LinBox { // ----- Vector norm - // Returns false if the vector is null, true otherwise template - bool vectorLogNorm(double& logNorm, const ConstIterator& begin, const ConstIterator& end) + void vectorNormSquared(Integer& normSquared, const ConstIterator& begin, + const ConstIterator& end) { - Integer normSquared = 0; + normSquared = 0; for (ConstIterator it = begin; it != end; ++it) { // Whatever field element it is, // it should be able to store the square without // loss of information. normSquared += (*it) * (*it); } - - if (normSquared == 0) { - logNorm = 0.0; - return false; // Vector is zero - } -#ifdef DEBUG_HADAMARD_BOUND - std::clog << "normSquared:=" << normSquared << ';' << std::endl; - std::clog << "vectorLogNorm:=" << (Givaro::logtwo(normSquared) / 2.0) << ';' << std::endl; -#endif - logNorm = Givaro::logtwo(normSquared) / 2.0; - return true; } // ----- Detailed Hadamard bound - struct HadamardLogBoundDetails { + struct HadamarBoundDetails { /** - * Bit size of the minimal hadamard bound + * The minimal hadamard bound * between the row-wise and the col-wise ones. * * min { HadamardRow(A), HadamardCol(A) } */ - double logBound; + Integer bound; + /** - * Bit size of the minimal hadamard bound + * The minimal hadamard bound * divided by the min of the norm vectors * between the row-wise and the col-wise ones. * * min { HadamardRow(A) / min || Ai,* ||, * HadamardCol(A) / min || A*,j || } */ - double logBoundOverMinNorm; + Integer boundOverMinNorm; }; /** * Precise Hadamard bound (bound on determinant) by taking * the row-wise euclidean norm. - * - * The result is expressed as bit size. */ template - void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A) + void HadamardRowBound(Integer& bound, const IMatrix& A) { typename MatrixTraits::MatrixCategory tag; - HadamardRowLogBound(logBound, minLogNorm, A, tag); + HadamardRowBound(bound, A, tag); } template - void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) + void HadamardRowBound(Integer& bound, const IMatrix& A, + const MatrixCategories::RowColMatrixTag& tag) { - logBound = 0.0; - minLogNorm = std::numeric_limits::infinity(); + bound = 1; for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) { - double rowLogNorm; - if (vectorLogNorm(rowLogNorm, rowIt->begin(), rowIt->end())) { - if (rowLogNorm < minLogNorm) { - minLogNorm = rowLogNorm; - } - } - else { - logBound = 0.0; - minLogNorm = 0.0; + Integer rowNormSquared; + vectorNormSquared(rowNormSquared, rowIt->begin(), rowIt->end()); + + if (rowNormSquared == 0) { + bound = 0; return; } - logBound += rowLogNorm; + + bound *= rowNormSquared; + } + + // Square-root (upper bound) + Integer rem; + bound = Givaro::sqrtrem(bound, rem); + if (rem != 0) { + bound += 1; } } template - void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowMatrixTag& tag) + void HadamardRowBound(Integer& bound, const IMatrix& A, + const MatrixCategories::RowMatrixTag& tag) { - logBound = 0.0; - minLogNorm = std::numeric_limits::infinity(); + bound = 1; for (auto rowIt = A.rowBegin(); rowIt != A.rowEnd(); ++rowIt) { Integer normSquared = 0; for (const auto& pair : *rowIt) { normSquared += (pair.second) * (pair.second); } + if (normSquared == 0) { - logBound = 0.0; - minLogNorm = 0.0; + bound = 0; return; } - double logNormSquared = Givaro::logtwo(normSquared); - if (logNormSquared < minLogNorm) { - minLogNorm = logNormSquared; - } - logBound += logNormSquared; + bound *= normSquared; } - // Square-root - logBound /= 2.0; - minLogNorm /= 2.0; + // Square-root (upper bound) + Integer rem; + bound = Givaro::sqrtrem(bound, rem); + if (rem != 0) { + bound += 1; + } } template - void HadamardRowLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::BlackboxTag& tag) + void HadamardRowBound(Integer& bound, const IMatrix& A, + const MatrixCategories::BlackboxTag& tag) { DenseMatrix ACopy(A); - HadamardRowLogBound(logBound, minLogNorm, ACopy); + HadamardRowBound(bound, ACopy); } /** @@ -156,40 +148,51 @@ namespace LinBox { * The result is expressed as bit size. */ template - void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A) + void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A) { typename MatrixTraits::MatrixCategory tag; - HadamardColLogBound(logBound, minLogNorm, A, tag); + HadamardColBound(bound, minNormSquared, A, tag); } template - void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) + void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + const MatrixCategories::RowColMatrixTag& tag) { - logBound = 0.0; - minLogNorm = std::numeric_limits::infinity(); + bound = 1; + minNormSquared = -1; typename IMatrix::ConstColIterator colIt; for (colIt = A.colBegin(); colIt != A.colEnd(); ++colIt) { - double colLogNorm; - if (vectorLogNorm(colLogNorm, colIt->begin(), colIt->end())) { - if (colLogNorm < minLogNorm) { - minLogNorm = colLogNorm; - } - } - else { - logBound = 0.0; - minLogNorm = 0.0; + Integer colNormSquared; + vectorNormSquared(colNormSquared, colIt->begin(), colIt->end()); + + if (colNormSquared == 0) { + bound = 0; + minNormSquared = 0; return; } - logBound += colLogNorm; + + if (minNormSquared < 0 || colNormSquared < minNormSquared) { + minNormSquared = colNormSquared; + } + + bound *= colNormSquared; + } + + // Square-root (upper bound) + Integer rem; + bound = Givaro::sqrtrem(bound, rem); + if (rem != 0) { + bound += 1; } } template - void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::RowMatrixTag& tag) + void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + const MatrixCategories::RowMatrixTag& tag) { - logBound = 0.0; - minLogNorm = std::numeric_limits::infinity(); + bound = 1; + minNormSquared = -1; // This vector contains the norm squared for each columns. std::vector columnsNormsSquared(A.coldim()); @@ -200,30 +203,35 @@ namespace LinBox { } // All the norms have been computed, we check which one is the smallest - // and compute the product (aka sum bitsize-wise) of them to make the logBound. + // and compute the product (aka sum bitsize-wise) of them to make the bound. for (const Integer& normSquared : columnsNormsSquared) { if (normSquared == 0) { - logBound = 0.0; - minLogNorm = 0.0; + bound = 0; + minNormSquared = 0; return; } - double logNormSquared = Givaro::logtwo(normSquared); - if (logNormSquared < minLogNorm) { - minLogNorm = logNormSquared; + + if (minNormSquared < 0 || normSquared < minNormSquared) { + minNormSquared = normSquared; } - logBound += logNormSquared; + + bound *= normSquared; } - // Square-root - logBound /= 2.0; - minLogNorm /= 2.0; + // Square-root (upper bound) + Integer rem; + bound = Givaro::sqrtrem(bound, rem); + if (rem != 0) { + bound += 1; + } } template - void HadamardColLogBound(double& logBound, double& minLogNorm, const IMatrix& A, const MatrixCategories::BlackboxTag& tag) + void HadamardColBound(Integer& bound, Integer& minNormSquared, const IMatrix& A, + const MatrixCategories::BlackboxTag& tag) { DenseMatrix ACopy(A); - HadamardColLogBound(logBound, minLogNorm, ACopy); + HadamardColBound(bound, minNormSquared, ACopy); } /** @@ -233,34 +241,39 @@ namespace LinBox { * The results are expressed as bit size. */ template - HadamardLogBoundDetails DetailedHadamardBound(const IMatrix& A) + HadamarBoundDetails DetailedHadamardBound(const IMatrix& A) { - double rowLogBound = 0.0; - double rowMinLogNorm = 0.0; - HadamardRowLogBound(rowLogBound, rowMinLogNorm, A); - double rowLogBoundOverMinNorm = rowLogBound - rowMinLogNorm; + // @note We can't use the rowBoundOverMinNorm because + // the rational solve Hadamard bound uses it for the numerator bound. + + Integer rowBound; + HadamardRowBound(rowBound, A); #ifdef DEBUG_HADAMARD_BOUND - std::clog << "rowLogBound:=" << rowLogBound << ';' << std::endl; - std::clog << "rowMinLogNorm:=" << rowMinLogNorm << ';' << std::endl; - std::clog << "rowLogBoundOverMinNorm:=" << rowLogBoundOverMinNorm << ';' << std::endl; + A.write(std::clog) << std::endl; + std::clog << "rowBound:=" << rowBound << ';' << std::endl; #endif - double colLogBound = 0.0; - double colMinLogNorm = 0.0; - HadamardColLogBound(colLogBound, colMinLogNorm, A); - double colLogBoundOverMinNorm = colLogBound - colMinLogNorm; + Integer rem; + Integer colBound; + Integer colMinNormSquared; + HadamardColBound(colBound, colMinNormSquared, A); + Integer colBoundOverMinNorm; + Integer::divmod(colBoundOverMinNorm, rem, colBound, Givaro::sqrt(colMinNormSquared)); + if (rem != 0) { + colBoundOverMinNorm += 1; + } #ifdef DEBUG_HADAMARD_BOUND - std::clog << "colLogBound:=" << colLogBound << ';' << std::endl; - std::clog << "colMinLogNorm:=" << colMinLogNorm << ';' << std::endl; - std::clog << "colLogBoundOverMinNorm:=" << colLogBoundOverMinNorm << ';' << std::endl; + std::clog << "colBound:=" << colBound << ';' << std::endl; + std::clog << "colMinNormSquared:=" << colMinNormSquared << ';' << std::endl; + std::clog << "colBoundOverMinNorm:=" << colBoundOverMinNorm << ';' << std::endl; #endif - HadamardLogBoundDetails data; - data.logBound = std::min(rowLogBound, colLogBound); - data.logBoundOverMinNorm = std::min(rowLogBoundOverMinNorm, colLogBoundOverMinNorm); + HadamarBoundDetails data; + data.bound = (rowBound < colBound) ? rowBound : colBound; + data.boundOverMinNorm = colBoundOverMinNorm; #ifdef DEBUG_HADAMARD_BOUND - std::clog << "logBound:=" << data.logBound << ';' << std::endl; - std::clog << "logBoundOverMinNorm:=" << data.logBoundOverMinNorm << ';' << std::endl; + std::clog << "bound:=" << data.bound << ';' << std::endl; + std::clog << "boundOverMinNorm:=" << data.boundOverMinNorm << ';' << std::endl; #endif return data; @@ -271,18 +284,27 @@ namespace LinBox { /** * Precise Hadamard bound (bound on determinant) by taking the minimum * of the column-wise and the row-wise euclidean norm. - * - * The result is expressed as bit size. */ template - double HadamardBound(const IMatrix& A) + Integer HadamardBound(const IMatrix& A) { - return DetailedHadamardBound(A).logBound; + return DetailedHadamardBound(A).bound; } - // ----- Fast Hadamard bound + template + double HadamardLogBound(const IMatrix& A) + { + return Givaro::logtwo(HadamardBound(A)); + } + // ----- Fast Hadamard bound + template + inline Integer& InfinityNorm(Integer& max, const IMatrix& A) + { + typename MatrixTraits::MatrixCategory tag; + return InfinityNorm(max, A, tag); + } /** * Returns the maximal absolute value. @@ -294,9 +316,9 @@ namespace LinBox { return InfinityNorm(max, ACopy, MatrixCategories::RowColMatrixTag()); } - template - inline Integer& InfinityNorm(Integer& max, const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) + inline Integer& InfinityNorm(Integer& max, const IMatrix& A, + const MatrixCategories::RowColMatrixTag& tag) { max = 0; for (auto it = A.Begin(); it != A.End(); ++it) { @@ -310,95 +332,96 @@ namespace LinBox { return max; } - /** - * Returns the bit size of the Hadamard bound. - * This is a larger estimation but faster to compute. - */ + /** + * Returns the bit size of the Hadamard bound. + * This is a larger estimation but faster to compute. + */ template - inline double FastHadamardBound(const IMatrix& A, const Integer& infnorm) + inline double FastHadamardLogBound(const IMatrix& A, const Integer& infinityNorm) { - if (infnorm == 0) { + if (infinityNorm == 0) { return 0.0; } uint64_t n = std::max(A.rowdim(), A.coldim()); - double logBound = static_cast(n) * (Givaro::logtwo(n) / 2.0 + Givaro::logtwo(infnorm)); - return logBound; + double bound = + static_cast(n) * (Givaro::logtwo(n) / 2.0 + Givaro::logtwo(infinityNorm)); + return bound; } template - inline double FastHadamardBound(const IMatrix& A, const MatrixCategories::RowColMatrixTag& tag) + inline double FastHadamardLogBound(const IMatrix& A, + const MatrixCategories::RowColMatrixTag& tag) { - Integer infnorm; - InfinityNorm(infnorm, A, tag); - return FastHadamardBound(A, infnorm); + Integer infinityNorm; + InfinityNorm(infinityNorm, A, tag); + return FastHadamardLogBound(A, infinityNorm); } template - inline double FastHadamardBound(const IMatrix& A, const MatrixCategories::BlackboxTag& tag) + inline double FastHadamardLogBound(const IMatrix& A, const MatrixCategories::BlackboxTag& tag) { DenseMatrix ACopy(A); - return FastHadamardBound(ACopy); + return FastHadamardLogBound(ACopy); } template - inline double FastHadamardBound(const IMatrix& A) + inline double FastHadamardLogBound(const IMatrix& A) { typename MatrixTraits::MatrixCategory tag; - return FastHadamardBound(A, tag); + return FastHadamardLogBound(A, tag); } - /** - * Bound on the coefficients of the characteristic polynomial - * @bib "Efficient Computation of the Characteristic Polynomial". Dumas Pernet Wan ISSAC'05. - * - */ + /** + * Bound on the coefficients of the characteristic polynomial + * @bib "Efficient Computation of the Characteristic Polynomial". Dumas Pernet Wan ISSAC'05. + * + */ template - inline double FastCharPolyDumasPernetWanBound(const IMatrix& A, const Integer& infnorm) + inline double FastCharPolyDumasPernetWanBound(const IMatrix& A, const Integer& infinityNorm) { - // .105815875 = 0.21163275 / 2 - return FastHadamardBound(A, infnorm) + A.coldim()*.105815875; + // .105815875 = 0.21163275 / 2 + return FastHadamardLogBound(A, infinityNorm) + A.coldim() * .105815875; } - /** - * A.J. Goldstein et R.L. Graham. - * A Hadamard-type bound on the coefficients of - * a determinant of polynomials. - * SIAM Review, volume 15, 1973, pages 657-658. - * - */ + /** + * A.J. Goldstein et R.L. Graham. + * A Hadamard-type bound on the coefficients of + * a determinant of polynomials. + * SIAM Review, volume 15, 1973, pages 657-658. + * + */ template - inline double FastCharPolyGoldsteinGrahamBound(const IMatrix& A, const Integer& infnorm) + inline double FastCharPolyGoldsteinGrahamBound(const IMatrix& A, const Integer& infinityNorm) { - Integer ggb(infnorm); + Integer ggb(infinityNorm); ggb *= static_cast(A.coldim()); ggb += 2; - ggb *= infnorm; + ggb *= infinityNorm; ++ggb; - return Givaro::logtwo(ggb)*A.coldim()/2.0; + return Givaro::logtwo(ggb) * A.coldim() / 2.0; } template inline double FastCharPolyHadamardBound(const IMatrix& A) { typename MatrixTraits::MatrixCategory tag; - Integer infnorm; - InfinityNorm(infnorm, A, tag); - const double DPWbound = FastCharPolyDumasPernetWanBound(A, infnorm); - const double GGbound = FastCharPolyGoldsteinGrahamBound(A, infnorm); + Integer infinityNorm; + InfinityNorm(infinityNorm, A, tag); + const double DPWbound = FastCharPolyDumasPernetWanBound(A, infinityNorm); + const double GGbound = FastCharPolyGoldsteinGrahamBound(A, infinityNorm); #ifdef DEBUG_HADAMARD_BOUND std::clog << "DPWbound: " << DPWbound << std::endl; std::clog << "GGbound : " << GGbound << std::endl; #endif - return std::min(DPWbound,GGbound); + return std::min(DPWbound, GGbound); } - // ----- Rational solve bound struct RationalSolveHadamardBoundData { - double numLogBound; // log2(N) - double denLogBound; // log2(D) + Integer numBound; // N + Integer denBound; // D double solutionLogBound; // log2(2 * N * D) }; @@ -410,30 +433,46 @@ namespace LinBox { * @note Matrix and Vector should be over Integer. */ template - typename std::enable_if::categoryTag, RingCategories::IntegerTag>::value, + typename std::enable_if::categoryTag, + RingCategories::IntegerTag>::value, RationalSolveHadamardBoundData>::type RationalSolveHadamardBound(const Matrix& A, const Vector& b) { RationalSolveHadamardBoundData data; auto hadamardBound = DetailedHadamardBound(A); - double bLogNorm; - vectorLogNorm(bLogNorm, b.begin(), b.end()); + Integer bNormSquared; + vectorNormSquared(bNormSquared, b.begin(), b.end()); + + // Square-root of bNormSquared (upper bound) + Integer rem; + Integer bNorm = Givaro::sqrtrem(bNormSquared, rem); + if (rem != 0) { + bNorm += 1; + } - data.numLogBound = hadamardBound.logBoundOverMinNorm + bLogNorm + 1.0; - data.denLogBound = hadamardBound.logBound; - data.solutionLogBound = data.numLogBound + data.denLogBound + 1.0; + // @note RR expects the bounds to be strict, this is why we add a + 1 + data.denBound = hadamardBound.bound + 1; + data.numBound = hadamardBound.boundOverMinNorm * bNorm + 1; + if (data.denBound == 0 || data.numBound == 0) { + data.solutionLogBound = 0.0; + } + else { + data.solutionLogBound = 1.0 + Givaro::logtwo(data.numBound) + + Givaro::logtwo(data.denBound); // log2(2 * N * D) + } #ifdef DEBUG_HADAMARD_BOUND - std::clog << "numLogBound:=" << data.numLogBound << ';' << std::endl; - std::clog << "denLogBound:=" << data.denLogBound << ';' << std::endl; + std::clog << "numBound:=" << data.numBound << ';' << std::endl; + std::clog << "denBound:=" << data.denBound << ';' << std::endl; #endif return data; } /// @fixme Needed to solve-cra.h, but can't be used yet. template - typename std::enable_if::categoryTag, RingCategories::RationalTag>::value, + typename std::enable_if::categoryTag, + RingCategories::RationalTag>::value, RationalSolveHadamardBoundData>::type RationalSolveHadamardBound(const Matrix& A, const Vector& b) { diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h index 881de1959..063a44dbf 100644 --- a/linbox/solutions/methods.h +++ b/linbox/solutions/methods.h @@ -178,6 +178,17 @@ namespace LinBox { Linear, }; + /** + * When running FFLAS's fgemm on an RNS structure, + * how the composed ParSeqHelper should be configured. + */ + enum class RnsFgemmType { + BothParallel, + BothSequential, + ParallelRnsOnly, + ParallelFgemmOnly, + }; + /** * Holds everything a method needs to know about the problem. * @@ -221,6 +232,15 @@ namespace LinBox { SingularSolutionType singularSolutionType = SingularSolutionType::Random; bool certifyMinimalDenominator = false; //!< Whether the solver should try to find a certificate //! that the provided denominator is minimal. + // @fixme Make a auto switch for multi modular lifting, based on matrix size. + // Whether to use the multi-modular Dixon lifter. + // (A BLAS Based C Library for Exact Linear Algebra on Integer Matrices - Chen, Storjohann ISSAC 2005) + // https://cs.uwaterloo.ca/~astorjoh/p92-chen.pdf + bool multiModularLifting = true; + //! How many primes to use, multi mod lifting will be done over p = p1p2...pl. + //! -1 means automatically set to a heuristic value. + uint32_t primesCount = -1u; + RnsFgemmType rnsFgemmType = RnsFgemmType::ParallelRnsOnly; // ----- For random-based systems. size_t trialsBeforeFailure = LINBOX_DEFAULT_TRIALS_BEFORE_FAILURE; //!< Maximum number of trials before giving up. diff --git a/linbox/solutions/solve/solve-dixon.h b/linbox/solutions/solve/solve-dixon.h index b55650a14..84ed7cbbd 100644 --- a/linbox/solutions/solve/solve-dixon.h +++ b/linbox/solutions/solve/solve-dixon.h @@ -89,14 +89,14 @@ namespace LinBox { commentator().start("solve.dixon.integer.dense"); linbox_check((A.coldim() == xNum.size()) && (A.rowdim() == b.size())); - // @fixme Using Givaro::ModularBalanced for the field makes Dixon fail... + // @note Using Givaro::ModularBalanced would make Dixon and MultiModLiftingContainer fail... using Matrix = DenseMatrix; using Field = Givaro::Modular; using PrimeGenerator = PrimeIterator; PrimeGenerator primeGenerator(FieldTraits::bestBitSize(A.coldim())); using Solver = DixonSolver::type>; - Solver dixonSolve(A.field(), primeGenerator); + Solver dixonSolve(A.field(), primeGenerator, m); // Either A is known to be non-singular, or we just don't know yet. int maxTrials = m.trialsBeforeFailure; diff --git a/tests/test-hadamard-bound.C b/tests/test-hadamard-bound.C index 8e7a602f8..6487c7b70 100644 --- a/tests/test-hadamard-bound.C +++ b/tests/test-hadamard-bound.C @@ -42,8 +42,8 @@ bool test(const Ring& F, const TMatrix& A, const TVector& b) // ---- Determinant // Compute the bounds - double hb = HadamardBound(A); - double fastHb = FastHadamardBound(A); + double hb = HadamardLogBound(A); + double fastHb = FastHadamardLogBound(A); // Compute the effective determinant Integer detA; @@ -73,17 +73,17 @@ bool test(const Ring& F, const TMatrix& A, const TVector& b) solve(num, den, A, b); for (size_t i = 0u; i < num.size(); ++i) { - if (Givaro::logtwo(Givaro::abs(num[i])) > rationalSolveHb.numLogBound + ESPILON) { + if (Givaro::abs(num[i]) > rationalSolveHb.numBound) { std::cerr << "The rational solve Hadamard bound does not bound the numerator." << std::endl; - std::cerr << "num[i]: " << Givaro::logtwo(Givaro::abs(num[i])) << " > " << rationalSolveHb.numLogBound + std::cerr << "num[i]: " << Givaro::abs(num[i]) << " > " << rationalSolveHb.numBound << std::endl; return false; } } - if (Givaro::logtwo(Givaro::abs(den)) > rationalSolveHb.denLogBound + ESPILON) { + if (Givaro::abs(den) > rationalSolveHb.denBound) { std::cerr << "The rational solve Hadamard bound does not bound the denominator." << std::endl; - std::cerr << "den: " << Givaro::logtwo(den) << " > " << rationalSolveHb.denLogBound << std::endl; + std::cerr << "den: " << den << " > " << rationalSolveHb.denBound << std::endl; return false; } diff --git a/tests/test-last-invariant-factor.C b/tests/test-last-invariant-factor.C index 297e50b6f..7722e23df 100644 --- a/tests/test-last-invariant-factor.C +++ b/tests/test-last-invariant-factor.C @@ -48,21 +48,21 @@ using namespace LinBox; -template -bool testRandom(const Ring& R, - LIF& lif, - LinBox::VectorStream& stream1) +template +bool testRandom(const Ring& R, RandIter& gen, + LIF& lif, + LinBox::VectorStream& stream1) { std::ostringstream str; str << "Testing last invariant factor:"; - commentator().start (str.str ().c_str (), "testRandom", stream1.m ()); + commentator().start (str.str ().c_str (), "testRandom", stream1.m ()); - bool ret = true; + bool ret = true; - VectorDomain VD (R); + VectorDomain VD (R); Vector d(R); @@ -72,21 +72,22 @@ bool testRandom(const Ring& R, int n = int(d. size()); - while (stream1) { + while (stream1) { - commentator().startIteration ((unsigned)stream1.j ()); + commentator().startIteration ((unsigned)stream1.j ()); - std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION); +// std::ostream &report = commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_DESCRIPTION); + std::ostream &report = std::clog; - bool iter_passed = true; + bool iter_passed = true; stream1.next (d); report << "Input vector: "; VD.write (report, d); - report << endl; + report << endl; - BlasMatrix D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n); + DenseMatrix D(R, n, n), L(R, n, n), U(R, n, n), A(R,n,n); int i, j; @@ -99,15 +100,14 @@ bool testRandom(const Ring& R, for (j = 0; j < i; ++ j) { - R.init(L[(size_t)i][(size_t)j], int64_t(rand() % 10)); - - R.init(U[(size_t)j][(size_t)i], int64_t(rand() % 10)); + gen.random(L[(size_t)i][(size_t)j]); + gen.random(U[(size_t)j][(size_t)i]); } - BlasVector tmp1(R,(size_t)n), tmp2(R,(size_t)n), e(R,(size_t)n); + DenseVector tmp1(R,(size_t)n), tmp2(R,(size_t)n), e(R,(size_t)n); - typename BlasMatrix::ColIterator col_p; + typename DenseMatrix::ColIterator col_p; i = 0; for (col_p = A.colBegin(); @@ -116,8 +116,6 @@ bool testRandom(const Ring& R, R.assign(e[(size_t)i],R.one); U.apply(tmp1, e); D.apply(tmp2, tmp1); - // LinBox::BlasSubvector > col_p_v(R,*col_p); - // L.apply(col_p_v, tmp2); L.apply(*col_p, tmp2); R.assign(e[(size_t)i],R.zero); } @@ -156,24 +154,24 @@ bool testRandom(const Ring& R, ret = iter_passed = false; - if (!iter_passed) + if (!iter_passed) - commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_ERROR) + commentator().report (Commentator::LEVEL_IMPORTANT, INTERNAL_ERROR) << "ERROR: Computed last invariant factor is incorrect" << endl; - commentator().stop ("done"); + commentator().stop ("done"); - commentator().progress (); + commentator().progress (); - } + } - //stream1.reset (); + //stream1.reset (); - commentator().stop (MSG_STATUS (ret), (const char *) 0, "testRandom"); + commentator().stop (MSG_STATUS (ret), (const char *) 0, "testRandom"); - return ret; + return ret; } @@ -181,44 +179,55 @@ int main(int argc, char** argv) { - bool pass = true; - - static size_t n = 10; + bool pass = true; + int seed = -1; + static size_t n = 10; + static size_t bits = 30; static unsigned int iterations = 1; - static Argument args[] = { - { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT, &n }, - { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT, &iterations }, + static Argument args[] = { + { 'n', "-n N", "Set order of test matrices to N.", TYPE_INT, &n }, + { 'b', "-b B", "Set bit size to B.", TYPE_INT, &bits }, + { 'i', "-i I", "Perform each test for I iterations.", TYPE_INT, &iterations }, + {'s', "-s", "Seed for randomness.", TYPE_INT, &seed}, END_OF_ARGUMENTS - }; + }; parseArguments (argc, argv, args); - typedef Givaro::ZRing Ring; + if (seed < 0) { + seed = time(nullptr); + } - Ring R; Ring::RandIter gen(R); + typedef Givaro::ZRing Ring; + + Ring R; Ring::RandIter gen(R, seed, bits); commentator().start("Last invariant factor test suite", "LIF"); - commentator().getMessageClass (INTERNAL_DESCRIPTION).setMaxDepth (5); + commentator().getMessageClass (INTERNAL_DESCRIPTION).setMaxDepth (5); - RandomDenseStream s1 (R, gen, n, iterations); + RandomDenseStream s1 (R, gen, n, iterations); - typedef DixonSolver, PrimeIterator > Solver; - // typedef DixonSolver, LinBox::PrimeIterator > Solver; + // typedef DixonSolver, PrimeIterator > Solver; + typedef DixonSolver, LinBox::PrimeIterator > Solver; typedef LastInvariantFactor LIF; LIF lif; - lif. setThreshold (30); + lif.setThreshold (30); + + if (!testRandom(R, gen, lif, s1)) pass = false; - if (!testRandom(R, lif, s1)) pass = false; + if (!pass) { + std::cerr << "Failed with seed: " << seed << std::endl; + } - commentator().stop("Last invariant factor test suite"); - return pass ? 0 : -1; + commentator().stop("Last invariant factor test suite"); + return pass ? 0 : -1; } // Local Variables: diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 2a5ee6f86..0f43b57ce 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -96,8 +96,12 @@ namespace { } template -bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, ResultVector& Rb) +bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, ResultVector& Rb, bool verbose) { + if (verbose) { + std::cout << "Checking result..." << std::endl; + } + ResultVector RAx(RA.field(), Rb.size()); RA.apply(RAx, x); @@ -107,6 +111,10 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul return false; } + if (verbose) { + std::cout << "Result OK !" << std::endl; + } + return true; } @@ -144,10 +152,12 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R bool ok = true; try { solve(x, A, b, method); - ok = ok && check_result(x, A, b, RA, Rb); + ok = check_result(x, A, b, RA, Rb, verbose); - solveInPlace(x, A, b, method); - ok = ok && check_result(x, A, b, RA, Rb); + // if (ok) { + // solveInPlace(x, A, b, method); + // ok = check_result(x, A, b, RA, Rb, verbose); + // } } catch (...) { print_error(x, A, b, "throws error"); return false; @@ -208,15 +218,18 @@ int main(int argc, char** argv) Integer q = 131071; bool verbose = false; bool loop = false; + int primesCount = -1; int seed = -1; int bitSize = 10; int vectorBitSize = -1; int m = 32; int n = 24; std::string dispatchString = "Auto"; + std::string rnsFgemmString = "ParallelRnsOnly"; static Argument args[] = { {'q', "-q", "Field characteristic.", TYPE_INTEGER, &q}, + {'p', "-p", "For multi-modular methods, how many primes to use.", TYPE_INT, &primesCount}, {'v', "-v", "Enable verbose mode.", TYPE_BOOL, &verbose}, {'l', "-l", "Infinite loop of tests.", TYPE_BOOL, &loop}, {'s', "-s", "Seed for randomness.", TYPE_INT, &seed}, @@ -225,6 +238,7 @@ int main(int argc, char** argv) {'m', "-m", "Row dimension of matrices.", TYPE_INT, &m}, {'n', "-n", "Column dimension of matrices.", TYPE_INT, &n}, {'d', "-d", "Dispatch mode (either Auto, Sequential, SMP or Distributed).", TYPE_STR, &dispatchString}, + {'r', "-r", "RNS-FGEMM type (either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly).", TYPE_STR, &rnsFgemmString}, END_OF_ARGUMENTS}; parseArguments(argc, argv, args); @@ -247,6 +261,27 @@ int main(int argc, char** argv) return EXIT_FAILURE; } + if (rnsFgemmString == "BothParallel") + method.rnsFgemmType = RnsFgemmType::BothParallel; + else if (rnsFgemmString == "BothSequential") + method.rnsFgemmType = RnsFgemmType::BothSequential; + else if (rnsFgemmString == "ParallelRnsOnly") + method.rnsFgemmType = RnsFgemmType::ParallelRnsOnly; + else if (rnsFgemmString == "ParallelFgemmOnly") + method.rnsFgemmType = RnsFgemmType::ParallelFgemmOnly; + else { + std::cerr << "-r RNS-FGEMM type should be either BothParallel, BothSequential, ParallelRnsOnly or ParallelFgemmOnly" << std::endl; + return EXIT_FAILURE; + } + + if (primesCount > 0) { + method.multiModularLifting = true; + method.primesCount = primesCount; + } + else { + method.multiModularLifting = false; + } + if (vectorBitSize < 0) { vectorBitSize = bitSize; } @@ -288,9 +323,9 @@ int main(int argc, char** argv) // ----- Rational Dixon ok = ok && test_dense_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - // @fixme Dixon does not compile - // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // ok = ok && test_sparse_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + // // @fixme Dixon does not compile + // // ok = ok && test_blackbox_solve(Method::Dixon(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ----- Rational SymbolicNumeric // @note SymbolicNumeric methods are only implemented on DenseMatrix @@ -301,30 +336,30 @@ int main(int argc, char** argv) // ok = ok && test_sparse_solve(Method::SymbolicNumericNorm(method), ZZ, QQ, m, n, bitSize, vectorBitSize, // seed, verbose); - // ----- Modular Auto - ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); - - // ----- Modular Blackbox - ok = ok && test_dense_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); - - // ----- Modular DenseElimination - ok = ok && test_dense_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); - - // ----- Modular SparseElimination - ok = ok && test_dense_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); - - // ----- Modular Wiedemann - ok = ok && test_dense_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_sparse_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); - ok = ok && test_blackbox_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); + // // ----- Modular Auto + // ok = ok && test_dense_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::Auto(method), F, F, m, n, 0, 0, seed, verbose); + + // // ----- Modular Blackbox + // ok = ok && test_dense_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::Blackbox(method), F, F, m, n, 0, 0, seed, verbose); + + // // ----- Modular DenseElimination + // ok = ok && test_dense_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::DenseElimination(method), F, F, m, n, 0, 0, seed, verbose); + + // // ----- Modular SparseElimination + // ok = ok && test_dense_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::SparseElimination(method), F, F, m, n, 0, 0, seed, verbose); + + // // ----- Modular Wiedemann + // ok = ok && test_dense_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_sparse_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); + // ok = ok && test_blackbox_solve(Method::Wiedemann(method), F, F, m, n, 0, 0, seed, verbose); // ----- Modular Lanczos // @fixme Dense is segfaulting