Rewrite explanatory comments in Reducer

quaternic · quaternic · commit 21987d8c280e · 2025-09-16T23:17:33.000+03:00
diff --git a/libm/src/math/support/modular.rs b/libm/src/math/support/modular.rs
@@ -97,78 +97,128 @@ where
         let m = n << 1;
         assert!(x < m);
 
-        // We need q and r s.t. RR/2 = qm + r, and `0 <= r < m`
-        // As R/4 < m < R/2,
-        // we have R <= q < 2R
-        // so let q = R + f
-        // RR/2 = (R + f)m + r
-        // R(R/2 - m) = fm + r
-
-        // v = R/2 - m < R/4 < m
-        let v = (_1 << (U::BITS - 1)) - m;
-        let (f, r) = v.widen_hi().checked_narrowing_div_rem(m).unwrap();
-
-        // xq < qm <= RR/2
-        // 2xq < RR
-        // 2xq = 2xR + 2xf;
-        let _2x: U = x << 1;
+        // We need to compute the parameters
+        // `q = (RR/2) / m`
+        // `r = (RR/2) % m`
+
+        // Since `m` is in `(R/4, R/2)`, the quotient `q` is in `[R, 2R)`, and
+        // it would overflow in `U` if computed directly. Instead, we compute
+        // `f = q - R`, which is in `[0, R)`. To do so, we simply subtract `Rm`
+        // from the dividend, which doesn't change the remainder:
+        // `f = R(R/2 - m) / m`
+        // `r = R(R/2 - m) % m`
+        let dividend = ((_1 << (U::BITS - 1)) - m).widen_hi();
+        let (f, r) = dividend.checked_narrowing_div_rem(m).unwrap();
+
+        // As `x < m`, `xq < qm <= RR/2`
+        // Thus `2xq = 2xR + 2xf` does not overflow in `U::D`.
+        let _2x = x + x;
         let _2xq = _2x.widen_hi() + _2x.widen_mul(f);
         Self { m, r, _2xq }
     }
 
-    /// Extract the current remainder in the range `[0, 2n)`
+    /// Extract the current remainder `x` in the range `[0, 2n)`
     fn partial_remainder(&self) -> U {
-        // RR/2 = qm + r, 0 <= r < m
-        // 2xq = uR + v, 0 <= v < R
-        // muR = 2mxq - mv
-        // = xRR - 2xr - mv
-        // mu + (2xr + mv)/R == xR
-
-        // 0 <= 2xq < RR
-        // R <= q < 2R
-        // 0 <= x < R/2
-        // R/4 < m < R/2
-        // 0 <= r < m
-        // 0 <= mv < mR
-        // 0 <= 2xr < rR < mR
-
-        // 0 <= (2xr + mv)/R < 2m
-        // Add `mu` to each term to obtain:
-        // mu <= xR < mu + 2m
-
-        // Since `0 <= 2m < R`, `xR` is the only multiple of `R` between
-        // `mu` and `m(u+2)`, so the high half of `m(u+2)` must equal `x`.
-        let _1 = U::ONE;
-        self.m.widen_mul(self._2xq.hi() + (_1 + _1)).hi()
+        // `RR/2 = qm + r`, where `0 <= r < m`
+        // `2xq = uR + v`,  where `0 <= v < R`
+
+        // The goal is to extract the current value of `x` from the value `2xq`
+        // that we actually have. A bit simplified, we could multiply it by `m`
+        // to obtain `2xqm == 2x(RR/2 - r) == xRR - 2xr`, where `2xr < RR`.
+        // We could just round that up to the next multiple of `RR` to get `x`,
+        // but we can avoid having to multiply the full double-wide `2xq` by
+        // making a couple of adjustments:
+
+        // First, let's only use the high half `u` for the product, and
+        // include an additional error term due to the truncation:
+        //  `mu = xR - (2xr + mv)/R`
+
+        // Next, show bounds for the error term
+        //  `0 <= mv < mR` follows from `0 <= v < R`
+        //  `0 <= 2xr < mR` follows from `0 <= x < m < R/2` and `0 <= r < m`
+        // Adding those together, we have:
+        //  `0 <= (mv + 2xr)/R < 2m`
+        // Which also implies:
+        //  `0 < 2m - (mv + 2xr)/R <= 2m < R`
+
+        // For that reason, we can use `u + 2` as the factor to obtain
+        //  `m(u + 2) = xR + (2m - (mv + 2xr)/R)`
+        // By the previous inequality, the second term fits neatly in the lower
+        // half, so we get exactly `x` as the high half.
+        let u = self._2xq.hi();
+        let _2 = U::ONE + U::ONE;
+        self.m.widen_mul(u + _2).hi()
+
+        // Additionally, we should ensure that `u + 2` cannot overflow:
+        // Since `x < m` and `2qm <= RR`,
+        //  `2xq <= 2q(m-1) <= RR - 2q`
+        // As we also have `q > R`,
+        //  `2xq < RR - 2R`
+        // which is sufficient.
     }
 
     /// Replace the remainder `x` with `(x << k) - un`,
     /// for a suitable quotient `u`, which is returned.
+    ///
+    /// Requires that `k < U::BITS`.
     fn shift_reduce(&mut self, k: u32) -> U {
         assert!(k < U::BITS);
-        // 2xq << k = aRR/2 + b;
+
+        // First, split the shifted value:
+        // `2xq << k = aRR/2 + b`, where `0 <= b < RR/2`
         let a = self._2xq.hi() >> (U::BITS - 1 - k);
         let (low, high) = (self._2xq << k).lo_hi();
         let b = U::D::from_lo_hi(low, high & (U::MAX >> 1));
 
+        // Then, subtract `2anq = aqm`:
+        // ```
         // (2xq << k) - aqm
         // = aRR/2 + b - aqm
         // = a(RR/2 - qm) + b
         // = ar + b
+        // ```
         self._2xq = a.widen_mul(self.r) + b;
         a
+
+        // Since `a` is at most the high half of `2xq`, we have
+        //  `a + 2 < R` (shown above, in `partial_remainder`)
+        // Using that together with `b < RR/2` and `r < m < R/2`,
+        // we get `(a + 2)r + b < RR`, so
+        //  `ar + b < RR - 2r = 2mq`
+        // which shows that the new remainder still satisfies `x < m`.
     }
 
+    // NB: `word_reduce()` is just the special case `shift_reduce(U::BITS - 1)`
+    // that optimizes especially well. The correspondence is that `a == u` and
+    //  `b == (v >> 1).widen_hi()`
+    //
     /// Replace the remainder `x` with `x(R/2) - un`,
     /// for a suitable quotient `u`, which is returned.
     fn word_reduce(&mut self) -> U {
-        // 2xq = uR + v
-        let (v, u) = self._2xq.lo_hi();
-        // xqR - uqm
+        // To do so, we replace `2xq = uR + v` with
+        // ```
+        // 2 * (x(R/2) - un) * q
+        // = xqR - 2unq
+        // = xqR - uqm
         // = uRR/2 + vR/2 - uRR/2 + ur
         // = ur + (v/2)R
+        // ```
+        let (v, u) = self._2xq.lo_hi();
         self._2xq = u.widen_mul(self.r) + U::widen_hi(v >> 1);
         u
+
+        // Additional notes:
+        //  1. As `v` is the low bits of `2xq`, it is even and can be halved.
+        //  2. The new remainder is `(xr + mv/2) / R` (see below)
+        //      and since `v < R`, `r < m`, `x < m < R/2`,
+        //      that is also strictly less than `m`.
+        // ```
+        // (x(R/2) - un)R
+        //      = xRR/2 - (m/2)uR
+        //      = x(qm + r) - (m/2)(2xq - v)
+        //      = xqm + xr - xqm + mv/2
+        //      = xr + mv/2
+        // ```
     }
 }