From 122af526925f21f9de5624aaa2902aa1d6d180fc Mon Sep 17 00:00:00 2001
From: Chip Hogg <chogg@aurora.tech>
Date: Tue, 19 Nov 2024 18:51:34 -0500
Subject: [PATCH] Use Pollard's rho instead of more trial division (#326)

In our current implementation, we can't handle any number whose factor
is bigger than, roughly, "the thousandth odd number after the hundredth
prime" --- "thousandth" because that's roughly where compiler iteration
limits kick in, and "hundredth prime" because we try the first hundred
primes in our initial trial division step.  This works out to 2,541
(although again, this is only the _approximate_ limit).

Pollard's rho algorithm requires a number of iterations _on average_
that is proportional to the square root of `p`, the smallest prime
factor.  Thus, we expect it to have good success rates for numbers whose
smallest factor is up to roughly one million, which is a lot higher than
2,541.

In practice, I've found some numbers that we can't factor with our
current implementation, but can if we use Pollard's rho.  I've included
one of them in a test case.  However, there are other factors (see #217)
that even the current version of Pollard's rho can't factor.  If we
can't find an approach that works for these, we may just have to live
with it.

Helps #217.
---
 au/code/au/utility/factoring.hh           | 58 +++++++++++++++++++----
 au/code/au/utility/test/factoring_test.cc | 15 ++++++
 2 files changed, 63 insertions(+), 10 deletions(-)
diff --git a/au/code/au/utility/factoring.hh b/au/code/au/utility/factoring.hh
index 9fd39c2d..e8bf7cfd 100644
--- a/au/code/au/utility/factoring.hh
+++ b/au/code/au/utility/factoring.hh
@@ -29,6 +29,50 @@ constexpr bool is_prime(std::uintmax_t n) {
     return baillie_psw(n) == PrimeResult::PROBABLY_PRIME;
 }
 
+// Compute the next step for Pollard's rho algorithm factoring `n`, with parameter `t`.
+constexpr std::uintmax_t x_squared_plus_t_mod_n(std::uintmax_t x,
+                                                std::uintmax_t t,
+                                                std::uintmax_t n) {
+    return add_mod(mul_mod(x, x, n), t, n);
+}
+
+constexpr std::uintmax_t absolute_diff(std::uintmax_t a, std::uintmax_t b) {
+    return a > b ? a - b : b - a;
+}
+
+// Pollard's rho algorithm, using Brent's cycle detection method.
+//
+// Precondition: `n` is known to be composite.
+constexpr std::uintmax_t find_pollard_rho_factor(std::uintmax_t n) {
+    // The outer loop tries separate _parameterizations_ of Pollard's rho.  We try a finite number
+    // of them just to guarantee that we terminate.  But in practice, the vast overwhelming majority
+    // will succeed on the first iteration, and we don't expect that any will _ever_ come anywhere
+    // _near_ to hitting this limit.
+    for (std::uintmax_t t = 1u; t < n / 2u; ++t) {
+        std::size_t max_cycle_length = 1u;
+        std::size_t cycle_length = 1u;
+        std::uintmax_t tortoise = 2u;
+        std::uintmax_t hare = x_squared_plus_t_mod_n(tortoise, t, n);
+
+        std::uintmax_t factor = gcd(n, absolute_diff(tortoise, hare));
+        while (factor == 1u) {
+            if (max_cycle_length == cycle_length) {
+                tortoise = hare;
+                max_cycle_length *= 2u;
+                cycle_length = 0u;
+            }
+            hare = x_squared_plus_t_mod_n(hare, t, n);
+            ++cycle_length;
+            factor = gcd(n, absolute_diff(tortoise, hare));
+        }
+        if (factor < n) {
+            return factor;
+        }
+    }
+    // Failure case: we think this should be unreachable (in practice) with any composite `n`.
+    return n;
+}
+
 template <typename T = void>
 struct FirstPrimesImpl {
     static constexpr uint16_t values[] = {
@@ -51,7 +95,6 @@ using FirstPrimes = FirstPrimesImpl<>;
 // Undefined unless (n > 1).
 constexpr std::uintmax_t find_prime_factor(std::uintmax_t n) {
     const auto &first_primes = FirstPrimes::values;
-    const auto &n_primes = FirstPrimes::N;
 
     // First, do trial division against the first N primes.
     for (const auto &p : first_primes) {
@@ -69,16 +112,11 @@ constexpr std::uintmax_t find_prime_factor(std::uintmax_t n) {
         return n;
     }
 
-    // If we're here, we know `n` is composite, so continue with trial division for all odd numbers.
-    std::uintmax_t factor = first_primes[n_primes - 1u] + 2u;
-    while (factor * factor <= n) {
-        if (n % factor == 0u) {
-            return factor;
-        }
-        factor += 2u;
+    auto factor = find_pollard_rho_factor(n);
+    while (!is_prime(factor)) {
+        factor = find_pollard_rho_factor(factor);
     }
-
-    return n;
+    return factor;
 }
 
 // Find the largest power of `factor` which divides `n`.
diff --git a/au/code/au/utility/test/factoring_test.cc b/au/code/au/utility/test/factoring_test.cc
index 2a9bd883..54e76930 100644
--- a/au/code/au/utility/test/factoring_test.cc
+++ b/au/code/au/utility/test/factoring_test.cc
@@ -77,6 +77,21 @@ TEST(FindFactor, CanFactorNumbersWithLargePrimeFactor) {
                 AnyOf(Eq(1999u), Eq(9'007'199'254'740'881u)));
 }
 
+TEST(FindFactor, CanFactorChallengingCompositeNumbers) {
+    // For ideas, see numbers in the "best solution" column in the various tables in
+    // <https://miller-rabin.appspot.com/>.
+    {
+        // Also passes for trial division.
+        constexpr auto factor = find_prime_factor(7'999'252'175'582'851u);
+        EXPECT_THAT(factor, AnyOf(Eq(9'227u), Eq(894'923u), Eq(968'731u)));
+    }
+    {
+        // Fails for trial division: requires Pollard's rho.
+        constexpr auto factor = find_prime_factor(55'245'642'489'451u);
+        EXPECT_THAT(factor, AnyOf(Eq(3'716'371u), Eq(14'865'481u)));
+    }
+}
+
 TEST(IsPrime, FalseForLessThan2) {
     EXPECT_FALSE(is_prime(0u));
     EXPECT_FALSE(is_prime(1u));