Merge pull request #90 from idontgetoutmuch/master

Binomial Sampler
haskell · Jun 15, 2024 · 08c6be9 · 08c6be9
2 parents 33c89e1 + 4b2bc36
commit 08c6be9
Show file tree

Hide file tree

Showing 7 changed files with 560 additions and 4 deletions.
diff --git a/System/Random/MWC/Distributions.hs b/System/Random/MWC/Distributions.hs
@@ -27,6 +27,7 @@ module System.Random.MWC.Distributions
     , geometric0
     , geometric1
     , bernoulli
+    , binomial
       -- ** Multivariate
     , dirichlet
       -- * Permutations
@@ -51,6 +52,7 @@ import System.Random.Stateful (StatefulGen(..),Uniform(..),UniformRange(..),unif
 import qualified Data.Vector.Unboxed         as I
 import qualified Data.Vector.Generic         as G
 import qualified Data.Vector.Generic.Mutable as M
+import Numeric.SpecFunctions (logFactorial)
 
 -- Unboxed 2-tuple
 data T = T {-# UNPACK #-} !Double {-# UNPACK #-} !Double
@@ -355,3 +357,131 @@ pkgError func msg = error $ "System.Random.MWC.Distributions." ++ func ++
 --   (2007). Gaussian random number generators.
 --   /ACM Computing Surveys/ 39(4).
 --   <http://www.cse.cuhk.edu.hk/~phwl/mt/public/archives/papers/grng_acmcs07.pdf>
+--
+-- * Kachitvichyanukul, V. and Schmeiser, B. W.  Binomial Random
+--   Variate Generation.  Communications of the ACM, 31, 2 (February,
+--   1988) 216. <https://dl.acm.org/doi/pdf/10.1145/42372.42381>
+--   Here's an example of how the algorithm's sampling regions look
+--   ![Something](docs/RecreateFigure.svg)
+
+-- | Random variate generator for Binomial distribution
+--
+-- The probability of getting exactly k successes in n trials is
+-- given by the probability mass function:
+--
+-- \[
+-- f(k;n,p) = \Pr(X = k) = \binom n k  p^k(1-p)^{n-k}
+-- \]
+binomial :: forall g m . StatefulGen g m
+         => Int               -- ^ Number of trials
+         -> Double            -- ^ Probability of success (returning True)
+         -> g                 -- ^ Generator
+         -> m Int
+{-# INLINE binomial #-}
+binomial nTrials prob gen
+  | nTrials <= 0             = pkgError "binomial" "number of trials must be positive"
+  | prob < 0.0 || prob > 1.0 = pkgError "binomial" "probability must be >= 0 and <= 1"
+  | prob == 0.0 = return 0
+  | prob == 1.0 = return nTrials
+  | otherwise = do let (p', flipped) = if prob > 0.5 then (1.0 - prob, True) else (prob, False)
+                   ix <- if fromIntegral nTrials * p' < bInvThreshold
+                         then binomialInv nTrials p' gen
+                         else binomialTPE nTrials p' gen
+                   if flipped
+                     then return $ nTrials - ix
+                     else return ix
+
+  where
+    binomialTPE n p g =
+      let q    = 1 - p
+          np   = fromIntegral n * p
+          ffm  = np + p
+          bigM = floor ffm
+          -- Half integer mean (tip of triangle)
+          xm   = fromIntegral bigM + 0.5
+          npq  = np * q
+
+          -- p1: the distance to the left and right edges of the triangle
+          -- region below the target distribution; since height=1, also:
+          -- area of region (half base * height)
+          p1 = fromIntegral (floor (2.195 * sqrt npq - 4.6 * q) :: Int) + 0.5
+          -- Left edge of triangle
+          xl = xm - p1
+          -- Right edge of triangle
+          xr = xm + p1
+          c  = 0.134 + 20.5 / (15.3 + fromIntegral bigM)
+          -- p1 + area of parallelogram region
+          p2 = p1 * (1.0 + c + c)
+          al = (ffm - xl) / (ffm - xl * p)
+          lambdaL = al * (1.0 + 0.5 * al)
+          ar = (xr - ffm) / (xr * q)
+          lambdaR = ar * (1.0 + 0.5 * ar)
+
+          -- p2 + area of left tail
+          p3 = p2 + c / lambdaL
+          -- p3 + area of right tail
+          p4 = p3 + c / lambdaR
+
+          -- Acceptance / rejection comparison
+          step5 :: Int -> Double -> m Int
+          step5 ix v
+            | var <= accept = return $ if p > 0 then ix else n - ix
+            | otherwise     = hh
+            where
+              var = log v
+              accept = logFactorial bigM + logFactorial (n - bigM) -
+                       logFactorial ix - logFactorial (n - ix) +
+                       fromIntegral (ix - bigM) * log (p / q)
+
+          h :: Double -> Double -> m Int
+          h u v | -- Triangular region
+                  u <= p1 = return $ floor $ xm - p1 * v + u
+
+                  -- Parallelogram region
+                | u <= p2 = do let x = xl + (u - p1) / c
+                                   w = v * c + 1.0 - abs (x - xm) / p1
+                               if w > 1 || w <= 0
+                                then hh
+                                else do let ix = floor x
+                                        step5 ix w
+
+                  -- Left tail
+                | u <= p3 = do let ix = floor $ xl + log v / lambdaL
+                               if ix < 0
+                                 then hh
+                                 else do let w = v * (u - p2) * lambdaL
+                                         step5 ix w
+
+                  -- Right tail
+                | otherwise = do let ix = floor $ xr - log v / lambdaR
+                                 if ix > n
+                                   then hh
+                                   else do let w = v * (u - p3) * lambdaR
+                                           step5 ix w
+
+          hh = do
+            u <- uniformRM (0.0, p4) g
+            v <- uniformDoublePositive01M g
+            h u v
+
+      in hh
+
+    binomialInv :: StatefulGen g m => Int -> Double -> g -> m Int
+    binomialInv n p g = do
+      let q = 1 - p
+          s = p / q
+          a = fromIntegral (n + 1) * s
+          r = q^n
+          f (rPrev, uPrev, xPrev) = (rNew, uNew, xNew)
+            where
+              uNew = uPrev - rPrev
+              xNew = xPrev + 1
+              rNew = rPrev * ((a / fromIntegral xNew) - s)
+      u <- uniformDoublePositive01M g
+      let (_, _, x) = until (\(t, v, _) -> v <= t) f (r, u, 0) in return x
+
+    -- Threshold for preferring the BINV algorithm / inverse cdf
+    -- logic. The paper suggests 10, Ranlib uses 30, R uses 30, Rust uses
+    -- 10 and GSL uses 14.
+    bInvThreshold :: Double
+    bInvThreshold = 10
diff --git a/bench/Benchmark.hs b/bench/Benchmark.hs
@@ -7,6 +7,7 @@ import Data.Word
 import Data.Proxy
 import qualified Data.Vector.Unboxed as U
 import qualified System.Random as R
+import System.Random.Stateful (StatefulGen)
 import System.Random.MWC
 import System.Random.MWC.Distributions
 import System.Random.MWC.CondensedTable
@@ -91,6 +92,11 @@ main = do
         , bench "gamma,a<1"   $ whnfIO $ loop iter (gamma 0.5 1   mwc :: IO Double)
         , bench "gamma,a>1"   $ whnfIO $ loop iter (gamma 2   1   mwc :: IO Double)
         , bench "chiSquare"   $ whnfIO $ loop iter (chiSquare 4   mwc :: IO Double)
+        , bench "binomial"    $ whnfIO $ loop iter (binomial 1400 0.4 mwc :: IO Int)
+        , bench "beta binomial 10" $ whnfIO $ loop iter (betaBinomial 600 400 10 mwc :: IO Int)
+        , bench "beta binomial 100" $ whnfIO $ loop iter (betaBinomial 600 400 100 mwc :: IO Int)
+        , bench "beta binomial table 10" $ whnfIO $ loop iter (betaBinomialTable 600 400 10 mwc :: IO Int)
+        , bench "beta binomial table 100" $ whnfIO $ loop iter (betaBinomialTable 600 400 100 mwc :: IO Int)
         ]
         -- Test sampling performance. Table creation must be floated out!
       , bgroup "CT/gen" $ concat
@@ -132,3 +138,13 @@ main = do
       , bench "Int"    $ whnfIO $ loop iter (M.random mtg :: IO Int)
       ]
     ]
+
+betaBinomial :: StatefulGen g m => Double -> Double -> Int -> g -> m Int
+betaBinomial a b n g = do
+  p <- beta a b g
+  binomial n p g
+
+betaBinomialTable :: StatefulGen g m => Double -> Double -> Int -> g -> m Int
+betaBinomialTable a b n g = do
+  p <- beta a b g
+  genFromTable (tableBinomial n p) g
diff --git a/changelog.md b/changelog.md
@@ -1,3 +1,12 @@
+## Changes in 0.15.1.0
+
+  * Additon of binomial sampler using the rejection sampling method in
+    Kachitvichyanukul, V. and Schmeiser, B. W.  Binomial Random
+    Variate Generation.  Communications of the ACM, 31, 2 (February,
+    1988) 216. <https://dl.acm.org/doi/pdf/10.1145/42372.42381>. A more
+    efficient basis for e.g. the beta binomial distribution:
+	`beta a b g >>= \p -> binomial n p g`.
+
 ## Changes in 0.15.0.2
 
   * Doctests on 32-bit platforms are fixed. (#79)