fimad · fimad · Aug 27, 2021 · Aug 23, 2021 · Aug 24, 2021 · Aug 24, 2021
diff --git a/prometheus-client/prometheus-client.cabal b/prometheus-client/prometheus-client.cabal
@@ -1,5 +1,5 @@
 name:                prometheus-client
-version:             1.0.1
+version:             1.1.0
 synopsis:            Haskell client library for http://prometheus.io.
 description:         Haskell client library for http://prometheus.io.
 homepage:            https://github.com/fimad/prometheus-haskell
@@ -41,13 +41,15 @@ library
     , clock
     , containers
     , deepseq
+    , primitive
     , mtl                >=2
     , stm                >=2.3
     , transformers
     , transformers-compat
     , utf8-string
     , exceptions
     , text
+    , data-sketches
   ghc-options: -Wall
 
 test-suite doctest
@@ -83,6 +85,8 @@ test-suite spec
     , deepseq
     , exceptions
     , text
+    , primitive
+    , data-sketches
   ghc-options: -Wall
 
 benchmark bench

diff --git a/prometheus-client/src/Prometheus/Metric/Summary.hs b/prometheus-client/src/Prometheus/Metric/Summary.hs
@@ -1,6 +1,5 @@
 {-# language BangPatterns #-}
 {-# language OverloadedStrings #-}
-
 module Prometheus.Metric.Summary (
     Summary
 ,   Quantile
@@ -9,187 +8,105 @@ module Prometheus.Metric.Summary (
 ,   observe
 ,   observeDuration
 ,   getSummary
-
-,   dumpEstimator
-,   emptyEstimator
-,   Estimator (..)
-,   Item (..)
-,   insert
-,   compress
-,   query
-,   invariant
 ) where
 
 import Prometheus.Info
 import Prometheus.Metric
 import Prometheus.Metric.Observer
 import Prometheus.MonadMonitor
 
-import qualified Control.Concurrent.STM as STM
+import Control.Concurrent.MVar
 import Control.DeepSeq
+import Control.Monad
 import Control.Monad.IO.Class
+import Control.Monad.Primitive
 import qualified Data.ByteString.UTF8 as BS
-import Data.Foldable (foldr')
-import Data.Int (Int64)
-import Data.Monoid ((<>))
 import qualified Data.Text as T
+import DataSketches.Quantiles.RelativeErrorQuantile
+import qualified DataSketches.Quantiles.RelativeErrorQuantile as ReqSketch
+import Data.Maybe (mapMaybe)
+import Prelude hiding (maximum)
+import qualified Prelude
+import Data.Word
+
+data Summary = MkSummary
+  { reqSketch :: MVar (ReqSketch (PrimState IO))
+  , quantiles :: [Quantile]
+  }
 
+instance NFData Summary where
+  rnf (MkSummary a b) = a `seq` b `deepseq` ()
 
-newtype Summary = MkSummary (STM.TVar Estimator)
 
-instance NFData Summary where
-  rnf (MkSummary a) = a `seq` ()
+type Quantile = (Rational, Rational)
+
+-- | K is a parameter divisible by two, in the range 4-1024 used in the RelativeErrorQuantile algorithm to 
+-- determine how many items must be retained per compaction section. As the value increases, the accuracy
+-- of the sketch increases as well. This function iterates on the k value starting from 6 
+-- (conservative on space, but reasonably accurate) until it finds a K value that satisfies the specified 
+-- error bounds for the given quantile. Note: this algorithm maintains highest accuracy for the upper tail 
+-- of the quantile when passed the 'HighRanksAreAccurate', sampling out more items at lower ranks during 
+-- the compaction process. Thus, extremely tight error bounds on low quantile values may cause this 
+-- function to return 'Nothing'.
+--
+-- If another smart constructor was exposed for summary creation, specific k values & LowRanksAreAccurate
+-- could be used to refine accuracy settings to bias towards lower quantiles when retaining accurate samples.
+determineK :: Quantile -> Maybe Word32
+determineK (rank_, acceptableError) = go 6
+    where
+        go k =
+            let rse = relativeStandardError (fromIntegral k) (fromRational rank_) HighRanksAreAccurate 50000
+            in if abs (rse - fromRational rank_) <= fromRational acceptableError
+                then Just k
+                else if k < 1024
+                    then go (k + 2)
+                    else Nothing
+
 
 -- | Creates a new summary metric with a given name, help string, and a list of
 -- quantiles. A reasonable set set of quantiles is provided by
 -- 'defaultQuantiles'.
 summary :: Info -> [Quantile] -> Metric Summary
-summary info quantiles = Metric $ do
-    valueTVar <- STM.newTVarIO (emptyEstimator quantiles)
-    return (MkSummary valueTVar, collectSummary info valueTVar)
-
-withSummary :: MonadMonitor m
-            => Summary -> (Estimator -> Estimator) -> m ()
-withSummary (MkSummary !valueTVar) f =
-    doIO $ STM.atomically $ do
-        STM.modifyTVar' valueTVar compress
-        STM.modifyTVar' valueTVar f
+summary info quantiles_ = Metric $ do
+    rs <- mkReqSketch kInt HighRanksAreAccurate
+    mv <- newMVar $ rs {criterion = (:<=)}
+    let summary_ = MkSummary mv quantiles_
+    return (summary_, collectSummary info summary_)
+    where
+        kInt = fromIntegral $ case mapMaybe determineK quantiles_ of
+          [] -> error "Unable to create a Summary meeting the provided quantile precision requirements"
+          xs -> Prelude.maximum xs
 
 instance Observer Summary where
     -- | Adds a new observation to a summary metric.
-    observe s v = withSummary s (insert v)
+    observe s v = doIO $ withMVar (reqSketch s) (`ReqSketch.insert` v)
 
 -- | Retrieves a list of tuples containing a quantile and its associated value.
 getSummary :: MonadIO m => Summary -> m [(Rational, Double)]
-getSummary (MkSummary valueTVar) = liftIO $ do
-    estimator <- STM.atomically $ do
-        STM.modifyTVar' valueTVar compress
-        STM.readTVar valueTVar
-    let quantiles = map fst $ estQuantiles estimator
-    let values = map (query estimator) quantiles
-    return $ zip quantiles values
-
-collectSummary :: Info -> STM.TVar Estimator -> IO [SampleGroup]
-collectSummary info valueTVar = STM.atomically $ do
-    STM.modifyTVar' valueTVar compress
-    estimator@(Estimator count itemSum _ _) <- STM.readTVar valueTVar
-    let quantiles = map fst $ estQuantiles estimator
-    let samples =  map (toSample estimator) quantiles
+getSummary (MkSummary sketchVar quantiles_) = liftIO $ withMVar sketchVar $ \sketch -> do
+  forM quantiles_ $ \qv ->
+    (,) <$> pure (fst qv) <*> ReqSketch.quantile sketch (fromRational $ fst qv)
+
+collectSummary :: Info -> Summary -> IO [SampleGroup]
+collectSummary info (MkSummary sketchVar quantiles_) = withMVar sketchVar $ \sketch -> do
+    itemSum <- ReqSketch.sum sketch
+    count_ <- ReqSketch.count sketch
+    estimatedQuantileValues <- forM quantiles_ $ \qv ->
+      (,) <$> pure (fst qv) <*> ReqSketch.quantile sketch (toDouble $ fst qv)
     let sumSample = Sample (metricName info <> "_sum") [] (bsShow itemSum)
-    let countSample = Sample (metricName info <> "_count") [] (bsShow count)
-    return [SampleGroup info SummaryType $ samples ++ [sumSample, countSample]]
+    let countSample = Sample (metricName info <> "_count") [] (bsShow count_)
+    return [SampleGroup info SummaryType $ map toSample estimatedQuantileValues ++ [sumSample, countSample]]
     where
         bsShow :: Show s => s -> BS.ByteString
         bsShow = BS.fromString . show
 
-        toSample estimator q =
+        toSample :: (Rational, Double) -> Sample
+        toSample (q, estimatedValue) =
             Sample (metricName info) [("quantile", T.pack . show $ toDouble q)] $
-                bsShow $ query estimator q
+                bsShow estimatedValue
 
         toDouble :: Rational -> Double
         toDouble = fromRational
 
-dumpEstimator :: Summary -> IO Estimator
-dumpEstimator (MkSummary valueTVar) =
-    STM.atomically $ STM.readTVar valueTVar
-
--- | A quantile is a pair of a quantile value and an associated acceptable error
--- value.
-type Quantile = (Rational, Rational)
-
-data Item = Item {
-    itemValue :: Double
-,   itemG     :: !Int64
-,   itemD     :: !Int64
-} deriving (Eq, Show)
-
-instance Ord Item where
-    compare a b = itemValue a `compare` itemValue b
-
-data Estimator = Estimator {
-    estCount      :: !Int64
-,   estSum        :: !Double
-,   estQuantiles  :: [Quantile]
-,   estItems      :: [Item]
-} deriving (Show)
-
 defaultQuantiles :: [Quantile]
-defaultQuantiles = [(0.5, 0.05), (0.9, 0.01), (0.99, 0.001)]
-
-emptyEstimator :: [Quantile] -> Estimator
-emptyEstimator quantiles = Estimator 0 0 quantiles []
-
-insert :: Double -> Estimator -> Estimator
-insert value estimator@(Estimator oldCount oldSum quantiles items) =
-        newEstimator $ insertItem 0 items
-    where
-        newEstimator = Estimator (oldCount + 1) (oldSum + value) quantiles
-
-        insertItem _ [] = [Item value 1 0]
-        insertItem r [x]
-            -- The first two cases cover the scenario where the initial size of
-            -- the list is one.
-            | r == 0 && value < itemValue x = Item value 1 0 : [x]
-            | r == 0                        = x : [Item value 1 0]
-            -- The last case covers the scenario where the we have walked off
-            -- the end of a list with more than 1 element in the final case of
-            -- insertItem in which case we already know that x < value.
-            | otherwise                     = x : [Item value 1 0]
-        insertItem r (x:y:xs)
-            -- This first case only covers the scenario where value is less than
-            -- the first item in a multi-item list. For subsequent steps of
-            -- a multi valued list, this case cannot happen as it would have
-            -- fallen through to the case below in the previous step.
-            | value <= itemValue x = Item value 1 0 : x : y : xs
-            | value <= itemValue y = x : Item value 1 (calcD $ r + itemG x)
-                                       : y : xs
-            | otherwise            = x : insertItem (itemG x + r) (y : xs)
-
-        calcD r = max 0
-                $ floor (invariant estimator (fromIntegral r)) - 1
-
-
-compress :: Estimator -> Estimator
-compress est@(Estimator _ _ _ [])    = est
-compress est@(Estimator _ _ _ items) = est {
-        estItems = (minItem :)
-                 $ foldr' compressPair []
-                 $ drop 1  -- The exact minimum item must be kept exactly.
-                 $ zip items
-                 $ scanl (+) 0 (map itemG items)
-    }
-    where
-        minItem = head items
-        compressPair (a, _) [] = [a]
-        compressPair (a@(Item _ aG _), r) (b@(Item bVal bG bD):bs)
-            | bD == 0             = a : b : bs
-            | aG + bG + bD <= inv = Item bVal (aG + bG) bD : bs
-            | otherwise           = a : b : bs
-            where
-                inv = floor $ invariant est (fromIntegral r)
-
-query :: Estimator -> Rational -> Double
-query est@(Estimator count _ _ items) q = findQuantile allRs items
-    where
-        allRs = scanl (+) 0 $ map itemG items
-
-        n = fromIntegral count
-        f = invariant est
-
-        rank  = q * n
-        bound = rank + (f rank / 2)
-
-        findQuantile _        []   = 0 / 0  -- NaN
-        findQuantile _        [a]  = itemValue a
-        findQuantile (_:bR:rs) (a@(Item{}):b@(Item _ bG bD):xs)
-            | fromIntegral (bR + bG + bD) > bound = itemValue a
-            | otherwise            = findQuantile (bR:rs) (b:xs)
-        findQuantile _        _    = error "Query impossibility"
-
-invariant :: Estimator -> Rational -> Rational
-invariant (Estimator count _ quantiles _) r = max 1
-                                            $ minimum $ map fj quantiles
-    where
-        n = fromIntegral count
-        fj (q, e) | q * n <= r = 2 * e * r / q
-                  | otherwise  = 2 * e * (n - r) / (1 - q)
+defaultQuantiles = [(0.5, 0.05), (0.9, 0.01), (0.99, 0.001)]
diff --git a/prometheus-client/tests/Prometheus/Metric/SummarySpec.hs b/prometheus-client/tests/Prometheus/Metric/SummarySpec.hs
@@ -2,9 +2,6 @@
 
 module Prometheus.Metric.SummarySpec (
     spec
-
-,   prop_boundedRank
-,   prop_invariant
 ,   rankOf
 ) where
 
@@ -44,7 +41,6 @@ spec = describe "Prometheus.Metric.Summary" $ do
             m <- register $ summary (Info "name" "help") quantiles
             mapM_ (m `observe`) observations
             checkQuantiles m smallWindowSize =<< getQuantiles quantiles m
-    context "Maintains invariants" invariantTests
     where
       checkBadObservations observations =
         it ("computes quantiles correctly for " ++ show observations) $ do
@@ -116,7 +112,6 @@ checkQuantiles :: Summary
                -> Double
                -> [(Rational, Rational, Double)] -> IO ()
 checkQuantiles m windowSize values = do
-    estimator <- dumpEstimator m
     forM_ values $ \(q, e, actual) -> do
         let expected = fromIntegral $ (ceiling $ fromRat q * windowSize :: Int) - 1
         let minValue = expected - (fromRat e * windowSize)
@@ -127,9 +122,6 @@ checkQuantiles m windowSize values = do
                 ,   " was not within acceptable error range (", show e , "). "
                 ,   "Got ", show actual, ", but wanted ", show expected
                 ,   " (", show minValue, " <= v <= ", show maxValue, ")."
-                ,   if estCount estimator <= 100
-                        then "\nEstimator = " ++ show estimator
-                        else ""
                 ]
 
 quantiles :: [Quantile]
@@ -143,66 +135,6 @@ getQuantiles qs s = do
     where
         sortQuantiles = sortBy (\(a, _) (b, _) -> compare a b)
 
---------------------------------------------------------------------------------
--- QuickCheck tests
-
-invariantTests :: Spec
-invariantTests = do
-    it "Maintains g + d is bounded above by the invariant f" $
-        property prop_invariant
-    it "Compression maintains g + d is bounded above by the invariant f" $
-        property prop_invariantAfterCompress
-    it "Maintains that rank is bounded by r + g and r + g + d" $
-        property prop_boundedRank
-    it "Compression maintains that rank is bounded by r + g and r + g + d" $
-        property prop_boundedRankAfterCompress
-
-prop_invariant :: NonEmptyList Double -> Property
-prop_invariant (NonEmpty events) =
-    let estimator = estimatorAfterObserving events
-        rvgds  = rvgdsFromEstimator estimator
-    in  whenFail (putStrLn $ "[(R, V, G, D)] -> " ++ show rvgds) $
-        flip all rvgds $ \(r, _, g, d) ->
-        let f = invariant estimator (fromIntegral r)
-        in  fromIntegral (g + d) <= f
-
-prop_invariantAfterCompress :: NonEmptyList Double -> Property
-prop_invariantAfterCompress (NonEmpty events) =
-    let estimator = estimatorAfterObserving events
-        rvgds  = rvgdsFromEstimator $ compress estimator
-    in  whenFail (putStrLn $ "[(R, V, G, D)] -> " ++ show rvgds) $
-        flip all rvgds $ \(r, _, g, d) ->
-        let f = invariant estimator (fromIntegral r)
-        in  fromIntegral (g + d) <= f
-
-prop_boundedRank :: NonEmptyList Double -> Property
-prop_boundedRank (NonEmpty events) =
-    let rvgds = rvgdsFromEstimator $ estimatorAfterObserving events
-        vs    = map (\(_, v, _, _) -> v) rvgds
-    in  whenFail (putStrLn $ "[(R, V, G, D)] -> " ++ show rvgds) $
-        flip all rvgds $ \(r, v, g, d) ->
-        let (minRank, maxRank) = rankOf v vs
-        in  r + g <= maxRank && minRank <= r + g + d
-
-prop_boundedRankAfterCompress :: NonEmptyList Double -> Property
-prop_boundedRankAfterCompress (NonEmpty events) =
-    let rvgds = rvgdsFromEstimator $ estimatorAfterObserving events
-        vs    = map (\(_, v, _, _) -> v) rvgds
-    in  whenFail (putStrLn $ "[(R, V, G, D)] -> " ++ show rvgds) $
-        flip all rvgds $ \(r, v, g, d) ->
-        let (minRank, maxRank) = rankOf v vs
-        in  r + g <= maxRank && minRank <= r + g + d
-
-rvgdsFromEstimator :: Estimator -> [(Int64, Double, Int64, Int64)]
-rvgdsFromEstimator estimator = rvgds
-    where
-        items     = estItems estimator
-        rs        = scanl (+) 0 $ map itemG items
-        rvgds     = zipWith (\r (Item v g d) -> (r, v, g, d)) rs items
-
-estimatorAfterObserving :: [Double] -> Estimator
-estimatorAfterObserving = foldr insert (emptyEstimator [(0.5, 0)])
-
 -- | Return a tuple that describes the range of that an element's true rank can
 -- be in. For example, in the list [0, 0, 0, 1] the result for querying 0 will
 -- be (0, 2) and the result for querying 1 will be (3, 3).