Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace Summary algorithm with improved implementation #62

Merged
merged 7 commits into from
Aug 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion prometheus-client/prometheus-client.cabal
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: prometheus-client
version: 1.0.1
version: 1.1.0
synopsis: Haskell client library for http://prometheus.io.
description: Haskell client library for http://prometheus.io.
homepage: https://github.com/fimad/prometheus-haskell
Expand Down Expand Up @@ -41,13 +41,15 @@ library
, clock
, containers
, deepseq
, primitive
, mtl >=2
, stm >=2.3
, transformers
, transformers-compat
, utf8-string
, exceptions
, text
, data-sketches
ghc-options: -Wall

test-suite doctest
Expand Down Expand Up @@ -83,6 +85,8 @@ test-suite spec
, deepseq
, exceptions
, text
, primitive
, data-sketches
ghc-options: -Wall

benchmark bench
Expand Down
215 changes: 66 additions & 149 deletions prometheus-client/src/Prometheus/Metric/Summary.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{-# language BangPatterns #-}
{-# language OverloadedStrings #-}

module Prometheus.Metric.Summary (
Summary
, Quantile
Expand All @@ -9,187 +8,105 @@ module Prometheus.Metric.Summary (
, observe
, observeDuration
, getSummary

, dumpEstimator
, emptyEstimator
, Estimator (..)
, Item (..)
, insert
, compress
, query
, invariant
) where

import Prometheus.Info
import Prometheus.Metric
import Prometheus.Metric.Observer
import Prometheus.MonadMonitor

import qualified Control.Concurrent.STM as STM
import Control.Concurrent.MVar
import Control.DeepSeq
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Primitive
import qualified Data.ByteString.UTF8 as BS
import Data.Foldable (foldr')
import Data.Int (Int64)
import Data.Monoid ((<>))
import qualified Data.Text as T
import DataSketches.Quantiles.RelativeErrorQuantile
import qualified DataSketches.Quantiles.RelativeErrorQuantile as ReqSketch
import Data.Maybe (mapMaybe)
import Prelude hiding (maximum)
import qualified Prelude
import Data.Word

data Summary = MkSummary
{ reqSketch :: MVar (ReqSketch (PrimState IO))
, quantiles :: [Quantile]
}

instance NFData Summary where
rnf (MkSummary a b) = a `seq` b `deepseq` ()

newtype Summary = MkSummary (STM.TVar Estimator)

instance NFData Summary where
rnf (MkSummary a) = a `seq` ()
type Quantile = (Rational, Rational)

-- | K is a parameter divisible by two, in the range 4-1024 used in the RelativeErrorQuantile algorithm to
-- determine how many items must be retained per compaction section. As the value increases, the accuracy
-- of the sketch increases as well. This function iterates on the k value starting from 6
-- (conservative on space, but reasonably accurate) until it finds a K value that satisfies the specified
-- error bounds for the given quantile. Note: this algorithm maintains highest accuracy for the upper tail
-- of the quantile when passed the 'HighRanksAreAccurate', sampling out more items at lower ranks during
-- the compaction process. Thus, extremely tight error bounds on low quantile values may cause this
-- function to return 'Nothing'.
--
-- If another smart constructor was exposed for summary creation, specific k values & LowRanksAreAccurate
-- could be used to refine accuracy settings to bias towards lower quantiles when retaining accurate samples.
determineK :: Quantile -> Maybe Word32
fimad marked this conversation as resolved.
Show resolved Hide resolved
determineK (rank_, acceptableError) = go 6
where
go k =
let rse = relativeStandardError (fromIntegral k) (fromRational rank_) HighRanksAreAccurate 50000
in if abs (rse - fromRational rank_) <= fromRational acceptableError
then Just k
else if k < 1024
then go (k + 2)
else Nothing


-- | Creates a new summary metric with a given name, help string, and a list of
-- quantiles. A reasonable set set of quantiles is provided by
-- 'defaultQuantiles'.
summary :: Info -> [Quantile] -> Metric Summary
summary info quantiles = Metric $ do
valueTVar <- STM.newTVarIO (emptyEstimator quantiles)
return (MkSummary valueTVar, collectSummary info valueTVar)

withSummary :: MonadMonitor m
=> Summary -> (Estimator -> Estimator) -> m ()
withSummary (MkSummary !valueTVar) f =
doIO $ STM.atomically $ do
STM.modifyTVar' valueTVar compress
STM.modifyTVar' valueTVar f
summary info quantiles_ = Metric $ do
rs <- mkReqSketch kInt HighRanksAreAccurate
mv <- newMVar $ rs {criterion = (:<=)}
let summary_ = MkSummary mv quantiles_
return (summary_, collectSummary info summary_)
where
kInt = fromIntegral $ case mapMaybe determineK quantiles_ of
[] -> error "Unable to create a Summary meeting the provided quantile precision requirements"
xs -> Prelude.maximum xs

instance Observer Summary where
-- | Adds a new observation to a summary metric.
observe s v = withSummary s (insert v)
observe s v = doIO $ withMVar (reqSketch s) (`ReqSketch.insert` v)

-- | Retrieves a list of tuples containing a quantile and its associated value.
getSummary :: MonadIO m => Summary -> m [(Rational, Double)]
getSummary (MkSummary valueTVar) = liftIO $ do
estimator <- STM.atomically $ do
STM.modifyTVar' valueTVar compress
STM.readTVar valueTVar
let quantiles = map fst $ estQuantiles estimator
let values = map (query estimator) quantiles
return $ zip quantiles values

collectSummary :: Info -> STM.TVar Estimator -> IO [SampleGroup]
collectSummary info valueTVar = STM.atomically $ do
STM.modifyTVar' valueTVar compress
estimator@(Estimator count itemSum _ _) <- STM.readTVar valueTVar
let quantiles = map fst $ estQuantiles estimator
let samples = map (toSample estimator) quantiles
getSummary (MkSummary sketchVar quantiles_) = liftIO $ withMVar sketchVar $ \sketch -> do
forM quantiles_ $ \qv ->
(,) <$> pure (fst qv) <*> ReqSketch.quantile sketch (fromRational $ fst qv)

collectSummary :: Info -> Summary -> IO [SampleGroup]
collectSummary info (MkSummary sketchVar quantiles_) = withMVar sketchVar $ \sketch -> do
itemSum <- ReqSketch.sum sketch
count_ <- ReqSketch.count sketch
estimatedQuantileValues <- forM quantiles_ $ \qv ->
(,) <$> pure (fst qv) <*> ReqSketch.quantile sketch (toDouble $ fst qv)
let sumSample = Sample (metricName info <> "_sum") [] (bsShow itemSum)
let countSample = Sample (metricName info <> "_count") [] (bsShow count)
return [SampleGroup info SummaryType $ samples ++ [sumSample, countSample]]
let countSample = Sample (metricName info <> "_count") [] (bsShow count_)
return [SampleGroup info SummaryType $ map toSample estimatedQuantileValues ++ [sumSample, countSample]]
where
bsShow :: Show s => s -> BS.ByteString
bsShow = BS.fromString . show

toSample estimator q =
toSample :: (Rational, Double) -> Sample
toSample (q, estimatedValue) =
Sample (metricName info) [("quantile", T.pack . show $ toDouble q)] $
bsShow $ query estimator q
bsShow estimatedValue

toDouble :: Rational -> Double
toDouble = fromRational

dumpEstimator :: Summary -> IO Estimator
dumpEstimator (MkSummary valueTVar) =
STM.atomically $ STM.readTVar valueTVar

-- | A quantile is a pair of a quantile value and an associated acceptable error
-- value.
type Quantile = (Rational, Rational)

data Item = Item {
itemValue :: Double
, itemG :: !Int64
, itemD :: !Int64
} deriving (Eq, Show)

instance Ord Item where
compare a b = itemValue a `compare` itemValue b

data Estimator = Estimator {
estCount :: !Int64
, estSum :: !Double
, estQuantiles :: [Quantile]
, estItems :: [Item]
} deriving (Show)

defaultQuantiles :: [Quantile]
defaultQuantiles = [(0.5, 0.05), (0.9, 0.01), (0.99, 0.001)]

emptyEstimator :: [Quantile] -> Estimator
emptyEstimator quantiles = Estimator 0 0 quantiles []

insert :: Double -> Estimator -> Estimator
insert value estimator@(Estimator oldCount oldSum quantiles items) =
newEstimator $ insertItem 0 items
where
newEstimator = Estimator (oldCount + 1) (oldSum + value) quantiles

insertItem _ [] = [Item value 1 0]
insertItem r [x]
-- The first two cases cover the scenario where the initial size of
-- the list is one.
| r == 0 && value < itemValue x = Item value 1 0 : [x]
| r == 0 = x : [Item value 1 0]
-- The last case covers the scenario where the we have walked off
-- the end of a list with more than 1 element in the final case of
-- insertItem in which case we already know that x < value.
| otherwise = x : [Item value 1 0]
insertItem r (x:y:xs)
-- This first case only covers the scenario where value is less than
-- the first item in a multi-item list. For subsequent steps of
-- a multi valued list, this case cannot happen as it would have
-- fallen through to the case below in the previous step.
| value <= itemValue x = Item value 1 0 : x : y : xs
| value <= itemValue y = x : Item value 1 (calcD $ r + itemG x)
: y : xs
| otherwise = x : insertItem (itemG x + r) (y : xs)

calcD r = max 0
$ floor (invariant estimator (fromIntegral r)) - 1


compress :: Estimator -> Estimator
compress est@(Estimator _ _ _ []) = est
compress est@(Estimator _ _ _ items) = est {
estItems = (minItem :)
$ foldr' compressPair []
$ drop 1 -- The exact minimum item must be kept exactly.
$ zip items
$ scanl (+) 0 (map itemG items)
}
where
minItem = head items
compressPair (a, _) [] = [a]
compressPair (a@(Item _ aG _), r) (b@(Item bVal bG bD):bs)
| bD == 0 = a : b : bs
| aG + bG + bD <= inv = Item bVal (aG + bG) bD : bs
| otherwise = a : b : bs
where
inv = floor $ invariant est (fromIntegral r)

query :: Estimator -> Rational -> Double
query est@(Estimator count _ _ items) q = findQuantile allRs items
where
allRs = scanl (+) 0 $ map itemG items

n = fromIntegral count
f = invariant est

rank = q * n
bound = rank + (f rank / 2)

findQuantile _ [] = 0 / 0 -- NaN
findQuantile _ [a] = itemValue a
findQuantile (_:bR:rs) (a@(Item{}):b@(Item _ bG bD):xs)
| fromIntegral (bR + bG + bD) > bound = itemValue a
| otherwise = findQuantile (bR:rs) (b:xs)
findQuantile _ _ = error "Query impossibility"

invariant :: Estimator -> Rational -> Rational
invariant (Estimator count _ quantiles _) r = max 1
$ minimum $ map fj quantiles
where
n = fromIntegral count
fj (q, e) | q * n <= r = 2 * e * r / q
| otherwise = 2 * e * (n - r) / (1 - q)
defaultQuantiles = [(0.5, 0.05), (0.9, 0.01), (0.99, 0.001)]
68 changes: 0 additions & 68 deletions prometheus-client/tests/Prometheus/Metric/SummarySpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@

module Prometheus.Metric.SummarySpec (
spec

, prop_boundedRank
, prop_invariant
, rankOf
) where

Expand Down Expand Up @@ -44,7 +41,6 @@ spec = describe "Prometheus.Metric.Summary" $ do
m <- register $ summary (Info "name" "help") quantiles
mapM_ (m `observe`) observations
checkQuantiles m smallWindowSize =<< getQuantiles quantiles m
context "Maintains invariants" invariantTests
where
checkBadObservations observations =
it ("computes quantiles correctly for " ++ show observations) $ do
Expand Down Expand Up @@ -116,7 +112,6 @@ checkQuantiles :: Summary
-> Double
-> [(Rational, Rational, Double)] -> IO ()
checkQuantiles m windowSize values = do
estimator <- dumpEstimator m
forM_ values $ \(q, e, actual) -> do
let expected = fromIntegral $ (ceiling $ fromRat q * windowSize :: Int) - 1
let minValue = expected - (fromRat e * windowSize)
Expand All @@ -127,9 +122,6 @@ checkQuantiles m windowSize values = do
, " was not within acceptable error range (", show e , "). "
, "Got ", show actual, ", but wanted ", show expected
, " (", show minValue, " <= v <= ", show maxValue, ")."
, if estCount estimator <= 100
then "\nEstimator = " ++ show estimator
else ""
]

quantiles :: [Quantile]
Expand All @@ -143,66 +135,6 @@ getQuantiles qs s = do
where
sortQuantiles = sortBy (\(a, _) (b, _) -> compare a b)

--------------------------------------------------------------------------------
-- QuickCheck tests

invariantTests :: Spec
invariantTests = do
it "Maintains g + d is bounded above by the invariant f" $
property prop_invariant
it "Compression maintains g + d is bounded above by the invariant f" $
property prop_invariantAfterCompress
it "Maintains that rank is bounded by r + g and r + g + d" $
property prop_boundedRank
it "Compression maintains that rank is bounded by r + g and r + g + d" $
property prop_boundedRankAfterCompress

prop_invariant :: NonEmptyList Double -> Property
prop_invariant (NonEmpty events) =
let estimator = estimatorAfterObserving events
rvgds = rvgdsFromEstimator estimator
in whenFail (putStrLn $ "[(R, V, G, D)] -> " ++ show rvgds) $
flip all rvgds $ \(r, _, g, d) ->
let f = invariant estimator (fromIntegral r)
in fromIntegral (g + d) <= f

prop_invariantAfterCompress :: NonEmptyList Double -> Property
prop_invariantAfterCompress (NonEmpty events) =
let estimator = estimatorAfterObserving events
rvgds = rvgdsFromEstimator $ compress estimator
in whenFail (putStrLn $ "[(R, V, G, D)] -> " ++ show rvgds) $
flip all rvgds $ \(r, _, g, d) ->
let f = invariant estimator (fromIntegral r)
in fromIntegral (g + d) <= f

prop_boundedRank :: NonEmptyList Double -> Property
prop_boundedRank (NonEmpty events) =
let rvgds = rvgdsFromEstimator $ estimatorAfterObserving events
vs = map (\(_, v, _, _) -> v) rvgds
in whenFail (putStrLn $ "[(R, V, G, D)] -> " ++ show rvgds) $
flip all rvgds $ \(r, v, g, d) ->
let (minRank, maxRank) = rankOf v vs
in r + g <= maxRank && minRank <= r + g + d

prop_boundedRankAfterCompress :: NonEmptyList Double -> Property
prop_boundedRankAfterCompress (NonEmpty events) =
let rvgds = rvgdsFromEstimator $ estimatorAfterObserving events
vs = map (\(_, v, _, _) -> v) rvgds
in whenFail (putStrLn $ "[(R, V, G, D)] -> " ++ show rvgds) $
flip all rvgds $ \(r, v, g, d) ->
let (minRank, maxRank) = rankOf v vs
in r + g <= maxRank && minRank <= r + g + d

rvgdsFromEstimator :: Estimator -> [(Int64, Double, Int64, Int64)]
rvgdsFromEstimator estimator = rvgds
where
items = estItems estimator
rs = scanl (+) 0 $ map itemG items
rvgds = zipWith (\r (Item v g d) -> (r, v, g, d)) rs items

estimatorAfterObserving :: [Double] -> Estimator
estimatorAfterObserving = foldr insert (emptyEstimator [(0.5, 0)])

-- | Return a tuple that describes the range of that an element's true rank can
-- be in. For example, in the list [0, 0, 0, 1] the result for querying 0 will
-- be (0, 2) and the result for querying 1 will be (3, 3).
Expand Down
Loading