diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 06471fc7c9..bc6aea6f4e 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -35,6 +35,9 @@ * Update the Pytorch library to version 2.3.1. (See {ml-pull}2688[#2688].) * Allow the user to force a detector to shift time series state by a specific amount. (See {ml-pull}2695[#2695].) +* Improve variance estimation for anomaly detection when record counts + suddenly drop. (See {ml-pull}2677[#2677].) + == {es} version 8.15.0 @@ -53,13 +56,6 @@ * Handle any exception thrown by inference. (See {ml-pull}2680[#2680].) -== {es} version 8.14.1 - -=== Enhancements - -* Improve memory allocation management for JSON processing to reduce memory usage. - (See {ml-pull}2679[#2679].) - == {es} version 8.14.0 === Bug Fixes diff --git a/include/core/CSmallVector.h b/include/core/CSmallVector.h index 63fd32b5a0..4cf885f609 100644 --- a/include/core/CSmallVector.h +++ b/include/core/CSmallVector.h @@ -142,6 +142,21 @@ class CSmallVector : public boost::container::small_vector { return *this; } + std::string toDelimited(const std::string& delimiter = ", ") const { + std::string result; + for (size_type i = 0; i < this->size(); ++i) { + result += std::to_string((*this)[i]); + if (i < this->size() - 1) { + result += delimiter; + } + // Reserve space to minimize concatenation overhead. + if (i == 0) { + result.reserve(result.size() * this->size()); + } + } + return result; + } + private: TBase& baseRef() { return *this; } const TBase& baseRef() const { return *this; } diff --git a/include/maths/common/CBasicStatisticsPersist.h b/include/maths/common/CBasicStatisticsPersist.h index 022594cee7..218a300d3b 100644 --- a/include/maths/common/CBasicStatisticsPersist.h +++ b/include/maths/common/CBasicStatisticsPersist.h @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -99,6 +100,11 @@ template inline std::string typeToString(const CSymmetricMatrix& value) { return value.toDelimited(); } + +template +inline std::string typeToString(const core::CSmallVector& value) { + return value.toDelimited(); +} } template diff --git a/include/model/CAnomalyDetector.h b/include/model/CAnomalyDetector.h index c4c56b5695..2b817a950b 100644 --- a/include/model/CAnomalyDetector.h +++ b/include/model/CAnomalyDetector.h @@ -259,7 +259,7 @@ class MODEL_EXPORT CAnomalyDetector : public CMonitoredResource { void resetBucket(core_t::TTime bucketStart); //! Release memory that is no longer needed - void releaseMemory(core_t::TTime samplingCutoffTime); + void releaseMemory(); //! Print the detector memory usage to the given stream void showMemoryUsage(std::ostream& stream) const; diff --git a/include/model/CAnomalyDetectorModelConfig.h b/include/model/CAnomalyDetectorModelConfig.h index 4bc0f89bbd..63153252e6 100644 --- a/include/model/CAnomalyDetectorModelConfig.h +++ b/include/model/CAnomalyDetectorModelConfig.h @@ -102,17 +102,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! Default maximum number of buckets for receiving out of order records. static const std::size_t DEFAULT_LATENCY_BUCKETS; - //! Default amount by which metric sample count is reduced for fine-grained - //! sampling when there is no latency. - static const std::size_t DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY; - - //! Default amount by which metric sample count is reduced for fine-grained - //! sampling when there is latency. - static const std::size_t DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY; - - //! Default amount by which the metric sample queue expands when it is full. - static const double DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR; - //! Bucket length corresponding to the default decay and learn rates. static const core_t::TTime STANDARD_BUCKET_LENGTH; //@} @@ -444,9 +433,6 @@ class MODEL_EXPORT CAnomalyDetectorModelConfig { //! bucket length. double bucketNormalizationFactor() const; - //! The time window during which samples are accepted. - core_t::TTime samplingAgeCutoff() const; - private: //! Bucket length. core_t::TTime m_BucketLength{0}; diff --git a/include/model/CBucketGatherer.h b/include/model/CBucketGatherer.h index 24c4256733..c94ee1ae4f 100644 --- a/include/model/CBucketGatherer.h +++ b/include/model/CBucketGatherer.h @@ -328,7 +328,7 @@ class MODEL_EXPORT CBucketGatherer { virtual bool resetBucket(core_t::TTime bucketStart) = 0; //! Release memory that is no longer needed - virtual void releaseMemory(core_t::TTime samplingCutoffTime) = 0; + virtual void releaseMemory() = 0; //! Remove the values in queue for the people or attributes //! in \p toRemove. @@ -378,9 +378,7 @@ class MODEL_EXPORT CBucketGatherer { //! //! \param[in] time The time of interest. //! \param[out] result Filled in with the feature data at \p time. - virtual void featureData(core_t::TTime time, - core_t::TTime bucketLength, - TFeatureAnyPrVec& result) const = 0; + virtual void featureData(core_t::TTime time, TFeatureAnyPrVec& result) const = 0; //! Get a reference to the owning data gatherer. const CDataGatherer& dataGatherer() const; @@ -388,9 +386,6 @@ class MODEL_EXPORT CBucketGatherer { //! Has this pid/cid pair had only explicit null records? bool hasExplicitNullsOnly(core_t::TTime time, std::size_t pid, std::size_t cid) const; - //! Create samples if possible for the bucket pointed out by \p time. - virtual void sample(core_t::TTime time) = 0; - //! Persist state by passing information \p inserter. virtual void acceptPersistInserter(core::CStatePersistInserter& inserter) const = 0; diff --git a/include/model/CDataGatherer.h b/include/model/CDataGatherer.h index b15e7514ec..f6d7c88872 100644 --- a/include/model/CDataGatherer.h +++ b/include/model/CDataGatherer.h @@ -162,10 +162,6 @@ class MODEL_EXPORT CDataGatherer { //! \param[in] features The features of the data to model. //! \param[in] startTime The start of the time interval for which //! to gather data. - //! \param[in] sampleCountOverride for the number of measurements - //! in a statistic. (Note that this is intended for testing only.) - //! A zero value means that the data gatherer class will determine - //! an appropriate value for the bucket length and data rate. CDataGatherer(model_t::EAnalysisCategory gathererType, model_t::ESummaryMode summaryMode, const SModelParams& modelParams, @@ -177,8 +173,7 @@ class MODEL_EXPORT CDataGatherer { const TStrVec& influenceFieldNames, const CSearchKey& key, const TFeatureVec& features, - core_t::TTime startTime, - int sampleCountOverride); + core_t::TTime startTime); //! Construct from a state document. CDataGatherer(model_t::EAnalysisCategory gathererType, @@ -334,10 +329,9 @@ class MODEL_EXPORT CDataGatherer { //! \tparam T The type of the feature data. template bool featureData(core_t::TTime time, - core_t::TTime bucketLength, std::vector>& result) const { TFeatureAnyPrVec rawFeatureData; - m_BucketGatherer->featureData(time, bucketLength, rawFeatureData); + m_BucketGatherer->featureData(time, rawFeatureData); bool succeeded = true; @@ -487,36 +481,6 @@ class MODEL_EXPORT CDataGatherer { bool isAttributeActive(std::size_t cid) const; //@} - //! \name Metric - //@{ - //! Get the current number of measurements in a sample for - //! the model of the entity identified by \p id. - //! - //! If we are performing temporal analysis we have one sample - //! count per person and if we are performing population analysis - //! we have one sample count per attribute. - double sampleCount(std::size_t id) const; - - //! Get the effective number of measurements in a sample for - //! the model of the entity identified by \p id. - //! - //! If we are performing temporal analysis we have one sample - //! count per person and if we are performing population analysis - //! we have one sample count per attribute. - double effectiveSampleCount(std::size_t id) const; - - //! Reset the number of measurements in a sample for the entity - //! identified \p id. - //! - //! If we are performing individual analysis we have one sample - //! count per person and if we are performing population analysis - //! we have one sample count per attribute. - void resetSampleCount(std::size_t id); - - //! Get the sample counts. - const TSampleCountsPtr& sampleCounts() const; - //@} - //! \name Time //@{ //! Get the start of the current bucketing time interval. @@ -578,7 +542,7 @@ class MODEL_EXPORT CDataGatherer { bool resetBucket(core_t::TTime bucketStart); //! Release memory that is no longer needed - void releaseMemory(core_t::TTime samplingCutoffTime); + void releaseMemory(); //! Get the global configuration parameters. const SModelParams& params() const; @@ -701,8 +665,7 @@ class MODEL_EXPORT CDataGatherer { const std::string& attributeFieldName, const std::string& valueFieldName, const TStrVec& influenceFieldNames, - core_t::TTime startTime, - unsigned int sampleCountOverride); + core_t::TTime startTime); private: //! The type of the bucket gatherer(s) used. @@ -739,9 +702,6 @@ class MODEL_EXPORT CDataGatherer { //! If true the gatherer will process missing person field values. bool m_UseNull; - - //! The object responsible for managing sample counts. - TSampleCountsPtr m_SampleCounts; }; } } diff --git a/include/model/CEventRateBucketGatherer.h b/include/model/CEventRateBucketGatherer.h index 3a1b5fafe4..4bba2433cc 100644 --- a/include/model/CEventRateBucketGatherer.h +++ b/include/model/CEventRateBucketGatherer.h @@ -259,7 +259,7 @@ class MODEL_EXPORT CEventRateBucketGatherer final : public CBucketGatherer { bool resetBucket(core_t::TTime bucketStart) override; //! Release memory that is no longer needed - void releaseMemory(core_t::TTime samplingCutoffTime) override; + void releaseMemory() override; //! \name Features //@{ @@ -268,15 +268,10 @@ class MODEL_EXPORT CEventRateBucketGatherer final : public CBucketGatherer { //! //! \param[in] time The time of interest. //! \param[out] result Filled in with the feature data at \p time. - void featureData(core_t::TTime time, - core_t::TTime bucketLength, - TFeatureAnyPrVec& result) const override; + void featureData(core_t::TTime time, TFeatureAnyPrVec& result) const override; //@} private: - //! No-op. - void sample(core_t::TTime time) override; - //! Append the counts by person for the bucketing interval containing //! \p time. //! diff --git a/include/model/CGathererTools.h b/include/model/CGathererTools.h deleted file mode 100644 index 823e3f3217..0000000000 --- a/include/model/CGathererTools.h +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#ifndef INCLUDED_ml_model_CGathererTools_h -#define INCLUDED_ml_model_CGathererTools_h - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -namespace ml { -namespace core { -class CStatePersistInserter; -class CStateRestoreTraverser; -} -namespace model { - -//! \brief A collection of utility functionality for the CDataGatherer -//! and CBucketGatherer hierarchies. -//! -//! DESCRIPTION:\n -//! A collection of utility functions primarily intended for use by the -//! the CDataGatherer and CBucketGatherer hierarchies. -//! -//! IMPLEMENTATION DECISIONS:\n -//! This class is really just a proxy for a namespace, but a class has -//! been intentionally used to provide a single point for the declaration -//! and definition of utility functions within the model library. As such -//! all member functions should be static and it should be state-less. -//! If your functionality doesn't fit this pattern just make it a nested -//! class. -class MODEL_EXPORT CGathererTools { -public: - using TDoubleVec = std::vector; - using TOptionalDouble = std::optional; - using TSampleVec = std::vector; - using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean::TAccumulator; - using TMedianAccumulator = maths::common::CFixedQuantileSketch<30>; - using TMinAccumulator = maths::common::CBasicStatistics::SMin::TAccumulator; - using TMaxAccumulator = maths::common::CBasicStatistics::SMax::TAccumulator; - using TVarianceAccumulator = - maths::common::CBasicStatistics::SSampleMeanVar::TAccumulator; - using TMultivariateMeanAccumulator = CMetricMultivariateStatistic; - using TMultivariateMinAccumulator = CMetricMultivariateStatistic; - using TMultivariateMaxAccumulator = CMetricMultivariateStatistic; - - //! \brief Mean arrival time gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the mean time between - //! measurements. - class MODEL_EXPORT CArrivalTimeGatherer { - public: - using TAccumulator = TMeanAccumulator; - - public: - //! The earliest possible time. - static const core_t::TTime FIRST_TIME; - - public: - CArrivalTimeGatherer(); - - //! Get the mean arrival time in this bucketing interval. - TOptionalDouble featureData() const; - - //! Update the state with a new measurement. - //! - //! \param[in] time The time of the measurement. - inline void add(core_t::TTime time) { this->add(time, 1); } - - //! Update the state with a measurement count. - //! - //! \param[in] time The end time of the \p count messages. - //! \param[in] count The count of measurements. - inline void add(core_t::TTime time, unsigned int count) { - if (m_LastTime == FIRST_TIME) { - m_LastTime = time; - } else { - m_Value.add(static_cast(time - m_LastTime) / - static_cast(count)); - m_LastTime = time; - } - } - - //! Update the state to represent the start of a new bucket. - void startNewBucket(); - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - - //! Create from part of an XML document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //@} - - //! Get the checksum of this gatherer. - uint64_t checksum() const; - - //! Print this gatherer for debug. - std::string print() const; - - private: - //! The last time a message was added. - core_t::TTime m_LastTime; - - //! The mean time between messages received in the current - //! bucketing interval. - TAccumulator m_Value; - }; - - //! \brief Mean statistic gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the arithmetic mean of - //! a fixed number of measurements, which are supplied to the add - //! function. - //! - //! This also computes the mean of all measurements in the current - //! bucketing interval. - using TMeanGatherer = CSampleGatherer; - - //! \brief Multivariate mean statistic gatherer. - //! - //! See TMeanGatherer for details. - using TMultivariateMeanGatherer = - CSampleGatherer; - - //! \brief Median statistic gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the median of a fixed number - //! of measurements, which are supplied to the add function. - using TMedianGatherer = - CSampleGatherer; - - // TODO Add multivariate median. - - //! \brief Minimum statistic gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the minimum of a fixed number - //! of measurements, which are supplied to the add function. - //! - //! This also computes the minimum of all measurements in the current - //! bucketing interval. - using TMinGatherer = CSampleGatherer; - - //! \brief Multivariate minimum statistic gatherer. - //! - //! See TMinGatherer for details. - using TMultivariateMinGatherer = - CSampleGatherer; - - //! \brief Maximum statistic gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the maximum of a fixed number - //! of measurements, which are supplied to the add function. - //! - //! This also computes the maximum of all measurements in the current - //! bucketing interval. - using TMaxGatherer = CSampleGatherer; - - //! \brief Multivariate maximum statistic gatherer. - //! - //! See TMaxGatherer for details. - using TMultivariateMaxGatherer = - CSampleGatherer; - - //! \brief Variance statistic gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the variance of a fixed number - //! of measurements, which are supplied to the add function. - //! - //! This also computes the variance of all measurements in the current - //! bucketing interval. - using TVarianceGatherer = - CSampleGatherer; - - // TODO Add multivariate variance. - - //! \brief Bucket sum gatherer. - //! - //! DESCRIPTION:\n - //! Wraps up the functionality to sample the sum of a metric quantity - //! in a bucketing interval. - class MODEL_EXPORT CSumGatherer { - public: - using TDouble1Vec = core::CSmallVector; - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - using TOptionalStr = std::optional; - using TOptionalStrVec = std::vector; - using TSampleVecQueue = CBucketQueue; - using TSampleVecQueueItr = TSampleVecQueue::iterator; - using TSampleVecQueueCItr = TSampleVecQueue::const_iterator; - using TOptionalStrDoubleUMap = boost::unordered_map; - using TOptionalStrDoubleUMapCItr = TOptionalStrDoubleUMap::const_iterator; - using TOptionalStrDoubleUMapQueue = CBucketQueue; - using TOptionalStrDoubleUMapQueueCRItr = TOptionalStrDoubleUMapQueue::const_reverse_iterator; - using TOptionalStrDoubleUMapQueueVec = std::vector; - - public: - CSumGatherer(const SModelParams& params, - std::size_t dimension, - core_t::TTime startTime, - core_t::TTime bucketLength, - TStrVecCItr beginInfluencers, - TStrVecCItr endInfluencers); - - //! Get the dimension of the underlying statistic. - std::size_t dimension() const; - - //! Get the feature data for the current bucketing interval. - SMetricFeatureData featureData(core_t::TTime time, - core_t::TTime bucketLength, - const TSampleVec& emptySample) const; - - //! Returns false. - bool sample(core_t::TTime time, unsigned int sampleCount); - - //! Update the state with a new measurement. - //! - //! \param[in] time The time of \p value. - //! \param[in] value The measurement value. - //! \param[in] influences The influencing field values which - //! label \p value. - void add(core_t::TTime time, - const TDouble1Vec& value, - unsigned int count, - unsigned int /*sampleCount*/, - const TOptionalStrVec& influences) { - TSampleVec& sum = m_BucketSums.get(time); - if (sum.empty()) { - core_t::TTime bucketLength = m_BucketSums.bucketLength(); - sum.push_back(CSample(maths::common::CIntegerTools::floor(time, bucketLength), - TDoubleVec(1, 0.0), 1.0, 0.0)); - } - (sum[0].value())[0] += value[0]; - sum[0].count() += static_cast(count); - for (std::size_t i = 0; i < influences.size(); ++i) { - if (!influences[i]) { - continue; - } - TOptionalStrDoubleUMap& sums = m_InfluencerBucketSums[i].get(time); - sums[influences[i]] += value[0]; - } - } - - //! Update the state to represent the start of a new bucket. - void startNewBucket(core_t::TTime time); - - //! Reset bucket. - void resetBucket(core_t::TTime bucketStart); - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - - //! Create from part of a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - //@} - - //! Get the checksum of this gatherer. - uint64_t checksum() const; - - //! Debug the memory used by this gatherer. - void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const; - - //! Get the memory used by this gatherer. - std::size_t memoryUsage() const; - - //! Print this gatherer for debug. - std::string print() const; - - //! Is the gatherer holding redundant data? - bool isRedundant(core_t::TTime samplingCutoffTime) const; - - private: - //! Classifies the sum series. - CDataClassifier m_Classifier; - - //! The sum for each bucket within the latency window. - TSampleVecQueue m_BucketSums; - - //! The sum for each influencing field value and bucket within - //! the latency window. - TOptionalStrDoubleUMapQueueVec m_InfluencerBucketSums; - }; -}; -} -} - -#endif // INCLUDED_ml_model_CGathererTools_h diff --git a/include/model/CIndividualModelDetail.h b/include/model/CIndividualModelDetail.h index b6c1277430..4ed1050744 100644 --- a/include/model/CIndividualModelDetail.h +++ b/include/model/CIndividualModelDetail.h @@ -85,7 +85,7 @@ void CIndividualModel::sampleBucketStatistics(core_t::TTime startTime, time < endTime; time += bucketLength) { this->CIndividualModel::sampleBucketStatistics(time, time + bucketLength, resourceMonitor); - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); for (auto& feature_ : featureData) { T& data = feature_.second; LOG_TRACE(<< model_t::print(feature_.first) << " data = " << data); diff --git a/include/model/CMetricBucketGatherer.h b/include/model/CMetricBucketGatherer.h index d55e6292ab..edcd895a84 100644 --- a/include/model/CMetricBucketGatherer.h +++ b/include/model/CMetricBucketGatherer.h @@ -209,7 +209,7 @@ class MODEL_EXPORT CMetricBucketGatherer final : public CBucketGatherer { bool resetBucket(core_t::TTime bucketStart) override; //! Release memory that is no longer needed - void releaseMemory(core_t::TTime samplingCutoffTime) override; + void releaseMemory() override; //! \name Features //@{ @@ -218,15 +218,10 @@ class MODEL_EXPORT CMetricBucketGatherer final : public CBucketGatherer { //! //! \param[in] time The time of interest. //! \param[out] result Filled in with the feature data at \p time. - void featureData(core_t::TTime time, - core_t::TTime bucketLength, - TFeatureAnyPrVec& result) const override; + void featureData(core_t::TTime time, TFeatureAnyPrVec& result) const override; //@} private: - //! Create samples if possible for the bucket pointed out by \p time. - void sample(core_t::TTime time) override; - //! Resize the necessary data structures so they can accommodate //! the person and attribute identified by \p pid and \p cid, //! respectively. diff --git a/include/model/CMetricMultivariateStatistic.h b/include/model/CMetricMultivariateStatistic.h index 17ff8fa9e2..7c81bedc3f 100644 --- a/include/model/CMetricMultivariateStatistic.h +++ b/include/model/CMetricMultivariateStatistic.h @@ -19,11 +19,10 @@ #include -#include +#include #include #include -#include namespace ml { namespace model { @@ -53,30 +52,29 @@ class CMetricMultivariateStatistic { static const std::string VALUE_TAG; public: - CMetricMultivariateStatistic(std::size_t n) : m_Values(n) {} + explicit CMetricMultivariateStatistic(std::size_t dimension) + : m_Values(dimension) {} //! Persist to a state document. void persist(core::CStatePersistInserter& inserter) const { for (std::size_t i = 0; i < m_Values.size(); ++i) { - CMetricStatisticWrappers::persist(m_Values[i], VALUE_TAG, inserter); + metric_stat_shims::persist(m_Values[i], VALUE_TAG, inserter); } } //! Restore from the supplied state document traverser. bool restore(core::CStateRestoreTraverser& traverser) { - std::size_t i = 0; + std::size_t i{0}; do { - const std::string& name = traverser.name(); - if (name == VALUE_TAG) { - if (CMetricStatisticWrappers::restore(traverser, m_Values[i++]) == false) { - LOG_ERROR(<< "Invalid statistic in " << traverser.value()); - return false; - } - } + const std::string& name{traverser.name()}; + RESTORE(VALUE_TAG, metric_stat_shims::restore(traverser, m_Values[i++])) } while (traverser.next()); return true; } + //! Get the statistic dimension. + std::size_t dimension() const { return m_Values.size(); } + //! Add a new measurement. //! //! \param[in] value The value of the statistic. @@ -95,15 +93,14 @@ class CMetricMultivariateStatistic { //! Returns the aggregated value of all the measurements. TDouble1Vec value() const { - std::size_t dimension = m_Values.size(); - TDouble1Vec result(dimension); - for (std::size_t i = 0; i < dimension; ++i) { - TDouble1Vec vi = CMetricStatisticWrappers::value(m_Values[i]); + TDouble1Vec result(this->dimension()); + for (std::size_t i = 0; i < this->dimension(); ++i) { + const auto& vi = metric_stat_shims::value(m_Values[i]); if (vi.size() > 1) { - result.resize(vi.size() * dimension); + result.resize(vi.size() * this->dimension()); } for (std::size_t j = 0; j < vi.size(); ++j) { - result[i + j * dimension] = vi[j]; + result[i + j * this->dimension()] = vi[j]; } } return result; @@ -112,24 +109,21 @@ class CMetricMultivariateStatistic { //! Returns the aggregated value of all the measurements suitable //! for computing influence. TDouble1Vec influencerValue() const { - std::size_t dimension = m_Values.size(); - TDouble1Vec result(dimension); - for (std::size_t i = 0; i < dimension; ++i) { - TDouble1Vec vi = CMetricStatisticWrappers::influencerValue(m_Values[i]); + TDouble1Vec result(this->dimension()); + for (std::size_t i = 0; i < this->dimension(); ++i) { + const auto& vi = metric_stat_shims::influencerValue(m_Values[i]); if (vi.size() > 1) { - result.resize(vi.size() * dimension); + result.resize(vi.size() * this->dimension()); } for (std::size_t j = 0; j < vi.size(); ++j) { - result[i + j * dimension] = vi[j]; + result[i + j * this->dimension()] = vi[j]; } } return result; } //! Returns the count of all the measurements. - double count() const { - return CMetricStatisticWrappers::count(m_Values[0]); - } + double count() const { return metric_stat_shims::count(m_Values[0]); } //! Combine two partial statistics. const CMetricMultivariateStatistic& operator+=(const CMetricMultivariateStatistic& rhs) { diff --git a/include/model/CMetricPartialStatistic.h b/include/model/CMetricPartialStatistic.h deleted file mode 100644 index 3767854891..0000000000 --- a/include/model/CMetricPartialStatistic.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#ifndef INCLUDED_ml_model_CMetricPartialStatistic_h -#define INCLUDED_ml_model_CMetricPartialStatistic_h - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include - -namespace ml { -namespace model { - -//! \brief A partial metric statistic. -//! -//! DESCRIPTION:\n -//! A partial statistic is composed by the value of the statistic -//! and the mean time of the measurements that compose the statistic. -//! The class groups the two together and provides functions in order -//! to add measurements to the statistic, combine it with others -//! and read its values. -//! -//! \tparam STATISTIC has the following requirements: -//! -# Member function void add(double value, unsigned int count) -//! -# Implementations for every function in CMetricStatisticsWrapper -//! -# Member operator += -//! -# Supported by maths::common::CChecksum::calculate -//! -# Supported by core::memory_debug::dynamicSize -//! -# Supported by core::memory::dynamicSize -//! -# Have overload of operator<< -template -class CMetricPartialStatistic { -public: - using TDouble1Vec = core::CSmallVector; - using TMeanAccumulator = - maths::common::CBasicStatistics::SSampleMean::TAccumulator; - -public: - static const std::string VALUE_TAG; - static const std::string TIME_TAG; - -public: - CMetricPartialStatistic(std::size_t dimension) - : m_Value(CMetricStatisticWrappers::template make(dimension)) {} - - //! Persist to a state document. - void persist(core::CStatePersistInserter& inserter) const { - CMetricStatisticWrappers::persist(m_Value, VALUE_TAG, inserter); - inserter.insertValue(TIME_TAG, m_Time.toDelimited()); - } - - //! Restore from the supplied state document traverser. - bool restore(core::CStateRestoreTraverser& traverser) { - do { - const std::string& name = traverser.name(); - if (name == VALUE_TAG) { - if (CMetricStatisticWrappers::restore(traverser, m_Value) == false) { - LOG_ERROR(<< "Invalid statistic in " << traverser.value()); - return false; - } - } else if (name == TIME_TAG) { - if (m_Time.fromDelimited(traverser.value()) == false) { - LOG_ERROR(<< "Invalid time in " << traverser.value()); - return false; - } - } - } while (traverser.next()); - return true; - } - - //! Add a new measurement. - //! - //! \param[in] value The value of the statistic. - //! \param[in] time The time of the statistic. - //! \param[in] count The number of measurements in the statistic. - inline void add(const TDouble1Vec& value, core_t::TTime time, unsigned int count) { - CMetricStatisticWrappers::add(value, count, m_Value); - m_Time.add(static_cast(time), count); - } - - //! Returns the aggregated value of all the measurements. - inline TDouble1Vec value() const { - return CMetricStatisticWrappers::value(m_Value); - } - - //! Returns the combined count of all the measurements. - inline double count() const { - return maths::common::CBasicStatistics::count(m_Time); - } - - //! Returns the mean time of all the measurements. - inline core_t::TTime time() const { - return static_cast(maths::common::CBasicStatistics::mean(m_Time) + 0.5); - } - - //! Combine two partial statistics. - inline const CMetricPartialStatistic& operator+=(const CMetricPartialStatistic& rhs) { - m_Value += rhs.m_Value; - m_Time += rhs.m_Time; - return *this; - } - - //! Get the checksum of the partial statistic - inline uint64_t checksum(uint64_t seed) const { - seed = maths::common::CChecksum::calculate(seed, m_Value); - return maths::common::CChecksum::calculate(seed, m_Time); - } - - //! Debug the memory used by the statistic. - inline void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { - mem->setName("CMetricPartialStatistic", sizeof(*this)); - core::memory_debug::dynamicSize("m_Value", m_Value, mem); - core::memory_debug::dynamicSize("m_Time", m_Time, mem); - } - - //! Get the memory used by the statistic. - inline std::size_t memoryUsage() const { - return sizeof(*this) + core::memory::dynamicSize(m_Value) + - core::memory::dynamicSize(m_Time); - } - - //! Print partial statistic - inline std::string print() const { - std::ostringstream result; - result << m_Value << ' ' << maths::common::CBasicStatistics::mean(m_Time); - return result.str(); - } - -private: - STATISTIC m_Value; - TMeanAccumulator m_Time; -}; - -template -const std::string CMetricPartialStatistic::VALUE_TAG("a"); -template -const std::string CMetricPartialStatistic::TIME_TAG("b"); -} -} - -#endif // INCLUDED_ml_model_CMetricPartialStatistic_h diff --git a/include/model/CMetricStatGatherer.h b/include/model/CMetricStatGatherer.h new file mode 100644 index 0000000000..8f1e5a7424 --- /dev/null +++ b/include/model/CMetricStatGatherer.h @@ -0,0 +1,517 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#ifndef INCLUDED_ml_model_CMetricStatGatherer_h +#define INCLUDED_ml_model_CMetricStatGatherer_h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +namespace ml { +namespace model { +namespace metric_stat_gatherer_detail { + +using TDouble1Vec = core::CSmallVector; +using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean::TAccumulator; +using TMinAccumulator = maths::common::CBasicStatistics::SMin::TAccumulator; +using TMaxAccumulator = maths::common::CBasicStatistics::SMax::TAccumulator; +using TVarianceAccumulator = maths::common::CBasicStatistics::SSampleMeanVar::TAccumulator; +using TMedianAccumulator = maths::common::CFixedQuantileSketch<30>; +using TMultivariateMeanAccumulator = CMetricMultivariateStatistic; +using TSampleVec = std::vector; + +//! \brief Manages persistence of influence bucket statistics. +template +class CStrStatUMapSerializer { +public: + using TStrStatUMap = boost::unordered_map; + +public: + static const std::string MAP_KEY_TAG; + static const std::string MAP_VALUE_TAG; + explicit CStrStatUMapSerializer(const STAT& initial) : m_Initial(initial) {} + + void operator()(const TStrStatUMap& map, core::CStatePersistInserter& inserter) const { + using TStrCRef = std::reference_wrapper; + using TStatCRef = std::reference_wrapper; + using TStrCRefStatCRefPr = std::pair; + using TStrCRefStatCRefPrVec = std::vector; + TStrCRefStatCRefPrVec ordered; + ordered.reserve(map.size()); + for (const auto& stat : map) { + ordered.emplace_back(stat.first, stat.second); + } + std::sort(ordered.begin(), ordered.end(), maths::common::COrderings::SFirstLess{}); + for (const auto& stat : ordered) { + inserter.insertValue(MAP_KEY_TAG, stat.first); + metric_stat_shims::persist(core::unwrap_ref(stat.second), MAP_VALUE_TAG, inserter); + } + } + + bool operator()(TStrStatUMap& map, core::CStateRestoreTraverser& traverser) const { + std::string key; + do { + const std::string& name{traverser.name()}; + RESTORE_NO_ERROR(MAP_KEY_TAG, key = traverser.value()) + RESTORE(MAP_VALUE_TAG, + metric_stat_shims::restore( + traverser, map.emplace(key, m_Initial).first->second)) + } while (traverser.next()); + return true; + } + +private: + STAT m_Initial; +}; + +template +using TStrStatUMapQueue = CBucketQueue>; +template +using TStrStatUMapQueueSerializer = + typename TStrStatUMapQueue::template CSerializer>; + +class CMidTime { +public: + explicit CMidTime(core_t::TTime bucketLength) : m_Time{bucketLength / 2} {} + + core_t::TTime value() const { return m_Time; } + + void add(core_t::TTime, unsigned int) {} + + std::string toDelimited() const { + return core::CStringUtils::typeToString(m_Time); + } + bool fromDelimited(const std::string& value) { + return core::CStringUtils::stringToType(value, m_Time); + } + std::uint64_t checksum() const { + return static_cast(m_Time); + } + +private: + core_t::TTime m_Time{0}; +}; + +class CLastTime { +public: + explicit CLastTime(core_t::TTime) {} + + core_t::TTime value() const { return m_Time; } + + void add(core_t::TTime time, unsigned int) { m_Time = time; } + + std::string toDelimited() const { + return core::CStringUtils::typeToString(m_Time); + } + bool fromDelimited(const std::string& value) { + return core::CStringUtils::stringToType(value, m_Time); + } + std::uint64_t checksum() const { + return static_cast(m_Time); + } + +private: + core_t::TTime m_Time{0}; +}; + +class CMeanTime { +public: + explicit CMeanTime(core_t::TTime) {} + + core_t::TTime value() const { + return static_cast( + std::round(maths::common::CBasicStatistics::mean(m_Time))); + } + + void add(core_t::TTime time, unsigned int count) { + m_Time.add(static_cast(time), count); + } + + std::string toDelimited() const { return m_Time.toDelimited(); } + bool fromDelimited(const std::string& value) { + return m_Time.fromDelimited(value); + } + std::uint64_t checksum() const { return m_Time.checksum(); } + +private: + TMeanAccumulator m_Time; +}; + +//! \brief Gathers a STAT statistic and it's bucket time. +template +class CStatGatherer { +public: + using TStat = STAT; + +public: + static const std::string COUNT_TAG; + static const std::string STAT_TAG; + static const std::string TIME_TAG; + explicit CStatGatherer(core_t::TTime bucketLength, const STAT& initial) + : m_Time{bucketLength}, m_Stat{initial} {} + + std::size_t dimension() const { + return metric_stat_shims::dimension(m_Stat); + } + core_t::TTime bucketTime(core_t::TTime time) const { + return time + m_Time.value(); + } + TDouble1Vec value() const { return metric_stat_shims::value(m_Stat); } + double count() const { return metric_stat_shims::count(m_Stat); } + double varianceScale() const { return 1.0; } + TSampleVec samples(core_t::TTime time) const { + return {{this->bucketTime(time), this->value(), 1.0, + static_cast(this->count())}}; + } + + void add(core_t::TTime bucketTime, const TDouble1Vec& value, unsigned int count) { + if (metric_stat_shims::wouldAdd(value, m_Stat)) { + m_Time.add(bucketTime, count); + metric_stat_shims::add(value, count, m_Stat); + } + } + + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + metric_stat_shims::persist(m_Time, TIME_TAG, inserter); + metric_stat_shims::persist(m_Stat, STAT_TAG, inserter); + } + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; + RESTORE(TIME_TAG, metric_stat_shims::restore(traverser, m_Time)) + RESTORE(STAT_TAG, metric_stat_shims::restore(traverser, m_Stat)) + } while (traverser.next()); + return true; + } + + std::uint64_t checksum() const { + return maths::common::CChecksum::calculate(m_Time.checksum(), m_Stat); + } + +private: + TIME m_Time; + STAT m_Stat; +}; + +using TSumGatherer = CStatGatherer; +using TMeanGatherer = CStatGatherer; +using TMultivariateMeanGatherer = CStatGatherer; +using TMedianGatherer = CStatGatherer; +using TMinGatherer = CStatGatherer; +using TMaxGatherer = CStatGatherer; +using TVarianceGatherer = CStatGatherer; + +template +std::ostream& operator<<(std::ostream& os, const CStatGatherer& statGatherer) { + os << "CStatGatherer(dim=" << statGatherer.dimension() << ", value=" + << maths::common::basic_statistics_detail::typeToString(statGatherer.value()) + << ", count=" << statGatherer.count() + << ", varianceScale=" << statGatherer.varianceScale() << ")"; + return os; +} + +template +const std::string CStatGatherer::COUNT_TAG{"a"}; +template +const std::string CStatGatherer::STAT_TAG{"b"}; +template +const std::string CStatGatherer::TIME_TAG{"c"}; +template +const std::string CStrStatUMapSerializer::MAP_KEY_TAG{"d"}; +template +const std::string CStrStatUMapSerializer::MAP_VALUE_TAG{"e"}; +} // metric_stat_gatherer_detail:: + +//! \brief Bucket metric statistic gatherer. +template +class CMetricStatGatherer { +public: + using TDouble1Vec = metric_stat_gatherer_detail::TDouble1Vec; + using TStrVecCItr = std::vector::const_iterator; + + static const std::string CLASSIFIER_TAG; + static const std::string BUCKET_STATS_TAG; + static const std::string INFLUENCER_BUCKET_STATS_TAG; + +private: + using TBaseStat = typename STAT::TStat; + using TStatQueue = CBucketQueue; + using TStrBaseStatUMap = boost::unordered_map; + using TStrBaseStatUMapQueue = CBucketQueue; + using TStrBaseStatUMapQueueVec = std::vector; + using TStrVec = std::vector; + using TOptionalStr = std::optional; + using TOptionalStrVec = std::vector; + using TStrStatUMapQueueSerializer = + metric_stat_gatherer_detail::TStrStatUMapQueueSerializer; + +public: + CMetricStatGatherer(std::size_t latencyBuckets, + std::size_t dimension, + core_t::TTime startTime, + core_t::TTime bucketLength, + TStrVecCItr beginInfluencers, + TStrVecCItr endInfluencers) + : m_BaseStat{metric_stat_shims::makeStat(dimension)}, + m_BucketStats(latencyBuckets, + bucketLength, + startTime, + STAT{bucketLength, metric_stat_shims::makeStat(dimension)}), + m_InfluencerBucketStats( + std::distance(beginInfluencers, endInfluencers), + TStrBaseStatUMapQueue(latencyBuckets, bucketLength, startTime, TStrBaseStatUMap(1))) { + } + + //! Get the dimension of the underlying statistic. + std::size_t dimension() const { return m_BaseStat.dimension(); } + + //! Get the feature data for the current bucketing interval. + SMetricFeatureData featureData(core_t::TTime time) const { + + using namespace metric_stat_gatherer_detail; + using TStrCRef = std::reference_wrapper; + using TStrCRefDouble1VecDoublePrPrVecVec = SMetricFeatureData::TStrCRefDouble1VecDoublePrPrVecVec; + + const auto& gatherer = m_BucketStats.get(time); + + if (gatherer.value().empty() == false) { + TStrCRefDouble1VecDoublePrPrVecVec influenceValues( + m_InfluencerBucketStats.size()); + for (std::size_t i = 0; i < m_InfluencerBucketStats.size(); ++i) { + const auto& influencerStats = m_InfluencerBucketStats[i].get(time); + influenceValues[i].reserve(influencerStats.size()); + for (const auto & [ name, stat ] : influencerStats) { + influenceValues[i].emplace_back( + TStrCRef(name), + std::make_pair(metric_stat_shims::influencerValue(stat), + metric_stat_shims::count(stat))); + } + } + + time = maths::common::CIntegerTools::floor(time, m_BucketStats.bucketLength()); + return {gatherer.bucketTime(time), + gatherer.value(), + gatherer.varianceScale(), + gatherer.count(), + influenceValues, + m_Classifier.isInteger(), + m_Classifier.isNonNegative(), + gatherer.samples(time)}; + } + + return {m_Classifier.isInteger(), m_Classifier.isNonNegative(), {}}; + } + + //! Update the state with a new measurement. + //! + //! \param[in] time The time of \p value. + //! \param[in] value The measurement value. + //! \param[in] influences The influencing field values which label \p value. + void add(core_t::TTime time, + const TDouble1Vec& value, + unsigned int count, + const TOptionalStrVec& influences) { + core_t::TTime bucketTime{time % m_BucketStats.bucketLength()}; + m_Classifier.add(FEATURE, value, count); + m_BucketStats.get(time).add(bucketTime, value, count); + for (std::size_t i = 0; i < influences.size(); ++i) { + if (influences[i] != std::nullopt) { + metric_stat_shims::add(value, count, + m_InfluencerBucketStats[i] + .get(time) + .emplace(*influences[i], m_BaseStat) + .first->second); + } + } + } + + //! Update the state to represent the start of a new bucket. + void startNewBucket(core_t::TTime time) { + m_BucketStats.push(STAT{m_BucketStats.bucketLength(), m_BaseStat}, time); + for (auto& stat : m_InfluencerBucketStats) { + stat.push(TStrBaseStatUMap(1), time); + } + } + + //! Reset bucket. + void resetBucket(core_t::TTime bucketStart) { + m_BucketStats.get(bucketStart) = STAT{m_BucketStats.bucketLength(), m_BaseStat}; + for (auto& stat : m_InfluencerBucketStats) { + stat.get(bucketStart).clear(); + } + } + + //! \name Persistence + //@{ + //! Persist state by passing information to the supplied inserter + void acceptPersistInserter(core::CStatePersistInserter& inserter) const { + using namespace metric_stat_gatherer_detail; + inserter.insertLevel(CLASSIFIER_TAG, [this](core::CStatePersistInserter& inserter_) { + m_Classifier.acceptPersistInserter(inserter_); + }); + if (m_BucketStats.empty() == false) { + inserter.insertLevel(BUCKET_STATS_TAG, [this](core::CStatePersistInserter& inserter_) { + m_BucketStats.acceptPersistInserter(inserter_); + }); + } + TStrStatUMapQueueSerializer influencerSerializer{ + TStrBaseStatUMap(1), CStrStatUMapSerializer(m_BaseStat)}; + for (const auto& stats : m_InfluencerBucketStats) { + inserter.insertLevel(INFLUENCER_BUCKET_STATS_TAG, [&](auto& inserter_) { + influencerSerializer(stats, inserter_); + }); + } + } + + //! Create from part of a state document. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { + using namespace metric_stat_gatherer_detail; + TStrStatUMapQueueSerializer influencerSerializer{ + TStrBaseStatUMap(1), CStrStatUMapSerializer(m_BaseStat)}; + std::size_t i{0}; + do { + const std::string& name{traverser.name()}; + RESTORE(CLASSIFIER_TAG, + traverser.traverseSubLevel([this](core::CStateRestoreTraverser& traverser_) { + return m_Classifier.acceptRestoreTraverser(traverser_); + })) + RESTORE(BUCKET_STATS_TAG, + traverser.traverseSubLevel([this](core::CStateRestoreTraverser& traverser_) { + return m_BucketStats.acceptRestoreTraverser(traverser_); + })) + RESTORE(INFLUENCER_BUCKET_STATS_TAG, + i < m_InfluencerBucketStats.size() && + traverser.traverseSubLevel([&](auto& traverser_) { + return influencerSerializer(m_InfluencerBucketStats[i++], traverser_); + })); + } while (traverser.next()); + return true; + } + //@} + + //! Get the checksum of this gatherer. + std::uint64_t checksum() const { + std::uint64_t seed{static_cast(m_Classifier.isInteger())}; + seed = maths::common::CChecksum::calculate(seed, m_Classifier.isNonNegative()); + seed = maths::common::CChecksum::calculate(seed, m_BucketStats); + return maths::common::CChecksum::calculate(seed, m_InfluencerBucketStats); + } + + //! Debug the memory used by this gatherer. + void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { + mem->setName("CMetricStatGatherer"); + core::memory_debug::dynamicSize("m_BaseStat", m_BaseStat, mem); + core::memory_debug::dynamicSize("m_BucketStats", m_BucketStats, mem); + core::memory_debug::dynamicSize("m_InfluencerBucketStats", + m_InfluencerBucketStats, mem); + } + + //! Get the memory used by this gatherer. + std::size_t memoryUsage() const { + return core::memory::dynamicSize(m_BaseStat) + + core::memory::dynamicSize(m_BucketStats) + + core::memory::dynamicSize(m_InfluencerBucketStats); + } + + //! Is the gatherer holding redundant data? + bool isRedundant() const { + for (const auto& bucket : m_BucketStats) { + if (bucket.count() > 0) { + return false; + } + } + return true; + } + +private: + //! Gathers influencer stats. + TBaseStat m_BaseStat; + + //! Classifies the stat series. + CDataClassifier m_Classifier; + + //! The stat for each bucket within the latency window. + TStatQueue m_BucketStats; + + //! The stat for each influencing field value and bucket within the latency window. + TStrBaseStatUMapQueueVec m_InfluencerBucketStats; +}; + +template +const std::string CMetricStatGatherer::CLASSIFIER_TAG{"a"}; +template +const std::string CMetricStatGatherer::BUCKET_STATS_TAG{"b"}; +template +const std::string CMetricStatGatherer::INFLUENCER_BUCKET_STATS_TAG{"c"}; + +//! \brief Sum statistic gatherer. +using TSumGatherer = + CMetricStatGatherer; + +//! \brief Mean statistic gatherer. +using TMeanGatherer = + CMetricStatGatherer; + +//! \brief Multivariate mean statistic gatherer. +using TMultivariateMeanGatherer = + CMetricStatGatherer; + +//! \brief Median statistic gatherer. +using TMedianGatherer = + CMetricStatGatherer; + +//! \brief Minimum statistic gatherer. +using TMinGatherer = + CMetricStatGatherer; + +//! \brief Maximum statistic gatherer. +using TMaxGatherer = + CMetricStatGatherer; + +//! \brief Variance statistic gatherer. +using TVarianceGatherer = + CMetricStatGatherer; +} +} + +#endif // INCLUDED_ml_model_CMetricStatGatherer_h diff --git a/include/model/CMetricStatShims.h b/include/model/CMetricStatShims.h new file mode 100644 index 0000000000..b1e47132d5 --- /dev/null +++ b/include/model/CMetricStatShims.h @@ -0,0 +1,236 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#ifndef INCLUDED_ml_model_CMetricStatShims_h +#define INCLUDED_ml_model_CMetricStatShims_h + +#include +#include +#include + +#include +#include +#include + +#include + +namespace ml { +namespace model { +template +class CMetricMultivariateStatistic; + +//! \brief Accumulates the sum and count of a metric. +class CSumAccumulator { +public: + double value() const { return m_Sum; } + unsigned int count() const { return std::max(m_Count, 1U); } + + void add(double value, unsigned int count) { + m_Sum += value; + m_Count += count; + } + + std::string toDelimited() const { + return core::CStringUtils::typeToString(m_Count) + "," + + core::CStringUtils::typeToStringPrecise(m_Sum, core::CIEEE754::E_DoublePrecision); + } + + bool fromDelimited(const std::string& value) { + std::size_t delimPos{value.find(',')}; + if (delimPos == std::string::npos) { + LOG_ERROR(<< "Invalid sum in '" << value << "'"); + return false; + } + if (core::CStringUtils::stringToType(value.substr(0, delimPos), m_Count) == false) { + LOG_ERROR(<< "Invalid sum in '" << value.substr(0, delimPos) << "'"); + return false; + } + if (core::CStringUtils::stringToType(value.substr(delimPos + 1), m_Sum) == false) { + LOG_ERROR(<< "Invalid sum in '" << value.substr(delimPos + 1) << "'"); + return false; + } + return true; + } + + std::uint64_t checksum() const { + return maths::common::CChecksum::calculate(m_Count, m_Sum); + } + +private: + unsigned int m_Count{0}; + double m_Sum{0.0}; +}; + +inline std::ostream& operator<<(std::ostream& os, const CSumAccumulator& acc) { + os << "CSumAccumulator(" << acc.toDelimited() << ")"; + return os; +} + +namespace metric_stat_shims { +using TDouble1Vec = core::CSmallVector; +using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean::TAccumulator; +using TVarianceAccumulator = maths::common::CBasicStatistics::SSampleMeanVar::TAccumulator; +using TMedianAccumulator = maths::common::CFixedQuantileSketch<30>; + +template +class StatFactory { +public: + static STAT make(std::size_t) { return STAT{}; } +}; +template +class StatFactory> { +public: + static CMetricMultivariateStatistic make(std::size_t dimension) { + return CMetricMultivariateStatistic{dimension}; + } +}; +template +STAT makeStat(std::size_t dimension) { + return StatFactory::make(dimension); +} + +template +std::size_t dimension(const STAT&) { + return 1; +} +template +std::size_t dimension(const CMetricMultivariateStatistic& stat) { + return stat.dimension(); +} + +template +bool wouldAdd(const TDouble1Vec&, STAT&) { + return true; +} +template +bool wouldAdd(const TDouble1Vec& value, + maths::common::CBasicStatistics::COrderStatisticsStack& stat) { + return stat.wouldAdd(value[0]); +} + +template +void add(const TDouble1Vec& value, unsigned int count, STAT& stat) { + stat.add(value[0], count); +} +template +void add(const TDouble1Vec& value, unsigned int count, CMetricMultivariateStatistic& stat) { + stat.add(value, count); +} + +inline TDouble1Vec value(const CSumAccumulator& stat) { + return {stat.value()}; +} +inline TDouble1Vec value(const TMeanAccumulator& stat) { + return {maths::common::CBasicStatistics::mean(stat)}; +} +inline TDouble1Vec value(const TVarianceAccumulator& stat) { + if (maths::common::CBasicStatistics::count(stat) >= 2.0) { + return {maths::common::CBasicStatistics::maximumLikelihoodVariance(stat), + maths::common::CBasicStatistics::mean(stat)}; + } + return {}; +} +inline TDouble1Vec value(const TMedianAccumulator& stat) { + double result; + if (stat.quantile(50.0, result) == false) { + return {}; + } + return {result}; +} +template +TDouble1Vec +value(const maths::common::CBasicStatistics::COrderStatisticsStack& stat) { + return {stat[0]}; +} +template +TDouble1Vec value(const CMetricMultivariateStatistic& stat) { + return stat.value(); +} + +template +TDouble1Vec influencerValue(const STAT& stat) { + return value(stat); +} +inline TDouble1Vec influencerValue(const TVarianceAccumulator& stat) { + // We always return an influence value (independent of the count) + // because this is not used to directly compute a variance only + // to adjust the bucket variance. + return {maths::common::CBasicStatistics::maximumLikelihoodVariance(stat), + maths::common::CBasicStatistics::mean(stat)}; +} +template +TDouble1Vec influencerValue(const CMetricMultivariateStatistic& stat) { + return stat.influencerValue(); +} + +inline double count(const CSumAccumulator& stat) { + return static_cast(stat.count()); +} +inline double count(const TMeanAccumulator& stat) { + return static_cast(maths::common::CBasicStatistics::count(stat)); +} +inline double count(const TVarianceAccumulator& stat) { + return static_cast(maths::common::CBasicStatistics::count(stat)); +} +inline double count(const TMedianAccumulator& stat) { + return stat.count(); +} +template +inline double +count(const maths::common::CBasicStatistics::COrderStatisticsStack& /*stat*/) { + return 1.0; +} +template +double count(const CMetricMultivariateStatistic& stat) { + return stat.count(); +} + +template +void persist(const STAT& stat, const std::string& tag, core::CStatePersistInserter& inserter) { + inserter.insertValue(tag, stat.toDelimited()); +} +inline void persist(const TMedianAccumulator& stat, + const std::string& tag, + core::CStatePersistInserter& inserter) { + inserter.insertLevel( + tag, [&](auto& inserter_) { stat.acceptPersistInserter(inserter_); }); +} +template +inline void persist(const CMetricMultivariateStatistic& stat, + const std::string& tag, + core::CStatePersistInserter& inserter) { + inserter.insertLevel(tag, [&](auto& inserter_) { stat.persist(inserter_); }); +} + +template +bool restore(core::CStateRestoreTraverser& traverser, STAT& stat) { + if (stat.fromDelimited(traverser.value()) == false) { + LOG_ERROR(<< "Invalid statistic in " << traverser.value()); + return false; + } + return true; +} +inline bool restore(core::CStateRestoreTraverser& traverser, TMedianAccumulator& stat) { + return traverser.traverseSubLevel([&](auto& traverser_) { + return stat.acceptRestoreTraverser(traverser_); + }); +} +template +inline bool restore(core::CStateRestoreTraverser& traverser, + CMetricMultivariateStatistic& stat) { + return traverser.traverseSubLevel( + [&](auto& traverser_) { return stat.restore(traverser_); }); +} +} +} +} + +#endif // INCLUDED_ml_model_CMetricStatShims_h diff --git a/include/model/CMetricStatisticWrappers.h b/include/model/CMetricStatisticWrappers.h deleted file mode 100644 index 97ca4e3347..0000000000 --- a/include/model/CMetricStatisticWrappers.h +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#ifndef INCLUDED_ml_model_CMetricStatisticWrappers_h -#define INCLUDED_ml_model_CMetricStatisticWrappers_h - -#include - -#include -#include -#include -#include - -#include - -#include -#include - -namespace ml { -namespace model { -template -class CMetricMultivariateStatistic; - -namespace metric_statistic_wrapper_detail { - -//! \brief Makes a univariate metric statistic. -template -struct SMake { - static STATISTIC dispatch(std::size_t /*dimension*/) { return STATISTIC(); } -}; -//! \brief Makes a multivariate metric statistic. -template -struct SMake> { - static CMetricMultivariateStatistic dispatch(std::size_t dimension) { - return CMetricMultivariateStatistic(dimension); - } -}; - -} // metric_statistic_wrapper_detail:: - -//! \brief Provides wrappers for all aggregate metric statistics -//! for which we gather data. -//! -//! DESCTIPTION:\n -//! This shim is used by CPartialStatistic and CSampleGatherer to -//! provide a common interface into the various types of operation -//! which those classes need. -//! -//! It provides static functions for getting the statistic value -//! and count if possible, and persisting and restoring them all -//! of which delegate to the appropriate statistic functions. -struct MODEL_EXPORT CMetricStatisticWrappers { - using TDouble1Vec = core::CSmallVector; - using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean::TAccumulator; - using TVarianceAccumulator = - maths::common::CBasicStatistics::SSampleMeanVar::TAccumulator; - using TMedianAccumulator = maths::common::CFixedQuantileSketch<30>; - - //! Make a statistic. - template - static STATISTIC make(std::size_t dimension) { - return metric_statistic_wrapper_detail::SMake::dispatch(dimension); - } - - //! Add \p value to an order statistic. - template - static void - add(const TDouble1Vec& value, - unsigned int count, - maths::common::CBasicStatistics::COrderStatisticsStack& stat) { - stat.add(value[0], count); - } - //! Add \p value to a mean statistic. - static void add(const TDouble1Vec& value, unsigned int count, TMeanAccumulator& stat) { - stat.add(value[0], count); - } - //! Add \p value to a variance statistic. - static void add(const TDouble1Vec& value, unsigned int count, TVarianceAccumulator& stat) { - stat.add(value[0], count); - } - //! Add \p value to a median statistic. - static void add(const TDouble1Vec& value, unsigned int count, TMedianAccumulator& stat) { - stat.add(value[0], count); - } - //! Add \p value to a multivariate statistic. - template - static void add(const TDouble1Vec& value, - unsigned int count, - CMetricMultivariateStatistic& stat) { - stat.add(value, count); - } - - //! Get the median value of an order statistic. - template - static TDouble1Vec - value(const maths::common::CBasicStatistics::COrderStatisticsStack& stat) { - return TDouble1Vec{stat[0]}; - } - //! Get the value of a mean statistic. - static TDouble1Vec value(const TMeanAccumulator& stat) { - return TDouble1Vec{maths::common::CBasicStatistics::mean(stat)}; - } - //! Get the value of a variance statistic. - static TDouble1Vec value(const TVarianceAccumulator& stat) { - TDouble1Vec result; - if (maths::common::CBasicStatistics::count(stat) >= 2.0) { - result.assign({maths::common::CBasicStatistics::maximumLikelihoodVariance(stat), - maths::common::CBasicStatistics::mean(stat)}); - } - return result; - } - //! Get the value of a median statistic. - static TDouble1Vec value(const TMedianAccumulator& stat) { - double result; - if (!stat.quantile(50.0, result)) { - return TDouble1Vec{0.0}; - } - return TDouble1Vec{result}; - } - //! Get the value of a multivariate statistic. - template - static TDouble1Vec value(const CMetricMultivariateStatistic& stat) { - return stat.value(); - } - - //! Forward to the value function. - template - static TDouble1Vec influencerValue(const STATISTIC& stat) { - return value(stat); - } - //! Get the variance influence value. - static TDouble1Vec influencerValue(const TVarianceAccumulator& stat) { - // We always return an influence value (independent of the count) - // because this is not used to directly compute a variance only - // to adjust the bucket variance. - TDouble1Vec result(2); - result[0] = maths::common::CBasicStatistics::maximumLikelihoodVariance(stat); - result[1] = maths::common::CBasicStatistics::mean(stat); - return result; - } - //! Get the value suitable for computing influence of a multivariate - //! statistic. - template - static TDouble1Vec influencerValue(const CMetricMultivariateStatistic& stat) { - return stat.influencerValue(); - } - - //! Returns 1.0 since this is not available. - template - static double - count(const maths::common::CBasicStatistics::COrderStatisticsStack& /*stat*/) { - return 1.0; - } - //! Get the count of the statistic. - static double count(const TMeanAccumulator& stat) { - return static_cast(maths::common::CBasicStatistics::count(stat)); - } - //! Get the count of the statistic. - static double count(const TVarianceAccumulator& stat) { - return static_cast(maths::common::CBasicStatistics::count(stat)); - } - //! Get the count of the statistic. - static double count(const TMedianAccumulator& stat) { return stat.count(); } - //! Get the count of a multivariate statistic. - template - static double count(const CMetricMultivariateStatistic& stat) { - return stat.count(); - } - - //! Persist an order statistic. - template - static void - persist(const maths::common::CBasicStatistics::COrderStatisticsStack& stat, - const std::string& tag, - core::CStatePersistInserter& inserter) { - inserter.insertValue(tag, stat.toDelimited()); - } - //! Persist a mean statistic. - static void persist(const TMeanAccumulator& stat, - const std::string& tag, - core::CStatePersistInserter& inserter) { - inserter.insertValue(tag, stat.toDelimited()); - } - //! Persist a variance statistic. - static void persist(const TVarianceAccumulator& stat, - const std::string& tag, - core::CStatePersistInserter& inserter) { - inserter.insertValue(tag, stat.toDelimited()); - } - //! Persist a median statistic. - static void persist(const TMedianAccumulator& stat, - const std::string& tag, - core::CStatePersistInserter& inserter) { - inserter.insertLevel(tag, std::bind(&TMedianAccumulator::acceptPersistInserter, - &stat, std::placeholders::_1)); - } - //! Persist a multivariate statistic. - template - static void persist(const CMetricMultivariateStatistic& stat, - const std::string& tag, - core::CStatePersistInserter& inserter) { - inserter.insertLevel(tag, std::bind(&CMetricMultivariateStatistic::persist, - &stat, std::placeholders::_1)); - } - - //! Restore an order statistic. - template - static inline bool - restore(core::CStateRestoreTraverser& traverser, - maths::common::CBasicStatistics::COrderStatisticsStack& stat) { - if (stat.fromDelimited(traverser.value()) == false) { - LOG_ERROR(<< "Invalid statistic in " << traverser.value()); - return false; - } - return true; - } - //! Restore a mean statistic. - static bool restore(core::CStateRestoreTraverser& traverser, TMeanAccumulator& stat) { - if (stat.fromDelimited(traverser.value()) == false) { - LOG_ERROR(<< "Invalid mean in " << traverser.value()); - return false; - } - return true; - } - //! Restore a variance statistic. - static bool restore(core::CStateRestoreTraverser& traverser, TVarianceAccumulator& stat) { - if (stat.fromDelimited(traverser.value()) == false) { - LOG_ERROR(<< "Invalid variance in " << traverser.value()); - return false; - } - return true; - } - //! Restore a median statistic. - static bool restore(core::CStateRestoreTraverser& traverser, TMedianAccumulator& stat) { - return traverser.traverseSubLevel(std::bind(&TMedianAccumulator::acceptRestoreTraverser, - &stat, std::placeholders::_1)); - } - //! Restore a multivariate statistic. - template - static bool restore(core::CStateRestoreTraverser& traverser, - CMetricMultivariateStatistic& stat) { - return traverser.traverseSubLevel( - std::bind(&CMetricMultivariateStatistic::restore, &stat, - std::placeholders::_1)); - } -}; -} -} - -#endif // INCLUDED_ml_model_CMetricStatisticWrappers_h diff --git a/include/model/CModelDetailsView.h b/include/model/CModelDetailsView.h index 09b42bcae9..78ec3904f9 100644 --- a/include/model/CModelDetailsView.h +++ b/include/model/CModelDetailsView.h @@ -97,9 +97,7 @@ class MODEL_EXPORT CModelDetailsView { virtual const CAnomalyDetectorModel& base() const = 0; //! Get the count variance scale. - virtual double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const = 0; + virtual double countVarianceScale() const = 0; //! Returns true if the terms are empty or they contain the key. static bool contains(const TStrSet& terms, const std::string& key); @@ -134,9 +132,7 @@ class MODEL_EXPORT CEventRateModelDetailsView : public CModelDetailsView { private: const CAnomalyDetectorModel& base() const override; - double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const override; + double countVarianceScale() const override; private: //! The model. @@ -159,9 +155,7 @@ class MODEL_EXPORT CEventRatePopulationModelDetailsView : public CModelDetailsVi private: const CAnomalyDetectorModel& base() const override; - double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const override; + double countVarianceScale() const override; private: //! The model. @@ -184,9 +178,7 @@ class MODEL_EXPORT CMetricModelDetailsView : public CModelDetailsView { private: const CAnomalyDetectorModel& base() const override; - double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const override; + double countVarianceScale() const override; private: //! The model. @@ -209,9 +201,7 @@ class MODEL_EXPORT CMetricPopulationModelDetailsView : public CModelDetailsView private: const CAnomalyDetectorModel& base() const override; - double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const override; + double countVarianceScale() const override; private: //! The model. diff --git a/include/model/CModelFactory.h b/include/model/CModelFactory.h index 623f88db86..5dcf8ae3a0 100644 --- a/include/model/CModelFactory.h +++ b/include/model/CModelFactory.h @@ -126,15 +126,13 @@ class MODEL_EXPORT CModelFactory { //! time we need extra data to initialize a data gatherer. struct MODEL_EXPORT SGathererInitializationData { SGathererInitializationData(core_t::TTime startTime, - const std::string& partitionFieldValue, - unsigned int sampleOverrideCount = 0u); + const std::string& partitionFieldValue); //! This constructor is to simplify unit testing. SGathererInitializationData(const core_t::TTime startTime); core_t::TTime s_StartTime; const std::string& s_PartitionFieldValue; - unsigned int s_SampleOverrideCount; }; public: @@ -287,10 +285,6 @@ class MODEL_EXPORT CModelFactory { //! Set the features which will be modeled. virtual void features(const TFeatureVec& features) = 0; - //! Set the amount by which metric sample count is reduced for - //! fine-grained sampling when there is latency. - void sampleCountFactor(std::size_t sampleCountFactor); - //! Set whether the model should exclude frequent hitters from the //! calculations. void excludeFrequent(model_t::EExcludeFrequent excludeFrequent); @@ -321,10 +315,6 @@ class MODEL_EXPORT CModelFactory { //! models. void initialDecayRateMultiplier(double multiplier); - //! Set the maximum number of times we'll update a person's model - //! in a bucketing interval. - void maximumUpdatesPerBucket(double maximumUpdatesPerBucket); - //! Set the prune window scale factor minimum void pruneWindowScaleMinimum(double factor); diff --git a/include/model/CSampleCounts.h b/include/model/CSampleCounts.h deleted file mode 100644 index 911154201c..0000000000 --- a/include/model/CSampleCounts.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#ifndef INCLUDED_ml_model_CSampleCounts_h -#define INCLUDED_ml_model_CSampleCounts_h - -#include -#include - -#include - -#include - -#include -#include - -namespace ml { -namespace core { -class CStatePersistInserter; -class CStateRestoreTraverser; -} -namespace model { -class CDataGatherer; - -//! \brief Manages setting of sample counts. -//! -//! DESCRIPTION:\n -//! To handle variable data rates we gather statistics on an almost -//! fixed number of measurements (since the distribution of these -//! statistics is a function of the measurement count). The correction -//! we apply to account for the varying measurement count in the -//! likelihood function is approximate and so if the mean bucket -//! bucket count wanders too far from the sample count we reset the -//! the sample count. -class MODEL_EXPORT CSampleCounts { -public: - using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean::TAccumulator; - using TMeanAccumulatorVec = std::vector; - using TSizeVec = std::vector; - -public: - explicit CSampleCounts(unsigned int sampleCountOverride = 0); - - //! Create a copy that will result in the same persisted state as the - //! original. This is effectively a copy constructor that creates a - //! copy that's only valid for a single purpose. The boolean flag is - //! redundant except to create a signature that will not be mistaken for - //! a general purpose copy constructor. - CSampleCounts(bool isForPersistence, const CSampleCounts& other); - - CSampleCounts* cloneForPersistence() const; - - //! Persist the sample counts to a state document. - void acceptPersistInserter(core::CStatePersistInserter& inserter) const; - - //! Restore some sample counts from a state document traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); - - //! Get the sample count identified by \p id. - unsigned int count(std::size_t id) const; - - //! Get the effective sample count identified by \p id. - double effectiveSampleCount(std::size_t id) const; - - //! Reset the sample count identified by \p id. - void resetSampleCount(const CDataGatherer& gatherer, std::size_t id); - - //! Update the effective sample variances to reflect new sample for \p id. - void updateSampleVariance(std::size_t id); - - //! Update the mean non-zero bucket counts and age the count data. - void updateMeanNonZeroBucketCount(std::size_t id, double count, double alpha); - - //! Refresh the sample count identified by \p id. - void refresh(const CDataGatherer& gatherer); - - //! Recycle the sample counts identified by \p idsToRemove. - void recycle(const TSizeVec& idsToRemove); - - //! Remove all traces of attributes whose identifiers are - //! greater than or equal to \p lowestIdToRemove. - void remove(std::size_t lowestIdToRemove); - - //! Resize the sample counts so they can accommodate \p id. - void resize(std::size_t id); - - //! Get the sample counts checksum. - uint64_t checksum(const CDataGatherer& gatherer) const; - - //! Debug the memory used by this object. - void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const; - - //! Get the memory used by this object. - std::size_t memoryUsage() const; - - //! Clear the sample counts. - void clear(); - -private: - using TUIntVec = std::vector; - -private: - //! Get the name of the entity identified by \p id. - const std::string& name(const CDataGatherer& gatherer, std::size_t id) const; - -private: - //! This overrides the sample counts if non-zero. - unsigned int m_SampleCountOverride; - - //! The "fixed" number of measurements in a sample. - TUIntVec m_SampleCounts; - - //! The mean number of measurements per bucket. - TMeanAccumulatorVec m_MeanNonZeroBucketCounts; - - //! The effective sample variance in the data supplied to the - //! model. The sample count is reset if the mean bucket count - //! moves too far from the current value. This is an approximate - //! estimate of the effective variance, due to the averaging - //! process, of the samples with which the model has been updated. - TMeanAccumulatorVec m_EffectiveSampleVariances; -}; - -} // model -} // ml - -#endif // INCLUDED_ml_model_CSampleCounts_h diff --git a/include/model/CSampleGatherer.h b/include/model/CSampleGatherer.h deleted file mode 100644 index cbc7a4de03..0000000000 --- a/include/model/CSampleGatherer.h +++ /dev/null @@ -1,450 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#ifndef INCLUDED_ml_model_CSampleGatherer_h -#define INCLUDED_ml_model_CSampleGatherer_h - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -namespace ml { -namespace model { - -//! \brief Metric statistic gatherer. -//! -//! DESCRIPTION:\n -//! Wraps up the functionality to sample a statistic of a fixed -//! number of metric values, which is supplied to the add function, -//! for a latency window. -//! -//! This also computes the statistic value of all metric values -//! and for each influencing field values for every bucketing -//! interval in the latency window. -//! -//! \tparam STATISTIC This must satisfy the requirements imposed -//! by CMetricPartialStatistic. -template -class CSampleGatherer { -public: - using TDouble1Vec = core::CSmallVector; - using TStrVec = std::vector; - using TStrVecCItr = TStrVec::const_iterator; - using TStrCRef = std::reference_wrapper; - using TDouble1VecDoublePr = std::pair; - using TStrCRefDouble1VecDoublePrPr = std::pair; - using TStrCRefDouble1VecDoublePrPrVec = std::vector; - using TStrCRefDouble1VecDoublePrPrVecVec = std::vector; - using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean::TAccumulator; - using TSampleQueue = CSampleQueue; - using TSampleVec = typename TSampleQueue::TSampleVec; - using TMetricPartialStatistic = CMetricPartialStatistic; - using TStatBucketQueue = CBucketQueue; - using TOptionalStr = std::optional; - using TOptionalStrVec = std::vector; - using TOptionalStrStatUMap = - boost::unordered_map; - using TOptionalStrStatUMapBucketQueue = CBucketQueue; - using TOptionalStrStatUMapBucketQueueVec = std::vector; - -public: - static const std::string CLASSIFIER_TAG; - static const std::string SAMPLE_STATS_TAG; - static const std::string BUCKET_STATS_TAG; - static const std::string INFLUENCER_BUCKET_STATS_TAG; - static const std::string DIMENSION_TAG; - -public: - CSampleGatherer(const SModelParams& params, - std::size_t dimension, - core_t::TTime startTime, - core_t::TTime bucketLength, - TStrVecCItr beginInfluencers, - TStrVecCItr endInfluencers) - : m_Dimension(dimension), m_SampleStats(dimension, - params.s_SampleCountFactor, - params.s_LatencyBuckets, - params.s_SampleQueueGrowthFactor, - bucketLength), - m_BucketStats(params.s_LatencyBuckets, - bucketLength, - startTime, - TMetricPartialStatistic(dimension)), - m_InfluencerBucketStats( - std::distance(beginInfluencers, endInfluencers), - TOptionalStrStatUMapBucketQueue(params.s_LatencyBuckets + 3, - bucketLength, - startTime, - TOptionalStrStatUMap(1))) {} - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertValue(DIMENSION_TAG, m_Dimension); - inserter.insertLevel(CLASSIFIER_TAG, - std::bind(&CDataClassifier::acceptPersistInserter, - &m_Classifier, std::placeholders::_1)); - if (m_SampleStats.empty() == false) { - inserter.insertLevel(SAMPLE_STATS_TAG, - std::bind(&TSampleQueue::acceptPersistInserter, - &m_SampleStats, std::placeholders::_1)); - } - if (m_BucketStats.empty() == false) { - inserter.insertLevel( - BUCKET_STATS_TAG, - std::bind(TStatBucketQueueSerializer(TMetricPartialStatistic(m_Dimension)), - std::cref(m_BucketStats), std::placeholders::_1)); - } - for (const auto& stats : m_InfluencerBucketStats) { - inserter.insertLevel( - INFLUENCER_BUCKET_STATS_TAG, - std::bind(TOptionalStrStatUMapBucketQueueSerializer( - TOptionalStrStatUMap(1), - COptionalStrStatUMapSerializer(m_Dimension)), - std::cref(stats), std::placeholders::_1)); - } - } - - //! Create from part of a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - std::size_t i = 0; - do { - const std::string& name = traverser.name(); - TMetricPartialStatistic stat(m_Dimension); - RESTORE_BUILT_IN(DIMENSION_TAG, m_Dimension) - RESTORE(CLASSIFIER_TAG, traverser.traverseSubLevel(std::bind( - &CDataClassifier::acceptRestoreTraverser, - &m_Classifier, std::placeholders::_1))) - RESTORE(SAMPLE_STATS_TAG, traverser.traverseSubLevel(std::bind( - &TSampleQueue::acceptRestoreTraverser, - &m_SampleStats, std::placeholders::_1))) - RESTORE(BUCKET_STATS_TAG, - traverser.traverseSubLevel(std::bind( - TStatBucketQueueSerializer(TMetricPartialStatistic(m_Dimension)), - std::ref(m_BucketStats), std::placeholders::_1))) - RESTORE(INFLUENCER_BUCKET_STATS_TAG, - i < m_InfluencerBucketStats.size() && - traverser.traverseSubLevel(std::bind( - TOptionalStrStatUMapBucketQueueSerializer( - TOptionalStrStatUMap(1), COptionalStrStatUMapSerializer(m_Dimension)), - std::ref(m_InfluencerBucketStats[i++]), std::placeholders::_1))) - } while (traverser.next()); - return true; - } - //@} - - //! Get the dimension of the underlying statistic. - std::size_t dimension() const { return m_Dimension; } - - //! Get the feature data for the bucketing interval containing - //! \p time. - //! - //! \param[in] time The start time of the sampled bucket. - //! \param[in] effectiveSampleCount The effective historical - //! number of measurements in a sample. - SMetricFeatureData featureData(core_t::TTime time, - core_t::TTime /*bucketLength*/, - double effectiveSampleCount) const { - const TMetricPartialStatistic& bucketPartial = m_BucketStats.get(time); - double count = bucketPartial.count(); - if (count > 0.0) { - core_t::TTime bucketTime = bucketPartial.time(); - TDouble1Vec bucketValue = bucketPartial.value(); - if (bucketValue.size() > 0) { - TStrCRefDouble1VecDoublePrPrVecVec influenceValues( - m_InfluencerBucketStats.size()); - for (std::size_t i = 0; i < m_InfluencerBucketStats.size(); ++i) { - const TOptionalStrStatUMap& influencerStats = - m_InfluencerBucketStats[i].get(time); - influenceValues[i].reserve(influencerStats.size()); - for (const auto& stat : influencerStats) { - influenceValues[i].emplace_back( - std::cref(*stat.first), - std::make_pair( - CMetricStatisticWrappers::influencerValue(stat.second), - CMetricStatisticWrappers::count(stat.second))); - } - } - return {bucketTime, - bucketValue, - model_t::varianceScale(FEATURE, effectiveSampleCount, count), - count, - influenceValues, - m_Classifier.isInteger(), - m_Classifier.isNonNegative(), - m_Samples}; - } - } - return {m_Classifier.isInteger(), m_Classifier.isNonNegative(), m_Samples}; - } - - //! Create samples if possible for the given bucket. - //! - //! \param[in] time The start time of the sampled bucket. - //! \param[in] sampleCount The measurement count in a sample. - //! \return True if there are new samples and false otherwise. - bool sample(core_t::TTime time, unsigned int sampleCount) { - if (sampleCount > 0 && m_SampleStats.canSample(time)) { - TSampleVec newSamples; - m_SampleStats.sample(time, sampleCount, FEATURE, newSamples); - m_Samples.insert(m_Samples.end(), newSamples.begin(), newSamples.end()); - return !newSamples.empty(); - } - return false; - } - - //! Update the state with a new measurement. - //! - //! \param[in] time The time of \p value. - //! \param[in] value The measurement value. - //! \param[in] sampleCount The measurement count in a sample. - //! \param[in] influences The influencing field values which - //! label \p value. - inline void add(core_t::TTime time, - const TDouble1Vec& value, - unsigned int sampleCount, - const TOptionalStrVec& influences) { - this->add(time, value, 1, sampleCount, influences); - } - - //! Update the state with a new mean statistic. - //! - //! \param[in] time The approximate time of \p statistic. - //! \param[in] statistic The statistic value. - //! \param[in] count The number of measurements in \p statistic. - //! \param[in] sampleCount The measurement count in a sample. - //! \param[in] influences The influencing field values which - //! label \p value. - void add(core_t::TTime time, - const TDouble1Vec& statistic, - unsigned int count, - unsigned int sampleCount, - const TOptionalStrVec& influences) { - if (sampleCount > 0) { - m_SampleStats.add(time, statistic, count, sampleCount); - } - m_BucketStats.get(time).add(statistic, time, count); - m_Classifier.add(FEATURE, statistic, count); - std::size_t n = std::min(influences.size(), m_InfluencerBucketStats.size()); - for (std::size_t i = 0; i < n; ++i) { - if (!influences[i]) { - continue; - } - TOptionalStrStatUMap& stats = m_InfluencerBucketStats[i].get(time); - auto j = stats - .emplace(influences[i], - CMetricStatisticWrappers::template make(m_Dimension)) - .first; - CMetricStatisticWrappers::add(statistic, count, j->second); - } - } - - //! Update the state to represent the start of a new bucket. - void startNewBucket(core_t::TTime time) { - m_BucketStats.push(TMetricPartialStatistic(m_Dimension), time); - for (auto& stats : m_InfluencerBucketStats) { - stats.push(TOptionalStrStatUMap(1), time); - } - m_Samples.clear(); - } - - //! Reset the bucket state for the bucket containing \p bucketStart. - void resetBucket(core_t::TTime bucketStart) { - m_BucketStats.get(bucketStart) = TMetricPartialStatistic(m_Dimension); - for (auto& stats : m_InfluencerBucketStats) { - stats.get(bucketStart) = TOptionalStrStatUMap(1); - } - m_SampleStats.resetBucket(bucketStart); - } - - //! Is the gatherer holding redundant data? - bool isRedundant(core_t::TTime samplingCutoffTime) const { - if (m_SampleStats.latestEnd() >= samplingCutoffTime) { - return false; - } - for (const auto& bucket : m_BucketStats) { - if (bucket.count() > 0.0) { - return false; - } - } - return true; - } - - //! Get the checksum of this gatherer. - uint64_t checksum() const { - uint64_t seed = static_cast(m_Classifier.isInteger()); - seed = maths::common::CChecksum::calculate(seed, m_Classifier.isNonNegative()); - seed = maths::common::CChecksum::calculate(seed, m_SampleStats); - seed = maths::common::CChecksum::calculate(seed, m_BucketStats); - return maths::common::CChecksum::calculate(seed, m_InfluencerBucketStats); - } - - //! Debug the memory used by this gatherer. - void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { - mem->setName("CSampleGatherer", sizeof(*this)); - core::memory_debug::dynamicSize("m_SampleStats", m_SampleStats, mem); - core::memory_debug::dynamicSize("m_BucketStats", m_BucketStats, mem); - core::memory_debug::dynamicSize("m_InfluencerBucketStats", - m_InfluencerBucketStats, mem); - core::memory_debug::dynamicSize("m_Samples", m_Samples, mem); - } - - //! Get the memory used by this gatherer. - std::size_t memoryUsage() const { - return sizeof(*this) + core::memory::dynamicSize(m_SampleStats) + - core::memory::dynamicSize(m_BucketStats) + - core::memory::dynamicSize(m_InfluencerBucketStats) + - core::memory::dynamicSize(m_Samples); - } - - //! Print this gatherer for debug. - std::string print() const { - std::ostringstream result; - result << m_Classifier.isInteger() << ' ' << m_Classifier.isNonNegative() - << ' ' << m_BucketStats.print() << ' ' << m_SampleStats.print() - << ' ' << m_Samples << ' ' << m_InfluencerBucketStats; - return result.str(); - } - -private: - static const std::string MAP_KEY_TAG; - static const std::string MAP_VALUE_TAG; - -private: - //! \brief Manages persistence of bucket statistics. - struct SStatSerializer { - void operator()(const TMetricPartialStatistic& stat, - core::CStatePersistInserter& inserter) const { - stat.persist(inserter); - } - - bool operator()(TMetricPartialStatistic& stat, - core::CStateRestoreTraverser& traverser) const { - return stat.restore(traverser); - } - }; - using TStatBucketQueueSerializer = - typename TStatBucketQueue::template CSerializer; - - //! \brief Manages persistence of influence bucket statistics. - class COptionalStrStatUMapSerializer { - public: - COptionalStrStatUMapSerializer(std::size_t dimension) - : m_Initial(CMetricStatisticWrappers::template make(dimension)) {} - - void operator()(const TOptionalStrStatUMap& map, - core::CStatePersistInserter& inserter) const { - using TStatCRef = std::reference_wrapper; - using TStrCRefStatCRefPr = std::pair; - using TStrCRefStatCRefPrVec = std::vector; - TStrCRefStatCRefPrVec ordered; - ordered.reserve(map.size()); - for (const auto& stat : map) { - ordered.emplace_back(TStrCRef(*stat.first), TStatCRef(stat.second)); - } - std::sort(ordered.begin(), ordered.end(), - maths::common::COrderings::SFirstLess()); - for (const auto& stat : ordered) { - inserter.insertValue(MAP_KEY_TAG, stat.first); - CMetricStatisticWrappers::persist(stat.second.get(), MAP_VALUE_TAG, inserter); - } - } - - bool operator()(TOptionalStrStatUMap& map, - core::CStateRestoreTraverser& traverser) const { - std::string key; - do { - const std::string& name = traverser.name(); - RESTORE_NO_ERROR(MAP_KEY_TAG, key = traverser.value()) - RESTORE(MAP_VALUE_TAG, - CMetricStatisticWrappers::restore( - traverser, map.insert({key, m_Initial}).first->second)) - } while (traverser.next()); - return true; - } - - private: - STATISTIC m_Initial; - }; - using TOptionalStrStatUMapBucketQueueSerializer = - typename TOptionalStrStatUMapBucketQueue::template CSerializer; - -private: - //! The dimension of the statistic being gathered. - std::size_t m_Dimension; - - //! Classifies the sampled statistics. - CDataClassifier m_Classifier; - - //! The queue holding the partial aggregate statistics within - //! latency window used for building samples. - TSampleQueue m_SampleStats; - - //! The aggregation of the measurements received for each - //! bucket within latency window. - TStatBucketQueue m_BucketStats; - - //! The aggregation of the measurements received for each - //! bucket and influencing field within latency window. - TOptionalStrStatUMapBucketQueueVec m_InfluencerBucketStats; - - //! The samples of the aggregate statistic in the current - //! bucketing interval. - TSampleVec m_Samples; -}; - -template -const std::string CSampleGatherer::CLASSIFIER_TAG("a"); -template -const std::string CSampleGatherer::SAMPLE_STATS_TAG("b"); -template -const std::string CSampleGatherer::BUCKET_STATS_TAG("c"); -template -const std::string CSampleGatherer::INFLUENCER_BUCKET_STATS_TAG("d"); -template -const std::string CSampleGatherer::DIMENSION_TAG("e"); -template -const std::string CSampleGatherer::MAP_KEY_TAG("a"); -template -const std::string CSampleGatherer::MAP_VALUE_TAG("b"); - -//! Overload print operator for feature data. -MODEL_EXPORT -inline std::ostream& operator<<(std::ostream& o, const SMetricFeatureData& fd) { - return o << fd.print(); -} -} -} - -#endif // INCLUDED_ml_model_CSampleGatherer_h diff --git a/include/model/CSampleQueue.h b/include/model/CSampleQueue.h deleted file mode 100644 index 983afb504a..0000000000 --- a/include/model/CSampleQueue.h +++ /dev/null @@ -1,514 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#ifndef INCLUDED_ml_model_CSampleQueue_h -#define INCLUDED_ml_model_CSampleQueue_h - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -namespace ml { -namespace model { - -//! \brief A queue that manages the sampling of statistics -//! -//! DESCRIPTION:\n -//! A queue which collects measurements and creates samples when -//! there are enough measurements and the time is appropriate. -//! The queue handles the creation of samples during a latency -//! window by creating smaller sub-samples which get updated -//! as new measurements are received. The new measurements are -//! combined into existing sub-samples if their time is close enough -//! or they are placed into newly created sub-samples. -//! -//! The template STATISTIC has to comply with the requirements of -//! the CMetricPartialStatistic template. -template -class CSampleQueue { -private: - using TDouble1Vec = core::CSmallVector; - using TMetricPartialStatistic = CMetricPartialStatistic; - -private: - //! A struct grouping together the data that form a sub-sample. - //! A sub-sample is comprised of a partial statistic and a start - //! and an end time marking the interval range for the sub-sample. - struct SSubSample { - static const std::string SAMPLE_START_TAG; - static const std::string SAMPLE_END_TAG; - static const std::string SAMPLE_TAG; - - SSubSample(std::size_t dimension, core_t::TTime time) - : s_Statistic(dimension), s_Start(time), s_End(time) {} - - void add(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { - s_Statistic.add(measurement, time, count); - // Using explicit tests instead of std::min and std::max to work - // around g++ 4.1 optimiser bug - if (time < s_Start) { - s_Start = time; - } - if (time > s_End) { - s_End = time; - } - } - - //! Check if \p time overlaps the interval or doesn't extend - //! it to be more than \p targetSpan long. - bool isClose(core_t::TTime time, core_t::TTime targetSpan) const { - if (time > s_End) { - return time < s_Start + targetSpan; - } - if (time >= s_Start) { - return true; - } - return time > s_End - targetSpan; - } - - // This assumes that buckets are aligned to n * bucketLength - bool isInSameBucket(core_t::TTime time, core_t::TTime bucketLength) const { - core_t::TTime timeBucket = maths::common::CIntegerTools::floor(time, bucketLength); - core_t::TTime subSampleBucket = - maths::common::CIntegerTools::floor(s_Start, bucketLength); - return timeBucket == subSampleBucket; - } - - //! Combine the statistic and construct the union interval. - const SSubSample& operator+=(const SSubSample& rhs) { - s_Statistic += rhs.s_Statistic; - s_Start = std::min(s_Start, rhs.s_Start); - s_End = std::max(s_End, rhs.s_End); - return *this; - } - - //! Persist to a state document. - void acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(SAMPLE_TAG, - std::bind(&TMetricPartialStatistic::persist, - &s_Statistic, std::placeholders::_1)); - inserter.insertValue(SAMPLE_START_TAG, s_Start); - inserter.insertValue(SAMPLE_END_TAG, s_End); - } - - //! Restore from a state document. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - do { - const std::string& name = traverser.name(); - if (name == SAMPLE_TAG) { - if (traverser.traverseSubLevel( - std::bind(&TMetricPartialStatistic::restore, &s_Statistic, - std::placeholders::_1)) == false) { - LOG_ERROR(<< "Invalid sample value"); - return false; - } - } else if (name == SAMPLE_START_TAG) { - if (core::CStringUtils::stringToType(traverser.value(), s_Start) == false) { - LOG_ERROR(<< "Invalid attribute identifier in " - << traverser.value()); - return false; - } - } else if (name == SAMPLE_END_TAG) { - if (core::CStringUtils::stringToType(traverser.value(), s_End) == false) { - LOG_ERROR(<< "Invalid attribute identifier in " - << traverser.value()); - return false; - } - } - } while (traverser.next()); - return true; - } - - //! Get a checksum of this sub-sample. - std::uint64_t checksum() const { - std::uint64_t seed = maths::common::CChecksum::calculate(0, s_Statistic); - seed = maths::common::CChecksum::calculate(seed, s_Start); - return maths::common::CChecksum::calculate(seed, s_End); - } - - //! Debug the memory used by the sub-sample. - void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { - mem->setName("SSubSample", sizeof(*this)); - core::memory_debug::dynamicSize("s_Statistic", s_Statistic, mem); - } - - //! Get the memory used by the sub-sample. - std::size_t memoryUsage() const { - return sizeof(*this) + core::memory::dynamicSize(s_Statistic); - } - - //! Print the sub-sample for debug. - std::string print() const { - return "{[" + core::CStringUtils::typeToString(s_Start) + ", " + - core::CStringUtils::typeToString(s_End) + "] -> " + - s_Statistic.print() + "}"; - } - - TMetricPartialStatistic s_Statistic; - core_t::TTime s_Start; - core_t::TTime s_End; - }; - -public: - using TQueue = boost::circular_buffer; - using iterator = typename TQueue::iterator; - using reverse_iterator = typename TQueue::reverse_iterator; - using const_reverse_iterator = typename TQueue::const_reverse_iterator; - using TSampleVec = std::vector; - using TOptionalSubSample = std::optional; - -public: - static const std::string SUB_SAMPLE_TAG; - -public: - //! Constructs a new queue. - //! - //! \param[in] dimension The dimension of the metric statistic. - //! \param[in] sampleCountFactor The queue attempts to keep the sub-samples - //! size to the current sample count divided by the sampleCountFactor. - //! \param[in] latencyBuckets The number of buckets that are in the latency window. - //! \param[in] growthFactor The factor with which the queue's size grows whenever - //! a new item is inserted and the queue is full. - //! \param[in] bucketLength The bucket length. - CSampleQueue(std::size_t dimension, - std::size_t sampleCountFactor, - std::size_t latencyBuckets, - double growthFactor, - core_t::TTime bucketLength) - : m_Dimension(dimension), - m_Queue(std::max(sampleCountFactor * latencyBuckets, std::size_t(1))), - m_SampleCountFactor(sampleCountFactor), m_GrowthFactor(growthFactor), - m_BucketLength(bucketLength), - m_Latency(static_cast(latencyBuckets) * bucketLength) {} - - //! Adds a measurement to the queue. - //! - //! \param[in] time The time of the measurement. - //! \param[in] measurement The value of the measurement. - //! \param[in] count The count of the measurement. - //! \param[in] sampleCount The target sample count. - void add(core_t::TTime time, const TDouble1Vec& measurement, unsigned int count, unsigned int sampleCount) { - if (m_Queue.empty()) { - this->pushFrontNewSubSample(measurement, time, count); - } else if (time >= m_Queue[0].s_Start) { - this->addAfterLatestStartTime(measurement, time, count, sampleCount); - } else { - this->addHistorical(measurement, time, count, sampleCount); - } - } - - //! Can the queue possible create samples? - bool canSample(core_t::TTime bucketStart) const { - core_t::TTime bucketEnd = bucketStart + m_BucketLength - 1; - return m_Queue.empty() ? false : m_Queue.back().s_End <= bucketEnd; - } - - //! Combines as many sub-samples as possible in order to create samples. - //! - //! \param[in] bucketStart The start time of the bucket to sample. - //! \param[in] sampleCount The target sample count. - //! \param[in] feature The feature to which the measurements correspond. - //! \param[out] samples The newly created samples. - void sample(core_t::TTime bucketStart, - unsigned int sampleCount, - model_t::EFeature feature, - TSampleVec& samples) { - core_t::TTime latencyCutoff = bucketStart + m_BucketLength - 1; - TOptionalSubSample combinedSubSample; - - while (m_Queue.empty() == false && m_Queue.back().s_End <= latencyCutoff) { - if (combinedSubSample) { - *combinedSubSample += m_Queue.back(); - } else { - combinedSubSample = TOptionalSubSample(m_Queue.back()); - } - - m_Queue.pop_back(); - - double count = combinedSubSample->s_Statistic.count(); - double countIncludingNext = - (m_Queue.empty()) ? count : count + m_Queue.back().s_Statistic.count(); - double countRatio = sampleCount / count; - double countRatioIncludingNext = sampleCount / countIncludingNext; - - if (countIncludingNext >= sampleCount && - (std::abs(1.0 - countRatio) <= std::abs(1.0 - countRatioIncludingNext))) { - TDouble1Vec sample = combinedSubSample->s_Statistic.value(); - core_t::TTime sampleTime = combinedSubSample->s_Statistic.time(); - double vs = model_t::varianceScale(feature, sampleCount, count); - samples.emplace_back(sampleTime, sample, vs, count); - combinedSubSample = TOptionalSubSample(); - } - } - - if (combinedSubSample) { - m_Queue.push_back(*combinedSubSample); - } - } - - void resetBucket(core_t::TTime bucketStart) { - // The queue is ordered in descending sub-sample start time. - - iterator firstEarlierThanBucket = - std::upper_bound(m_Queue.begin(), m_Queue.end(), bucketStart, timeLater); - - // This is equivalent to lower_bound(., ., bucketStart + m_BucketLength - 1, .); - iterator latestWithinBucket = std::upper_bound( - m_Queue.begin(), m_Queue.end(), bucketStart + m_BucketLength, timeLater); - - m_Queue.erase(latestWithinBucket, firstEarlierThanBucket); - } - - //! Returns the item in the queue at position \p index. - const SSubSample& operator[](std::size_t index) const { - return m_Queue[index]; - } - - //! Returns the size of the queue. - std::size_t size() const { return m_Queue.size(); } - - //! Returns the capacity of the queue. - std::size_t capacity() const { return m_Queue.capacity(); } - - //! Is the queue empty? - bool empty() const { return m_Queue.empty(); } - - core_t::TTime latestEnd() const { - return m_Queue.empty() ? 0 : m_Queue.front().s_End; - } - - //! \name Persistence - //@{ - //! Persist state by passing information to the supplied inserter. - void acceptPersistInserter(core::CStatePersistInserter& inserter) const { - for (const_reverse_iterator itr = m_Queue.rbegin(); itr != m_Queue.rend(); ++itr) { - inserter.insertLevel(SUB_SAMPLE_TAG, - std::bind(&SSubSample::acceptPersistInserter, - *itr, std::placeholders::_1)); - } - } - - //! Restore by getting information from the state document traverser. - bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - do { - const std::string& name = traverser.name(); - if (name == SUB_SAMPLE_TAG) { - SSubSample subSample(m_Dimension, 0); - if (traverser.traverseSubLevel( - std::bind(&SSubSample::acceptRestoreTraverser, - &subSample, std::placeholders::_1)) == false) { - LOG_ERROR(<< "Invalid sub-sample in " << traverser.value()); - return false; - } - this->resizeIfFull(); - m_Queue.push_front(subSample); - } - } while (traverser.next()); - - return true; - } - //@} - - //! Returns the checksum of the queue. - uint64_t checksum() const { - return maths::common::CChecksum::calculate(0, m_Queue); - } - - //! Debug the memory used by the queue. - void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { - mem->setName("CSampleQueue", sizeof(*this)); - core::memory_debug::dynamicSize("m_Queue", m_Queue, mem); - } - - //! Get the memory used by the queue. - std::size_t memoryUsage() const { - return sizeof(*this) + core::memory::dynamicSize(m_Queue); - } - - //! Prints the contents of the queue. - std::string print() const { - return core::CContainerPrinter::print(m_Queue); - } - -private: - void pushFrontNewSubSample(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { - this->resizeIfFull(); - SSubSample newSubSample(m_Dimension, time); - newSubSample.s_Statistic.add(measurement, time, count); - m_Queue.push_front(newSubSample); - } - - void pushBackNewSubSample(const TDouble1Vec& measurement, core_t::TTime time, unsigned int count) { - this->resizeIfFull(); - SSubSample newSubSample(m_Dimension, time); - newSubSample.s_Statistic.add(measurement, time, count); - m_Queue.push_back(newSubSample); - } - - void insertNewSubSample(iterator pos, - const TDouble1Vec& measurement, - core_t::TTime time, - unsigned int count) { - this->resizeIfFull(); - SSubSample newSubSample(m_Dimension, time); - newSubSample.s_Statistic.add(measurement, time, count); - m_Queue.insert(pos, newSubSample); - } - - void resizeIfFull() { - if (m_Queue.full()) { - std::size_t currentSize = m_Queue.size(); - std::size_t newSize = static_cast( - static_cast(currentSize) * (1.0 + m_GrowthFactor)); - m_Queue.set_capacity(std::max(newSize, currentSize + 1)); - } - } - - void addAfterLatestStartTime(const TDouble1Vec& measurement, - core_t::TTime time, - unsigned int count, - unsigned int sampleCount) { - if (time >= m_Queue[0].s_End && - this->shouldCreateNewSubSampleAfterLatest(time, sampleCount)) { - this->pushFrontNewSubSample(measurement, time, count); - } else { - m_Queue[0].add(measurement, time, count); - } - } - - bool shouldCreateNewSubSampleAfterLatest(core_t::TTime time, unsigned int sampleCount) { - if (m_Queue[0].s_Statistic.count() >= - static_cast(this->targetSubSampleCount(sampleCount))) { - return true; - } - - // If latency is non-zero, we also want to check whether the new measurement - // is too far from the latest sub-sample or whether they belong in different buckets. - if (m_Latency > 0) { - if (!m_Queue[0].isClose(time, this->targetSubSampleSpan()) || - !m_Queue[0].isInSameBucket(time, m_BucketLength)) { - return true; - } - } - return false; - } - - core_t::TTime targetSubSampleSpan() const { - return (m_BucketLength + static_cast(m_SampleCountFactor) - 1) / - static_cast(m_SampleCountFactor); - } - - std::size_t targetSubSampleCount(unsigned int sampleCount) const { - return static_cast(sampleCount) / m_SampleCountFactor; - } - - void addHistorical(const TDouble1Vec& measurement, - core_t::TTime time, - unsigned int count, - unsigned int sampleCount) { - // We have to resize before we do the search of the upper bound. Otherwise, - // a later resize will invalidate the upper bound iterator. - this->resizeIfFull(); - - reverse_iterator upperBound = - std::upper_bound(m_Queue.rbegin(), m_Queue.rend(), time, timeEarlier); - core_t::TTime targetSubSampleSpan = this->targetSubSampleSpan(); - - if (upperBound == m_Queue.rbegin()) { - if ((upperBound->s_Statistic.count() >= - static_cast(this->targetSubSampleCount(sampleCount))) || - !upperBound->isClose(time, targetSubSampleSpan) || - !(*upperBound).isInSameBucket(time, m_BucketLength)) { - this->pushBackNewSubSample(measurement, time, count); - } else { - upperBound->add(measurement, time, count); - } - return; - } - - SSubSample& left = *(upperBound - 1); - SSubSample& right = *upperBound; - if (time <= left.s_End) { - left.add(measurement, time, count); - return; - } - bool sameBucketWithLeft = left.isInSameBucket(time, m_BucketLength); - bool sameBucketWithRight = right.isInSameBucket(time, m_BucketLength); - std::size_t spaceLimit = this->targetSubSampleCount(sampleCount); - bool leftHasSpace = static_cast(left.s_Statistic.count()) < spaceLimit; - bool rightHasSpace = static_cast(right.s_Statistic.count()) < spaceLimit; - core_t::TTime leftDistance = time - left.s_End; - core_t::TTime rightDistance = right.s_Start - time; - SSubSample& candidate = maths::common::COrderings::lexicographicalCompare( - -static_cast(sameBucketWithLeft), - -static_cast(leftHasSpace), leftDistance, - -static_cast(sameBucketWithRight), - -static_cast(rightHasSpace), rightDistance) - ? left - : right; - - if (candidate.isInSameBucket(time, m_BucketLength) && - (candidate.isClose(time, targetSubSampleSpan) || - right.s_Start <= left.s_End + targetSubSampleSpan)) { - candidate.add(measurement, time, count); - return; - } - this->insertNewSubSample(upperBound.base(), measurement, time, count); - } - - static bool timeEarlier(core_t::TTime time, const SSubSample& subSample) { - return time < subSample.s_Start; - } - - static bool timeLater(core_t::TTime time, const SSubSample& subSample) { - return time > subSample.s_Start; - } - -private: - std::size_t m_Dimension; - TQueue m_Queue; - std::size_t m_SampleCountFactor; - double m_GrowthFactor; - core_t::TTime m_BucketLength; - core_t::TTime m_Latency; -}; - -template -const std::string CSampleQueue::SSubSample::SAMPLE_START_TAG("a"); -template -const std::string CSampleQueue::SSubSample::SAMPLE_END_TAG("b"); -template -const std::string CSampleQueue::SSubSample::SAMPLE_TAG("c"); -template -const std::string CSampleQueue::SUB_SAMPLE_TAG("a"); -} -} - -#endif // INCLUDED_ml_model_CSampleQueue_h diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index acbcc14c04..43266e851b 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -429,14 +429,6 @@ bool isSumFeature(EFeature feature); MODEL_EXPORT double varianceScale(EFeature feature, double sampleCount, double count); -//! Check if the feature is sampled. -MODEL_EXPORT -bool isSampled(EFeature feature); - -//! Get the minimum useful sample count for a feature. -MODEL_EXPORT -unsigned minimumSampleCount(EFeature feature); - //! Offset count features so that their range starts at zero. MODEL_EXPORT double offsetCountToZero(EFeature feature, double count); @@ -614,21 +606,19 @@ std::string print(EFeature feature); //! These enumerate the distinct types of metric statistic //! which we gather. enum EMetricCategory { - E_Mean, - E_Min, - E_Max, - E_Sum, - E_MultivariateMean, - E_MultivariateMin, - E_MultivariateMax, - E_Median, - E_Variance + E_Mean = 0, + E_Min = 1, + E_Max = 2, + E_Sum = 3, + E_MultivariateMean = 4, + E_Median = 7, + E_Variance = 8 // If you add any more enumeration values here then be sure to update the // constant beneath too! }; //! Must correspond to the number of enumeration values of EMetricCategory -const size_t NUM_METRIC_CATEGORIES = 9; +constexpr std::size_t NUM_METRIC_CATEGORIES{9}; //! Get the metric feature data corresponding to \p feature //! if there is one. diff --git a/include/model/SModelParams.h b/include/model/SModelParams.h index eb9827a526..ab492893eb 100644 --- a/include/model/SModelParams.h +++ b/include/model/SModelParams.h @@ -133,20 +133,8 @@ struct MODEL_EXPORT SModelParams { //! The frequency at which to exclude an attribute. double s_ExcludeAttributeFrequency; - //! The maximum number of times we'll update a metric model in a bucket. - double s_MaximumUpdatesPerBucket; - //! The number of buckets that are within the latency window. std::size_t s_LatencyBuckets; - - //! The factor to divide sample count in order to determine size of sub-samples. - std::size_t s_SampleCountFactor; - - //! The factor that determines how much the sample queue grows. - double s_SampleQueueGrowthFactor; - - //! The time window during which samples are accepted. - core_t::TTime s_SamplingAgeCutoff; //@} //! \name Model Life-Cycle Management diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 82757df65e..635136a967 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -681,7 +681,7 @@ void CAnomalyJob::outputResults(core_t::TTime bucketStartTime) { continue; } detector->buildResults(bucketStartTime, bucketStartTime + bucketLength, results); - detector->releaseMemory(bucketStartTime - m_ModelConfig.samplingAgeCutoff()); + detector->releaseMemory(); this->generateModelPlot(bucketStartTime, bucketStartTime + bucketLength, *detector, modelPlotData); diff --git a/lib/model/CAnomalyDetector.cc b/lib/model/CAnomalyDetector.cc index f8c36d0057..102dc670f8 100644 --- a/lib/model/CAnomalyDetector.cc +++ b/lib/model/CAnomalyDetector.cc @@ -615,8 +615,8 @@ void CAnomalyDetector::resetBucket(core_t::TTime bucketStart) { m_DataGatherer->resetBucket(bucketStart); } -void CAnomalyDetector::releaseMemory(core_t::TTime samplingCutoffTime) { - m_DataGatherer->releaseMemory(samplingCutoffTime); +void CAnomalyDetector::releaseMemory() { + m_DataGatherer->releaseMemory(); } void CAnomalyDetector::showMemoryUsage(std::ostream& stream) const { diff --git a/lib/model/CAnomalyDetectorModelConfig.cc b/lib/model/CAnomalyDetectorModelConfig.cc index 551a7e4320..0e35cb009e 100644 --- a/lib/model/CAnomalyDetectorModelConfig.cc +++ b/lib/model/CAnomalyDetectorModelConfig.cc @@ -57,9 +57,6 @@ core_t::TTime validateBucketLength(core_t::TTime length) { const std::string CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER(","); const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_BUCKET_LENGTH(300); const std::size_t CAnomalyDetectorModelConfig::DEFAULT_LATENCY_BUCKETS(0); -const std::size_t CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY(1); -const std::size_t CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY(10); -const double CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR(0.1); const core_t::TTime CAnomalyDetectorModelConfig::STANDARD_BUCKET_LENGTH(1800); const double CAnomalyDetectorModelConfig::DEFAULT_DECAY_RATE(0.0005); const double CAnomalyDetectorModelConfig::DEFAULT_INITIAL_DECAY_RATE_MULTIPLIER(4.0); @@ -68,7 +65,7 @@ const double CAnomalyDetectorModelConfig::DEFAULT_INDIVIDUAL_MINIMUM_MODE_FRACTI const double CAnomalyDetectorModelConfig::DEFAULT_POPULATION_MINIMUM_MODE_FRACTION(0.05); const double CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_CLUSTER_SPLIT_COUNT(12.0); const double CAnomalyDetectorModelConfig::DEFAULT_CATEGORY_DELETE_FRACTION(0.8); -const std::size_t CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE(36u); +const std::size_t CAnomalyDetectorModelConfig::DEFAULT_COMPONENT_SIZE(36); const core_t::TTime CAnomalyDetectorModelConfig::DEFAULT_MINIMUM_TIME_TO_DETECT_CHANGE(core::constants::DAY); const core_t::TTime @@ -732,19 +729,13 @@ void CAnomalyDetectorModelConfig::modelPruneWindow(core_t::TTime modelPruneWindo m_ModelPruneWindow = modelPruneWindow; } -core_t::TTime CAnomalyDetectorModelConfig::samplingAgeCutoff() const { - return m_Factories.begin()->second->modelParams().s_SamplingAgeCutoff; -} - namespace { const std::string ONLINE_LEARN_RATE_PROPERTY("learnrate"); const std::string DECAY_RATE_PROPERTY("decayrate"); const std::string INITIAL_DECAY_RATE_MULTIPLIER_PROPERTY("initialdecayratemultiplier"); -const std::string MAXIMUM_UPDATES_PER_BUCKET_PROPERTY("maximumupdatesperbucket"); const std::string INDIVIDUAL_MODE_FRACTION_PROPERTY("individualmodefraction"); const std::string POPULATION_MODE_FRACTION_PROPERTY("populationmodefraction"); const std::string COMPONENT_SIZE_PROPERTY("componentsize"); -const std::string SAMPLE_COUNT_FACTOR_PROPERTY("samplecountfactor"); const std::string PRUNE_WINDOW_SCALE_MINIMUM("prunewindowscaleminimum"); const std::string PRUNE_WINDOW_SCALE_MAXIMUM("prunewindowscalemaximum"); const std::string AGGREGATION_STYLE_PARAMS("aggregationstyleparams"); @@ -800,18 +791,6 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre for (auto& factory : m_Factories) { factory.second->initialDecayRateMultiplier(multiplier); } - } else if (propName == MAXIMUM_UPDATES_PER_BUCKET_PROPERTY) { - double maximumUpdatesPerBucket; - if (core::CStringUtils::stringToType(propValue, maximumUpdatesPerBucket) == false || - maximumUpdatesPerBucket < 0.0) { - LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); - result = false; - continue; - } - - for (auto& factory : m_Factories) { - factory.second->maximumUpdatesPerBucket(maximumUpdatesPerBucket); - } } else if (propName == INDIVIDUAL_MODE_FRACTION_PROPERTY) { double fraction; if (core::CStringUtils::stringToType(propValue, fraction) == false || @@ -853,16 +832,6 @@ bool CAnomalyDetectorModelConfig::processStanza(const boost::property_tree::ptre for (auto& factory : m_Factories) { factory.second->componentSize(componentSize); } - } else if (propName == SAMPLE_COUNT_FACTOR_PROPERTY) { - int factor; - if (core::CStringUtils::stringToType(propValue, factor) == false || factor < 0) { - LOG_ERROR(<< "Invalid value for property " << propName << " : " << propValue); - result = false; - continue; - } - for (auto& factory : m_Factories) { - factory.second->sampleCountFactor(factor); - } } else if (propName == PRUNE_WINDOW_SCALE_MINIMUM) { double factor; if (core::CStringUtils::stringToType(propValue, factor) == false) { diff --git a/lib/model/CBucketGatherer.cc b/lib/model/CBucketGatherer.cc index a4f85e23c4..8ca34461eb 100644 --- a/lib/model/CBucketGatherer.cc +++ b/lib/model/CBucketGatherer.cc @@ -337,7 +337,6 @@ void CBucketGatherer::sampleNow(core_t::TTime sampleBucketStart) { sampleBucketStart + (m_DataGatherer.params().s_LatencyBuckets + 1) * this->bucketLength() - 1; this->timeNow(timeNow); - this->sample(sampleBucketStart); } void CBucketGatherer::skipSampleNow(core_t::TTime sampleBucketStart) { diff --git a/lib/model/CCountingModelFactory.cc b/lib/model/CCountingModelFactory.cc index ac11f9dcbf..8b9b997976 100644 --- a/lib/model/CCountingModelFactory.cc +++ b/lib/model/CCountingModelFactory.cc @@ -64,7 +64,7 @@ CCountingModelFactory::makeDataGatherer(const SGathererInitializationData& initD return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), m_SummaryCountFieldName, initData.s_PartitionFieldValue, m_PersonFieldName, EMPTY_STRING, EMPTY_STRING, {}, - this->searchKey(), m_Features, initData.s_StartTime, 0); + this->searchKey(), m_Features, initData.s_StartTime); } CDataGatherer* diff --git a/lib/model/CDataGatherer.cc b/lib/model/CDataGatherer.cc index d224feb9aa..b731054480 100644 --- a/lib/model/CDataGatherer.cc +++ b/lib/model/CDataGatherer.cc @@ -24,7 +24,6 @@ #include #include -#include #include #include @@ -153,8 +152,7 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, const TStrVec& influenceFieldNames, const CSearchKey& key, const TFeatureVec& features, - core_t::TTime startTime, - int sampleCountOverride) + core_t::TTime startTime) : m_GathererType(gathererType), m_Features(detail::sanitize(features, gathererType)), m_SummaryMode(summaryMode), m_Params(modelParams), m_SearchKey(key), @@ -171,8 +169,8 @@ CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, std::sort(m_Features.begin(), m_Features.end()); this->createBucketGatherer(gathererType, summaryCountFieldName, - personFieldName, attributeFieldName, valueFieldName, - influenceFieldNames, startTime, sampleCountOverride); + personFieldName, attributeFieldName, + valueFieldName, influenceFieldNames, startTime); } CDataGatherer::CDataGatherer(model_t::EAnalysisCategory gathererType, @@ -217,9 +215,6 @@ CDataGatherer::CDataGatherer(bool isForPersistence, const CDataGatherer& other) LOG_ABORT(<< "This constructor only creates clones for persistence"); } m_BucketGatherer.reset(other.m_BucketGatherer->cloneForPersistence()); - if (other.m_SampleCounts) { - m_SampleCounts.reset(other.m_SampleCounts->cloneForPersistence()); - } } CDataGatherer::~CDataGatherer() { @@ -378,10 +373,6 @@ void CDataGatherer::recyclePeople(const TSizeVec& peopleToRemove) { m_BucketGatherer->recyclePeople(peopleToRemove); - if (!this->isPopulation() && m_SampleCounts) { - m_SampleCounts->recycle(peopleToRemove); - } - m_PeopleRegistry.recycleNames(peopleToRemove, DEFAULT_PERSON_NAME); core::CProgramCounters::counter(counter_t::E_TSADNumberPrunedItems) += peopleToRemove.size(); @@ -392,10 +383,6 @@ void CDataGatherer::removePeople(std::size_t lowestPersonToRemove) { return; } - if (!this->isPopulation() && m_SampleCounts) { - m_SampleCounts->remove(lowestPersonToRemove); - } - m_BucketGatherer->removePeople(lowestPersonToRemove); m_PeopleRegistry.removeNames(lowestPersonToRemove); @@ -442,10 +429,6 @@ void CDataGatherer::recycleAttributes(const TSizeVec& attributesToRemove) { return; } - if (this->isPopulation() && m_SampleCounts) { - m_SampleCounts->recycle(attributesToRemove); - } - m_BucketGatherer->recycleAttributes(attributesToRemove); m_AttributesRegistry.recycleNames(attributesToRemove, DEFAULT_ATTRIBUTE_NAME); @@ -458,10 +441,6 @@ void CDataGatherer::removeAttributes(std::size_t lowestAttributeToRemove) { return; } - if (this->isPopulation() && m_SampleCounts) { - m_SampleCounts->remove(lowestAttributeToRemove); - } - m_BucketGatherer->removeAttributes(lowestAttributeToRemove); m_AttributesRegistry.removeNames(lowestAttributeToRemove); @@ -483,34 +462,6 @@ std::size_t CDataGatherer::addAttribute(const std::string& attribute, resourceMonitor, addedAttribute); } -double CDataGatherer::sampleCount(std::size_t id) const { - if (m_SampleCounts) { - return static_cast(m_SampleCounts->count(id)); - } else { - LOG_ERROR(<< "Sample count for non-metric gatherer"); - return 0.0; - } -} - -double CDataGatherer::effectiveSampleCount(std::size_t id) const { - if (m_SampleCounts) { - return m_SampleCounts->effectiveSampleCount(id); - } else { - LOG_ERROR(<< "Effective sample count for non-metric gatherer"); - return 0.0; - } -} - -void CDataGatherer::resetSampleCount(std::size_t id) { - if (m_SampleCounts) { - m_SampleCounts->resetSampleCount(*this, id); - } -} - -const CDataGatherer::TSampleCountsPtr& CDataGatherer::sampleCounts() const { - return m_SampleCounts; -} - core_t::TTime CDataGatherer::currentBucketStartTime() const { return m_BucketGatherer->currentBucketStartTime(); } @@ -549,9 +500,6 @@ std::uint64_t CDataGatherer::checksum() const { result = maths::common::CChecksum::calculate(result, m_AttributesRegistry); result = maths::common::CChecksum::calculate(result, m_SummaryMode); result = maths::common::CChecksum::calculate(result, m_Features); - if (m_SampleCounts) { - result = maths::common::CChecksum::calculate(result, m_SampleCounts->checksum(*this)); - } result = maths::common::CChecksum::calculate(result, m_BucketGatherer); LOG_TRACE(<< "checksum = " << result); @@ -564,7 +512,6 @@ void CDataGatherer::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& core::memory_debug::dynamicSize("m_Features", m_Features, mem); core::memory_debug::dynamicSize("m_PeopleRegistry", m_PeopleRegistry, mem); core::memory_debug::dynamicSize("m_AttributesRegistry", m_AttributesRegistry, mem); - core::memory_debug::dynamicSize("m_SampleCounts", m_SampleCounts, mem); core::memory_debug::dynamicSize("m_BucketGatherer", m_BucketGatherer, mem); } @@ -573,7 +520,6 @@ std::size_t CDataGatherer::memoryUsage() const { mem += core::memory::dynamicSize(m_PartitionFieldValue); mem += core::memory::dynamicSize(m_PeopleRegistry); mem += core::memory::dynamicSize(m_AttributesRegistry); - mem += core::memory::dynamicSize(m_SampleCounts); mem += core::memory::dynamicSize(m_BucketGatherer); return mem; } @@ -585,10 +531,7 @@ bool CDataGatherer::useNull() const { void CDataGatherer::clear() { m_PeopleRegistry.clear(); m_AttributesRegistry.clear(); - if (m_SampleCounts) { - m_SampleCounts->clear(); - } - if (m_BucketGatherer) { + if (m_BucketGatherer != nullptr) { m_BucketGatherer->clear(); } } @@ -597,9 +540,9 @@ bool CDataGatherer::resetBucket(core_t::TTime bucketStart) { return m_BucketGatherer->resetBucket(bucketStart); } -void CDataGatherer::releaseMemory(core_t::TTime samplingCutoffTime) { +void CDataGatherer::releaseMemory() { if (this->isPopulation()) { - m_BucketGatherer->releaseMemory(samplingCutoffTime); + m_BucketGatherer->releaseMemory(); } } @@ -617,13 +560,6 @@ void CDataGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) inserter.insertLevel(ATTRIBUTES_REGISTRY_TAG, std::bind(&CDynamicStringIdRegistry::acceptPersistInserter, m_AttributesRegistry, std::placeholders::_1)); - - if (m_SampleCounts) { - inserter.insertLevel(SAMPLE_COUNTS_TAG, - std::bind(&CSampleCounts::acceptPersistInserter, - m_SampleCounts.get(), std::placeholders::_1)); - } - inserter.insertLevel(BUCKET_GATHERER_TAG, std::bind(&CDataGatherer::persistBucketGatherers, this, std::placeholders::_1)); } @@ -767,11 +703,6 @@ bool CDataGatherer::acceptRestoreTraverser(const std::string& summaryCountFieldN traverser.traverseSubLevel( std::bind(&CDynamicStringIdRegistry::acceptRestoreTraverser, &m_AttributesRegistry, std::placeholders::_1))) - RESTORE_SETUP_TEARDOWN( - SAMPLE_COUNTS_TAG, m_SampleCounts = std::make_unique(0), - traverser.traverseSubLevel(std::bind(&CSampleCounts::acceptRestoreTraverser, - m_SampleCounts.get(), std::placeholders::_1)), - /**/) RESTORE(BUCKET_GATHERER_TAG, traverser.traverseSubLevel(std::bind( &CDataGatherer::restoreBucketGatherer, this, @@ -830,8 +761,7 @@ void CDataGatherer::createBucketGatherer(model_t::EAnalysisCategory gathererType const std::string& attributeFieldName, const std::string& valueFieldName, const TStrVec& influenceFieldNames, - core_t::TTime startTime, - unsigned int sampleCountOverride) { + core_t::TTime startTime) { switch (gathererType) { case model_t::E_EventRate: case model_t::E_PopulationEventRate: @@ -841,7 +771,6 @@ void CDataGatherer::createBucketGatherer(model_t::EAnalysisCategory gathererType break; case model_t::E_Metric: case model_t::E_PopulationMetric: - m_SampleCounts = std::make_unique(sampleCountOverride); m_BucketGatherer = std::make_unique( *this, summaryCountFieldName, personFieldName, attributeFieldName, valueFieldName, influenceFieldNames, startTime); diff --git a/lib/model/CEventRateBucketGatherer.cc b/lib/model/CEventRateBucketGatherer.cc index c30dd1585a..5ba9078088 100644 --- a/lib/model/CEventRateBucketGatherer.cc +++ b/lib/model/CEventRateBucketGatherer.cc @@ -1038,17 +1038,11 @@ bool CEventRateBucketGatherer::resetBucket(core_t::TTime bucketStart) { return this->CBucketGatherer::resetBucket(bucketStart); } -void CEventRateBucketGatherer::releaseMemory(core_t::TTime /*samplingCutoffTime*/) { +void CEventRateBucketGatherer::releaseMemory() { // Nothing to release } -void CEventRateBucketGatherer::sample(core_t::TTime /*time*/) { - // Nothing to sample -} - -void CEventRateBucketGatherer::featureData(core_t::TTime time, - core_t::TTime /*bucketLength*/, - TFeatureAnyPrVec& result) const { +void CEventRateBucketGatherer::featureData(core_t::TTime time, TFeatureAnyPrVec& result) const { result.clear(); if (!this->dataAvailable(time) || diff --git a/lib/model/CEventRateModel.cc b/lib/model/CEventRateModel.cc index faa16f0e21..901bea80a2 100644 --- a/lib/model/CEventRateModel.cc +++ b/lib/model/CEventRateModel.cc @@ -233,7 +233,7 @@ void CEventRateModel::sample(core_t::TTime startTime, LOG_TRACE(<< "Sampling [" << time << "," << time + bucketLength << ")"); gatherer.sampleNow(time); - gatherer.featureData(time, bucketLength, m_CurrentBucketStats.s_FeatureData); + gatherer.featureData(time, m_CurrentBucketStats.s_FeatureData); const CIndividualModel::TTimeVec& preSampleLastBucketTimes = this->lastBucketTimes(); CIndividualModel::TSizeTimeUMap lastBucketTimesMap; @@ -368,9 +368,7 @@ void CEventRateModel::sample(core_t::TTime startTime, }) .memoryCircuitBreaker(circuitBreaker); - if (model->addSamples(params, values) == maths::common::CModel::E_Reset) { - gatherer.resetSampleCount(pid); - } + model->addSamples(params, values); } } @@ -609,7 +607,7 @@ void CEventRateModel::clearPrunedResources(const TSizeVec& people, const TSizeVe // Stop collecting for these people and add them to the free list. gatherer.recyclePeople(people); if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { - gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), + gatherer.featureData(m_CurrentBucketStats.s_StartTime, m_CurrentBucketStats.s_FeatureData); } diff --git a/lib/model/CEventRateModelFactory.cc b/lib/model/CEventRateModelFactory.cc index 2ddb927d4e..93049d5b39 100644 --- a/lib/model/CEventRateModelFactory.cc +++ b/lib/model/CEventRateModelFactory.cc @@ -92,12 +92,11 @@ CEventRateModelFactory::makeModel(const SModelInitializationData& initData, CDataGatherer* CEventRateModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, - this->modelParams(), m_SummaryCountFieldName, - initData.s_PartitionFieldValue, m_PersonFieldName, - EMPTY_STRING /*AttributeFieldName*/, m_ValueFieldName, - m_InfluenceFieldNames, this->searchKey(), m_Features, - initData.s_StartTime, initData.s_SampleOverrideCount); + return new CDataGatherer(model_t::E_EventRate, m_SummaryMode, this->modelParams(), + m_SummaryCountFieldName, initData.s_PartitionFieldValue, + m_PersonFieldName, EMPTY_STRING /*AttributeFieldName*/, + m_ValueFieldName, m_InfluenceFieldNames, + this->searchKey(), m_Features, initData.s_StartTime); } CDataGatherer* diff --git a/lib/model/CEventRatePopulationModel.cc b/lib/model/CEventRatePopulationModel.cc index c7250262f3..e0d6db3694 100644 --- a/lib/model/CEventRatePopulationModel.cc +++ b/lib/model/CEventRatePopulationModel.cc @@ -334,7 +334,7 @@ void CEventRatePopulationModel::sampleBucketStatistics(core_t::TTime startTime, this->applyFilter(model_t::E_XF_Over, false, this->personFilter(), personCounts); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); for (auto& featureData_ : featureData) { model_t::EFeature feature = featureData_.first; TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; @@ -363,7 +363,7 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, gatherer.sampleNow(time); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); this->CPopulationModel::sample(time, time + bucketLength, resourceMonitor); const TTimeVec& preSampleAttributeLastBucketTimes = this->attributeLastBucketTimes(); @@ -528,10 +528,7 @@ void CEventRatePopulationModel::sample(core_t::TTime startTime, LOG_TRACE(<< "Model unexpectedly null"); continue; } - if (model->addSamples(params, attribute.second.s_Values) == - maths::common::CModel::E_Reset) { - gatherer.resetSampleCount(cid); - } + model->addSamples(params, attribute.second.s_Values); } } @@ -569,8 +566,7 @@ void CEventRatePopulationModel::prune(std::size_t maximumAge) { if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(m_CurrentBucketStats.s_StartTime, - gatherer.bucketLength(), featureData); + gatherer.featureData(m_CurrentBucketStats.s_StartTime, featureData); for (auto& feature : featureData) { m_CurrentBucketStats.s_FeatureData[feature.first].swap(feature.second); } diff --git a/lib/model/CEventRatePopulationModelFactory.cc b/lib/model/CEventRatePopulationModelFactory.cc index 39042ac74f..e8b19521a6 100644 --- a/lib/model/CEventRatePopulationModelFactory.cc +++ b/lib/model/CEventRatePopulationModelFactory.cc @@ -97,7 +97,7 @@ CEventRatePopulationModelFactory::makeDataGatherer(const SGathererInitialization this->modelParams(), m_SummaryCountFieldName, initData.s_PartitionFieldValue, m_PersonFieldName, m_AttributeFieldName, m_ValueFieldName, m_InfluenceFieldNames, - this->searchKey(), m_Features, initData.s_StartTime, 0); + this->searchKey(), m_Features, initData.s_StartTime); } CDataGatherer* diff --git a/lib/model/CGathererTools.cc b/lib/model/CGathererTools.cc deleted file mode 100644 index 378e0ddd2a..0000000000 --- a/lib/model/CGathererTools.cc +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -namespace ml { -namespace model { - -namespace { - -const std::string CLASSIFIER_TAG("a"); -const std::string LAST_TIME_TAG("b"); -const std::string BUCKET_SUM_QUEUE_TAG("c"); -const std::string INFLUENCER_BUCKET_SUM_QUEUE_TAG("e"); - -// Nested tags -const std::string SUM_SAMPLE_TAG("a"); -const std::string SUM_MAP_KEY_TAG("b"); -const std::string SUM_MAP_VALUE_TAG("c"); - -//! \brief Manages persistence of bucket sums. -struct SSumSerializer { - using TSampleVec = std::vector; - - void operator()(const TSampleVec& sample, core::CStatePersistInserter& inserter) const { - inserter.insertValue(SUM_SAMPLE_TAG, core::CPersistUtils::toString( - sample, CSample::SToString())); - } - bool operator()(TSampleVec& sample, core::CStateRestoreTraverser& traverser) const { - if (traverser.name() != SUM_SAMPLE_TAG || - core::CPersistUtils::fromString( - traverser.value(), CSample::SFromString(), sample) == false) { - LOG_ERROR(<< "Invalid sample in: " << traverser.value()); - return false; - } - return true; - } -}; - -//! \brief Manages persistence of influence bucket sums. -struct SInfluencerSumSerializer { - using TOptionalStr = std::optional; - using TOptionalStrDoubleUMap = boost::unordered_map; - using TOptionalStrDoubleUMapCItr = TOptionalStrDoubleUMap::const_iterator; - using TStrCRef = std::reference_wrapper; - using TStrCRefDoublePr = std::pair; - using TStrCRefDoublePrVec = std::vector; - - void operator()(const TOptionalStrDoubleUMap& map, - core::CStatePersistInserter& inserter) const { - TStrCRefDoublePrVec ordered; - ordered.reserve(map.size()); - for (TOptionalStrDoubleUMapCItr i = map.begin(); i != map.end(); ++i) { - ordered.emplace_back(TStrCRef(*i->first), i->second); - } - std::sort(ordered.begin(), ordered.end(), maths::common::COrderings::SFirstLess()); - for (std::size_t i = 0; i < ordered.size(); ++i) { - inserter.insertValue(SUM_MAP_KEY_TAG, ordered[i].first); - inserter.insertValue(SUM_MAP_VALUE_TAG, ordered[i].second, - core::CIEEE754::E_SinglePrecision); - } - } - - bool operator()(TOptionalStrDoubleUMap& map, core::CStateRestoreTraverser& traverser) const { - std::string key; - do { - const std::string& name = traverser.name(); - if (name == SUM_MAP_KEY_TAG) { - key = traverser.value(); - } else if (name == SUM_MAP_VALUE_TAG) { - if (core::CStringUtils::stringToType(traverser.value(), map[key]) == false) { - LOG_ERROR(<< "Invalid sum in " << traverser.value()); - return false; - } - } - } while (traverser.next()); - return true; - } -}; - -} // unnamed:: - -CGathererTools::CArrivalTimeGatherer::CArrivalTimeGatherer() - : m_LastTime(FIRST_TIME) { -} - -CGathererTools::TOptionalDouble CGathererTools::CArrivalTimeGatherer::featureData() const { - return maths::common::CBasicStatistics::count(m_Value) > 0.0 - ? TOptionalDouble(maths::common::CBasicStatistics::mean(m_Value)) - : TOptionalDouble(); -} - -void CGathererTools::CArrivalTimeGatherer::startNewBucket() { - m_Value = TAccumulator(); -} - -void CGathererTools::CArrivalTimeGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - // Because we always serialize immediately after processing a bucket - // we will have already used the bucket value and samples so these - // don't need to be serialized. - inserter.insertValue(LAST_TIME_TAG, m_LastTime); -} - -bool CGathererTools::CArrivalTimeGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - do { - const std::string& name = traverser.name(); - if (name == LAST_TIME_TAG) { - if (core::CStringUtils::stringToType(traverser.value(), m_LastTime) == false) { - LOG_ERROR(<< "Invalid last time in " << traverser.value()); - continue; - } - } - } while (traverser.next()); - - return true; -} - -std::uint64_t CGathererTools::CArrivalTimeGatherer::checksum() const { - return maths::common::CChecksum::calculate(static_cast(m_LastTime), m_Value); -} - -std::string CGathererTools::CArrivalTimeGatherer::print() const { - std::ostringstream o; - if (maths::common::CBasicStatistics::count(m_Value) > 0.0) { - o << maths::common::CBasicStatistics::mean(m_Value); - } else { - o << "-"; - } - o << " (" << m_LastTime << ")"; - return o.str(); -} - -const core_t::TTime CGathererTools::CArrivalTimeGatherer::FIRST_TIME( - std::numeric_limits::min()); - -CGathererTools::CSumGatherer::CSumGatherer(const SModelParams& params, - std::size_t /*dimension*/, - core_t::TTime startTime, - core_t::TTime bucketLength, - TStrVecCItr beginInfluencers, - TStrVecCItr endInfluencers) - : m_Classifier(), m_BucketSums(params.s_LatencyBuckets, bucketLength, startTime), - m_InfluencerBucketSums(std::distance(beginInfluencers, endInfluencers), - TOptionalStrDoubleUMapQueue(params.s_LatencyBuckets + 3, - bucketLength, - startTime, - TOptionalStrDoubleUMap(1))) { -} - -std::size_t CGathererTools::CSumGatherer::dimension() const { - return 1; -} - -SMetricFeatureData -CGathererTools::CSumGatherer::featureData(core_t::TTime time, - core_t::TTime /*bucketLength*/, - const TSampleVec& emptySample) const { - using TStrCRef = std::reference_wrapper; - using TDouble1VecDoublePr = std::pair; - using TStrCRefDouble1VecDoublePrPr = std::pair; - using TStrCRefDouble1VecDoublePrPrVec = std::vector; - using TStrCRefDouble1VecDoublePrPrVecVec = std::vector; - - const TSampleVec* sum = &m_BucketSums.get(time); - if (sum->empty()) { - sum = &emptySample; - } - TStrCRefDouble1VecDoublePrPrVecVec influenceValues(m_InfluencerBucketSums.size()); - for (std::size_t i = 0; i < m_InfluencerBucketSums.size(); ++i) { - const TOptionalStrDoubleUMap& influencerStats = - m_InfluencerBucketSums[i].get(time); - influenceValues[i].reserve(influencerStats.size()); - for (const auto& stat : influencerStats) { - influenceValues[i].emplace_back( - TStrCRef(*stat.first), TDouble1VecDoublePr(TDouble1Vec{stat.second}, 1.0)); - } - } - - if (!sum->empty()) { - return {(*sum)[0].time(), - (*sum)[0].value(), - (*sum)[0].varianceScale(), - (*sum)[0].count(), - influenceValues, - m_Classifier.isInteger() && - maths::common::CIntegerTools::isInteger(((*sum)[0].value())[0]), - m_Classifier.isNonNegative(), - *sum}; - } - return {m_Classifier.isInteger(), m_Classifier.isNonNegative(), *sum}; -} - -bool CGathererTools::CSumGatherer::sample(core_t::TTime /*time*/, unsigned int /*sampleCount*/) { - return false; -} - -void CGathererTools::CSumGatherer::startNewBucket(core_t::TTime time) { - TSampleVec& sum = m_BucketSums.earliest(); - if (!sum.empty()) { - m_Classifier.add(model_t::E_IndividualSumByBucketAndPerson, sum[0].value(), 1); - } - m_BucketSums.push(TSampleVec(), time); - for (std::size_t i = 0; i < m_InfluencerBucketSums.size(); ++i) { - m_InfluencerBucketSums[i].push(TOptionalStrDoubleUMap(1), time); - } -} - -void CGathererTools::CSumGatherer::resetBucket(core_t::TTime bucketStart) { - m_BucketSums.get(bucketStart).clear(); - for (std::size_t i = 0; i < m_InfluencerBucketSums.size(); ++i) { - m_InfluencerBucketSums[i].get(bucketStart).clear(); - } -} - -void CGathererTools::CSumGatherer::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - inserter.insertLevel(CLASSIFIER_TAG, - std::bind(&CDataClassifier::acceptPersistInserter, - &m_Classifier, std::placeholders::_1)); - if (m_BucketSums.size() > 0) { - inserter.insertLevel( - BUCKET_SUM_QUEUE_TAG, - std::bind(TSampleVecQueue::CSerializer(), - std::cref(m_BucketSums), std::placeholders::_1)); - } - for (std::size_t i = 0; i < m_InfluencerBucketSums.size(); ++i) { - inserter.insertLevel( - INFLUENCER_BUCKET_SUM_QUEUE_TAG, - std::bind( - TOptionalStrDoubleUMapQueue::CSerializer(), - std::cref(m_InfluencerBucketSums[i]), std::placeholders::_1)); - } -} - -bool CGathererTools::CSumGatherer::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - std::size_t i = 0; - do { - const std::string& name = traverser.name(); - if (name == CLASSIFIER_TAG) { - if (traverser.traverseSubLevel( - std::bind(&CDataClassifier::acceptRestoreTraverser, - &m_Classifier, std::placeholders::_1)) == false) { - LOG_ERROR(<< "Invalid classifier in " << traverser.value()); - continue; - } - } else if (name == BUCKET_SUM_QUEUE_TAG) { - if (traverser.traverseSubLevel(std::bind( - TSampleVecQueue::CSerializer(), - std::ref(m_BucketSums), std::placeholders::_1)) == false) { - LOG_ERROR(<< "Invalid bucket queue in " << traverser.value()); - return false; - } - } else if (name == INFLUENCER_BUCKET_SUM_QUEUE_TAG) { - if (i < m_InfluencerBucketSums.size() && - traverser.traverseSubLevel(std::bind( - TOptionalStrDoubleUMapQueue::CSerializer( - TOptionalStrDoubleUMap(1)), - std::ref(m_InfluencerBucketSums[i++]), std::placeholders::_1)) == false) { - LOG_ERROR(<< "Invalid bucket queue in " << traverser.value()); - return false; - } - } - } while (traverser.next()); - - return true; -} - -std::uint64_t CGathererTools::CSumGatherer::checksum() const { - std::uint64_t seed = static_cast(m_Classifier.isInteger()); - seed = maths::common::CChecksum::calculate(seed, m_Classifier.isNonNegative()); - seed = maths::common::CChecksum::calculate(seed, m_BucketSums); - return maths::common::CChecksum::calculate(seed, m_InfluencerBucketSums); -} - -void CGathererTools::CSumGatherer::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { - mem->setName("CSumGatherer"); - core::memory_debug::dynamicSize("m_BucketSums", m_BucketSums, mem); - core::memory_debug::dynamicSize("m_InfluencerBucketSums", m_InfluencerBucketSums, mem); -} - -std::size_t CGathererTools::CSumGatherer::memoryUsage() const { - return core::memory::dynamicSize(m_BucketSums) + - core::memory::dynamicSize(m_InfluencerBucketSums); -} - -std::string CGathererTools::CSumGatherer::print() const { - std::ostringstream result; - result << m_Classifier.isInteger() << ' ' << m_BucketSums.print() << ' ' - << core::CContainerPrinter::print(m_InfluencerBucketSums); - return result.str(); -} - -bool CGathererTools::CSumGatherer::isRedundant(core_t::TTime /*samplingCutoffTime*/) const { - for (const auto& bucket : m_BucketSums) { - if (bucket.empty() == false) { - return false; - } - } - return true; -} -} -} diff --git a/lib/model/CMakeLists.txt b/lib/model/CMakeLists.txt index c53eec9fb0..f1f3d22a22 100644 --- a/lib/model/CMakeLists.txt +++ b/lib/model/CMakeLists.txt @@ -43,7 +43,6 @@ ml_add_library(MlModel SHARED CFeatureData.cc CForecastDataSink.cc CForecastModelPersist.cc - CGathererTools.cc CHierarchicalResults.cc CHierarchicalResultsAggregator.cc CHierarchicalResultsNormalizer.cc @@ -72,7 +71,6 @@ ml_add_library(MlModel SHARED CRuleCondition.cc CRuleScope.cc CSample.cc - CSampleCounts.cc CSearchKey.cc CSimpleCountDetector.cc CTokenListCategory.cc diff --git a/lib/model/CMetricBucketGatherer.cc b/lib/model/CMetricBucketGatherer.cc index 80951cb98d..76c65a96a8 100644 --- a/lib/model/CMetricBucketGatherer.cc +++ b/lib/model/CMetricBucketGatherer.cc @@ -21,10 +21,8 @@ #include #include -#include +#include #include -#include -#include #include #include @@ -50,33 +48,23 @@ using TStrCRefStrCRefPr = std::pair; using TStrCRefStrCRefPrUInt64Map = std::map; using TSampleVec = std::vector; -using TSizeMeanGathererUMap = boost::unordered_map; +using TSizeSumGathererUMap = boost::unordered_map; +using TSizeSizeSumGathererUMapUMap = boost::unordered_map; +using TSizeMeanGathererUMap = boost::unordered_map; using TSizeSizeMeanGathererUMapUMap = boost::unordered_map; -using TSizeMedianGathererUMap = - boost::unordered_map; +using TSizeMedianGathererUMap = boost::unordered_map; using TSizeSizeMedianGathererUMapUMap = boost::unordered_map; -using TSizeMinGathererUMap = boost::unordered_map; +using TSizeMinGathererUMap = boost::unordered_map; using TSizeSizeMinGathererUMapUMap = boost::unordered_map; -using TSizeMaxGathererUMap = boost::unordered_map; +using TSizeMaxGathererUMap = boost::unordered_map; using TSizeSizeMaxGathererUMapUMap = boost::unordered_map; -using TSizeVarianceGathererUMap = - boost::unordered_map; +using TSizeVarianceGathererUMap = boost::unordered_map; using TSizeSizeVarianceGathererUMapUMap = boost::unordered_map; -using TSizeSumGathererUMap = boost::unordered_map; -using TSizeSizeSumGathererUMapUMap = boost::unordered_map; using TSizeMultivariateMeanGathererUMap = - boost::unordered_map; + boost::unordered_map; using TSizeSizeMultivariateMeanGathererUMapUMap = boost::unordered_map; -using TSizeMultivariateMinGathererUMap = - boost::unordered_map; -using TSizeSizeMultivariateMinGathererUMapUMap = - boost::unordered_map; -using TSizeMultivariateMaxGathererUMap = - boost::unordered_map; -using TSizeSizeMultivariateMaxGathererUMapUMap = - boost::unordered_map; using TSizeFeatureDataPr = std::pair; using TSizeFeatureDataPrVec = std::vector; using TSizeSizePrFeatureDataPr = std::pair; @@ -103,8 +91,6 @@ const std::string MIN_TAG("f"); const std::string MAX_TAG("g"); const std::string SUM_TAG("h"); const std::string MULTIVARIATE_MEAN_TAG("i"); -const std::string MULTIVARIATE_MIN_TAG("j"); -const std::string MULTIVARIATE_MAX_TAG("k"); const std::string MEDIAN_TAG("l"); const std::string VARIANCE_TAG("m"); const std::string EMPTY_STRING; @@ -155,14 +141,6 @@ template<> struct SDataType { using Type = TSizeSizeMultivariateMeanGathererUMapUMap; }; -template<> -struct SDataType { - using Type = TSizeSizeMultivariateMinGathererUMapUMap; -}; -template<> -struct SDataType { - using Type = TSizeSizeMultivariateMaxGathererUMapUMap; -}; template struct SMaybeConst {}; template @@ -185,8 +163,6 @@ void registerMemoryCallbacks(VISITOR& visitor) { visitor.template registerCallback(); visitor.template registerCallback(); visitor.template registerCallback(); - visitor.template registerCallback(); - visitor.template registerCallback(); } //! Register the callbacks for computing the size of feature data gatherers. @@ -234,12 +210,6 @@ bool applyFunc(ITR begin, ITR end, const F& f) { case model_t::E_MultivariateMean: applyFunc(i, f); break; - case model_t::E_MultivariateMin: - applyFunc(i, f); - break; - case model_t::E_MultivariateMax: - applyFunc(i, f); - break; } } catch (const std::exception& e) { LOG_ERROR(<< "Apply failed for " << category << ": " << e.what()); @@ -298,12 +268,6 @@ class CPersistFeatureData { case model_t::E_MultivariateMean: return MULTIVARIATE_MEAN_TAG + core::CStringUtils::typeToString(category.second); - case model_t::E_MultivariateMin: - return MULTIVARIATE_MIN_TAG + - core::CStringUtils::typeToString(category.second); - case model_t::E_MultivariateMax: - return MULTIVARIATE_MAX_TAG + - core::CStringUtils::typeToString(category.second); } return EMPTY_STRING; } @@ -410,7 +374,8 @@ class CRestoreFeatureData { //! \brief Responsible for restoring individual gatherers. class CDoNewRestore { public: - CDoNewRestore(std::size_t dimension) : m_Dimension(dimension) {} + explicit CDoNewRestore(std::size_t dimension) + : m_Dimension(dimension) {} template bool operator()(core::CStateRestoreTraverser& traverser, @@ -486,10 +451,12 @@ class CRestoreFeatureData { << traverser.value()); return false; } - T initial(gatherer.dataGatherer().params(), m_Dimension, + T initial{gatherer.dataGatherer().params().s_LatencyBuckets, + m_Dimension, gatherer.currentBucketStartTime(), - gatherer.bucketLength(), gatherer.beginInfluencers(), - gatherer.endInfluencers()); + gatherer.bucketLength(), + gatherer.beginInfluencers(), + gatherer.endInfluencers()}; if (traverser.traverseSubLevel( std::bind(&T::acceptRestoreTraverser, &initial, std::placeholders::_1)) == false) { @@ -510,14 +477,14 @@ class CRestoreFeatureData { //! \brief Responsible for restoring individual gatherers. class CDoOldRestore { public: - CDoOldRestore(std::size_t dimension) : m_Dimension(dimension) {} + explicit CDoOldRestore(std::size_t dimension) + : m_Dimension(dimension) {} template bool operator()(core::CStateRestoreTraverser& traverser, const CMetricBucketGatherer& gatherer, TSizeSizeTUMapUMap& result) const { - bool isPopulation = gatherer.dataGatherer().isPopulation(); - if (isPopulation) { + if (gatherer.dataGatherer().isPopulation()) { this->restorePopulation(traverser, gatherer, result); } else { this->restoreIndividual(traverser, gatherer, result); @@ -533,10 +500,12 @@ class CRestoreFeatureData { do { const std::string& name = traverser.name(); if (name == DATA_TAG) { - T initial(gatherer.dataGatherer().params(), m_Dimension, + T initial{gatherer.dataGatherer().params().s_LatencyBuckets, + m_Dimension, gatherer.currentBucketStartTime(), - gatherer.bucketLength(), gatherer.beginInfluencers(), - gatherer.endInfluencers()); + gatherer.bucketLength(), + gatherer.beginInfluencers(), + gatherer.endInfluencers()}; if (traverser.traverseSubLevel( std::bind(&T::acceptRestoreTraverser, &initial, std::placeholders::_1)) == false) { @@ -575,10 +544,12 @@ class CRestoreFeatureData { return false; } - T initial(gatherer.dataGatherer().params(), m_Dimension, + T initial{gatherer.dataGatherer().params().s_LatencyBuckets, + m_Dimension, gatherer.currentBucketStartTime(), - gatherer.bucketLength(), gatherer.beginInfluencers(), - gatherer.endInfluencers()); + gatherer.bucketLength(), + gatherer.beginInfluencers(), + gatherer.endInfluencers()}; if (traverser.traverseSubLevel( std::bind(&T::acceptRestoreTraverser, &initial, std::placeholders::_1)) == false) { @@ -649,38 +620,6 @@ struct SRemoveAttributes { } }; -//! Sample the metric statistics. -struct SDoSample { -public: - template - void operator()(const TCategorySizePr& /*category*/, - TSizeSizeTUMapUMap& data, - core_t::TTime time, - const CMetricBucketGatherer& gatherer, - CSampleCounts& sampleCounts) const { - for (const auto& count : gatherer.bucketCounts(time)) { - std::size_t pid = CDataGatherer::extractPersonId(count); - std::size_t cid = CDataGatherer::extractAttributeId(count); - std::size_t activeId = gatherer.dataGatherer().isPopulation() ? cid : pid; - auto cidEntry = data.find(cid); - if (cidEntry == data.end()) { - LOG_ERROR(<< "No gatherer for attribute " - << gatherer.dataGatherer().attributeName(cid) << " of person " - << gatherer.dataGatherer().personName(pid)); - } else { - auto pidEntry = cidEntry->second.find(pid); - if (pidEntry == cidEntry->second.end()) { - LOG_ERROR(<< "No gatherer for attribute " - << gatherer.dataGatherer().attributeName(cid) << " of person " - << gatherer.dataGatherer().personName(pid)); - } else if (pidEntry->second.sample(time, sampleCounts.count(activeId))) { - sampleCounts.updateSampleVariance(activeId); - } - } - } - } -}; - //! Stably hashes the collection of data gatherers. struct SHash { public: @@ -720,24 +659,20 @@ struct SExtractFeatureData { const CMetricBucketGatherer& gatherer, model_t::EFeature feature, core_t::TTime time, - core_t::TTime bucketLength, TFeatureAnyPrVec& result) const { if (gatherer.dataGatherer().isPopulation()) { result.emplace_back(feature, TSizeSizePrFeatureDataPrVec()); this->featureData( - data, gatherer, time, bucketLength, this->isSum(feature), + data, gatherer, time, this->isSum(feature), *std::any_cast(&result.back().second)); } else { result.emplace_back(feature, TSizeFeatureDataPrVec()); this->featureData( - data, gatherer, time, bucketLength, this->isSum(feature), + data, gatherer, time, this->isSum(feature), *std::any_cast(&result.back().second)); } } -private: - static const TSampleVec ZERO_SAMPLE; - private: bool isSum(model_t::EFeature feature) const { return feature == model_t::E_IndividualSumByBucketAndPerson || @@ -749,7 +684,6 @@ struct SExtractFeatureData { void featureData(const TSizeSizeTUMapUMap& data, const CMetricBucketGatherer& gatherer, core_t::TTime time, - core_t::TTime bucketLength, bool isSum, U& result) const { result.clear(); @@ -761,9 +695,9 @@ struct SExtractFeatureData { std::size_t pid = pidEntry.first; if (gatherer.hasExplicitNullsOnly( time, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID) == false) { - this->featureData(pidEntry.second, gatherer, pid, + this->featureData(pidEntry.second, pid, model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID, - time, bucketLength, result); + time, result); } } } @@ -785,9 +719,7 @@ struct SExtractFeatureData { << gatherer.dataGatherer().personName(pid)); continue; } - - this->featureData(pidEntry->second, gatherer, pid, cid, time, - bucketLength, result); + this->featureData(pidEntry->second, pid, cid, time, result); } } std::sort(result.begin(), result.end(), maths::common::COrderings::SFirstLess()); @@ -795,59 +727,31 @@ struct SExtractFeatureData { //! Individual model specialization template - void featureData(const T& data, - const CMetricBucketGatherer& gatherer, + void featureData(const T& gatherer, std::size_t pid, std::size_t /*cid*/, core_t::TTime time, - core_t::TTime bucketLength, TSizeFeatureDataPrVec& result) const { - result.emplace_back( - pid, this->featureData(data, time, bucketLength, - gatherer.dataGatherer().effectiveSampleCount(pid))); + result.emplace_back(pid, gatherer.featureData(time)); } //! Population model specialization template - void featureData(const T& data, - const CMetricBucketGatherer& gatherer, + void featureData(const T& gatherer, std::size_t pid, std::size_t cid, core_t::TTime time, - core_t::TTime bucketLength, TSizeSizePrFeatureDataPrVec& result) const { - result.emplace_back( - TSizeSizePr(pid, cid), - this->featureData(data, time, bucketLength, - gatherer.dataGatherer().effectiveSampleCount(cid))); - } - - SMetricFeatureData featureData(const CGathererTools::CSumGatherer& data, - core_t::TTime time, - core_t::TTime bucketLength, - double /*effectiveSampleCount*/) const { - return data.featureData(time, bucketLength, ZERO_SAMPLE); - } - - template - inline SMetricFeatureData featureData(const T& data, - core_t::TTime time, - core_t::TTime bucketLength, - double effectiveSampleCount) const { - return data.featureData(time, bucketLength, effectiveSampleCount); + result.emplace_back(TSizeSizePr(pid, cid), gatherer.featureData(time)); } }; -const TSampleVec - SExtractFeatureData::ZERO_SAMPLE(1, CSample(0, TDoubleVec(1, 0.0), 1.0, 1.0)); - //! Adds a value to the specified data gatherers. struct SAddValue { struct SStatistic { core_t::TTime s_Time; const CEventData::TDouble1VecArray* s_Values; unsigned int s_Count; - unsigned int s_SampleCount; const TOptionalStrVec* s_Influences; }; @@ -858,17 +762,17 @@ struct SAddValue { std::size_t cid, const CMetricBucketGatherer& gatherer, const SStatistic& stat) const { - auto& entry = + auto& statGatherer = data[cid] .emplace(boost::unordered::piecewise_construct, boost::make_tuple(pid), boost::make_tuple( - std::cref(gatherer.dataGatherer().params()), + gatherer.dataGatherer().params().s_LatencyBuckets, category.second, gatherer.currentBucketStartTime(), gatherer.bucketLength(), gatherer.beginInfluencers(), gatherer.endInfluencers())) .first->second; - entry.add(stat.s_Time, (*stat.s_Values)[category.first], stat.s_Count, - stat.s_SampleCount, *stat.s_Influences); + statGatherer.add(stat.s_Time, (*stat.s_Values)[category.first], + stat.s_Count, *stat.s_Influences); } }; @@ -906,13 +810,11 @@ struct SResetBucket { struct SReleaseMemory { public: template - void operator()(const TCategorySizePr& /*category*/, - TSizeSizeTUMapUMap& data, - core_t::TTime samplingCutoffTime) const { + void operator()(const TCategorySizePr& /*category*/, TSizeSizeTUMapUMap& data) const { for (auto& cidEntry : data) { auto& pidMap = cidEntry.second; for (auto i = pidMap.begin(); i != pidMap.end(); /**/) { - if (i->second.isRedundant(samplingCutoffTime)) { + if (i->second.isRedundant()) { i = pidMap.erase(i); } else { ++i; @@ -1054,30 +956,6 @@ bool CMetricBucketGatherer::acceptRestoreTraverserInternal(core::CStateRestoreTr LOG_ERROR(<< "Invalid multivariate mean data in " << traverser.value()); return false; } - } else if (name.find(MULTIVARIATE_MIN_TAG) != std::string::npos) { - std::size_t dimension; - if (core::CStringUtils::stringToType( - name.substr(MULTIVARIATE_MIN_TAG.length()), dimension) == false) { - LOG_ERROR(<< "Invalid dimension in " << name); - return false; - } - CRestoreFeatureData restore; - if (restore(traverser, dimension, isCurrentVersion, *this, m_FeatureData) == false) { - LOG_ERROR(<< "Invalid multivariate min data in " << traverser.value()); - return false; - } - } else if (name.find(MULTIVARIATE_MAX_TAG) != std::string::npos) { - std::size_t dimension; - if (core::CStringUtils::stringToType( - name.substr(MULTIVARIATE_MAX_TAG.length()), dimension) == false) { - LOG_ERROR(<< "Invalid dimension in " << name); - return false; - } - CRestoreFeatureData restore; - if (restore(traverser, dimension, isCurrentVersion, *this, m_FeatureData) == false) { - LOG_ERROR(<< "Invalid multivariate max data in " << traverser.value()); - return false; - } } return true; @@ -1372,24 +1250,14 @@ bool CMetricBucketGatherer::resetBucket(core_t::TTime bucketStart) { return true; } -void CMetricBucketGatherer::releaseMemory(core_t::TTime samplingCutoffTime) { +void CMetricBucketGatherer::releaseMemory() { applyFunc(m_FeatureData, [&, releaseMemory = SReleaseMemory{} ](const auto& category, auto& data) { - releaseMemory(category, data, samplingCutoffTime); + releaseMemory(category, data); }); } -void CMetricBucketGatherer::sample(core_t::TTime time) { - if (m_DataGatherer.sampleCounts()) { - applyFunc(m_FeatureData, [&, sample = SDoSample{} ](const auto& category, auto& data) { - sample(category, data, time, *this, *m_DataGatherer.sampleCounts()); - }); - } -} - -void CMetricBucketGatherer::featureData(core_t::TTime time, - core_t::TTime bucketLength, - TFeatureAnyPrVec& result) const { +void CMetricBucketGatherer::featureData(core_t::TTime time, TFeatureAnyPrVec& result) const { result.clear(); if (!this->dataAvailable(time) || @@ -1412,8 +1280,8 @@ void CMetricBucketGatherer::featureData(core_t::TTime time, applyFunc(begin, end, [&, extractFeatureData = SExtractFeatureData{} ]( const auto& category_, const auto& data) { - extractFeatureData(category_, data, *this, feature, - time, bucketLength, result); + extractFeatureData(category_, data, *this, + feature, time, result); }); } else { LOG_ERROR(<< "No data for category " << model_t::print(category)); @@ -1424,12 +1292,7 @@ void CMetricBucketGatherer::featureData(core_t::TTime time, } } -void CMetricBucketGatherer::resize(std::size_t pid, std::size_t cid) { - if (m_DataGatherer.sampleCounts()) { - m_DataGatherer.sampleCounts()->resize(m_DataGatherer.isPopulation() ? cid : pid); - } else { - LOG_ERROR(<< "Invalid sample counts for gatherer"); - } +void CMetricBucketGatherer::resize(std::size_t /*pid*/, std::size_t /*cid*/) { } void CMetricBucketGatherer::addValue(std::size_t pid, @@ -1446,14 +1309,6 @@ void CMetricBucketGatherer::addValue(std::size_t pid, stat.s_Time = time; stat.s_Values = &values; stat.s_Count = static_cast(count); - if (m_DataGatherer.sampleCounts()) { - stat.s_SampleCount = m_DataGatherer.sampleCounts()->count( - m_DataGatherer.isPopulation() ? cid : pid); - } else { - LOG_ERROR(<< "Invalid sample counts for gatherer"); - stat.s_SampleCount = 0.0; - } - stat.s_Influences = &influences; applyFunc(m_FeatureData, [&, addValue = SAddValue{} ](const auto& category, auto& data) { addValue(category, data, pid, cid, *this, stat); @@ -1484,19 +1339,6 @@ void CMetricBucketGatherer::startNewBucket(core_t::TTime time, bool skipUpdates) .first->second[0] += CDataGatherer::extractData(count); } } - double alpha = std::exp(-m_DataGatherer.params().s_DecayRate); - - for (auto& count : counts) { - std::sort(count.second.begin(), count.second.end()); - std::size_t n = count.second.size() / 2; - double median = - count.second.size() % 2 == 0 - ? static_cast(count.second[n - 1] + count.second[n]) / 2.0 - : static_cast(count.second[n]); - m_DataGatherer.sampleCounts()->updateMeanNonZeroBucketCount( - count.first, median, alpha); - } - m_DataGatherer.sampleCounts()->refresh(m_DataGatherer); } } applyFunc(m_FeatureData, @@ -1576,12 +1418,6 @@ void CMetricBucketGatherer::initializeFeatureData() { case model_t::E_MultivariateMean: initializeFeatureDataInstance(dimension, m_FeatureData); break; - case model_t::E_MultivariateMin: - initializeFeatureDataInstance(dimension, m_FeatureData); - break; - case model_t::E_MultivariateMax: - initializeFeatureDataInstance(dimension, m_FeatureData); - break; } } else { LOG_ERROR(<< "Unexpected feature = " diff --git a/lib/model/CMetricModel.cc b/lib/model/CMetricModel.cc index b6b0c8716c..57e4c2610b 100644 --- a/lib/model/CMetricModel.cc +++ b/lib/model/CMetricModel.cc @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -206,7 +205,7 @@ void CMetricModel::sample(core_t::TTime startTime, LOG_TRACE(<< "Sampling [" << time << "," << time + bucketLength << ")"); gatherer.sampleNow(time); - gatherer.featureData(time, bucketLength, m_CurrentBucketStats.s_FeatureData); + gatherer.featureData(time, m_CurrentBucketStats.s_FeatureData); const TTimeVec& preSampleLastBucketTimes = this->lastBucketTimes(); TSizeTimeUMap lastBucketTimesMap; @@ -233,7 +232,7 @@ void CMetricModel::sample(core_t::TTime startTime, for (const auto& data_ : data) { std::size_t pid = data_.first; - const CGathererTools::TSampleVec& samples = data_.second.s_Samples; + const auto& samples = data_.second.s_Samples; maths::common::CModel* model = this->model(feature, pid); if (model == nullptr) { @@ -261,14 +260,6 @@ void CMetricModel::sample(core_t::TTime startTime, continue; } - const TOptionalSample& bucket = data_.second.s_BucketValue; - if (model_t::isSampled(feature) && bucket != std::nullopt) { - values.assign(1, core::make_triple( - bucket->time(), TDouble2Vec(bucket->value(dimension)), - model_t::INDIVIDUAL_ANALYSIS_ATTRIBUTE_ID)); - model->addBucketValue(values); - } - // For sparse data we reduce the impact of samples from empty buckets. // In effect, we smoothly transition to modeling only values from non-empty // buckets as the data becomes sparse. @@ -277,12 +268,7 @@ void CMetricModel::sample(core_t::TTime startTime, continue; } - std::size_t n = samples.size(); - double countWeight = - (this->params().s_MaximumUpdatesPerBucket > 0.0 && n > 0 - ? this->params().s_MaximumUpdatesPerBucket / static_cast(n) - : 1.0) * - this->learnRate(feature) * initialCountWeight; + double countWeight = this->learnRate(feature) * initialCountWeight; double outlierWeightDerate = this->derate(pid, sampleTime); // Note we need to scale the amount of data we'll "age out" of the residual // model in one bucket by the empty bucket weight so the posterior doesn't @@ -298,10 +284,12 @@ void CMetricModel::sample(core_t::TTime startTime, << ", scaled count weight = " << scaledCountWeight << ", scaled interval = " << scaledInterval); - values.resize(n); - trendWeights.resize(n, maths_t::CUnitWeights::unit(dimension)); - priorWeights.resize(n, maths_t::CUnitWeights::unit(dimension)); - for (std::size_t i = 0; i < n; ++i) { + values.resize(samples.size()); + trendWeights.resize(samples.size(), + maths_t::CUnitWeights::unit(dimension)); + priorWeights.resize(samples.size(), + maths_t::CUnitWeights::unit(dimension)); + for (std::size_t i = 0; i < samples.size(); ++i) { core_t::TTime ithSampleTime = samples[i].time(); TDouble2Vec ithSampleValue(samples[i].value(dimension)); double countVarianceScale = samples[i].varianceScale(); @@ -344,9 +332,7 @@ void CMetricModel::sample(core_t::TTime startTime, }) .memoryCircuitBreaker(circuitBreaker); - if (model->addSamples(params, values) == maths::common::CModel::E_Reset) { - gatherer.resetSampleCount(pid); - } + model->addSamples(params, values); } } @@ -553,7 +539,7 @@ void CMetricModel::clearPrunedResources(const TSizeVec& people, const TSizeVec& // Stop collecting for these people and add them to the free list. gatherer.recyclePeople(people); if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { - gatherer.featureData(m_CurrentBucketStats.s_StartTime, gatherer.bucketLength(), + gatherer.featureData(m_CurrentBucketStats.s_StartTime, m_CurrentBucketStats.s_FeatureData); } diff --git a/lib/model/CMetricModelFactory.cc b/lib/model/CMetricModelFactory.cc index e627bfc028..25853bb9d0 100644 --- a/lib/model/CMetricModelFactory.cc +++ b/lib/model/CMetricModelFactory.cc @@ -92,12 +92,11 @@ CMetricModelFactory::makeModel(const SModelInitializationData& initData, CDataGatherer* CMetricModelFactory::makeDataGatherer(const SGathererInitializationData& initData) const { - return new CDataGatherer(model_t::E_Metric, m_SummaryMode, - this->modelParams(), m_SummaryCountFieldName, - initData.s_PartitionFieldValue, m_PersonFieldName, - EMPTY_STRING /*AttributeFieldName*/, m_ValueFieldName, - m_InfluenceFieldNames, this->searchKey(), m_Features, - initData.s_StartTime, initData.s_SampleOverrideCount); + return new CDataGatherer(model_t::E_Metric, m_SummaryMode, this->modelParams(), + m_SummaryCountFieldName, initData.s_PartitionFieldValue, + m_PersonFieldName, EMPTY_STRING /*AttributeFieldName*/, + m_ValueFieldName, m_InfluenceFieldNames, + this->searchKey(), m_Features, initData.s_StartTime); } CDataGatherer* diff --git a/lib/model/CMetricPopulationModel.cc b/lib/model/CMetricPopulationModel.cc index 5faf99f579..c3e72ec221 100644 --- a/lib/model/CMetricPopulationModel.cc +++ b/lib/model/CMetricPopulationModel.cc @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -190,7 +189,8 @@ void CMetricPopulationModel::acceptPersistInserter(core::CStatePersistInserter& } bool CMetricPopulationModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - std::size_t i = 0u, j = 0; + std::size_t i = 0; + std::size_t j = 0; do { const std::string& name = traverser.name(); RESTORE(POPULATION_STATE_TAG, @@ -277,7 +277,7 @@ bool CMetricPopulationModel::bucketStatsAvailable(core_t::TTime time) const { void CMetricPopulationModel::sampleBucketStatistics(core_t::TTime startTime, core_t::TTime endTime, CResourceMonitor& resourceMonitor) { - CDataGatherer& gatherer = this->dataGatherer(); + const CDataGatherer& gatherer = this->dataGatherer(); core_t::TTime bucketLength = gatherer.bucketLength(); if (!gatherer.dataAvailable(startTime)) { return; @@ -294,7 +294,7 @@ void CMetricPopulationModel::sampleBucketStatistics(core_t::TTime startTime, this->applyFilter(model_t::E_XF_Over, false, this->personFilter(), personCounts); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); for (auto& featureData_ : featureData) { model_t::EFeature feature = featureData_.first; TSizeSizePrFeatureDataPrVec& data = m_CurrentBucketStats.s_FeatureData[feature]; @@ -324,7 +324,7 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, gatherer.sampleNow(time); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); const TTimeVec& preSampleAttributeLastBucketTimes = this->attributeLastBucketTimes(); TSizeTimeUMap attributeLastBucketTimesMap; @@ -345,8 +345,6 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, gatherer.personNonZeroCounts(time, personCounts); this->applyFilter(model_t::E_XF_Over, true, this->personFilter(), personCounts); - const TTimeVec& attributeLastBucketTimes = this->attributeLastBucketTimes(); - for (auto& featureData_ : featureData) { model_t::EFeature feature = featureData_.first; std::size_t dimension = model_t::dimension(feature); @@ -362,8 +360,7 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, // Set up fuzzy de-duplication. for (const auto& data_ : data) { std::size_t cid = CDataGatherer::extractAttributeId(data_); - const CGathererTools::TSampleVec& samples = - CDataGatherer::extractData(data_).s_Samples; + const auto& samples = CDataGatherer::extractData(data_).s_Samples; for (const auto& sample : samples) { duplicates[cid].add(TDouble2Vec(sample.value(dimension))); } @@ -408,14 +405,9 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, continue; } - const TOptionalSample& bucket = - CDataGatherer::extractData(data_).s_BucketValue; - const CGathererTools::TSampleVec& samples = - CDataGatherer::extractData(data_).s_Samples; + const auto& samples = CDataGatherer::extractData(data_).s_Samples; bool isInteger = CDataGatherer::extractData(data_).s_IsInteger; bool isNonNegative = CDataGatherer::extractData(data_).s_IsNonNegative; - core_t::TTime cutoff = attributeLastBucketTimes[cid] - - this->params().s_SamplingAgeCutoff; LOG_TRACE(<< "Adding " << CDataGatherer::extractData(data_) << " for person = " << gatherer.personName(pid) << " and attribute = " << gatherer.attributeName(cid)); @@ -424,29 +416,12 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, attribute.s_IsInteger &= isInteger; attribute.s_IsNonNegative &= isNonNegative; - if (model_t::isSampled(feature) && bucket) { - attribute.s_BucketValues.emplace_back( - bucket->time(), TDouble2Vec(bucket->value(dimension)), pid); - } - - std::size_t n = std::count_if(samples.begin(), samples.end(), - [cutoff](const CSample& sample) { - return sample.time() >= cutoff; - }); - double updatesPerBucket = this->params().s_MaximumUpdatesPerBucket; double countWeight = initialCountWeight * this->sampleRateWeight(pid, cid) * - this->learnRate(feature) * - (updatesPerBucket > 0.0 && n > 0 - ? updatesPerBucket / static_cast(n) - : 1.0); + this->learnRate(feature); LOG_TRACE(<< "countWeight = " << countWeight); for (const auto& sample : samples) { - if (sample.time() < cutoff) { - continue; - } - double countVarianceScale = sample.varianceScale(); TDouble2Vec value(sample.value(dimension)); std::size_t duplicate = duplicates[cid].duplicate(sample.time(), value); @@ -511,10 +486,7 @@ void CMetricPopulationModel::sample(core_t::TTime startTime, LOG_TRACE(<< "Model unexpectedly null"); return; } - if (model->addSamples(params, attribute.second.s_Values) == - maths::common::CModel::E_Reset) { - gatherer.resetSampleCount(cid); - } + model->addSamples(params, attribute.second.s_Values); } } @@ -551,8 +523,7 @@ void CMetricPopulationModel::prune(std::size_t maximumAge) { if (gatherer.dataAvailable(m_CurrentBucketStats.s_StartTime)) { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(m_CurrentBucketStats.s_StartTime, - gatherer.bucketLength(), featureData); + gatherer.featureData(m_CurrentBucketStats.s_StartTime, featureData); for (auto& feature : featureData) { m_CurrentBucketStats.s_FeatureData[feature.first].swap(feature.second); } diff --git a/lib/model/CMetricPopulationModelFactory.cc b/lib/model/CMetricPopulationModelFactory.cc index a758937dd3..94a7ac0b31 100644 --- a/lib/model/CMetricPopulationModelFactory.cc +++ b/lib/model/CMetricPopulationModelFactory.cc @@ -95,9 +95,8 @@ CMetricPopulationModelFactory::makeDataGatherer(const SGathererInitializationDat return new CDataGatherer(model_t::E_PopulationMetric, m_SummaryMode, this->modelParams(), m_SummaryCountFieldName, initData.s_PartitionFieldValue, m_PersonFieldName, - m_AttributeFieldName, m_ValueFieldName, - m_InfluenceFieldNames, this->searchKey(), m_Features, - initData.s_StartTime, initData.s_SampleOverrideCount); + m_AttributeFieldName, m_ValueFieldName, m_InfluenceFieldNames, + this->searchKey(), m_Features, initData.s_StartTime); } CDataGatherer* diff --git a/lib/model/CModelDetailsView.cc b/lib/model/CModelDetailsView.cc index ba7f437ba2..e1a4e0becc 100644 --- a/lib/model/CModelDetailsView.cc +++ b/lib/model/CModelDetailsView.cc @@ -97,7 +97,7 @@ void CModelDetailsView::modelPlotForByFieldId(core_t::TTime time, time, seasonalWeight); maths_t::setSeasonalVarianceScale(seasonalWeight, weights); maths_t::setCountVarianceScale( - TDouble2Vec(dimension, this->countVarianceScale(feature, byFieldId, time)), weights); + TDouble2Vec(dimension, this->countVarianceScale()), weights); TDouble1VecDouble1VecPr support(model_t::support(feature)); TDouble2Vec supportLower(support.first); @@ -223,9 +223,7 @@ const CAnomalyDetectorModel& CEventRateModelDetailsView::base() const { return *m_Model; } -double CEventRateModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, - std::size_t /*byFieldId*/, - core_t::TTime /*time*/) const { +double CEventRateModelDetailsView::countVarianceScale() const { return 1.0; } @@ -251,9 +249,7 @@ const CAnomalyDetectorModel& CEventRatePopulationModelDetailsView::base() const return *m_Model; } -double CEventRatePopulationModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, - std::size_t /*byFieldId*/, - core_t::TTime /*time*/) const { +double CEventRatePopulationModelDetailsView::countVarianceScale() const { return 1.0; } @@ -277,16 +273,8 @@ const CAnomalyDetectorModel& CMetricModelDetailsView::base() const { return *m_Model; } -double CMetricModelDetailsView::countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const { - TOptionalUInt64 count = m_Model->currentBucketCount(byFieldId, time); - if (!count) { - return 1.0; - } - return model_t::varianceScale(feature, - m_Model->dataGatherer().effectiveSampleCount(byFieldId), - static_cast(*count)); +double CMetricModelDetailsView::countVarianceScale() const { + return 1.0; } ////////// CMetricPopulationModelDetailsView Implementation ////////// @@ -310,16 +298,8 @@ const CAnomalyDetectorModel& CMetricPopulationModelDetailsView::base() const { return *m_Model; } -double CMetricPopulationModelDetailsView::countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const { - TOptionalUInt64 count = m_Model->currentBucketCount(byFieldId, time); - if (!count) { - return 1.0; - } - return model_t::varianceScale(feature, - m_Model->dataGatherer().effectiveSampleCount(byFieldId), - static_cast(*count)); +double CMetricPopulationModelDetailsView::countVarianceScale() const { + return 1.0; } } } diff --git a/lib/model/CModelFactory.cc b/lib/model/CModelFactory.cc index 70737d5563..b9ac2a061c 100644 --- a/lib/model/CModelFactory.cc +++ b/lib/model/CModelFactory.cc @@ -207,10 +207,6 @@ CModelFactory::defaultInfluenceCalculators(const std::string& influencerName, return result; } -void CModelFactory::sampleCountFactor(std::size_t sampleCountFactor) { - m_ModelParams.s_SampleCountFactor = sampleCountFactor; -} - void CModelFactory::excludeFrequent(model_t::EExcludeFrequent excludeFrequent) { m_ModelParams.s_ExcludeFrequent = excludeFrequent; } @@ -239,10 +235,6 @@ void CModelFactory::initialDecayRateMultiplier(double multiplier) { m_ModelParams.s_InitialDecayRateMultiplier = multiplier; } -void CModelFactory::maximumUpdatesPerBucket(double maximumUpdatesPerBucket) { - m_ModelParams.s_MaximumUpdatesPerBucket = maximumUpdatesPerBucket; -} - void CModelFactory::pruneWindowScaleMinimum(double factor) { m_ModelParams.s_PruneWindowScaleMinimum = factor; } @@ -370,17 +362,13 @@ CModelFactory::SModelInitializationData::SModelInitializationData(const TDataGat : s_DataGatherer(dataGatherer) { } -CModelFactory::SGathererInitializationData::SGathererInitializationData( - core_t::TTime startTime, - const std::string& partitionFieldValue, - unsigned int sampleOverrideCount) - : s_StartTime(startTime), s_PartitionFieldValue(partitionFieldValue), - s_SampleOverrideCount(sampleOverrideCount) { +CModelFactory::SGathererInitializationData::SGathererInitializationData(core_t::TTime startTime, + const std::string& partitionFieldValue) + : s_StartTime(startTime), s_PartitionFieldValue(partitionFieldValue) { } CModelFactory::SGathererInitializationData::SGathererInitializationData(core_t::TTime startTime) - : s_StartTime(startTime), s_PartitionFieldValue(EMPTY_STRING), - s_SampleOverrideCount(0u) { + : s_StartTime(startTime), s_PartitionFieldValue(EMPTY_STRING) { } } } diff --git a/lib/model/CModelPlotData.cc b/lib/model/CModelPlotData.cc index 49577d762f..28588b424b 100644 --- a/lib/model/CModelPlotData.cc +++ b/lib/model/CModelPlotData.cc @@ -30,12 +30,11 @@ CModelPlotData::CModelPlotData(core_t::TTime time, } CModelPlotData::SByFieldData::SByFieldData() - : s_LowerBound(0.0), s_UpperBound(0.0), s_Median(0.0), s_ValuesPerOverField() { + : s_LowerBound(0.0), s_UpperBound(0.0), s_Median(0.0) { } CModelPlotData::SByFieldData::SByFieldData(double lowerBound, double upperBound, double median) - : s_LowerBound(lowerBound), s_UpperBound(upperBound), s_Median(median), - s_ValuesPerOverField() { + : s_LowerBound(lowerBound), s_UpperBound(upperBound), s_Median(median) { } const std::string& CModelPlotData::partitionFieldName() const { diff --git a/lib/model/CSampleCounts.cc b/lib/model/CSampleCounts.cc deleted file mode 100644 index 957d93ce2a..0000000000 --- a/lib/model/CSampleCounts.cc +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include - -namespace ml { -namespace model { - -namespace { -const std::string SAMPLE_COUNT_TAG("b"); -const std::string MEAN_NON_ZERO_BUCKET_COUNT_TAG("c"); -const std::string EFFECTIVE_SAMPLE_VARIANCE_TAG("d"); - -const double NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT(3.0); -const double NUMBER_BUCKETS_TO_REFRESH_SAMPLE_COUNT(30.0); - -using TStrCRef = std::reference_wrapper; -using TStrCRefUInt64Map = - std::map; -} - -CSampleCounts::CSampleCounts(unsigned int sampleCountOverride) - : m_SampleCountOverride(sampleCountOverride) { -} - -CSampleCounts::CSampleCounts(bool isForPersistence, const CSampleCounts& other) - : m_SampleCountOverride(other.m_SampleCountOverride), - m_SampleCounts(other.m_SampleCounts), - m_MeanNonZeroBucketCounts(other.m_MeanNonZeroBucketCounts), - m_EffectiveSampleVariances(other.m_EffectiveSampleVariances) { - if (!isForPersistence) { - LOG_ABORT(<< "This constructor only creates clones for persistence"); - } -} - -CSampleCounts* CSampleCounts::cloneForPersistence() const { - return new CSampleCounts(true, *this); -} - -void CSampleCounts::acceptPersistInserter(core::CStatePersistInserter& inserter) const { - // Note m_SampleCountOverride is only for unit tests at present, - // hence not persisted or restored. - - core::CPersistUtils::persist(SAMPLE_COUNT_TAG, m_SampleCounts, inserter); - core::CPersistUtils::persist(MEAN_NON_ZERO_BUCKET_COUNT_TAG, - m_MeanNonZeroBucketCounts, inserter); - core::CPersistUtils::persist(EFFECTIVE_SAMPLE_VARIANCE_TAG, - m_EffectiveSampleVariances, inserter); -} - -bool CSampleCounts::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { - do { - const std::string& name = traverser.name(); - RESTORE(SAMPLE_COUNT_TAG, core::CPersistUtils::restore(name, m_SampleCounts, traverser)) - RESTORE(MEAN_NON_ZERO_BUCKET_COUNT_TAG, - core::CPersistUtils::restore(name, m_MeanNonZeroBucketCounts, traverser)) - RESTORE(EFFECTIVE_SAMPLE_VARIANCE_TAG, - core::CPersistUtils::restore(name, m_EffectiveSampleVariances, traverser)) - } while (traverser.next()); - return true; -} - -unsigned int CSampleCounts::count(std::size_t id) const { - return m_SampleCountOverride > 0 - ? m_SampleCountOverride - : id < m_SampleCounts.size() ? m_SampleCounts[id] : 0; -} - -double CSampleCounts::effectiveSampleCount(std::size_t id) const { - if (id < m_EffectiveSampleVariances.size()) { - // This uses the fact that variance ~ 1 / count. - double count = - maths::common::CBasicStatistics::count(m_EffectiveSampleVariances[id]); - double mean = maths::common::CBasicStatistics::mean(m_EffectiveSampleVariances[id]); - return count > 0.0 ? 1.0 / mean : this->count(id); - } - return 0.0; -} - -void CSampleCounts::resetSampleCount(const CDataGatherer& gatherer, std::size_t id) { - if (m_SampleCountOverride > 0) { - return; - } - - if (id >= m_MeanNonZeroBucketCounts.size()) { - LOG_ERROR(<< "Bad identifier " << id); - return; - } - - const TMeanAccumulator& count_ = m_MeanNonZeroBucketCounts[id]; - if (maths::common::CBasicStatistics::count(count_) >= NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT) { - unsigned sampleCountThreshold = 0; - const CDataGatherer::TFeatureVec& features = gatherer.features(); - for (const auto& feature : features) { - sampleCountThreshold = std::max(sampleCountThreshold, - model_t::minimumSampleCount(feature)); - } - double count = maths::common::CBasicStatistics::mean(count_); - m_SampleCounts[id] = std::max(sampleCountThreshold, - static_cast(count + 0.5)); - LOG_DEBUG(<< "Setting sample count to " << m_SampleCounts[id] << " for " - << this->name(gatherer, id)); - } -} - -void CSampleCounts::refresh(const CDataGatherer& gatherer) { - if (m_SampleCountOverride > 0) { - return; - } - - unsigned sampleCountThreshold = 0; - const CDataGatherer::TFeatureVec& features = gatherer.features(); - for (const auto& feature : features) { - sampleCountThreshold = - std::max(sampleCountThreshold, model_t::minimumSampleCount(feature)); - } - - for (std::size_t id = 0; id < m_MeanNonZeroBucketCounts.size(); ++id) { - const TMeanAccumulator& count_ = m_MeanNonZeroBucketCounts[id]; - if (m_SampleCounts[id] > 0) { - if (maths::common::CBasicStatistics::count(count_) >= - NUMBER_BUCKETS_TO_REFRESH_SAMPLE_COUNT) { - double count = maths::common::CBasicStatistics::mean(count_); - double scale = count / static_cast(m_SampleCounts[id]); - if (scale < maths::common::MINIMUM_ACCURATE_VARIANCE_SCALE || - scale > maths::common::MAXIMUM_ACCURATE_VARIANCE_SCALE) { - unsigned int oldCount = m_SampleCounts[id]; - unsigned int newCount = std::max( - sampleCountThreshold, static_cast(count + 0.5)); - LOG_TRACE(<< "Sample count " << oldCount << " is too far from the bucket mean " - << count << " count, resetting to " << newCount << ". This may cause temporary instability" - << " for " << this->name(gatherer, id) << " (" - << id << "). (Mean count " << count_ << ")"); - m_SampleCounts[id] = newCount; - SUPPRESS_USAGE_WARNING(oldCount); - } - } - } else if (maths::common::CBasicStatistics::count(count_) >= - NUMBER_BUCKETS_TO_ESTIMATE_SAMPLE_COUNT) { - double count = maths::common::CBasicStatistics::mean(count_); - m_SampleCounts[id] = std::max(sampleCountThreshold, - static_cast(count + 0.5)); - LOG_TRACE(<< "Setting sample count to " << m_SampleCounts[id] - << " for " << this->name(gatherer, id) << " (" << id - << "). (Mean count " << count_ << ")"); - } - } -} - -void CSampleCounts::updateSampleVariance(std::size_t id) { - m_EffectiveSampleVariances[id].add(1.0 / static_cast(this->count(id))); -} - -void CSampleCounts::updateMeanNonZeroBucketCount(std::size_t id, double count, double alpha) { - m_MeanNonZeroBucketCounts[id].add(count); - m_MeanNonZeroBucketCounts[id].age(alpha); - m_EffectiveSampleVariances[id].age(alpha); -} - -void CSampleCounts::recycle(const TSizeVec& idsToRemove) { - for (std::size_t i = 0; i < idsToRemove.size(); ++i) { - std::size_t id = idsToRemove[i]; - if (id >= m_SampleCounts.size()) { - continue; - } - m_SampleCounts[id] = 0; - m_MeanNonZeroBucketCounts[id] = TMeanAccumulator(); - m_EffectiveSampleVariances[id] = TMeanAccumulator(); - } - LOG_TRACE(<< "m_SampleCounts = " << m_SampleCounts); - LOG_TRACE(<< "m_MeanNonZeroBucketCounts = " << m_MeanNonZeroBucketCounts); - LOG_TRACE(<< "m_EffectiveSampleVariances = " << m_EffectiveSampleVariances); -} - -void CSampleCounts::remove(std::size_t lowestIdToRemove) { - if (lowestIdToRemove < m_SampleCounts.size()) { - m_SampleCounts.erase(m_SampleCounts.begin() + lowestIdToRemove, - m_SampleCounts.end()); - m_MeanNonZeroBucketCounts.erase(m_MeanNonZeroBucketCounts.begin() + lowestIdToRemove, - m_MeanNonZeroBucketCounts.end()); - m_EffectiveSampleVariances.erase(m_EffectiveSampleVariances.begin() + lowestIdToRemove, - m_EffectiveSampleVariances.end()); - LOG_TRACE(<< "m_SampleCounts = " << m_SampleCounts); - LOG_TRACE(<< "m_MeanNonZeroBucketCounts = " << m_MeanNonZeroBucketCounts); - LOG_TRACE(<< "m_EffectiveSampleVariances = " << m_EffectiveSampleVariances); - } -} - -void CSampleCounts::resize(std::size_t id) { - if (id >= m_SampleCounts.size()) { - m_SampleCounts.resize(id + 1); - m_MeanNonZeroBucketCounts.resize(id + 1); - m_EffectiveSampleVariances.resize(id + 1); - } -} - -std::uint64_t CSampleCounts::checksum(const CDataGatherer& gatherer) const { - TStrCRefUInt64Map hashes; - for (std::size_t id = 0; id < m_SampleCounts.size(); ++id) { - if (gatherer.isPopulation() ? gatherer.isAttributeActive(id) - : gatherer.isPersonActive(id)) { - std::uint64_t& hash = hashes[TStrCRef(this->name(gatherer, id))]; - hash = maths::common::CChecksum::calculate(hash, m_SampleCounts[id]); - hash = maths::common::CChecksum::calculate(hash, m_MeanNonZeroBucketCounts[id]); - hash = maths::common::CChecksum::calculate(hash, m_EffectiveSampleVariances[id]); - } - } - LOG_TRACE(<< "hashes = " << hashes); - return maths::common::CChecksum::calculate(0, hashes); -} - -void CSampleCounts::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { - mem->setName("CSampleCounts"); - core::memory_debug::dynamicSize("m_SampleCounts", m_SampleCounts, mem); - core::memory_debug::dynamicSize("m_MeanNonZeroBucketCounts", - m_MeanNonZeroBucketCounts, mem); - core::memory_debug::dynamicSize("m_EffectiveSampleVariances", - m_EffectiveSampleVariances, mem); -} - -std::size_t CSampleCounts::memoryUsage() const { - std::size_t mem = core::memory::dynamicSize(m_SampleCounts); - mem += core::memory::dynamicSize(m_MeanNonZeroBucketCounts); - mem += core::memory::dynamicSize(m_EffectiveSampleVariances); - return mem; -} - -void CSampleCounts::clear() { - m_SampleCounts.clear(); - m_MeanNonZeroBucketCounts.clear(); - m_EffectiveSampleVariances.clear(); -} - -const std::string& CSampleCounts::name(const CDataGatherer& gatherer, std::size_t id) const { - return gatherer.isPopulation() ? gatherer.attributeName(id) : gatherer.personName(id); -} - -} // model -} // ml diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 2fab1d1c2a..c16ab2ef0f 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -398,14 +398,12 @@ bool isMedianFeature(EFeature feature) { bool isMinFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) && - (category == E_Min || category == E_MultivariateMin); + return metricCategory(feature, category) && (category == E_Min); } bool isMaxFeature(EFeature feature) { EMetricCategory category; - return metricCategory(feature, category) && - (category == E_Max || category == E_MultivariateMax); + return metricCategory(feature, category) && (category == E_Max); } bool isVarianceFeature(EFeature feature) { @@ -424,126 +422,6 @@ double varianceScale(EFeature feature, double sampleCount, double count) { : 1.0; } -bool isSampled(EFeature feature) { - switch (feature) { - CASE_INDIVIDUAL_COUNT: - return false; - - case E_IndividualMeanByPerson: - case E_IndividualLowMeanByPerson: - case E_IndividualHighMeanByPerson: - case E_IndividualMedianByPerson: - case E_IndividualLowMedianByPerson: - case E_IndividualHighMedianByPerson: - case E_IndividualMinByPerson: - case E_IndividualMaxByPerson: - case E_IndividualVarianceByPerson: - case E_IndividualLowVarianceByPerson: - case E_IndividualHighVarianceByPerson: - case E_IndividualMeanVelocityByPerson: - case E_IndividualMinVelocityByPerson: - case E_IndividualMaxVelocityByPerson: - case E_IndividualMeanLatLongByPerson: - return true; - case E_IndividualSumByBucketAndPerson: - case E_IndividualLowSumByBucketAndPerson: - case E_IndividualHighSumByBucketAndPerson: - case E_IndividualNonNullSumByBucketAndPerson: - case E_IndividualLowNonNullSumByBucketAndPerson: - case E_IndividualHighNonNullSumByBucketAndPerson: - case E_IndividualSumVelocityByPerson: - return false; - - CASE_POPULATION_COUNT: - return false; - - case E_PopulationMeanByPersonAndAttribute: - case E_PopulationLowMeanByPersonAndAttribute: - case E_PopulationHighMeanByPersonAndAttribute: - case E_PopulationMedianByPersonAndAttribute: - case E_PopulationLowMedianByPersonAndAttribute: - case E_PopulationHighMedianByPersonAndAttribute: - case E_PopulationMinByPersonAndAttribute: - case E_PopulationMaxByPersonAndAttribute: - case E_PopulationVarianceByPersonAndAttribute: - case E_PopulationLowVarianceByPersonAndAttribute: - case E_PopulationHighVarianceByPersonAndAttribute: - case E_PopulationMeanVelocityByPersonAndAttribute: - case E_PopulationMinVelocityByPersonAndAttribute: - case E_PopulationMaxVelocityByPersonAndAttribute: - case E_PopulationMeanLatLongByPersonAndAttribute: - return true; - case E_PopulationSumByBucketPersonAndAttribute: - case E_PopulationLowSumByBucketPersonAndAttribute: - case E_PopulationHighSumByBucketPersonAndAttribute: - case E_PopulationSumVelocityByPersonAndAttribute: - return false; - } - return false; -} - -unsigned minimumSampleCount(EFeature feature) { - switch (feature) { - CASE_INDIVIDUAL_COUNT: - return 1; - - case E_IndividualMeanByPerson: - case E_IndividualMinByPerson: - case E_IndividualMaxByPerson: - case E_IndividualSumByBucketAndPerson: - case E_IndividualLowMeanByPerson: - case E_IndividualHighMeanByPerson: - case E_IndividualLowSumByBucketAndPerson: - case E_IndividualHighSumByBucketAndPerson: - case E_IndividualNonNullSumByBucketAndPerson: - case E_IndividualLowNonNullSumByBucketAndPerson: - case E_IndividualHighNonNullSumByBucketAndPerson: - case E_IndividualMeanLatLongByPerson: - case E_IndividualMaxVelocityByPerson: - case E_IndividualMinVelocityByPerson: - case E_IndividualMeanVelocityByPerson: - case E_IndividualSumVelocityByPerson: - case E_IndividualMedianByPerson: - case E_IndividualLowMedianByPerson: - case E_IndividualHighMedianByPerson: - return 1; - - // Population variance needs a minimum population size - case E_IndividualVarianceByPerson: - case E_IndividualLowVarianceByPerson: - case E_IndividualHighVarianceByPerson: - return 3; - - CASE_POPULATION_COUNT: - return 1; - - case E_PopulationMeanByPersonAndAttribute: - case E_PopulationMedianByPersonAndAttribute: - case E_PopulationLowMedianByPersonAndAttribute: - case E_PopulationHighMedianByPersonAndAttribute: - case E_PopulationMinByPersonAndAttribute: - case E_PopulationMaxByPersonAndAttribute: - case E_PopulationSumByBucketPersonAndAttribute: - case E_PopulationLowMeanByPersonAndAttribute: - case E_PopulationHighMeanByPersonAndAttribute: - case E_PopulationLowSumByBucketPersonAndAttribute: - case E_PopulationHighSumByBucketPersonAndAttribute: - case E_PopulationMeanLatLongByPersonAndAttribute: - case E_PopulationMaxVelocityByPersonAndAttribute: - case E_PopulationMinVelocityByPersonAndAttribute: - case E_PopulationMeanVelocityByPersonAndAttribute: - case E_PopulationSumVelocityByPersonAndAttribute: - return 1; - - // Population variance needs a minimum population size - case E_PopulationVarianceByPersonAndAttribute: - case E_PopulationLowVarianceByPersonAndAttribute: - case E_PopulationHighVarianceByPersonAndAttribute: - return 3; - } - return 1; -} - double offsetCountToZero(EFeature feature, double count) { switch (feature) { case E_IndividualNonZeroCountByBucketAndPerson: @@ -1656,10 +1534,6 @@ std::string print(EMetricCategory category) { return "'sum'"; case E_MultivariateMean: return "'multivariate mean'"; - case E_MultivariateMin: - return "'multivariate minimum'"; - case E_MultivariateMax: - return "'multivariate maximum'"; case E_Median: return "'median'"; case E_Variance: diff --git a/lib/model/SModelParams.cc b/lib/model/SModelParams.cc index c025170550..e5e31f0843 100644 --- a/lib/model/SModelParams.cc +++ b/lib/model/SModelParams.cc @@ -48,11 +48,7 @@ SModelParams::SModelParams(core_t::TTime bucketLength) CAnomalyDetectorModelConfig::DEFAULT_MULTIVARIATE_COMPONENT_DELIMITER), s_ExcludeFrequent(model_t::E_XF_None), s_ExcludePersonFrequency(0.1), s_ExcludeAttributeFrequency(0.1), - s_MaximumUpdatesPerBucket(CAnomalyDetectorModelConfig::DEFAULT_MAXIMUM_UPDATES_PER_BUCKET), s_LatencyBuckets(CAnomalyDetectorModelConfig::DEFAULT_LATENCY_BUCKETS), - s_SampleCountFactor(CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_NO_LATENCY), - s_SampleQueueGrowthFactor(CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_QUEUE_GROWTH_FACTOR), - s_SamplingAgeCutoff(SAMPLING_AGE_CUTOFF_DEFAULT), s_PruneWindowScaleMinimum(CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MINIMUM), s_PruneWindowScaleMaximum(CAnomalyDetectorModelConfig::DEFAULT_PRUNE_WINDOW_SCALE_MAXIMUM), s_DetectionRules(EMPTY_RULES), s_ScheduledEvents(EMPTY_SCHEDULED_EVENTS), @@ -63,12 +59,9 @@ SModelParams::SModelParams(core_t::TTime bucketLength) void SModelParams::configureLatency(core_t::TTime latency, core_t::TTime bucketLength) { s_LatencyBuckets = (latency + bucketLength - 1) / bucketLength; - if (s_LatencyBuckets > 0) { - s_SampleCountFactor = CAnomalyDetectorModelConfig::DEFAULT_SAMPLE_COUNT_FACTOR_WITH_LATENCY; - if (s_LatencyBuckets > 50) { - LOG_WARN(<< "There are a large number of buckets in the latency window. " - "Please ensure sufficient resources are available for this job."); - } + if (s_LatencyBuckets > 50) { + LOG_WARN(<< "There are a large number of buckets in the latency window. " + "Please ensure sufficient resources are available for this job."); } } @@ -101,11 +94,8 @@ std::uint64_t SModelParams::checksum(std::uint64_t seed) const { seed = maths::common::CChecksum::calculate(seed, s_ExcludeFrequent); seed = maths::common::CChecksum::calculate(seed, s_ExcludePersonFrequency); seed = maths::common::CChecksum::calculate(seed, s_ExcludeAttributeFrequency); - seed = maths::common::CChecksum::calculate(seed, s_MaximumUpdatesPerBucket); seed = maths::common::CChecksum::calculate(seed, s_InfluenceCutoff); seed = maths::common::CChecksum::calculate(seed, s_LatencyBuckets); - seed = maths::common::CChecksum::calculate(seed, s_SampleCountFactor); - seed = maths::common::CChecksum::calculate(seed, s_SampleQueueGrowthFactor); seed = maths::common::CChecksum::calculate(seed, s_PruneWindowScaleMinimum); seed = maths::common::CChecksum::calculate(seed, s_PruneWindowScaleMaximum); seed = maths::common::CChecksum::calculate(seed, s_CorrelationModelsOverhead); @@ -113,8 +103,7 @@ std::uint64_t SModelParams::checksum(std::uint64_t seed) const { seed = maths::common::CChecksum::calculate(seed, s_MinimumSignificantCorrelation); //seed = maths::common::CChecksum::calculate(seed, s_DetectionRules); //seed = maths::common::CChecksum::calculate(seed, s_ScheduledEvents); - seed = maths::common::CChecksum::calculate(seed, s_MinimumToFuzzyDeduplicate); - return maths::common::CChecksum::calculate(seed, s_SamplingAgeCutoff); + return maths::common::CChecksum::calculate(seed, s_MinimumToFuzzyDeduplicate); } } } diff --git a/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc b/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc index 1a8624f7a8..bc21163664 100644 --- a/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc +++ b/lib/model/unittest/CAnomalyDetectorModelConfigTest.cc @@ -65,14 +65,6 @@ BOOST_AUTO_TEST_CASE(testNormal) { 2.0, config.factory(1, POPULATION_COUNT)->modelParams().s_InitialDecayRateMultiplier); BOOST_REQUIRE_EQUAL( 2.0, config.factory(1, POPULATION_METRIC)->modelParams().s_InitialDecayRateMultiplier); - BOOST_REQUIRE_EQUAL( - 0.0, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_MaximumUpdatesPerBucket); - BOOST_REQUIRE_EQUAL( - 0.0, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_MaximumUpdatesPerBucket); - BOOST_REQUIRE_EQUAL( - 0.0, config.factory(1, POPULATION_COUNT)->modelParams().s_MaximumUpdatesPerBucket); - BOOST_REQUIRE_EQUAL( - 0.0, config.factory(1, POPULATION_METRIC)->modelParams().s_MaximumUpdatesPerBucket); BOOST_REQUIRE_EQUAL(0.1, config.factory(1, INDIVIDUAL_COUNT)->minimumModeFraction()); BOOST_REQUIRE_EQUAL(0.1, config.factory(1, INDIVIDUAL_METRIC)->minimumModeFraction()); BOOST_REQUIRE_EQUAL(0.01, config.factory(1, POPULATION_COUNT)->minimumModeFraction()); @@ -81,14 +73,6 @@ BOOST_AUTO_TEST_CASE(testNormal) { BOOST_REQUIRE_EQUAL(10, config.factory(1, INDIVIDUAL_METRIC)->componentSize()); BOOST_REQUIRE_EQUAL(10, config.factory(1, POPULATION_COUNT)->componentSize()); BOOST_REQUIRE_EQUAL(10, config.factory(1, POPULATION_METRIC)->componentSize()); - BOOST_REQUIRE_EQUAL( - 20, config.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor); - BOOST_REQUIRE_EQUAL( - 20, config.factory(1, INDIVIDUAL_METRIC)->modelParams().s_SampleCountFactor); - BOOST_REQUIRE_EQUAL( - 20, config.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor); - BOOST_REQUIRE_EQUAL( - 20, config.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor); TDoubleVec params; for (std::size_t i = 0; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { for (std::size_t j = 0; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { @@ -184,18 +168,6 @@ BOOST_AUTO_TEST_CASE(testErrors) { BOOST_REQUIRE_EQUAL( config2.factory(1, POPULATION_METRIC)->modelParams().s_InitialDecayRateMultiplier, config1.factory(1, POPULATION_METRIC)->modelParams().s_InitialDecayRateMultiplier); - BOOST_REQUIRE_EQUAL( - config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_MaximumUpdatesPerBucket, - config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_MaximumUpdatesPerBucket); - BOOST_REQUIRE_EQUAL( - config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_MaximumUpdatesPerBucket, - config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_MaximumUpdatesPerBucket); - BOOST_REQUIRE_EQUAL( - config2.factory(1, POPULATION_COUNT)->modelParams().s_MaximumUpdatesPerBucket, - config1.factory(1, POPULATION_COUNT)->modelParams().s_MaximumUpdatesPerBucket); - BOOST_REQUIRE_EQUAL( - config2.factory(1, POPULATION_METRIC)->modelParams().s_MaximumUpdatesPerBucket, - config1.factory(1, POPULATION_METRIC)->modelParams().s_MaximumUpdatesPerBucket); BOOST_REQUIRE_EQUAL(config2.factory(1, INDIVIDUAL_COUNT)->minimumModeFraction(), config1.factory(1, INDIVIDUAL_COUNT)->minimumModeFraction()); BOOST_REQUIRE_EQUAL(config2.factory(1, INDIVIDUAL_METRIC)->minimumModeFraction(), @@ -212,18 +184,6 @@ BOOST_AUTO_TEST_CASE(testErrors) { config1.factory(1, POPULATION_COUNT)->componentSize()); BOOST_REQUIRE_EQUAL(config2.factory(1, POPULATION_METRIC)->componentSize(), config1.factory(1, POPULATION_METRIC)->componentSize()); - BOOST_REQUIRE_EQUAL( - config2.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor, - config1.factory(1, INDIVIDUAL_COUNT)->modelParams().s_SampleCountFactor); - BOOST_REQUIRE_EQUAL( - config2.factory(1, INDIVIDUAL_METRIC)->modelParams().s_SampleCountFactor, - config1.factory(1, INDIVIDUAL_METRIC)->modelParams().s_SampleCountFactor); - BOOST_REQUIRE_EQUAL( - config2.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor, - config1.factory(1, POPULATION_COUNT)->modelParams().s_SampleCountFactor); - BOOST_REQUIRE_EQUAL( - config2.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor, - config1.factory(1, POPULATION_METRIC)->modelParams().s_SampleCountFactor); for (std::size_t i = 0; i < model_t::NUMBER_AGGREGATION_STYLES; ++i) { for (std::size_t j = 0; j < model_t::NUMBER_AGGREGATION_PARAMS; ++j) { BOOST_REQUIRE_EQUAL(config2.aggregationStyleParam( diff --git a/lib/model/unittest/CDetectionRuleTest.cc b/lib/model/unittest/CDetectionRuleTest.cc index 09c408790e..3e2a286b59 100644 --- a/lib/model/unittest/CDetectionRuleTest.cc +++ b/lib/model/unittest/CDetectionRuleTest.cc @@ -76,7 +76,7 @@ BOOST_FIXTURE_TEST_CASE(testApplyGivenScope, CTestFixture) { CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, partitionFieldValue, personFieldName, attributeFieldName, EMPTY_STRING, - TStrVec{}, key, features, startTime, 0)); + TStrVec{}, key, features, startTime)); std::string person1("p1"); bool added = false; @@ -315,7 +315,7 @@ BOOST_FIXTURE_TEST_CASE(testApplyGivenNumericalActualCondition, CTestFixture) { features.push_back(model_t::E_IndividualMeanByPerson); CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime)); std::string person1("p1"); bool addedPerson = false; @@ -431,7 +431,7 @@ BOOST_FIXTURE_TEST_CASE(testApplyGivenNumericalTypicalCondition, CTestFixture) { features.push_back(model_t::E_IndividualMeanByPerson); CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime)); std::string person1("p1"); bool addedPerson = false; @@ -509,7 +509,7 @@ BOOST_FIXTURE_TEST_CASE(testApplyGivenNumericalDiffAbsCondition, CTestFixture) { features.push_back(model_t::E_IndividualMeanByPerson); CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime)); std::string person1("p1"); bool addedPerson = false; @@ -617,7 +617,7 @@ BOOST_FIXTURE_TEST_CASE(testApplyGivenNoActualValueAvailable, CTestFixture) { features.push_back(model_t::E_IndividualMeanByPerson); CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime)); std::string person1("p1"); bool addedPerson = false; @@ -656,7 +656,7 @@ BOOST_FIXTURE_TEST_CASE(testApplyGivenDifferentSeriesAndIndividualModel, CTestFi std::string personFieldName("series"); CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, - EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime)); std::string person1("p1"); bool addedPerson = false; @@ -706,7 +706,7 @@ BOOST_FIXTURE_TEST_CASE(testApplyGivenDifferentSeriesAndPopulationModel, CTestFi CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, attributeFieldName, EMPTY_STRING, - TStrVec{}, key, features, startTime, 0)); + TStrVec{}, key, features, startTime)); std::string person1("p1"); bool added = false; @@ -771,7 +771,7 @@ BOOST_FIXTURE_TEST_CASE(testApplyGivenMultipleConditions, CTestFixture) { std::string personFieldName("series"); CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, - EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime)); std::string person1("p1"); bool addedPerson = false; @@ -877,7 +877,7 @@ BOOST_FIXTURE_TEST_CASE(testApplyGivenTimeCondition, CTestFixture) { "", personFieldName, EMPTY_STRING, partitionFieldName); CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, - EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime)); CMockModel model(params, gathererPtr, influenceCalculators); CRuleCondition conditionGte; @@ -919,7 +919,7 @@ BOOST_FIXTURE_TEST_CASE(testRuleActions, CTestFixture) { "", personFieldName, EMPTY_STRING, partitionFieldName); CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, personFieldName, - EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime)); CMockModel model(params, gathererPtr, influenceCalculators); CRuleCondition conditionGte; diff --git a/lib/model/unittest/CEventRateDataGathererTest.cc b/lib/model/unittest/CEventRateDataGathererTest.cc index 43cd562c2f..4fec909e60 100644 --- a/lib/model/unittest/CEventRateDataGathererTest.cc +++ b/lib/model/unittest/CEventRateDataGathererTest.cc @@ -203,7 +203,7 @@ void testInfluencerPerFeature(model_t::EFeature feature, influencerFieldNames.push_back("IF1"); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, valueField, - influencerFieldNames, key, features, startTime, 0); + influencerFieldNames, key, features, startTime); BOOST_TEST_REQUIRE(!gatherer.isPopulation()); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, resourceMonitor, "p", valueField, 1)); @@ -234,7 +234,7 @@ void testInfluencerPerFeature(model_t::EFeature feature, LOG_DEBUG(<< "Processing bucket [" << time << ", " << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); @@ -306,7 +306,7 @@ BOOST_FIXTURE_TEST_CASE(testLatencyPersist, CTestFixture) { features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, - "file", {}, key, features, startTime, 0); + "file", {}, key, features, startTime); TSizeVec fields; fields.push_back(2); fields.push_back(1); @@ -324,7 +324,7 @@ BOOST_FIXTURE_TEST_CASE(testLatencyPersist, CTestFixture) { features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, - "file", influencers, key, features, startTime, 0); + "file", influencers, key, features, startTime); TSizeVec fields; fields.push_back(2); fields.push_back(3); @@ -341,7 +341,7 @@ BOOST_FIXTURE_TEST_CASE(testLatencyPersist, CTestFixture) { features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); TSizeVec fields; fields.push_back(2); @@ -356,9 +356,9 @@ BOOST_FIXTURE_TEST_CASE(testLatencyPersist, CTestFixture) { TStrVec influencers; influencers.push_back("user"); features.push_back(model_t::E_IndividualNonZeroCountByBucketAndPerson); - CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, - EMPTY_STRING, "program", EMPTY_STRING, EMPTY_STRING, - influencers, key, features, startTime, 0); + CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, "program", EMPTY_STRING, + EMPTY_STRING, influencers, key, features, startTime); TSizeVec fields; fields.push_back(2); fields.push_back(3); @@ -411,7 +411,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) { features.push_back(model_t::E_IndividualMinByPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_TEST_REQUIRE(!gatherer.isPopulation()); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p")); @@ -445,7 +445,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) { << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -470,7 +470,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) { features.push_back(model_t::E_IndividualTotalBucketCountByPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; @@ -481,7 +481,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) { << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(2, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson, @@ -510,7 +510,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) { features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; @@ -521,7 +521,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) { << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, @@ -581,7 +581,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p1")); BOOST_REQUIRE_EQUAL(1, addPerson(gatherer, m_ResourceMonitor, "p2")); @@ -594,7 +594,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) { << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -635,7 +635,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) { BOOST_TEST_REQUIRE(!gatherer.personId("p2", pid)); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime + 4 * bucketLength, bucketLength, featureData); + gatherer.featureData(startTime + 4 * bucketLength, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -649,7 +649,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p1")); BOOST_REQUIRE_EQUAL(1, addPerson(gatherer, m_ResourceMonitor, "p2")); BOOST_REQUIRE_EQUAL(2, addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -666,7 +666,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) { TSizeUInt64PrVec personCounts; TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -693,7 +693,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) { BOOST_TEST_REQUIRE(gatherer.personId("p5", pid)); BOOST_REQUIRE_EQUAL(4, pid); - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -719,7 +719,7 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { SModelParams params(bucketLength); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p1")); BOOST_REQUIRE_EQUAL(1, addPerson(gatherer, m_ResourceMonitor, "p2")); BOOST_REQUIRE_EQUAL(2, addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -746,7 +746,7 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - {}, key, features, startTime, 0); + {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(expectedGatherer, m_ResourceMonitor, "p3")); BOOST_REQUIRE_EQUAL(1, addPerson(expectedGatherer, m_ResourceMonitor, "p4")); BOOST_REQUIRE_EQUAL(2, addPerson(expectedGatherer, m_ResourceMonitor, "p5")); @@ -776,7 +776,7 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - {}, key, features, startTime, 0); + {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(expectedGatherer, m_ResourceMonitor, "p3")); BOOST_REQUIRE_EQUAL(1, addPerson(expectedGatherer, m_ResourceMonitor, "p6")); BOOST_REQUIRE_EQUAL(2, addPerson(expectedGatherer, m_ResourceMonitor, "p7")); @@ -803,7 +803,7 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { CDataGatherer expectedGatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - {}, key, features, startTime, 0); + {}, key, features, startTime); LOG_DEBUG(<< "checksum = " << gatherer.checksum()); LOG_DEBUG(<< "expected checksum = " << expectedGatherer.checksum()); @@ -862,7 +862,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrderFinalResult, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); addPerson(gatherer, m_ResourceMonitor, "p"); core_t::TTime time = startTime; @@ -873,7 +873,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrderFinalResult, CTestFixture) { << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -899,7 +899,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrderFinalResult, CTestFixture) { features.push_back(model_t::E_IndividualTotalBucketCountByPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; @@ -910,7 +910,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrderFinalResult, CTestFixture) { << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(2, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualNonZeroCountByBucketAndPerson, @@ -939,7 +939,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrderFinalResult, CTestFixture) { features.push_back(model_t::E_IndividualIndicatorOfBucketPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p")); core_t::TTime time = startTime; @@ -950,7 +950,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrderFinalResult, CTestFixture) { << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualIndicatorOfBucketPerson, @@ -992,105 +992,105 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrderInterimResult, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); addPerson(gatherer, m_ResourceMonitor, "p"); TFeatureSizeFeatureDataPrVecPrVec featureData; // Bucket 1 only addArrival(gatherer, m_ResourceMonitor, data[0], "p"); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[1], "p"); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[2], "p"); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[3], "p"); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 1, 2 & 3 addArrival(gatherer, m_ResourceMonitor, data[4], "p"); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 3, 4 & 5 addArrival(gatherer, m_ResourceMonitor, data[5], "p"); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1800, bucketLength, featureData); + gatherer.featureData(1800, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(2400, bucketLength, featureData); + gatherer.featureData(2400, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 3, 4 & 5 addArrival(gatherer, m_ResourceMonitor, data[6], "p"); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1800, bucketLength, featureData); + gatherer.featureData(1800, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(2400, bucketLength, featureData); + gatherer.featureData(2400, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); // Bucket 3, 4 & 5 addArrival(gatherer, m_ResourceMonitor, data[7], "p"); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1800, bucketLength, featureData); + gatherer.featureData(1800, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(2400, bucketLength, featureData); + gatherer.featureData(2400, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); } @@ -1138,7 +1138,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeriesOutOfOrderFinalResult, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p1")); BOOST_REQUIRE_EQUAL(1, addPerson(gatherer, m_ResourceMonitor, "p2")); @@ -1151,7 +1151,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeriesOutOfOrderFinalResult, CTestFixture) { << time + bucketLength << ")"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -1191,7 +1191,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeriesOutOfOrderFinalResult, CTestFixture) { BOOST_TEST_REQUIRE(!gatherer.personId("p2", pid)); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime + 4 * bucketLength, bucketLength, featureData); + gatherer.featureData(startTime + 4 * bucketLength, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -1205,7 +1205,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeriesOutOfOrderFinalResult, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p1")); BOOST_REQUIRE_EQUAL(1, addPerson(gatherer, m_ResourceMonitor, "p2")); BOOST_REQUIRE_EQUAL(2, addPerson(gatherer, m_ResourceMonitor, "p3")); @@ -1222,7 +1222,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeriesOutOfOrderFinalResult, CTestFixture) { TSizeUInt64PrVec personCounts; TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -1248,7 +1248,7 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeriesOutOfOrderFinalResult, CTestFixture) { BOOST_TEST_REQUIRE(gatherer.personId("p5", pid)); BOOST_REQUIRE_EQUAL(4, pid); - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_IndividualCountByBucketAndPerson, @@ -1274,7 +1274,7 @@ BOOST_FIXTURE_TEST_CASE(testArrivalBeforeLatencyWindowIsIgnored, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); addPerson(gatherer, m_ResourceMonitor, "p"); addArrival(gatherer, m_ResourceMonitor, data[0], "p"); @@ -1282,18 +1282,18 @@ BOOST_FIXTURE_TEST_CASE(testArrivalBeforeLatencyWindowIsIgnored, CTestFixture) { TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(0, featureData.size()); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1800, bucketLength, featureData); + gatherer.featureData(1800, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); } @@ -1317,7 +1317,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); addPerson(gatherer, m_ResourceMonitor, "p"); for (std::size_t i = 0; i < std::size(data); ++i) { @@ -1326,29 +1326,29 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 3)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.resetBucket(600); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 0)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1)]"), core::CContainerPrinter::print(featureData[0].second)); } @@ -1372,7 +1372,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); addPerson(gatherer, m_ResourceMonitor, "p1"); addPerson(gatherer, m_ResourceMonitor, "p2"); addPerson(gatherer, m_ResourceMonitor, "p3"); @@ -1385,29 +1385,29 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 3), (1, 3), (2, 3)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), core::CContainerPrinter::print(featureData[0].second)); gatherer.resetBucket(600); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 2), (1, 2), (2, 2)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 0), (1, 0), (2, 0)]"), core::CContainerPrinter::print(featureData[0].second)); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(std::string("[(0, 1), (1, 1), (2, 1)]"), core::CContainerPrinter::print(featureData[0].second)); } @@ -1423,7 +1423,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenBucketNotAvailable, CTestFixture) { features.push_back(model_t::E_IndividualCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); addPerson(gatherer, m_ResourceMonitor, "p"); addArrival(gatherer, m_ResourceMonitor, 1200, "p"); @@ -1779,7 +1779,7 @@ BOOST_FIXTURE_TEST_CASE(testDistinctStrings, CTestFixture) { features.push_back(model_t::E_IndividualUniqueCountByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, "P", EMPTY_STRING, - "V", {"INF"}, key, features, startTime, 0); + "V", {"INF"}, key, features, startTime); BOOST_TEST_REQUIRE(!gatherer.isPopulation()); BOOST_REQUIRE_EQUAL(0, addPerson(gatherer, m_ResourceMonitor, "p", "v", 1)); @@ -1831,7 +1831,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { features.push_back(model_t::E_IndividualTimeOfDayByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, "person", EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_TEST_REQUIRE(!gatherer.isPopulation()); @@ -1851,7 +1851,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 86400), @@ -1861,7 +1861,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 100, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 50), @@ -1872,7 +1872,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 86400), @@ -1882,7 +1882,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 200, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 100), @@ -1893,7 +1893,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 86400), @@ -1903,7 +1903,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 300, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 150), @@ -1916,7 +1916,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 200, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 100), @@ -1927,7 +1927,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 400, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 200), @@ -1953,7 +1953,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { features.push_back(model_t::E_IndividualTimeOfWeekByBucketAndPerson); CDataGatherer gatherer(model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, "person", EMPTY_STRING, - EMPTY_STRING, {}, key, features, startTime, 0); + EMPTY_STRING, {}, key, features, startTime); BOOST_TEST_REQUIRE(!gatherer.isPopulation()); @@ -1973,7 +1973,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 604800), @@ -1983,7 +1983,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 100, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 50), @@ -1994,7 +1994,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 604800), @@ -2004,7 +2004,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 200, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 100), @@ -2015,7 +2015,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 604800), @@ -2025,7 +2025,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 300, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 150), @@ -2038,7 +2038,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 200, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 100), @@ -2049,7 +2049,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 400, person); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 200), @@ -2075,7 +2075,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { features.push_back(model_t::E_PopulationTimeOfWeekByBucketPersonAndAttribute); CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "att", EMPTY_STRING, {}, key, features, startTime, 0); + "att", EMPTY_STRING, {}, key, features, startTime); BOOST_TEST_REQUIRE(gatherer.isPopulation()); @@ -2096,7 +2096,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 604800), @@ -2106,7 +2106,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 100, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 50), @@ -2117,7 +2117,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 604800), @@ -2127,7 +2127,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 200, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 100), @@ -2138,7 +2138,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 604800), @@ -2148,7 +2148,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 300, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 150), @@ -2161,7 +2161,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 200, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 100), @@ -2172,7 +2172,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 400, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 604800) + 200), @@ -2199,7 +2199,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { features.push_back(model_t::E_PopulationTimeOfDayByBucketPersonAndAttribute); CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "att", EMPTY_STRING, {}, key, features, startTime, 0); + "att", EMPTY_STRING, {}, key, features, startTime); BOOST_TEST_REQUIRE(gatherer.isPopulation()); @@ -2220,7 +2220,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 86400), @@ -2230,7 +2230,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 100, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 50), @@ -2241,7 +2241,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 86400), @@ -2251,7 +2251,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 200, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 100), @@ -2262,7 +2262,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 0, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t(time % 86400), @@ -2272,7 +2272,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 300, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 150), @@ -2285,7 +2285,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 200, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 100), @@ -2296,7 +2296,7 @@ BOOST_FIXTURE_TEST_CASE(testDiurnalFeatures, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, time + 400, person, attribute); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(time, bucketLength, featureData); + gatherer.featureData(time, featureData); BOOST_REQUIRE_EQUAL(1, featureData.size()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(std::uint64_t((time % 86400) + 200), diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index 0c13ed8b75..77ef0c2e0a 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -195,7 +195,7 @@ class CTestFixture : public CModelTestFixtureBase { const std::string& summaryCountField = EMPTY_STRING) { this->makeModelT(params, features, startTime, model_t::E_EventRateOnline, m_Gatherer, - m_Model, {}, summaryCountField); + m_Model, summaryCountField); for (std::size_t i = 0; i < numberPeople; ++i) { BOOST_REQUIRE_EQUAL( @@ -2095,7 +2095,7 @@ BOOST_FIXTURE_TEST_CASE(testExplicitNulls, CTestFixture) { CModelFactory::TModelPtr modelSkipGap_; this->makeModelT(params, features, startTime, model_t::E_EventRateOnline, gathererSkipGap, - modelSkipGap_, {}, summaryCountField); + modelSkipGap_, summaryCountField); auto* modelSkipGap = dynamic_cast(modelSkipGap_.get()); // The idea here is to compare a model that has a gap skipped against a model @@ -2125,7 +2125,7 @@ BOOST_FIXTURE_TEST_CASE(testExplicitNulls, CTestFixture) { CModelFactory::TModelPtr modelExNullGap_; this->makeModelT(params, features, startTime, model_t::E_EventRateOnline, gathererExNull, - modelExNullGap_, {}, summaryCountField); + modelExNullGap_, summaryCountField); auto* modelExNullGap = dynamic_cast(modelExNullGap_.get()); // p1: |1,"",null|1|1|null|null|1| @@ -2395,15 +2395,14 @@ BOOST_FIXTURE_TEST_CASE(testSummaryCountZeroRecordsAreIgnored, CTestFixture) { CModelFactory::TModelPtr modelWithZerosPtr; this->makeModelT( params, {model_t::E_IndividualCountByBucketAndPerson}, startTime, - model_t::E_EventRateOnline, gathererWithZeros, modelWithZerosPtr, {}, - summaryCountField); + model_t::E_EventRateOnline, gathererWithZeros, modelWithZerosPtr, summaryCountField); CEventRateModel& modelWithZeros = static_cast(*modelWithZerosPtr); CModelFactory::TDataGathererPtr gathererNoZeros; CModelFactory::TModelPtr modelNoZerosPtr; this->makeModelT( params, {model_t::E_IndividualCountByBucketAndPerson}, startTime, - model_t::E_EventRateOnline, gathererNoZeros, modelNoZerosPtr, {}, summaryCountField); + model_t::E_EventRateOnline, gathererNoZeros, modelNoZerosPtr, summaryCountField); CEventRateModel& modelNoZeros = static_cast(*modelNoZerosPtr); // The idea here is to compare a model that has records with summary count of zero diff --git a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc index d08d5c1b11..7d610e6f66 100644 --- a/lib/model/unittest/CEventRatePopulationDataGathererTest.cc +++ b/lib/model/unittest/CEventRatePopulationDataGathererTest.cc @@ -239,7 +239,7 @@ BOOST_FIXTURE_TEST_CASE(testAttributeCounts, CTestFixture) { SModelParams params(bucketLength); CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, searchKey, features, startTime, 0); + EMPTY_STRING, {}, searchKey, features, startTime); BOOST_TEST_REQUIRE(dataGatherer.isPopulation()); BOOST_REQUIRE_EQUAL(startTime, dataGatherer.currentBucketStartTime()); @@ -284,15 +284,15 @@ BOOST_FIXTURE_TEST_CASE(testAttributeCounts, CTestFixture) { BOOST_TEST_REQUIRE(!dataGatherer.dataAvailable(time - 1)); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - dataGatherer.featureData(time, bucketLength, featureData); + dataGatherer.featureData(time, featureData); const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); BOOST_REQUIRE_EQUAL(expectedAttributePeople.size(), peoplePerAttribute.size()); TSizeSizePrFeatureDataPrVec expectedPeoplePerAttribute; for (std::size_t j = 0; j < peoplePerAttribute.size(); ++j) { - expectedPeoplePerAttribute.push_back(TSizeSizePrFeatureDataPr( - std::make_pair(size_t(0), j), expectedAttributePeople[j].size())); + expectedPeoplePerAttribute.push_back( + {{std::size_t(0), j}, expectedAttributePeople[j].size()}); } BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedPeoplePerAttribute), core::CContainerPrinter::print(peoplePerAttribute)); @@ -351,7 +351,7 @@ BOOST_FIXTURE_TEST_CASE(testAttributeIndicator, CTestFixture) { SModelParams params(bucketLength); CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, searchKey, features, startTime, 0); + EMPTY_STRING, {}, searchKey, features, startTime); core_t::TTime time = startTime; for (std::size_t i = 0; i < numberBuckets; ++i, time += bucketLength) { @@ -376,7 +376,7 @@ BOOST_FIXTURE_TEST_CASE(testAttributeIndicator, CTestFixture) { BOOST_TEST_REQUIRE(!dataGatherer.dataAvailable(time - 1)); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - dataGatherer.featureData(time, bucketLength, featureData); + dataGatherer.featureData(time, featureData); const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); @@ -410,7 +410,7 @@ BOOST_FIXTURE_TEST_CASE(testUniqueValueCounts, CTestFixture) { SModelParams params(bucketLength); CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "value", {}, searchKey, features, startTime, 0); + "value", {}, searchKey, features, startTime); core_t::TTime time = startTime; for (std::size_t i = 0; i < numberBuckets; ++i, time += bucketLength) { @@ -443,7 +443,7 @@ BOOST_FIXTURE_TEST_CASE(testUniqueValueCounts, CTestFixture) { BOOST_TEST_REQUIRE(!dataGatherer.dataAvailable(time - 1)); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - dataGatherer.featureData(time, bucketLength, featureData); + dataGatherer.featureData(time, featureData); const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); @@ -478,7 +478,7 @@ BOOST_FIXTURE_TEST_CASE(testCompressedLength, CTestFixture) { SModelParams params(bucketLength); CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - "value", {}, searchKey, features, startTime, 0); + "value", {}, searchKey, features, startTime); core_t::TTime time = startTime; for (std::size_t i = 0; i < numberBuckets; ++i, time += bucketLength) { @@ -503,7 +503,7 @@ BOOST_FIXTURE_TEST_CASE(testCompressedLength, CTestFixture) { BOOST_TEST_REQUIRE(!dataGatherer.dataAvailable(time - 1)); TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - dataGatherer.featureData(time, bucketLength, featureData); + dataGatherer.featureData(time, featureData); const TSizeSizePrFeatureDataPrVec& peoplePerAttribute = extractPeoplePerAttribute(featureData); @@ -567,7 +567,7 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { SModelParams params(bucketLength); CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, searchKey, features, startTime, 0); + EMPTY_STRING, {}, searchKey, features, startTime); core_t::TTime bucketStart = startTime; for (std::size_t i = 0; i < numberBuckets; ++i, bucketStart += bucketLength) { TMessageVec messages; @@ -617,7 +617,7 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { LOG_DEBUG(<< "Expected"); TStrFeatureDataPrVec expected; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart - bucketLength, bucketLength, featureData); + gatherer.featureData(bucketStart - bucketLength, featureData); for (std::size_t i = 0; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0; j < data.size(); ++j) { @@ -660,7 +660,7 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { LOG_DEBUG(<< "Actual"); TStrFeatureDataPrVec actual; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart - bucketLength, bucketLength, featureData); + gatherer.featureData(bucketStart - bucketLength, featureData); for (std::size_t i = 0; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0; j < data.size(); ++j) { @@ -689,7 +689,7 @@ BOOST_FIXTURE_TEST_CASE(testRemoveAttributes, CTestFixture) { SModelParams params(bucketLength); CDataGatherer gatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, searchKey, features, startTime, 0); + EMPTY_STRING, {}, searchKey, features, startTime); TMessageVec messages; generateTestMessages(rng, startTime, bucketLength, messages); @@ -726,7 +726,7 @@ BOOST_FIXTURE_TEST_CASE(testRemoveAttributes, CTestFixture) { LOG_DEBUG(<< "Expected"); TStrFeatureDataPrVec expected; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart, bucketLength, featureData); + gatherer.featureData(bucketStart, featureData); for (std::size_t i = 0; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0; j < data.size(); ++j) { @@ -758,7 +758,7 @@ BOOST_FIXTURE_TEST_CASE(testRemoveAttributes, CTestFixture) { LOG_DEBUG(<< "Actual"); TStrFeatureDataPrVec actual; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart, bucketLength, featureData); + gatherer.featureData(bucketStart, featureData); for (std::size_t i = 0; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0; j < data.size(); ++j) { @@ -795,7 +795,7 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { CDataGatherer origDataGatherer(model_t::E_PopulationEventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - {}, searchKey, features, startTime, 0); + {}, searchKey, features, startTime); TMessageVec messages; generateTestMessages(rng, startTime, bucketLength, messages); @@ -844,10 +844,10 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { CDataGatherer::TFeatureVec features; features.push_back(model_t::E_PopulationInfoContentByBucketPersonAndAttribute); SModelParams params(bucketLength); - CDataGatherer dataGatherer(model_t::E_PopulationEventRate, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, "value", {}, - searchKey, features, startTime, 0); + CDataGatherer dataGatherer(model_t::E_PopulationEventRate, + model_t::E_None, params, EMPTY_STRING, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + "value", {}, searchKey, features, startTime); core_t::TTime time = startTime; for (std::size_t i = 0; i < numberBuckets; ++i, time += bucketLength) { diff --git a/lib/model/unittest/CGathererToolsTest.cc b/lib/model/unittest/CGathererToolsTest.cc deleted file mode 100644 index 70a1195b30..0000000000 --- a/lib/model/unittest/CGathererToolsTest.cc +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#include -#include - -#include - -BOOST_AUTO_TEST_SUITE(CGathererToolsTest) - -using namespace ml; -using namespace model; - -namespace { -const CGathererTools::CSumGatherer::TStrVec EMPTY_STR_VEC; -const CGathererTools::CSumGatherer::TOptionalStrVec EMPTY_STR_PTR_VEC; -} - -BOOST_AUTO_TEST_CASE(testSumGathererIsRedundant) { - using TDouble1Vec = CGathererTools::CSumGatherer::TDouble1Vec; - - core_t::TTime bucketLength(100); - SModelParams modelParams(bucketLength); - modelParams.s_LatencyBuckets = 3; - CGathererTools::CSumGatherer sumGatherer( - modelParams, 0, 100, bucketLength, EMPTY_STR_VEC.begin(), EMPTY_STR_VEC.end()); - - sumGatherer.add(100, TDouble1Vec{1.0}, 1, 0, EMPTY_STR_PTR_VEC); - sumGatherer.startNewBucket(100); - sumGatherer.add(200, TDouble1Vec{1.0}, 1, 0, EMPTY_STR_PTR_VEC); - sumGatherer.startNewBucket(200); - sumGatherer.add(300, TDouble1Vec{1.0}, 1, 0, EMPTY_STR_PTR_VEC); - sumGatherer.startNewBucket(300); - sumGatherer.add(400, TDouble1Vec{1.0}, 1, 0, EMPTY_STR_PTR_VEC); - sumGatherer.startNewBucket(400); - - BOOST_TEST_REQUIRE(sumGatherer.isRedundant(400) == false); - - sumGatherer.startNewBucket(500); - BOOST_TEST_REQUIRE(sumGatherer.isRedundant(500) == false); - sumGatherer.startNewBucket(600); - BOOST_TEST_REQUIRE(sumGatherer.isRedundant(600) == false); - sumGatherer.startNewBucket(700); - BOOST_TEST_REQUIRE(sumGatherer.isRedundant(700)); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CHierarchicalResultsTest.cc b/lib/model/unittest/CHierarchicalResultsTest.cc index 91b4c57f9e..6b8bcf471e 100644 --- a/lib/model/unittest/CHierarchicalResultsTest.cc +++ b/lib/model/unittest/CHierarchicalResultsTest.cc @@ -1470,7 +1470,7 @@ BOOST_AUTO_TEST_CASE(testWriter) { model_t::E_EventRate, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, model_t::TFeatureVec{model_t::E_IndividualCountByBucketAndPerson}, - modelConfig.bucketLength(), 0)); + modelConfig.bucketLength())); model::CEventData dummy; dataGatherer->addArrival(TStrCPtrVec(1, &EMPTY_STRING), dummy, resourceMonitor); dummy.clear(); diff --git a/lib/model/unittest/CMakeLists.txt b/lib/model/unittest/CMakeLists.txt index 47a28b792a..939c150563 100644 --- a/lib/model/unittest/CMakeLists.txt +++ b/lib/model/unittest/CMakeLists.txt @@ -29,7 +29,6 @@ set (SRCS CEventRatePopulationModelTest.cc CForecastModelPersistTest.cc CFunctionTypesTest.cc - CGathererToolsTest.cc CHierarchicalResultsTest.cc CHierarchicalResultsLevelSetTest.cc CInterimBucketCorrectorTest.cc @@ -41,6 +40,7 @@ set (SRCS CMetricModelTest.cc CMetricPopulationDataGathererTest.cc CMetricPopulationModelTest.cc + CMetricStatGathererTest.cc CModelDetailsViewTest.cc CModelMemoryTest.cc CModelTestFixtureBase.cc @@ -50,7 +50,6 @@ set (SRCS CResourceLimitTest.cc CResourceMonitorTest.cc CRuleConditionTest.cc - CSampleQueueTest.cc CSearchKeyTest.cc CTokenListCategoryTest.cc CTokenListDataCategorizerBaseTest.cc diff --git a/lib/model/unittest/CMetricAnomalyDetectorTest.cc b/lib/model/unittest/CMetricAnomalyDetectorTest.cc index 7e46dd0fd7..306295f63e 100644 --- a/lib/model/unittest/CMetricAnomalyDetectorTest.cc +++ b/lib/model/unittest/CMetricAnomalyDetectorTest.cc @@ -44,6 +44,7 @@ using namespace ml; namespace { +using TTimeVec = std::vector; using TTimeTimePr = std::pair; using TTimeTimePrVec = std::vector; using TDoubleVec = std::vector; @@ -85,14 +86,14 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { return; } - if (!this->shouldWriteResult(m_Limits, results, node, pivot)) { + if (!shouldWriteResult(m_Limits, results, node, pivot)) { return; } - if (this->isSimpleCount(node)) { + if (isSimpleCount(node)) { return; } - if (!this->isLeaf(node)) { + if (!isLeaf(node)) { return; } @@ -143,7 +144,7 @@ class CResultWriter : public ml::model::CHierarchicalResultsVisitor { TDoubleVec m_AnomalyRates; }; -const double CResultWriter::HIGH_ANOMALY_SCORE(0.35); +const double CResultWriter::HIGH_ANOMALY_SCORE(1.3); void importData(core_t::TTime firstTime, core_t::TTime lastTime, @@ -222,12 +223,9 @@ const std::string EMPTY_STRING; } BOOST_AUTO_TEST_CASE(testAnomalies) { - // The test data has one genuine anomaly in the interval - // [1360617335, 1360617481]. The rest of the samples are - // Gaussian with mean 30 and standard deviation 5. The - // arrival rate it Poisson distributed with constant mean - // in each of the 24 hour periods. However, the rate varies - // from hour to hour. In particular, the mean rates are: + // The test data has one genuine anomaly in the interval [1360617335, 1360617481]. + // The rest of the samples are Gaussian with mean 30 and standard deviation 5. + // The arrival rate is Poisson with rate varying periodically as follows: // // Interval | Mean // ------------------+-------- @@ -258,13 +256,9 @@ BOOST_AUTO_TEST_CASE(testAnomalies) { static const core_t::TTime FIRST_TIME(1360540800); static const core_t::TTime LAST_TIME(FIRST_TIME + 86400); - static const core_t::TTime BUCKET_LENGTHS[] = {120, 150, 180, 210, 240, - 300, 450, 600, 900, 1200}; - static const TTimeTimePr ANOMALOUS_INTERVALS[] = { - TTimeTimePr(1360576852, 1360578629), TTimeTimePr(1360617335, 1360617481)}; - - double highRateNoise = 0.0; - double lowRateNoise = 0.0; + static const TTimeVec BUCKET_LENGTHS{120, 180, 240, 300}; + static const TTimeTimePrVec ANOMALOUS_INTERVALS{{1360576852, 1360578629}, + {1360617335, 1360617481}}; for (auto bucketLength : BUCKET_LENGTHS) { model::CAnomalyDetectorModelConfig modelConfig = @@ -305,7 +299,7 @@ BOOST_AUTO_TEST_CASE(testAnomalies) { double noise = std::accumulate(anomalyFactors.begin(), anomalyFactors.end(), 0.0); LOG_DEBUG(<< "S/N = " << (signal / noise)); - BOOST_TEST_REQUIRE(signal / noise > 33.0); + BOOST_TEST_REQUIRE(signal / noise > 25.0); } // Find the high/low rate partition point. @@ -318,24 +312,7 @@ BOOST_AUTO_TEST_CASE(testAnomalies) { maxStep = j; } } - double partitionRate = 0.0; - if (maxStep < orderedAnomalyRates.size()) { - partitionRate = 0.5 * (orderedAnomalyRates[maxStep] + - orderedAnomalyRates[maxStep - 1]); - } - LOG_DEBUG(<< "partition rate = " << partitionRate); - - // Compute the ratio of noise in the two rate channels. - for (std::size_t j = 0; j < anomalyFactors.size(); ++j) { - (anomalyRates[j] > partitionRate ? highRateNoise : lowRateNoise) += - anomalyFactors[j]; - } } - - LOG_DEBUG(<< "high rate noise = " << highRateNoise << ", low rate noise = " << lowRateNoise); - - // We don't have significantly more noise in the low rate channel. - BOOST_TEST_REQUIRE(lowRateNoise / highRateNoise < 1.5); } BOOST_AUTO_TEST_CASE(testPersist) { diff --git a/lib/model/unittest/CMetricDataGathererTest.cc b/lib/model/unittest/CMetricDataGathererTest.cc index 2a6609ec88..505a6e58df 100644 --- a/lib/model/unittest/CMetricDataGathererTest.cc +++ b/lib/model/unittest/CMetricDataGathererTest.cc @@ -17,9 +17,11 @@ #include #include +#include + #include #include -#include +#include #include #include #include @@ -31,20 +33,26 @@ #include #include +#include + +namespace { BOOST_AUTO_TEST_SUITE(CMetricDataGathererTest) using namespace ml; using namespace model; -namespace { using TDoubleVec = std::vector; +using TDoubleVecVec = std::vector; using TSizeVec = std::vector; +using TTimeVec = std::vector; +using TTimeVecVec = std::vector; using TSizeSizePr = std::pair; using TFeatureVec = std::vector; using TSizeUInt64Pr = std::pair; using TSizeUInt64PrVec = std::vector; using TStrVec = std::vector; +using TStrVecVec = std::vector; using TSizeFeatureDataPr = std::pair; using TSizeFeatureDataPrVec = std::vector; using TFeatureSizeFeatureDataPrVecPr = std::pair; @@ -201,7 +209,6 @@ void testPersistence(const SModelParams& params, const CDataGatherer& origGather } BOOST_REQUIRE_EQUAL(origXml, newXml); } -} class CTestFixture { protected: @@ -209,347 +216,214 @@ class CTestFixture { }; BOOST_FIXTURE_TEST_CASE(testSingleSeries, CTestFixture) { - // Test that the various statistics come back as we suspect. - - const core_t::TTime startTime = 0; - const core_t::TTime bucketLength = 600; - - TTimeDoublePr bucket1[] = { - TTimeDoublePr(1, 1.0), TTimeDoublePr(15, 2.1), - TTimeDoublePr(180, 0.9), TTimeDoublePr(190, 1.5), - TTimeDoublePr(400, 1.5), TTimeDoublePr(550, 2.0)}; - TTimeDoublePr bucket2[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), - TTimeDoublePr(1199, 1.8)}; - TTimeDoublePr bucket3[] = {TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5)}; - TTimeDoublePr bucket4[] = {TTimeDoublePr(1900, 3.5)}; - TTimeDoublePr bucket5[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), - TTimeDoublePr(2490, 3.8)}; - { - TFeatureVec features; - features.push_back(model_t::E_IndividualMeanByPerson); - features.push_back(model_t::E_IndividualMinByPerson); - features.push_back(model_t::E_IndividualMaxByPerson); - features.push_back(model_t::E_IndividualSumByBucketAndPerson); - features.push_back(model_t::E_IndividualCountByBucketAndPerson); - SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 2u); - BOOST_TEST_REQUIRE(!gatherer.isPopulation()); - BOOST_REQUIRE_EQUAL(0, addPerson("p", gatherer, m_ResourceMonitor)); - - BOOST_REQUIRE_EQUAL(4, gatherer.numberFeatures()); - for (std::size_t i = 0; i < 4; ++i) { - BOOST_REQUIRE_EQUAL(features[i], gatherer.feature(i)); - } - - BOOST_REQUIRE_EQUAL(1, gatherer.numberActivePeople()); - BOOST_REQUIRE_EQUAL(1, gatherer.numberByFieldValues()); - BOOST_REQUIRE_EQUAL(std::string("p"), gatherer.personName(0)); - BOOST_REQUIRE_EQUAL(std::string("-"), gatherer.personName(1)); - std::size_t pid; - BOOST_TEST_REQUIRE(gatherer.personId("p", pid)); - BOOST_REQUIRE_EQUAL(0, pid); - BOOST_TEST_REQUIRE(!gatherer.personId("a.n.other p", pid)); + // Test mean, min, max and sum gathering. + + const core_t::TTime startTime{0}; + const core_t::TTime bucketLength{600}; + TTimeDoublePrVec bucket1{{1, 1.0}, {15, 2.1}, {180, 0.9}, + {190, 1.5}, {400, 1.5}, {550, 2.0}}; + TTimeDoublePrVec bucket2{{600, 2.0}, {799, 2.2}, {1199, 1.8}}; + + TFeatureVec features{model_t::E_IndividualMeanByPerson, + model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson, + model_t::E_IndividualSumByBucketAndPerson, + model_t::E_IndividualCountByBucketAndPerson}; // Should be ignored. + SModelParams params(bucketLength); + CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, KEY, features, startTime); + BOOST_TEST_REQUIRE(!gatherer.isPopulation()); + BOOST_REQUIRE_EQUAL(0, addPerson("p", gatherer, m_ResourceMonitor)); - { - addArrival(gatherer, m_ResourceMonitor, bucket1[0].first, "p", - bucket1[0].second); - TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime, bucketLength, featureData); - LOG_DEBUG(<< "featureData = " << featureData); - BOOST_REQUIRE_EQUAL( - 1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL( - 1.0, featureData[1].second[0].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL( - 1.0, featureData[2].second[0].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL( - 1.0, featureData[3].second[0].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL(true, featureData[0].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL(true, featureData[1].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL(true, featureData[2].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL(true, featureData[3].second[0].second.s_IsInteger); - } + BOOST_REQUIRE_EQUAL(4, gatherer.numberFeatures()); + for (std::size_t i = 0; i < 4; ++i) { + BOOST_REQUIRE_EQUAL(features[i], gatherer.feature(i)); + } - for (std::size_t i = 1; i < std::size(bucket1); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", - bucket1[i].second); - } - { - TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.sampleNow(startTime); - gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), - bucketLength, featureData); - LOG_DEBUG(<< "featureData = " << featureData); - BOOST_TEST_REQUIRE(!featureData.empty()); - BOOST_REQUIRE_CLOSE_ABSOLUTE( - 1.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-14); - BOOST_REQUIRE_CLOSE_ABSOLUTE( - 0.9, featureData[1].second[0].second.s_BucketValue->value()[0], 1e-14); - BOOST_REQUIRE_CLOSE_ABSOLUTE( - 2.1, featureData[2].second[0].second.s_BucketValue->value()[0], 1e-14); - BOOST_REQUIRE_CLOSE_ABSOLUTE( - 9.0, featureData[3].second[0].second.s_BucketValue->value()[0], 1e-14); - BOOST_REQUIRE_EQUAL(false, featureData[0].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL(false, featureData[1].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL(false, featureData[2].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL(true, featureData[3].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL( - std::string("[(8 [1.55] 1 2), (185 [1.2] 1 2), (475 [1.75] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(8 [1] 1 2), (185 [0.9] 1 2), (475 [1.5] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(8 [2.1] 1 2), (185 [1.5] 1 2), (475 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[(0 [9] 1 6)]"), - core::CContainerPrinter::print( - featureData[3].second[0].second.s_Samples)); - testPersistence(params, gatherer); - } + BOOST_REQUIRE_EQUAL(1, gatherer.numberActivePeople()); + BOOST_REQUIRE_EQUAL(1, gatherer.numberByFieldValues()); + BOOST_REQUIRE_EQUAL(std::string("p"), gatherer.personName(0)); + BOOST_REQUIRE_EQUAL(std::string("-"), gatherer.personName(1)); + std::size_t pid; + BOOST_TEST_REQUIRE(gatherer.personId("p", pid)); + BOOST_REQUIRE_EQUAL(0, pid); + BOOST_TEST_REQUIRE(!gatherer.personId("a.n.other p", pid)); - gatherer.timeNow(startTime + bucketLength); - for (const auto& value : bucket2) { - addArrival(gatherer, m_ResourceMonitor, value.first, "p", value.second); - } - { - TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.sampleNow(startTime + bucketLength); - gatherer.featureData(startTime + bucketLength, bucketLength, featureData); - BOOST_TEST_REQUIRE(!featureData.empty()); - BOOST_REQUIRE_CLOSE_ABSOLUTE( - 2.0, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-14); - BOOST_REQUIRE_CLOSE_ABSOLUTE( - 1.8, featureData[1].second[0].second.s_BucketValue->value()[0], 1e-14); - BOOST_REQUIRE_CLOSE_ABSOLUTE( - 2.2, featureData[2].second[0].second.s_BucketValue->value()[0], 1e-14); - BOOST_REQUIRE_CLOSE_ABSOLUTE( - 6.0, featureData[3].second[0].second.s_BucketValue->value()[0], 1e-14); - BOOST_REQUIRE_EQUAL(true, featureData[3].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL(std::string("[(700 [2.1] 1 2)]"), - core::CContainerPrinter::print( - featureData[0].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[(700 [2] 1 2)]"), - core::CContainerPrinter::print( - featureData[1].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[(700 [2.2] 1 2)]"), - core::CContainerPrinter::print( - featureData[2].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[(600 [6] 1 3)]"), - core::CContainerPrinter::print( - featureData[3].second[0].second.s_Samples)); - testPersistence(params, gatherer); - } + { + addArrival(gatherer, m_ResourceMonitor, bucket1[0].first, "p", + bucket1[0].second); + TFeatureSizeFeatureDataPrVecPrVec featureData; + gatherer.featureData(startTime, featureData); + LOG_DEBUG(<< "featureData = " << featureData); + BOOST_REQUIRE_EQUAL(1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(1.0, featureData[1].second[0].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(1.0, featureData[2].second[0].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(1.0, featureData[3].second[0].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(true, featureData[0].second[0].second.s_IsInteger); + BOOST_REQUIRE_EQUAL(true, featureData[1].second[0].second.s_IsInteger); + BOOST_REQUIRE_EQUAL(true, featureData[2].second[0].second.s_IsInteger); + BOOST_REQUIRE_EQUAL(true, featureData[3].second[0].second.s_IsInteger); } - // Test capture of sample measurement count. + for (std::size_t i = 1; i < bucket1.size(); ++i) { + addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", + bucket1[i].second); + } { - TFeatureVec features; - features.push_back(model_t::E_IndividualMeanByPerson); - features.push_back(model_t::E_IndividualMinByPerson); - features.push_back(model_t::E_IndividualMaxByPerson); - features.push_back(model_t::E_IndividualSumByBucketAndPerson); - SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 0); - BOOST_REQUIRE_EQUAL(0, addPerson("p", gatherer, m_ResourceMonitor)); - - TTimeDoublePrVecVec buckets; - buckets.push_back(TTimeDoublePrVec(std::begin(bucket1), std::end(bucket1))); - buckets.push_back(TTimeDoublePrVec(std::begin(bucket2), std::end(bucket2))); - buckets.push_back(TTimeDoublePrVec(std::begin(bucket3), std::end(bucket3))); - buckets.push_back(TTimeDoublePrVec(std::begin(bucket4), std::end(bucket4))); - buckets.push_back(TTimeDoublePrVec(std::begin(bucket5), std::end(bucket5))); - - for (std::size_t i = 0; i < buckets.size(); ++i) { - LOG_DEBUG(<< "Processing bucket " << i); - gatherer.timeNow(startTime + i * bucketLength); - const TTimeDoublePrVec& bucket = buckets[i]; - for (std::size_t j = 0; j < bucket.size(); ++j) { - addArrival(gatherer, m_ResourceMonitor, bucket[j].first, "p", - bucket[j].second); - } - } - - BOOST_REQUIRE_EQUAL(4.0, gatherer.effectiveSampleCount(0)); TFeatureSizeFeatureDataPrVecPrVec featureData; - core_t::TTime featureBucketStart = core_t::TTime(startTime + 4 * bucketLength); - gatherer.sampleNow(featureBucketStart); - gatherer.featureData(featureBucketStart, bucketLength, featureData); + gatherer.sampleNow(startTime); + gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), featureData); + LOG_DEBUG(<< "featureData = " << featureData); BOOST_TEST_REQUIRE(!featureData.empty()); BOOST_REQUIRE_CLOSE_ABSOLUTE( - 3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + 1.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-14); BOOST_REQUIRE_CLOSE_ABSOLUTE( - 3.2, featureData[1].second[0].second.s_BucketValue->value()[0], 1e-14); + 0.9, featureData[1].second[0].second.s_BucketValue->value()[0], 1e-14); BOOST_REQUIRE_CLOSE_ABSOLUTE( - 3.8, featureData[2].second[0].second.s_BucketValue->value()[0], 1e-14); + 2.1, featureData[2].second[0].second.s_BucketValue->value()[0], 1e-14); BOOST_REQUIRE_CLOSE_ABSOLUTE( - 10.5, featureData[3].second[0].second.s_BucketValue->value()[0], 1e-14); + 9.0, featureData[3].second[0].second.s_BucketValue->value()[0], 1e-14); BOOST_REQUIRE_EQUAL(false, featureData[0].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[1].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[2].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[3].second[0].second.s_IsInteger); + BOOST_REQUIRE_EQUAL( + "[(223 [1.5] 1 6)]", + core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL( + "[(180 [0.9] 1 1)]", + core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL( + "[(15 [2.1] 1 1)]", + core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL( + "[(300 [9] 1 6)]", + core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + testPersistence(params, gatherer); + } + gatherer.timeNow(startTime + bucketLength); + for (const auto& value : bucket2) { + addArrival(gatherer, m_ResourceMonitor, value.first, "p", value.second); + } + { + TFeatureSizeFeatureDataPrVecPrVec featureData; + gatherer.sampleNow(startTime + bucketLength); + gatherer.featureData(startTime + bucketLength, featureData); + BOOST_TEST_REQUIRE(!featureData.empty()); + BOOST_REQUIRE_CLOSE_ABSOLUTE( + 2.0, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-14); + BOOST_REQUIRE_CLOSE_ABSOLUTE( + 1.8, featureData[1].second[0].second.s_BucketValue->value()[0], 1e-14); + BOOST_REQUIRE_CLOSE_ABSOLUTE( + 2.2, featureData[2].second[0].second.s_BucketValue->value()[0], 1e-14); + BOOST_REQUIRE_CLOSE_ABSOLUTE( + 6.0, featureData[3].second[0].second.s_BucketValue->value()[0], 1e-14); BOOST_REQUIRE_EQUAL( - std::string("[(2323 [3.5] 1 4)]"), + "[(866 [2] 1 3)]", core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2323 [3.2] 1 4)]"), + "[(1199 [1.8] 1 1)]", core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2323 [3.8] 1 4)]"), + "[(799 [2.2] 1 1)]", core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2400 [10.5] 1 3)]"), + "[(900 [6] 1 3)]", core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + testPersistence(params, gatherer); } } BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) { - // Test that the various statistics come back as we suspect - // for multiple people. - - const core_t::TTime startTime = 0; - const core_t::TTime bucketLength = 600; + // Test mean, min, max and sum gathering for multiple time series. - TFeatureVec features; - features.push_back(model_t::E_IndividualMeanByPerson); - features.push_back(model_t::E_IndividualMinByPerson); - features.push_back(model_t::E_IndividualMaxByPerson); - features.push_back(model_t::E_IndividualSumByBucketAndPerson); + const core_t::TTime startTime{0}; + const core_t::TTime bucketLength{600}; + TFeatureVec features{model_t::E_IndividualMeanByPerson, + model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson, + model_t::E_IndividualSumByBucketAndPerson}; SModelParams params(bucketLength); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 0); + EMPTY_STRING, {}, KEY, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson("p1", gatherer, m_ResourceMonitor)); BOOST_REQUIRE_EQUAL(1, addPerson("p2", gatherer, m_ResourceMonitor)); - TTimeDoublePr bucket11[] = { - TTimeDoublePr(1, 1.0), TTimeDoublePr(15, 2.1), - TTimeDoublePr(180, 0.9), TTimeDoublePr(190, 1.5), - TTimeDoublePr(400, 1.5), TTimeDoublePr(550, 2.0)}; - TTimeDoublePr bucket12[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(799, 2.2), - TTimeDoublePr(1199, 1.8)}; - TTimeDoublePr bucket13[] = {TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5)}; - TTimeDoublePr bucket14[] = {TTimeDoublePr(1900, 3.5)}; - TTimeDoublePr bucket15[] = {TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 3.2), - TTimeDoublePr(2490, 3.8)}; - TTimeDoublePrVecVec buckets1; - buckets1.push_back(TTimeDoublePrVec(std::begin(bucket11), std::end(bucket11))); - buckets1.push_back(TTimeDoublePrVec(std::begin(bucket12), std::end(bucket12))); - buckets1.push_back(TTimeDoublePrVec(std::begin(bucket13), std::end(bucket13))); - buckets1.push_back(TTimeDoublePrVec(std::begin(bucket14), std::end(bucket14))); - buckets1.push_back(TTimeDoublePrVec(std::begin(bucket15), std::end(bucket15))); - - TTimeDoublePr bucket21[] = { - TTimeDoublePr(1, 1.0), TTimeDoublePr(5, 1.0), - TTimeDoublePr(15, 2.1), TTimeDoublePr(25, 2.0), - TTimeDoublePr(180, 0.9), TTimeDoublePr(190, 1.5), - TTimeDoublePr(400, 1.5), TTimeDoublePr(550, 2.0)}; - TTimeDoublePr bucket22[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(605, 2.0), - TTimeDoublePr(609, 2.0), TTimeDoublePr(799, 2.2), - TTimeDoublePr(1199, 1.8)}; - TTimeDoublePr bucket23[] = { - TTimeDoublePr(1200, 2.1), TTimeDoublePr(1250, 2.5), - TTimeDoublePr(1255, 2.2), TTimeDoublePr(1256, 2.4), - TTimeDoublePr(1300, 2.2), TTimeDoublePr(1400, 2.5)}; - TTimeDoublePr bucket24[] = {TTimeDoublePr(1900, 3.5), TTimeDoublePr(1950, 3.5)}; - TTimeDoublePr bucket25[] = { - TTimeDoublePr(2420, 3.5), TTimeDoublePr(2480, 2.9), - TTimeDoublePr(2490, 3.9), TTimeDoublePr(2500, 3.4), - TTimeDoublePr(2550, 4.1), TTimeDoublePr(2600, 3.8)}; - TTimeDoublePrVecVec buckets2; - buckets2.push_back(TTimeDoublePrVec(std::begin(bucket21), std::end(bucket21))); - buckets2.push_back(TTimeDoublePrVec(std::begin(bucket22), std::end(bucket22))); - buckets2.push_back(TTimeDoublePrVec(std::begin(bucket23), std::end(bucket23))); - buckets2.push_back(TTimeDoublePrVec(std::begin(bucket24), std::end(bucket24))); - buckets2.push_back(TTimeDoublePrVec(std::begin(bucket25), std::end(bucket25))); - - for (std::size_t i = 0; i < 5; ++i) { + TTimeDoublePrVecVec buckets1{ + {{1, 1.0}, {15, 2.1}, {180, 0.9}, {190, 1.5}, {400, 1.5}, {550, 2.0}}, + {{600, 2.0}, {799, 2.2}, {1199, 1.8}}}; + + TTimeDoublePrVecVec buckets2{ + {{1, 1.0}, {5, 1.0}, {15, 2.1}, {25, 2.0}, {180, 0.9}, {190, 1.5}, {400, 1.5}, {550, 2.0}}, + {{600, 2.2}, {605, 2.2}, {609, 2.2}, {799, 2.4}, {1199, 2.0}}}; + + for (std::size_t i = 0; i < 2; ++i) { LOG_DEBUG(<< "Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); - - const TTimeDoublePrVec& bucket1 = buckets1[i]; - for (std::size_t j = 0; j < bucket1.size(); ++j) { - addArrival(gatherer, m_ResourceMonitor, bucket1[j].first, "p1", - bucket1[j].second); + for (auto[time, value] : buckets1[i]) { + addArrival(gatherer, m_ResourceMonitor, time, "p1", value); } - - const TTimeDoublePrVec& bucket2 = buckets2[i]; - TMeanAccumulator a; - for (std::size_t j = 0; j < bucket2.size(); ++j) { - addArrival(gatherer, m_ResourceMonitor, bucket2[j].first, "p2", - bucket2[j].second); - a.add(bucket2[j].second); + for (auto[time, value] : buckets2[i]) { + addArrival(gatherer, m_ResourceMonitor, time, "p2", value); } } - BOOST_REQUIRE_CLOSE_ABSOLUTE(4.0, gatherer.effectiveSampleCount(0), 1e-10); - BOOST_REQUIRE_CLOSE_ABSOLUTE(6.0, gatherer.effectiveSampleCount(1), 1e-10); - TSizeUInt64PrVec nonZeroCounts; - gatherer.personNonZeroCounts(startTime + 4 * bucketLength, nonZeroCounts); - BOOST_REQUIRE_EQUAL(std::string("[(0, 3), (1, 6)]"), - core::CContainerPrinter::print(nonZeroCounts)); + gatherer.personNonZeroCounts(startTime + bucketLength, nonZeroCounts); + BOOST_REQUIRE_EQUAL("[(0, 3), (1, 5)]", core::CContainerPrinter::print(nonZeroCounts)); TFeatureSizeFeatureDataPrVecPrVec featureData; - core_t::TTime featureBucketStart = core_t::TTime(startTime + 4 * bucketLength); - gatherer.sampleNow(featureBucketStart); - gatherer.featureData(featureBucketStart, bucketLength, featureData); + gatherer.sampleNow(startTime + bucketLength); + gatherer.featureData(startTime + bucketLength, featureData); BOOST_TEST_REQUIRE(!featureData.empty()); BOOST_REQUIRE_EQUAL(2, featureData[0].second.size()); BOOST_REQUIRE_EQUAL(2, featureData[1].second.size()); BOOST_REQUIRE_EQUAL(2, featureData[2].second.size()); BOOST_REQUIRE_EQUAL(2, featureData[3].second.size()); - BOOST_REQUIRE_CLOSE_ABSOLUTE( - 3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); - BOOST_REQUIRE_EQUAL(3.2, featureData[1].second[0].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL(3.8, featureData[2].second[0].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL(10.5, featureData[3].second[0].second.s_BucketValue->value()[0]); + 2.0, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + BOOST_REQUIRE_EQUAL(1.8, featureData[1].second[0].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(2.2, featureData[2].second[0].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(6.0, featureData[3].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(false, featureData[0].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[1].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[2].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[3].second[0].second.s_IsInteger); + BOOST_REQUIRE_EQUAL("[(866 [2] 1 3)]", core::CContainerPrinter::print( + featureData[0].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2323 [3.5] 1 4)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(2323 [3.2] 1 4)]"), + "[(1199 [1.8] 1 1)]", core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2323 [3.8] 1 4)]"), + "[(799 [2.2] 1 1)]", core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(2400 [10.5] 1 3)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(900 [6] 1 3)]", core::CContainerPrinter::print( + featureData[3].second[0].second.s_Samples)); BOOST_REQUIRE_CLOSE_ABSOLUTE( - 3.6, featureData[0].second[1].second.s_BucketValue->value()[0], 1e-10); - BOOST_REQUIRE_EQUAL(2.9, featureData[1].second[1].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL(4.1, featureData[2].second[1].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL(21.6, featureData[3].second[1].second.s_BucketValue->value()[0]); + 2.2, featureData[0].second[1].second.s_BucketValue->value()[0], 1e-10); + BOOST_REQUIRE_EQUAL(2.0, featureData[1].second[1].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(2.4, featureData[2].second[1].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(11.0, featureData[3].second[1].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(false, featureData[0].second[1].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[1].second[1].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[2].second[1].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[3].second[1].second.s_IsInteger); BOOST_REQUIRE_EQUAL( - std::string("[(2290 [3.45] 1 6)]"), + "[(762 [2.2] 1 5)]", core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2290 [2.9] 1 6)]"), + "[(1199 [2] 1 1)]", core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2290 [3.9] 1 6)]"), + "[(799 [2.4] 1 1)]", core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2400 [21.6] 1 6)]"), + "[(900 [11] 1 5)]", core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); testPersistence(params, gatherer); @@ -571,12 +445,9 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(0, gatherer.numberOverFieldValues()); gatherer.personNonZeroCounts(startTime + 4 * bucketLength, nonZeroCounts); - BOOST_REQUIRE_EQUAL(std::string("[(1, 6)]"), - core::CContainerPrinter::print(nonZeroCounts)); + BOOST_REQUIRE_EQUAL("[(1, 5)]", core::CContainerPrinter::print(nonZeroCounts)); - BOOST_REQUIRE_CLOSE_ABSOLUTE(6.0, gatherer.effectiveSampleCount(1), 1e-10); - - gatherer.featureData(core_t::TTime(startTime + 4 * bucketLength), bucketLength, featureData); + gatherer.featureData(startTime + bucketLength, featureData); BOOST_TEST_REQUIRE(!featureData.empty()); BOOST_REQUIRE_EQUAL(1, featureData[0].second.size()); @@ -584,113 +455,42 @@ BOOST_FIXTURE_TEST_CASE(testMultipleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(1, featureData[2].second.size()); BOOST_REQUIRE_CLOSE_ABSOLUTE( - 3.6, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); - BOOST_REQUIRE_EQUAL(2.9, featureData[1].second[0].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL(4.1, featureData[2].second[0].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL(21.6, featureData[3].second[0].second.s_BucketValue->value()[0]); + 2.2, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); + BOOST_REQUIRE_EQUAL(2.0, featureData[1].second[0].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(2.4, featureData[2].second[0].second.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(11.0, featureData[3].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(false, featureData[0].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[1].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[2].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[3].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL( - std::string("[(2290 [3.45] 1 6)]"), + "[(762 [2.2] 1 5)]", core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2290 [2.9] 1 6)]"), + "[(1199 [2] 1 1)]", core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2290 [3.9] 1 6)]"), + "[(799 [2.4] 1 1)]", core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(2400 [21.6] 1 6)]"), + "[(900 [11] 1 5)]", core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); } -BOOST_FIXTURE_TEST_CASE(testSampleCount, CTestFixture) { - // Test that we set sensible sample counts for each person. - - // Person 1 has constant update rate of 4 values per bucket. - // Person 2 has variable rate with mean of 2 values per bucket. - - const core_t::TTime startTime = 0; - const core_t::TTime bucketLength = 600; - const std::size_t numberBuckets = 3; - - TFeatureVec features; - features.push_back(model_t::E_IndividualMeanByPerson); - features.push_back(model_t::E_IndividualMinByPerson); - features.push_back(model_t::E_IndividualMaxByPerson); - SModelParams params(bucketLength); - CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 0); - - std::size_t pid1 = addPerson("p1", gatherer, m_ResourceMonitor); - std::size_t pid2 = addPerson("p2", gatherer, m_ResourceMonitor); - - test::CRandomNumbers rng; - - for (std::size_t i = 0; i < numberBuckets; ++i) { - LOG_DEBUG(<< "Processing bucket " << i); - gatherer.timeNow(startTime + i * bucketLength); - - { - LOG_DEBUG(<< "count p1 = 6"); - addArrival(gatherer, m_ResourceMonitor, - startTime + i * bucketLength + 20, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, - startTime + i * bucketLength + 40, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, - startTime + i * bucketLength + 60, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, - startTime + i * bucketLength + 80, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, - startTime + i * bucketLength + 100, "p1", 1.0); - addArrival(gatherer, m_ResourceMonitor, - startTime + i * bucketLength + 120, "p1", 1.0); - } - { - TDoubleVec count; - rng.generateUniformSamples(1.0, 5.0, 1, count); - LOG_DEBUG(<< "count p2 = " << std::floor(count[0])); - for (std::size_t j = 0; j < static_cast(count[0]); ++j) { - addArrival(gatherer, m_ResourceMonitor, - startTime + i * bucketLength + 100 * (j + 1), "p2", 1.0); - } - } - } - gatherer.timeNow(startTime + numberBuckets * bucketLength); - - LOG_DEBUG(<< "p1 sample count = " << gatherer.effectiveSampleCount(pid1)); - LOG_DEBUG(<< "p2 sample count = " << gatherer.effectiveSampleCount(pid2)); - BOOST_REQUIRE_CLOSE_ABSOLUTE(6.0, gatherer.effectiveSampleCount(pid1), 1e-5); - BOOST_REQUIRE_CLOSE_ABSOLUTE(2.0, gatherer.effectiveSampleCount(pid2), 1.0 + 1e-5); - - for (std::size_t i = numberBuckets; i < 100; ++i) { - gatherer.timeNow(startTime + i * bucketLength); - addArrival(gatherer, m_ResourceMonitor, - startTime + i * bucketLength + 10, "p1", 1.0); - } - LOG_DEBUG(<< "p1 sample count = " << gatherer.effectiveSampleCount(pid1)); - BOOST_REQUIRE_CLOSE_ABSOLUTE(2.0, gatherer.effectiveSampleCount(pid1), 0.5); -} - BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { // Test various combinations of removed people. - const core_t::TTime startTime = 0; - const core_t::TTime bucketLength = 3600; + const core_t::TTime startTime{0}; + const core_t::TTime bucketLength{3600}; - TFeatureVec features; - features.push_back(model_t::E_IndividualMeanByPerson); - features.push_back(model_t::E_IndividualMinByPerson); - features.push_back(model_t::E_IndividualMaxByPerson); - features.push_back(model_t::E_IndividualSumByBucketAndPerson); + TFeatureVec features{model_t::E_IndividualMeanByPerson, + model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson, + model_t::E_IndividualSumByBucketAndPerson}; SModelParams params(bucketLength); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 0); + EMPTY_STRING, {}, KEY, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson("p1", gatherer, m_ResourceMonitor)); BOOST_REQUIRE_EQUAL(1, addPerson("p2", gatherer, m_ResourceMonitor)); BOOST_REQUIRE_EQUAL(2, addPerson("p3", gatherer, m_ResourceMonitor)); @@ -700,28 +500,24 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { BOOST_REQUIRE_EQUAL(6, addPerson("p7", gatherer, m_ResourceMonitor)); BOOST_REQUIRE_EQUAL(7, addPerson("p8", gatherer, m_ResourceMonitor)); - core_t::TTime times[][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, - {10, 20, 100, 0, 0, 0, 0, 0}, - {110, 120, 150, 170, 200, 0, 0, 0}, - {210, 220, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {400, 410, 480, 510, 530, 0, 0, 0}, - {1040, 1100, 1080, 1200, 1300, 1311, 2100, 0}, - {2200, 2500, 2600, 2610, 2702, 2731, 2710, 2862}, - }; - double values[][8] = { - {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, - {1.0, 2.0, 1.1, 0.0, 0.0, 0.0, 0.0, 0.0}, - {2.0, 5.0, 6.0, 1.0, 0.2, 0.0, 0.0, 0.0}, - {2.1, 2.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, - {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, - {4.0, 1.0, 8.0, 1.0, 0.3, 0.0, 0.0, 0.0}, - {4.0, 1.0, 8.0, 1.0, 0.3, 1.1, 10.3, 0.0}, - {2.0, 5.0, 6.0, 1.0, 0.2, 3.1, 7.1, 6.2}, - }; - for (std::size_t i = 0; i < std::size(values); ++i) { - for (std::size_t j = 0; j < std::size(values[i]); ++j) { + TTimeVecVec times{{0, 0, 0, 0, 0, 0, 0, 0}, + {10, 20, 100, 0, 0, 0, 0, 0}, + {110, 120, 150, 170, 200, 0, 0, 0}, + {210, 220, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {400, 410, 480, 510, 530, 0, 0, 0}, + {1040, 1100, 1080, 1200, 1300, 1311, 2100, 0}, + {2200, 2500, 2600, 2610, 2702, 2731, 2710, 2862}}; + TDoubleVecVec values{{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {1.0, 2.0, 1.1, 0.0, 0.0, 0.0, 0.0, 0.0}, + {2.0, 5.0, 6.0, 1.0, 0.2, 0.0, 0.0, 0.0}, + {2.1, 2.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}, + {4.0, 1.0, 8.0, 1.0, 0.3, 0.0, 0.0, 0.0}, + {4.0, 1.0, 8.0, 1.0, 0.3, 1.1, 10.3, 0.0}, + {2.0, 5.0, 6.0, 1.0, 0.2, 3.1, 7.1, 6.2}}; + for (std::size_t i = 0; i < values.size(); ++i) { + for (std::size_t j = 0; j < values[i].size(); ++j) { if (values[i][j] > 0.0) { addArrival(gatherer, m_ResourceMonitor, startTime + times[i][j], gatherer.personName(i), values[i][j]); @@ -735,10 +531,9 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { peopleToRemove.push_back(1); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - {}, KEY, features, startTime, 0); + CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, KEY, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson("p3", expectedGatherer, m_ResourceMonitor)); BOOST_REQUIRE_EQUAL(1, addPerson("p4", expectedGatherer, m_ResourceMonitor)); BOOST_REQUIRE_EQUAL(2, addPerson("p5", expectedGatherer, m_ResourceMonitor)); @@ -768,10 +563,9 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { peopleToRemove.push_back(7); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - {}, KEY, features, startTime, 0); + CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, KEY, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson("p3", expectedGatherer, m_ResourceMonitor)); BOOST_REQUIRE_EQUAL(1, addPerson("p6", expectedGatherer, m_ResourceMonitor)); BOOST_REQUIRE_EQUAL(2, addPerson("p7", expectedGatherer, m_ResourceMonitor)); @@ -798,10 +592,9 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { peopleToRemove.push_back(6); gatherer.recyclePeople(peopleToRemove); - CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, - params, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - {}, KEY, features, startTime, 0); + CDataGatherer expectedGatherer(model_t::E_Metric, model_t::E_None, params, + EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, + EMPTY_STRING, {}, KEY, features, startTime); LOG_DEBUG(<< "checksum = " << gatherer.checksum()); LOG_DEBUG(<< "expected checksum = " << expectedGatherer.checksum()); @@ -822,31 +615,27 @@ BOOST_FIXTURE_TEST_CASE(testSum, CTestFixture) { // Test sum and non-zero sum work as expected. const core_t::TTime bucketLength = 600; - const std::size_t bucketCounts[] = {2, 5, 2, 1, 0, 0, 4, 8, 0, 1}; + const TSizeVec bucketCounts{2, 5, 2, 1, 0, 0, 4, 8, 0, 1}; const core_t::TTime startTime = 0; test::CRandomNumbers rng; - TFeatureVec sumFeatures; - sumFeatures.push_back(model_t::E_IndividualSumByBucketAndPerson); + TFeatureVec sumFeatures{model_t::E_IndividualSumByBucketAndPerson}; SModelParams params(bucketLength); CDataGatherer sum(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - {}, KEY, sumFeatures, startTime, 0); + {}, KEY, sumFeatures, startTime); BOOST_REQUIRE_EQUAL(0, addPerson("p1", sum, m_ResourceMonitor)); - TFeatureVec nonZeroSumFeatures; - nonZeroSumFeatures.push_back(model_t::E_IndividualNonNullSumByBucketAndPerson); + TFeatureVec nonNullSumFeatures{model_t::E_IndividualNonNullSumByBucketAndPerson}; - CDataGatherer nonZeroSum(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, + CDataGatherer nonNullSum(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - {}, KEY, nonZeroSumFeatures, startTime, 0); - BOOST_REQUIRE_EQUAL(0, addPerson("p1", nonZeroSum, m_ResourceMonitor)); + EMPTY_STRING, {}, KEY, nonNullSumFeatures, startTime); + BOOST_REQUIRE_EQUAL(0, addPerson("p1", nonNullSum, m_ResourceMonitor)); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0; i < std::size(bucketCounts); ++i) { - std::size_t count = bucketCounts[i]; - + for (auto count : bucketCounts) { TDoubleVec times; rng.generateUniformSamples(0.0, static_cast(bucketLength - 0.1), count, times); std::sort(times.begin(), times.end()); @@ -859,7 +648,7 @@ BOOST_FIXTURE_TEST_CASE(testSum, CTestFixture) { addArrival(sum, m_ResourceMonitor, bucketStart + static_cast(times[j]), "p1", values[j]); - addArrival(nonZeroSum, m_ResourceMonitor, + addArrival(nonNullSum, m_ResourceMonitor, bucketStart + static_cast(times[j]), "p1", values[j]); expected += doubleToStringToDouble(values[j]); @@ -868,7 +657,7 @@ BOOST_FIXTURE_TEST_CASE(testSum, CTestFixture) { LOG_DEBUG(<< "bucket: count = " << count << ", sum = " << expected); { TFeatureSizeFeatureDataPrVecPrVec data; - sum.featureData(bucketStart, bucketLength, data); + sum.featureData(bucketStart, data); BOOST_REQUIRE_EQUAL(1, data.size()); for (std::size_t j = 0; j < data.size(); ++j) { const TSizeFeatureDataPrVec& featureData = data[j].second; @@ -885,7 +674,7 @@ BOOST_FIXTURE_TEST_CASE(testSum, CTestFixture) { } { TFeatureSizeFeatureDataPrVecPrVec data; - nonZeroSum.featureData(bucketStart, bucketLength, data); + nonNullSum.featureData(bucketStart, data); BOOST_REQUIRE_EQUAL(1, data.size()); for (std::size_t j = 0; j < data.size(); ++j) { const TSizeFeatureDataPrVec& featureData = data[j].second; @@ -907,7 +696,7 @@ BOOST_FIXTURE_TEST_CASE(testSum, CTestFixture) { bucketStart += bucketLength; sum.timeNow(bucketStart); - nonZeroSum.timeNow(bucketStart); + nonNullSum.timeNow(bucketStart); } } @@ -918,33 +707,24 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrder, CTestFixture) { const core_t::TTime bucketLength = 600; SModelParams params(bucketLength); params.s_LatencyBuckets = 1; - params.s_SampleCountFactor = 1; - params.s_SampleQueueGrowthFactor = 0.1; - TTimeDoublePr bucket1[] = {TTimeDoublePr(1, 1.0), TTimeDoublePr(15, 2.1), - TTimeDoublePr(180, 0.9), TTimeDoublePr(400, 1.5), - TTimeDoublePr(550, 2.0)}; - TTimeDoublePr bucket2[] = {TTimeDoublePr(600, 2.0), TTimeDoublePr(190, 1.5), - TTimeDoublePr(799, 2.2), TTimeDoublePr(1199, 1.8)}; + TTimeDoublePrVec bucket1{{1, 1.0}, {15, 2.1}, {180, 0.9}, {400, 1.5}, {550, 2.0}}; + TTimeDoublePrVec bucket2{{600, 2.0}, {190, 1.5}, {799, 2.2}, {1199, 1.8}}; { - TFeatureVec features; - features.push_back(model_t::E_IndividualMeanByPerson); - features.push_back(model_t::E_IndividualMinByPerson); - features.push_back(model_t::E_IndividualMaxByPerson); - features.push_back(model_t::E_IndividualSumByBucketAndPerson); - features.push_back(model_t::E_IndividualCountByBucketAndPerson); + TFeatureVec features{model_t::E_IndividualMeanByPerson, + model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson, + model_t::E_IndividualSumByBucketAndPerson, + model_t::E_IndividualCountByBucketAndPerson}; CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 2u); + EMPTY_STRING, {}, KEY, features, startTime); BOOST_TEST_REQUIRE(!gatherer.isPopulation()); BOOST_REQUIRE_EQUAL(0, addPerson("p", gatherer, m_ResourceMonitor)); - BOOST_REQUIRE_EQUAL(4, gatherer.numberFeatures()); for (std::size_t i = 0; i < 4; ++i) { BOOST_REQUIRE_EQUAL(features[i], gatherer.feature(i)); } - BOOST_REQUIRE_EQUAL(1, gatherer.numberActivePeople()); BOOST_REQUIRE_EQUAL(1, gatherer.numberByFieldValues()); BOOST_REQUIRE_EQUAL(std::string("p"), gatherer.personName(0)); @@ -958,7 +738,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrder, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, bucket1[0].first, "p", bucket1[0].second); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); BOOST_REQUIRE_EQUAL( 1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL( @@ -973,14 +753,13 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrder, CTestFixture) { BOOST_REQUIRE_EQUAL(true, featureData[3].second[0].second.s_IsInteger); } - for (std::size_t i = 1; i < std::size(bucket1); ++i) { + for (std::size_t i = 1; i < bucket1.size(); ++i) { addArrival(gatherer, m_ResourceMonitor, bucket1[i].first, "p", bucket1[i].second); } { TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), - bucketLength, featureData); + gatherer.featureData(startTime + bucketLength - 1, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_TEST_REQUIRE(!featureData.empty()); BOOST_REQUIRE_EQUAL( @@ -995,16 +774,16 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrder, CTestFixture) { BOOST_REQUIRE_EQUAL(false, featureData[1].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[2].second[0].second.s_IsInteger); BOOST_REQUIRE_EQUAL(false, featureData[3].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL(std::string("[]"), + BOOST_REQUIRE_EQUAL("[(229 [1.5] 1 5)]", core::CContainerPrinter::print( featureData[0].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[]"), + BOOST_REQUIRE_EQUAL("[(180 [0.9] 1 1)]", core::CContainerPrinter::print( featureData[1].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[]"), + BOOST_REQUIRE_EQUAL("[(15 [2.1] 1 1)]", core::CContainerPrinter::print( featureData[2].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[(0 [7.5] 1 5)]"), + BOOST_REQUIRE_EQUAL("[(300 [7.5] 1 5)]", core::CContainerPrinter::print( featureData[3].second[0].second.s_Samples)); testPersistence(params, gatherer); @@ -1017,7 +796,7 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrder, CTestFixture) { { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.sampleNow(startTime); - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); BOOST_TEST_REQUIRE(!featureData.empty()); BOOST_REQUIRE_EQUAL( 1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); @@ -1027,17 +806,16 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrder, CTestFixture) { 2.1, featureData[2].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL( 9.0, featureData[3].second[0].second.s_BucketValue->value()[0]); - BOOST_REQUIRE_EQUAL(true, featureData[3].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL( - std::string("[(8 [1.55] 1 2), (257 [1.3] 0.666667 3)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[(8 [1] 1 2), (257 [0.9] 1 3)]"), + BOOST_REQUIRE_EQUAL("[(223 [1.5] 1 6)]", + core::CContainerPrinter::print( + featureData[0].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(180 [0.9] 1 1)]", core::CContainerPrinter::print( featureData[1].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[(8 [2.1] 1 2), (257 [1.5] 1 3)]"), + BOOST_REQUIRE_EQUAL("[(15 [2.1] 1 1)]", core::CContainerPrinter::print( featureData[2].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL(std::string("[(0 [9] 1 6)]"), + BOOST_REQUIRE_EQUAL("[(300 [9] 1 6)]", core::CContainerPrinter::print( featureData[3].second[0].second.s_Samples)); testPersistence(params, gatherer); @@ -1045,26 +823,21 @@ BOOST_FIXTURE_TEST_CASE(testSingleSeriesOutOfOrder, CTestFixture) { } } -BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { +BOOST_FIXTURE_TEST_CASE(testResetBucketSingleSeries, CTestFixture) { const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; SModelParams params(bucketLength); params.s_LatencyBuckets = 2; - params.s_SampleCountFactor = 1; - params.s_SampleQueueGrowthFactor = 0.1; - - TTimeDoublePr data[] = {TTimeDoublePr(1, 1.0), TTimeDoublePr(550, 2.0), - TTimeDoublePr(600, 3.0), TTimeDoublePr(700, 4.0), - TTimeDoublePr(1000, 5.0), TTimeDoublePr(1200, 6.0)}; - - TFeatureVec features; - features.push_back(model_t::E_IndividualMeanByPerson); - features.push_back(model_t::E_IndividualMinByPerson); - features.push_back(model_t::E_IndividualMaxByPerson); - features.push_back(model_t::E_IndividualSumByBucketAndPerson); + + TTimeDoublePrVec data{{1, 1.0}, {550, 2.0}, {600, 3.0}, + {700, 4.0}, {1000, 5.0}, {1200, 6.0}}; + + TFeatureVec features{model_t::E_IndividualMeanByPerson, + model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson, + model_t::E_IndividualSumByBucketAndPerson}; CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 2u); + EMPTY_STRING, {}, KEY, features, startTime); addPerson("p", gatherer, m_ResourceMonitor); for (const auto& value : data) { @@ -1074,7 +847,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { TFeatureSizeFeatureDataPrVecPrVec featureData; TSizeSizePr pidCidPr(0, 0); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(1.0, featureData[1].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(2.0, featureData[2].second[0].second.s_BucketValue->value()[0]); @@ -1082,7 +855,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(std::uint64_t(2), gatherer.bucketCounts(0).find(pidCidPr)->second); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(4.0, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(3.0, featureData[1].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(5.0, featureData[2].second[0].second.s_BucketValue->value()[0]); @@ -1090,7 +863,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(std::uint64_t(3), gatherer.bucketCounts(600).find(pidCidPr)->second); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(6.0, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(6.0, featureData[1].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(6.0, featureData[2].second[0].second.s_BucketValue->value()[0]); @@ -1102,7 +875,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, 610, "p", 2.0); addArrival(gatherer, m_ResourceMonitor, 620, "p", 3.0); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(1.0, featureData[1].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(2.0, featureData[2].second[0].second.s_BucketValue->value()[0]); @@ -1110,7 +883,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(std::uint64_t(2), gatherer.bucketCounts(0).find(pidCidPr)->second); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(2.5, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(2.0, featureData[1].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(3.0, featureData[2].second[0].second.s_BucketValue->value()[0]); @@ -1118,7 +891,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(std::uint64_t(2), gatherer.bucketCounts(0).find(pidCidPr)->second); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(6.0, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(6.0, featureData[1].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(6.0, featureData[2].second[0].second.s_BucketValue->value()[0]); @@ -1127,58 +900,47 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenSingleSeries, CTestFixture) { gatherer.bucketCounts(1200).find(pidCidPr)->second); gatherer.sampleNow(0); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL( - std::string("[(276 [1.5] 1 2)]"), + "[(276 [1.5] 1 2)]", core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(276 [1] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(276 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(0 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(1 [1] 1 1)]", core::CContainerPrinter::print( + featureData[1].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(550 [2] 1 1)]", core::CContainerPrinter::print( + featureData[2].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(300 [3] 1 2)]", core::CContainerPrinter::print( + featureData[3].second[0].second.s_Samples)); gatherer.sampleNow(600); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL( - std::string("[(615 [2.5] 1 2)]"), + "[(615 [2.5] 1 2)]", core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(615 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(615 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(600 [5] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(610 [2] 1 1)]", core::CContainerPrinter::print( + featureData[1].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(620 [3] 1 1)]", core::CContainerPrinter::print( + featureData[2].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(900 [5] 1 2)]", core::CContainerPrinter::print( + featureData[3].second[0].second.s_Samples)); } -BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { +BOOST_FIXTURE_TEST_CASE(testResetBucketMultipleSeries, CTestFixture) { const core_t::TTime startTime = 0; const core_t::TTime bucketLength = 600; SModelParams params(bucketLength); params.s_LatencyBuckets = 2; - params.s_SampleCountFactor = 1; - params.s_SampleQueueGrowthFactor = 0.1; - - TTimeDoublePr data[] = {TTimeDoublePr(1, 1.0), TTimeDoublePr(550, 2.0), - TTimeDoublePr(600, 3.0), TTimeDoublePr(700, 4.0), - TTimeDoublePr(1000, 5.0), TTimeDoublePr(1200, 6.0)}; - - TFeatureVec features; - features.push_back(model_t::E_IndividualMeanByPerson); - features.push_back(model_t::E_IndividualMinByPerson); - features.push_back(model_t::E_IndividualMaxByPerson); - features.push_back(model_t::E_IndividualSumByBucketAndPerson); + + TTimeDoublePrVec data{{1, 1.0}, {550, 2.0}, {600, 3.0}, + {700, 4.0}, {1000, 5.0}, {1200, 6.0}}; + + TFeatureVec features{model_t::E_IndividualMeanByPerson, + model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson, + model_t::E_IndividualSumByBucketAndPerson}; CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 2u); + EMPTY_STRING, {}, KEY, features, startTime); addPerson("p1", gatherer, m_ResourceMonitor); addPerson("p2", gatherer, m_ResourceMonitor); addPerson("p3", gatherer, m_ResourceMonitor); @@ -1195,7 +957,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { TSizeSizePr pidCidPr1(1, 0); TSizeSizePr pidCidPr2(2, 0); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(1.5, featureData[0].second[1].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(1.5, featureData[0].second[2].second.s_BucketValue->value()[0]); @@ -1215,7 +977,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(std::uint64_t(2), gatherer.bucketCounts(0).find(pidCidPr2)->second); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(4.0, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(4.0, featureData[0].second[1].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(4.0, featureData[0].second[2].second.s_BucketValue->value()[0]); @@ -1235,7 +997,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(std::uint64_t(3), gatherer.bucketCounts(600).find(pidCidPr2)->second); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(6.0, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(6.0, featureData[0].second[1].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(6.0, featureData[0].second[2].second.s_BucketValue->value()[0]); @@ -1261,7 +1023,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { addArrival(gatherer, m_ResourceMonitor, 620, gatherer.personName(pid), 3.0); } - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL(1.5, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(1.5, featureData[0].second[1].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(1.5, featureData[0].second[2].second.s_BucketValue->value()[0]); @@ -1281,7 +1043,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(std::uint64_t(2), gatherer.bucketCounts(0).find(pidCidPr2)->second); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL(2.5, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(2.5, featureData[0].second[1].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(2.5, featureData[0].second[2].second.s_BucketValue->value()[0]); @@ -1301,7 +1063,7 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { BOOST_REQUIRE_EQUAL(std::uint64_t(2), gatherer.bucketCounts(600).find(pidCidPr2)->second); - gatherer.featureData(1200, bucketLength, featureData); + gatherer.featureData(1200, featureData); BOOST_REQUIRE_EQUAL(6.0, featureData[0].second[0].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(6.0, featureData[0].second[1].second.s_BucketValue->value()[0]); BOOST_REQUIRE_EQUAL(6.0, featureData[0].second[2].second.s_BucketValue->value()[0]); @@ -1322,89 +1084,71 @@ BOOST_FIXTURE_TEST_CASE(testResetBucketGivenMultipleSeries, CTestFixture) { gatherer.bucketCounts(1200).find(pidCidPr2)->second); gatherer.sampleNow(0); - gatherer.featureData(0, bucketLength, featureData); + gatherer.featureData(0, featureData); BOOST_REQUIRE_EQUAL( - std::string("[(276 [1.5] 1 2)]"), + "[(276 [1.5] 1 2)]", core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(276 [1.5] 1 2)]"), + "[(276 [1.5] 1 2)]", core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(276 [1.5] 1 2)]"), + "[(276 [1.5] 1 2)]", core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(276 [1] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(276 [1] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(276 [1] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(276 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(276 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(276 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(0 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(0 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(0 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(1 [1] 1 1)]", core::CContainerPrinter::print( + featureData[1].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(1 [1] 1 1)]", core::CContainerPrinter::print( + featureData[1].second[1].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(1 [1] 1 1)]", core::CContainerPrinter::print( + featureData[1].second[2].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(550 [2] 1 1)]", core::CContainerPrinter::print( + featureData[2].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(550 [2] 1 1)]", core::CContainerPrinter::print( + featureData[2].second[1].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(550 [2] 1 1)]", core::CContainerPrinter::print( + featureData[2].second[2].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(300 [3] 1 2)]", core::CContainerPrinter::print( + featureData[3].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(300 [3] 1 2)]", core::CContainerPrinter::print( + featureData[3].second[1].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(300 [3] 1 2)]", core::CContainerPrinter::print( + featureData[3].second[2].second.s_Samples)); gatherer.sampleNow(600); - gatherer.featureData(600, bucketLength, featureData); + gatherer.featureData(600, featureData); BOOST_REQUIRE_EQUAL( - std::string("[(615 [2.5] 1 2)]"), + "[(615 [2.5] 1 2)]", core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(615 [2.5] 1 2)]"), + "[(615 [2.5] 1 2)]", core::CContainerPrinter::print(featureData[0].second[1].second.s_Samples)); BOOST_REQUIRE_EQUAL( - std::string("[(615 [2.5] 1 2)]"), + "[(615 [2.5] 1 2)]", core::CContainerPrinter::print(featureData[0].second[2].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(615 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(615 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[1].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(615 [2] 1 2)]"), - core::CContainerPrinter::print(featureData[1].second[2].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(615 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(615 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[1].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(615 [3] 1 2)]"), - core::CContainerPrinter::print(featureData[2].second[2].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(600 [5] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[0].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(600 [5] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[1].second.s_Samples)); - BOOST_REQUIRE_EQUAL( - std::string("[(600 [5] 1 2)]"), - core::CContainerPrinter::print(featureData[3].second[2].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(610 [2] 1 1)]", core::CContainerPrinter::print( + featureData[1].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(610 [2] 1 1)]", core::CContainerPrinter::print( + featureData[1].second[1].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(610 [2] 1 1)]", core::CContainerPrinter::print( + featureData[1].second[2].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(620 [3] 1 1)]", core::CContainerPrinter::print( + featureData[2].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(620 [3] 1 1)]", core::CContainerPrinter::print( + featureData[2].second[1].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(620 [3] 1 1)]", core::CContainerPrinter::print( + featureData[2].second[2].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(900 [5] 1 2)]", core::CContainerPrinter::print( + featureData[3].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(900 [5] 1 2)]", core::CContainerPrinter::print( + featureData[3].second[1].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(900 [5] 1 2)]", core::CContainerPrinter::print( + featureData[3].second[2].second.s_Samples)); } BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) { - using TTimeDoubleStrStrTuple = - boost::tuple; + using TTimeDoubleStrStrTuple = std::tuple; + using TTimeDoubleStrStrTupleVec = std::vector; using TDoubleDoublePr = std::pair; using TStrDoubleDoublePrPr = std::pair; using TStrDoubleDoublePrPrVec = std::vector; @@ -1413,84 +1157,78 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) { const core_t::TTime bucketLength = 600; SModelParams params(bucketLength); params.s_LatencyBuckets = 2; - params.s_SampleCountFactor = 1; - params.s_SampleQueueGrowthFactor = 0.1; - - std::string influencerNames_[] = {"i1", "i2"}; - std::string influencerValues[][3] = {{"i11", "i12", "i13"}, {"i21", "i22", "i23"}}; - - TTimeDoubleStrStrTuple data[] = { - TTimeDoubleStrStrTuple(1, 1.0, influencerValues[0][0], influencerValues[1][0]), // Bucket 1 - TTimeDoubleStrStrTuple(150, 5.0, influencerValues[0][1], influencerValues[1][1]), - TTimeDoubleStrStrTuple(150, 3.0, influencerValues[0][2], influencerValues[1][2]), - TTimeDoubleStrStrTuple(550, 2.0, influencerValues[0][0], influencerValues[1][0]), - TTimeDoubleStrStrTuple(551, 2.1, influencerValues[0][1], influencerValues[1][1]), - TTimeDoubleStrStrTuple(552, 4.0, influencerValues[0][2], influencerValues[1][2]), - TTimeDoubleStrStrTuple(554, 2.3, influencerValues[0][2], influencerValues[1][2]), - TTimeDoubleStrStrTuple(600, 3.0, influencerValues[0][1], influencerValues[1][0]), // Bucket 2 - TTimeDoubleStrStrTuple(660, 3.0, influencerValues[0][0], influencerValues[1][2]), - TTimeDoubleStrStrTuple(690, 7.1, influencerValues[0][1], ""), - TTimeDoubleStrStrTuple(700, 4.0, influencerValues[0][0], influencerValues[1][2]), - TTimeDoubleStrStrTuple(800, 2.1, influencerValues[0][2], influencerValues[1][0]), - TTimeDoubleStrStrTuple(900, 2.5, influencerValues[0][1], influencerValues[1][0]), - TTimeDoubleStrStrTuple(1000, 5.0, influencerValues[0][1], influencerValues[1][0]), - TTimeDoubleStrStrTuple(1200, 6.4, "", influencerValues[1][2]), // Bucket 3 - TTimeDoubleStrStrTuple(1210, 6.0, "", influencerValues[1][2]), - TTimeDoubleStrStrTuple(1240, 7.0, "", influencerValues[1][1]), - TTimeDoubleStrStrTuple(1600, 11.0, "", influencerValues[1][0]), + + TStrVec influencerNames{"i1", "i2"}; + TStrVecVec influencerValues{{"i11", "i12", "i13"}, {"i21", "i22", "i23"}}; + + TTimeDoubleStrStrTupleVec data{ + {1, 1.0, influencerValues[0][0], influencerValues[1][0]}, // Bucket 1 + {150, 5.0, influencerValues[0][1], influencerValues[1][1]}, + {150, 3.0, influencerValues[0][2], influencerValues[1][2]}, + {550, 2.0, influencerValues[0][0], influencerValues[1][0]}, + {551, 2.1, influencerValues[0][1], influencerValues[1][1]}, + {552, 4.0, influencerValues[0][2], influencerValues[1][2]}, + {554, 2.3, influencerValues[0][2], influencerValues[1][2]}, + {600, 3.0, influencerValues[0][1], influencerValues[1][0]}, // Bucket 2 + {660, 3.0, influencerValues[0][0], influencerValues[1][2]}, + {690, 7.1, influencerValues[0][1], ""}, + {700, 4.0, influencerValues[0][0], influencerValues[1][2]}, + {800, 2.1, influencerValues[0][2], influencerValues[1][0]}, + {900, 2.5, influencerValues[0][1], influencerValues[1][0]}, + {1000, 5.0, influencerValues[0][1], influencerValues[1][0]}, + {1200, 6.4, "", influencerValues[1][2]}, // Bucket 3 + {1210, 6.0, "", influencerValues[1][2]}, + {1240, 7.0, "", influencerValues[1][1]}, + {1600, 11.0, "", influencerValues[1][0]}, TTimeDoubleStrStrTuple(1800, 11.0, "", "") // Sentinel }; - std::string expectedStatistics[] = { - "[(i11, (1.5, 2)), (i12, (3.55, 2)), (i13, (3.1, 3)), (i21, (1.5, 2)), (i22, (3.55, 2)), (i23, (3.1, 3))]", + TStrVec expectedStatistics{ + "[(i11, (1.5, 2)), (i12, (3.55, 2)), (i13, (3.1, 3)), (i21, (1.5, 2)), (i22, (3.55, 2)), (i23, (3.1, 3))]", // Bucket 1 "[(i11, (1.5, 2)), (i12, (3.55, 2)), (i13, (3.1, 3)), (i21, (1.5, 2)), (i22, (3.55, 2)), (i23, (3.1, 3))]", "[(i11, (1, 1)), (i12, (2.1, 1)), (i13, (2.3, 1)), (i21, (1, 1)), (i22, (2.1, 1)), (i23, (2.3, 1))]", "[(i11, (1, 1)), (i12, (2.1, 1)), (i13, (2.3, 1)), (i21, (1, 1)), (i22, (2.1, 1)), (i23, (2.3, 1))]", "[(i11, (2, 1)), (i12, (5, 1)), (i13, (4, 1)), (i21, (2, 1)), (i22, (5, 1)), (i23, (4, 1))]", "[(i11, (2, 1)), (i12, (5, 1)), (i13, (4, 1)), (i21, (2, 1)), (i22, (5, 1)), (i23, (4, 1))]", - "[(i11, (3, 1)), (i12, (7.1, 1)), (i13, (9.3, 1)), (i21, (3, 1)), (i22, (7.1, 1)), (i23, (9.3, 1))]", - "[(i11, (3, 1)), (i12, (7.1, 1)), (i13, (9.3, 1)), (i21, (3, 1)), (i22, (7.1, 1)), (i23, (9.3, 1))]", - "[(i11, (3.5, 2)), (i12, (4.4, 4)), (i13, (2.1, 1)), (i21, (3.15, 4)), (i23, (3.5, 2))]", + "[(i11, (3, 2)), (i12, (7.1, 2)), (i13, (9.3, 3)), (i21, (3, 2)), (i22, (7.1, 2)), (i23, (9.3, 3))]", + "[(i11, (3, 2)), (i12, (7.1, 2)), (i13, (9.3, 3)), (i21, (3, 2)), (i22, (7.1, 2)), (i23, (9.3, 3))]", + "[(i11, (3.5, 2)), (i12, (4.4, 4)), (i13, (2.1, 1)), (i21, (3.15, 4)), (i23, (3.5, 2))]", // Bucket 2 "[(i11, (3.5, 2)), (i12, (4.4, 4)), (i13, (2.1, 1)), (i21, (3.15, 4)), (i23, (3.5, 2))]", "[(i11, (3, 1)), (i12, (2.5, 1)), (i13, (2.1, 1)), (i21, (2.1, 1)), (i23, (3, 1))]", "[(i11, (3, 1)), (i12, (2.5, 1)), (i13, (2.1, 1)), (i21, (2.1, 1)), (i23, (3, 1))]", "[(i11, (4, 1)), (i12, (7.1, 1)), (i13, (2.1, 1)), (i21, (5, 1)), (i23, (4, 1))]", "[(i11, (4, 1)), (i12, (7.1, 1)), (i13, (2.1, 1)), (i21, (5, 1)), (i23, (4, 1))]", - "[(i11, (7, 1)), (i12, (17.6, 1)), (i13, (2.1, 1)), (i21, (12.6, 1)), (i23, (7, 1))]", - "[(i11, (7, 1)), (i12, (17.6, 1)), (i13, (2.1, 1)), (i21, (12.6, 1)), (i23, (7, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", + "[(i11, (7, 2)), (i12, (17.6, 4)), (i13, (2.1, 1)), (i21, (12.6, 4)), (i23, (7, 2))]", + "[(i11, (7, 2)), (i12, (17.6, 4)), (i13, (2.1, 1)), (i21, (12.6, 4)), (i23, (7, 2))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", // Bucket 3 "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6, 1))]", "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6, 1))]", "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.4, 1))]", "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.4, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 1))]"}; - const std::string* expected = expectedStatistics; - - TFeatureVec features; - features.push_back(model_t::E_IndividualMeanByPerson); - features.push_back(model_t::E_IndividualMinByPerson); - features.push_back(model_t::E_IndividualMaxByPerson); - features.push_back(model_t::E_IndividualSumByBucketAndPerson); - TStrVec influencerNames(std::begin(influencerNames_), std::end(influencerNames_)); + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 2))]", + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 2))]"}; + + TFeatureVec features{model_t::E_IndividualMeanByPerson, + model_t::E_IndividualMinByPerson, model_t::E_IndividualMaxByPerson, + model_t::E_IndividualSumByBucketAndPerson}; CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - influencerNames, KEY, features, startTime, 2); + influencerNames, KEY, features, startTime); addPerson("p1", gatherer, m_ResourceMonitor, influencerNames.size()); addPerson("p2", gatherer, m_ResourceMonitor, influencerNames.size()); core_t::TTime bucketStart = startTime; + auto expected = expectedStatistics.begin(); for (std::size_t i = 0; i < std::size(data); ++i) { - if (data[i].get<0>() >= bucketStart + bucketLength) { + if (std::get<0>(data[i]) >= bucketStart + bucketLength) { LOG_DEBUG(<< "*** processing bucket ***"); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart, bucketLength, featureData); + gatherer.featureData(bucketStart, featureData); for (std::size_t j = 0; j < featureData.size(); ++j) { model_t::EFeature feature = featureData[j].first; LOG_DEBUG(<< "feature = " << model_t::print(feature)); - const TSizeFeatureDataPrVec& data_ = featureData[j].second; for (std::size_t k = 0; k < data_.size(); ++k) { TStrDoubleDoublePrPrVec statistics; @@ -1522,15 +1260,15 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) { bucketStart += bucketLength; } for (std::size_t pid = 0; pid < gatherer.numberActivePeople(); ++pid) { - addArrival(gatherer, m_ResourceMonitor, data[i].get<0>(), - gatherer.personName(pid), data[i].get<1>(), - data[i].get<2>(), data[i].get<3>()); + addArrival(gatherer, m_ResourceMonitor, std::get<0>(data[i]), + gatherer.personName(pid), std::get<1>(data[i]), + std::get<2>(data[i]), std::get<3>(data[i])); } } } BOOST_FIXTURE_TEST_CASE(testMultivariate, CTestFixture) { - using TTimeDoubleDoubleTuple = boost::tuple; + using TTimeDoubleDoubleTuple = std::tuple; using TTimeDoubleDoubleTupleVec = std::vector; using TTimeDoubleDoubleTupleVecVec = std::vector; @@ -1542,21 +1280,13 @@ BOOST_FIXTURE_TEST_CASE(testMultivariate, CTestFixture) { SModelParams params(bucketLength); params.s_MultivariateComponentDelimiter = DELIMITER; - TTimeDoubleDoubleTuple bucket1[] = {TTimeDoubleDoubleTuple(1, 1.0, 1.0), - TTimeDoubleDoubleTuple(15, 2.1, 2.0), - TTimeDoubleDoubleTuple(180, 0.9, 0.8), - TTimeDoubleDoubleTuple(190, 1.5, 1.4), - TTimeDoubleDoubleTuple(400, 1.5, 1.4), - TTimeDoubleDoubleTuple(550, 2.0, 1.8)}; - TTimeDoubleDoubleTuple bucket2[] = {TTimeDoubleDoubleTuple(600, 2.0, 1.8), - TTimeDoubleDoubleTuple(799, 2.2, 2.0), - TTimeDoubleDoubleTuple(1199, 1.8, 1.6)}; - TTimeDoubleDoubleTuple bucket3[] = {TTimeDoubleDoubleTuple(1200, 2.1, 2.0), - TTimeDoubleDoubleTuple(1250, 2.5, 2.4)}; - TTimeDoubleDoubleTuple bucket4[] = {TTimeDoubleDoubleTuple(1900, 3.5, 3.2)}; - TTimeDoubleDoubleTuple bucket5[] = {TTimeDoubleDoubleTuple(2420, 3.5, 3.2), - TTimeDoubleDoubleTuple(2480, 3.2, 3.0), - TTimeDoubleDoubleTuple(2490, 3.8, 3.8)}; + TTimeDoubleDoubleTupleVec bucket1{{1, 1.0, 1.0}, {15, 2.1, 2.0}, + {180, 0.9, 0.8}, {190, 1.5, 1.4}, + {400, 1.5, 1.4}, {550, 2.0, 1.8}}; + TTimeDoubleDoubleTupleVec bucket2{{600, 2.0, 1.8}, {799, 2.2, 2.0}, {1199, 1.8, 1.6}}; + TTimeDoubleDoubleTupleVec bucket3{{1200, 2.1, 2.0}, {1250, 2.5, 2.4}}; + TTimeDoubleDoubleTupleVec bucket4{{1900, 3.5, 3.2}}; + TTimeDoubleDoubleTupleVec bucket5{{2420, 3.5, 3.2}, {2480, 3.2, 3.0}, {2490, 3.8, 3.8}}; { TFeatureVec features; @@ -1564,7 +1294,7 @@ BOOST_FIXTURE_TEST_CASE(testMultivariate, CTestFixture) { TStrVec influencerNames; CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - influencerNames, KEY, features, startTime, 2u); + influencerNames, KEY, features, startTime); BOOST_TEST_REQUIRE(!gatherer.isPopulation()); BOOST_REQUIRE_EQUAL(0, addPerson("p", gatherer, m_ResourceMonitor)); BOOST_REQUIRE_EQUAL(1, gatherer.numberFeatures()); @@ -1580,10 +1310,10 @@ BOOST_FIXTURE_TEST_CASE(testMultivariate, CTestFixture) { BOOST_TEST_REQUIRE(!gatherer.personId("a.n.other p", pid)); { - addArrival(gatherer, m_ResourceMonitor, bucket1[0].get<0>(), "p", - bucket1[0].get<1>(), bucket1[0].get<2>(), DELIMITER); + addArrival(gatherer, m_ResourceMonitor, std::get<0>(bucket1[0]), "p", + std::get<1>(bucket1[0]), std::get<2>(bucket1[0]), DELIMITER); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL( 1.0, featureData[0].second[0].second.s_BucketValue->value()[0]); @@ -1592,15 +1322,14 @@ BOOST_FIXTURE_TEST_CASE(testMultivariate, CTestFixture) { BOOST_REQUIRE_EQUAL(true, featureData[0].second[0].second.s_IsInteger); } - for (std::size_t i = 1; i < std::size(bucket1); ++i) { - addArrival(gatherer, m_ResourceMonitor, bucket1[i].get<0>(), "p", - bucket1[i].get<1>(), bucket1[i].get<2>(), DELIMITER); + for (std::size_t i = 1; i < bucket1.size(); ++i) { + addArrival(gatherer, m_ResourceMonitor, std::get<0>(bucket1[i]), "p", + std::get<1>(bucket1[i]), std::get<2>(bucket1[i]), DELIMITER); } { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.sampleNow(startTime); - gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), - bucketLength, featureData); + gatherer.featureData(core_t::TTime(startTime + bucketLength - 1), featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_TEST_REQUIRE(!featureData.empty()); BOOST_REQUIRE_CLOSE_ABSOLUTE( @@ -1608,28 +1337,28 @@ BOOST_FIXTURE_TEST_CASE(testMultivariate, CTestFixture) { BOOST_REQUIRE_CLOSE_ABSOLUTE( 1.4, featureData[0].second[0].second.s_BucketValue->value()[1], 1e-10); BOOST_REQUIRE_EQUAL(false, featureData[0].second[0].second.s_IsInteger); - BOOST_REQUIRE_EQUAL( - std::string("[(8 [1.55, 1.5] 1 2), (185 [1.2, 1.1] 1 2), (475 [1.75, 1.6] 1 2)]"), - core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); + BOOST_REQUIRE_EQUAL("[(223 [1.5, 1.4] 1 6)]", + core::CContainerPrinter::print( + featureData[0].second[0].second.s_Samples)); testPersistence(params, gatherer); } gatherer.timeNow(startTime + bucketLength); for (const auto& value : bucket2) { - addArrival(gatherer, m_ResourceMonitor, value.get<0>(), "p", - value.get<1>(), value.get<2>(), DELIMITER); + addArrival(gatherer, m_ResourceMonitor, std::get<0>(value), "p", + std::get<1>(value), std::get<2>(value), DELIMITER); } { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.sampleNow(startTime + bucketLength); - gatherer.featureData(startTime + bucketLength, bucketLength, featureData); + gatherer.featureData(startTime + bucketLength, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_TEST_REQUIRE(!featureData.empty()); BOOST_REQUIRE_CLOSE_ABSOLUTE( 2.0, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); BOOST_REQUIRE_CLOSE_ABSOLUTE( 1.8, featureData[0].second[0].second.s_BucketValue->value()[1], 1e-10); - BOOST_REQUIRE_EQUAL(std::string("[(700 [2.1, 1.9] 1 2)]"), + BOOST_REQUIRE_EQUAL("[(866 [2, 1.8] 1 3)]", core::CContainerPrinter::print( featureData[0].second[0].second.s_Samples)); testPersistence(params, gatherer); @@ -1637,20 +1366,20 @@ BOOST_FIXTURE_TEST_CASE(testMultivariate, CTestFixture) { gatherer.timeNow(startTime + 2 * bucketLength); for (const auto& value : bucket3) { - addArrival(gatherer, m_ResourceMonitor, value.get<0>(), "p", - value.get<1>(), value.get<2>(), DELIMITER); + addArrival(gatherer, m_ResourceMonitor, std::get<0>(value), "p", + std::get<1>(value), std::get<2>(value), DELIMITER); } { TFeatureSizeFeatureDataPrVecPrVec featureData; gatherer.sampleNow(startTime + 2 * bucketLength); - gatherer.featureData(startTime + 2 * bucketLength, bucketLength, featureData); + gatherer.featureData(startTime + 2 * bucketLength, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_TEST_REQUIRE(!featureData.empty()); BOOST_REQUIRE_CLOSE_ABSOLUTE( 2.3, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); BOOST_REQUIRE_CLOSE_ABSOLUTE( 2.2, featureData[0].second[0].second.s_BucketValue->value()[1], 1e-10); - BOOST_REQUIRE_EQUAL(std::string("[(1200 [1.95, 1.8] 1 2)]"), + BOOST_REQUIRE_EQUAL("[(1225 [2.3, 2.2] 1 2)]", core::CContainerPrinter::print( featureData[0].second[0].second.s_Samples)); } @@ -1662,79 +1391,41 @@ BOOST_FIXTURE_TEST_CASE(testMultivariate, CTestFixture) { features.push_back(model_t::E_IndividualMeanLatLongByPerson); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 0); + EMPTY_STRING, {}, KEY, features, startTime); BOOST_REQUIRE_EQUAL(0, addPerson("p", gatherer, m_ResourceMonitor)); TTimeDoubleDoubleTupleVecVec buckets; - buckets.push_back(TTimeDoubleDoubleTupleVec(std::begin(bucket1), std::end(bucket1))); - buckets.push_back(TTimeDoubleDoubleTupleVec(std::begin(bucket2), std::end(bucket2))); - buckets.push_back(TTimeDoubleDoubleTupleVec(std::begin(bucket3), std::end(bucket3))); - buckets.push_back(TTimeDoubleDoubleTupleVec(std::begin(bucket4), std::end(bucket4))); - buckets.push_back(TTimeDoubleDoubleTupleVec(std::begin(bucket5), std::end(bucket5))); + buckets.emplace_back(std::begin(bucket1), std::end(bucket1)); + buckets.emplace_back(std::begin(bucket2), std::end(bucket2)); + buckets.emplace_back(std::begin(bucket3), std::end(bucket3)); + buckets.emplace_back(std::begin(bucket4), std::end(bucket4)); + buckets.emplace_back(std::begin(bucket5), std::end(bucket5)); for (std::size_t i = 0; i < buckets.size(); ++i) { LOG_DEBUG(<< "Processing bucket " << i); gatherer.timeNow(startTime + i * bucketLength); const TTimeDoubleDoubleTupleVec& bucket = buckets[i]; for (std::size_t j = 0; j < bucket.size(); ++j) { - addArrival(gatherer, m_ResourceMonitor, bucket[j].get<0>(), "p", - bucket[j].get<1>(), bucket[j].get<2>(), DELIMITER); + addArrival(gatherer, m_ResourceMonitor, std::get<0>(bucket[j]), "p", + std::get<1>(bucket[j]), std::get<2>(bucket[j]), DELIMITER); } } - BOOST_REQUIRE_EQUAL(4.0, gatherer.effectiveSampleCount(0)); TFeatureSizeFeatureDataPrVecPrVec featureData; core_t::TTime featureBucketStart = core_t::TTime(startTime + 4 * bucketLength); gatherer.sampleNow(featureBucketStart); - gatherer.featureData(featureBucketStart, bucketLength, featureData); + gatherer.featureData(featureBucketStart, featureData); BOOST_TEST_REQUIRE(!featureData.empty()); BOOST_REQUIRE_CLOSE_ABSOLUTE( 3.5, featureData[0].second[0].second.s_BucketValue->value()[0], 1e-10); BOOST_REQUIRE_EQUAL(false, featureData[0].second[0].second.s_IsInteger); LOG_DEBUG(<< "featureData = " << featureData); BOOST_REQUIRE_EQUAL( - std::string("[(2323 [3.5, 3.3] 1 4)]"), + "[(2463 [3.5, 3.333333] 1 3)]", core::CContainerPrinter::print(featureData[0].second[0].second.s_Samples)); } } -BOOST_FIXTURE_TEST_CASE(testStatisticsPersist, CTestFixture) { - CGathererTools::TMeanGatherer::TMetricPartialStatistic stat(1); - stat.add(TDoubleVec(1, 44.4), 1299196740, 1); - stat.add(TDoubleVec(1, 5.5), 1299196741, 1); - stat.add(TDoubleVec(1, 0.6), 1299196742, 1); - - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - stat.persist(inserter); - inserter.toXml(origXml); - } - - core_t::TTime origTime = stat.time(); - std::string restoredXml; - std::string restoredPrint; - core_t::TTime restoredTime; - { - core::CRapidXmlParser parser; - BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - CGathererTools::TMeanGatherer::TMetricPartialStatistic restored(1); - traverser.traverseSubLevel( - std::bind(&CGathererTools::TMeanGatherer::TMetricPartialStatistic::restore, - std::ref(restored), std::placeholders::_1)); - - restoredTime = restored.time(); - { - core::CRapidXmlStatePersistInserter inserter("root"); - restored.persist(inserter); - inserter.toXml(restoredXml); - } - } - BOOST_REQUIRE_EQUAL(origXml, restoredXml); - BOOST_REQUIRE_EQUAL(origTime, restoredTime); -} - BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { core_t::TTime startTime = 100000; const core_t::TTime bucketLength = 1000; @@ -1748,11 +1439,10 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { SModelParams params(bucketLength); { - TFeatureVec features; - features.push_back(model_t::E_IndividualVarianceByPerson); + TFeatureVec features{model_t::E_IndividualVarianceByPerson}; CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, KEY, features, startTime, 2u); + EMPTY_STRING, {}, KEY, features, startTime); BOOST_TEST_REQUIRE(!gatherer.isPopulation()); BOOST_REQUIRE_EQUAL(0, addPerson(person, gatherer, m_ResourceMonitor)); @@ -1763,7 +1453,7 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { addArrivals(gatherer, m_ResourceMonitor, startTime, 10, person, values); gatherer.sampleNow(startTime); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); // Expect only 1 feature BOOST_REQUIRE_EQUAL(1, featureData.size()); TFeatureSizeFeatureDataPrVecPr fsfd = featureData[0]; @@ -1781,10 +1471,9 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { addArrivals(gatherer, m_ResourceMonitor, startTime, 100, person, values); gatherer.sampleNow(startTime); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); LOG_DEBUG(<< "featureData = " << featureData); - CSample::TDouble1Vec v = - featureData[0].second[0].second.s_BucketValue->value(); + auto v = featureData[0].second[0].second.s_BucketValue->value(); double expectedMean = 0; double expectedVariance = variance(values, expectedMean); BOOST_REQUIRE_CLOSE_ABSOLUTE(v[0], expectedVariance, 0.0001); @@ -1798,7 +1487,7 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { addArrivals(gatherer, m_ResourceMonitor, startTime, 100, person, values); gatherer.sampleNow(startTime); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); LOG_DEBUG(<< "featureData = " << featureData); BOOST_TEST_REQUIRE(!featureData[0].second[0].second.s_BucketValue); } @@ -1814,7 +1503,7 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { influencerFieldNames.push_back("j"); CDataGatherer gatherer(model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - influencerFieldNames, KEY, features, startTime, 2u); + influencerFieldNames, KEY, features, startTime); BOOST_TEST_REQUIRE(!gatherer.isPopulation()); BOOST_REQUIRE_EQUAL(0, addPerson(person, gatherer, m_ResourceMonitor, influencerFieldNames.size())); @@ -1843,7 +1532,7 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { gatherer.sampleNow(startTime); TFeatureSizeFeatureDataPrVecPrVec featureData; - gatherer.featureData(startTime, bucketLength, featureData); + gatherer.featureData(startTime, featureData); TFeatureSizeFeatureDataPrVecPr fsfd = featureData[0]; BOOST_REQUIRE_EQUAL(model_t::E_IndividualVarianceByPerson, fsfd.first); @@ -1885,15 +1574,18 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { BOOST_REQUIRE_CLOSE_ABSOLUTE(ivs1.second.first[0], i1ExpectedVariance, 0.0001); BOOST_REQUIRE_CLOSE_ABSOLUTE(ivs1.second.first[1], i1ExpectedMean, 0.0001); - // The order of ivs2 and ivs3 seems to be backwards... - const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr& ivs2 = ivs[1][1]; + const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr& ivs2 = ivs[1][0]; + LOG_DEBUG(<< "Comparing inf2 " << inf2 << " and inf2.first.get() " + << ivs2.first.get()); BOOST_REQUIRE_EQUAL(inf2, ivs2.first.get()); BOOST_REQUIRE_CLOSE_ABSOLUTE(3.0, ivs2.second.second, 0.0001); BOOST_REQUIRE_EQUAL(2, ivs2.second.first.size()); BOOST_REQUIRE_CLOSE_ABSOLUTE(ivs2.second.first[0], i2ExpectedVariance, 0.0001); BOOST_REQUIRE_CLOSE_ABSOLUTE(ivs2.second.first[1], i2ExpectedMean, 0.0001); - const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr& ivs3 = ivs[1][0]; + const SMetricFeatureData::TStrCRefDouble1VecDoublePrPr& ivs3 = ivs[1][1]; + LOG_DEBUG(<< "Comparing inf2 " << inf3 << " and inf2.first.get() " + << ivs3.first.get()); BOOST_REQUIRE_EQUAL(inf3, ivs3.first.get()); BOOST_REQUIRE_CLOSE_ABSOLUTE(1.0, ivs3.second.second, 0.0001); BOOST_REQUIRE_EQUAL(2, ivs3.second.first.size()); @@ -1904,3 +1596,4 @@ BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { } BOOST_AUTO_TEST_SUITE_END() +} diff --git a/lib/model/unittest/CMetricModelTest.cc b/lib/model/unittest/CMetricModelTest.cc index 457b88cc93..d57826bc2d 100644 --- a/lib/model/unittest/CMetricModelTest.cc +++ b/lib/model/unittest/CMetricModelTest.cc @@ -55,7 +55,7 @@ namespace ml { namespace model { class CIndividualModelTestHelper { public: - static void setFeature(ml::model::CIndividualModel& model) { + static void setFeature(CIndividualModel& model) { auto& feature = model.m_FeatureModels[0]; feature.s_Models.emplace_back(feature.s_NewModel->clone(0)); } @@ -63,18 +63,14 @@ class CIndividualModelTestHelper { } } +namespace { + BOOST_AUTO_TEST_SUITE(CMetricModelTest) using namespace ml; using namespace model; -namespace { - -using TMinAccumulator = maths::common::CBasicStatistics::SMin::TAccumulator; -using TMaxAccumulator = maths::common::CBasicStatistics::SMax::TAccumulator; - const CModelTestFixtureBase::TSizeDoublePr1Vec NO_CORRELATES; -} class CTestFixture : public CModelTestFixtureBase { public: @@ -82,448 +78,21 @@ class CTestFixture : public CModelTestFixtureBase { model_t::EFeature feature, std::size_t pid, core_t::TTime time) { - const CMetricModel::TFeatureData* data = model.featureData(feature, pid, time); + const auto* data = model.featureData(feature, pid, time); if (data == nullptr) { - return TDouble1Vec(); + return {}; } return data->s_BucketValue ? data->s_BucketValue->value() : TDouble1Vec(); } void makeModel(const SModelParams& params, const model_t::TFeatureVec& features, - core_t::TTime startTime, - TOptionalUInt sampleCount = TOptionalUInt()) { - this->makeModelT(params, features, startTime, - model_t::E_MetricOnline, - m_Gatherer, m_Model, sampleCount); + core_t::TTime startTime) { + this->makeModelT( + params, features, startTime, model_t::E_MetricOnline, m_Gatherer, m_Model); } }; -BOOST_FIXTURE_TEST_CASE(testSample, CTestFixture) { - core_t::TTime startTime{45}; - core_t::TTime bucketLength{5}; - SModelParams params(bucketLength); - params.s_InitialDecayRateMultiplier = 1.0; - params.s_MaximumUpdatesPerBucket = 0.0; - - // Check basic sampling. - { - TTimeDoublePrVec data{{49, 1.5}, {60, 1.3}, {61, 1.3}, {62, 1.6}, - {65, 1.7}, {66, 1.33}, {68, 1.5}, {84, 1.58}, - {87, 1.69}, {157, 1.6}, {164, 1.66}, {199, 1.28}, - {202, 1.2}, {204, 1.5}}; - - TUIntVec sampleCounts{2, 1}; - TUIntVec expectedSampleCounts{2, 1}; - std::size_t i{0}; - for (auto& sampleCount : sampleCounts) { - model_t::TFeatureVec features{model_t::E_IndividualMeanByPerson, - model_t::E_IndividualMinByPerson, - model_t::E_IndividualMaxByPerson}; - - this->makeModel(params, features, startTime, sampleCount); - auto& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); - - // Bucket values. - std::uint64_t expectedCount{0}; - TMeanAccumulator baselineMeanError; - TMeanAccumulator expectedMean; - TMeanAccumulator expectedBaselineMean; - TMinAccumulator expectedMin; - TMaxAccumulator expectedMax; - - // Sampled values. - TMeanAccumulator expectedSampleTime; - TMeanAccumulator expectedMeanSample; - TMinAccumulator expectedMinSample; - TMaxAccumulator expectedMaxSample; - TDouble1Vec expectedSampleTimes; - TDouble1Vec expectedMeanSamples; - TDouble1Vec expectedMinSamples; - TDouble1Vec expectedMaxSamples; - std::size_t numberSamples{0}; - - TMathsModelPtr expectedMeanModel = m_Factory->defaultFeatureModel( - model_t::E_IndividualMeanByPerson, bucketLength, 0.4, true); - TMathsModelPtr expectedMinModel = m_Factory->defaultFeatureModel( - model_t::E_IndividualMinByPerson, bucketLength, 0.4, true); - TMathsModelPtr expectedMaxModel = m_Factory->defaultFeatureModel( - model_t::E_IndividualMaxByPerson, bucketLength, 0.4, true); - - std::size_t j{0}; - core_t::TTime time{startTime}; - for (;;) { - if (j < data.size() && data[j].first < time + bucketLength) { - LOG_DEBUG(<< "Adding " << data[j].second << " at " - << data[j].first); - - this->addArrival(SMessage(data[j].first, "p", data[j].second), m_Gatherer); - - ++expectedCount; - expectedMean.add(data[j].second); - expectedMin.add(data[j].second); - expectedMax.add(data[j].second); - - expectedSampleTime.add(static_cast(data[j].first)); - expectedMeanSample.add(data[j].second); - expectedMinSample.add(data[j].second); - expectedMaxSample.add(data[j].second); - - ++j; - - if (j % expectedSampleCounts[i] == 0) { - ++numberSamples; - expectedSampleTimes.push_back( - maths::common::CBasicStatistics::mean(expectedSampleTime)); - expectedMeanSamples.push_back( - maths::common::CBasicStatistics::mean(expectedMeanSample)); - expectedMinSamples.push_back(expectedMinSample[0]); - expectedMaxSamples.push_back(expectedMaxSample[0]); - expectedSampleTime = TMeanAccumulator(); - expectedMeanSample = TMeanAccumulator(); - expectedMinSample = TMinAccumulator(); - expectedMaxSample = TMaxAccumulator(); - } - } else { - LOG_DEBUG(<< "Sampling [" << time << ", " << time + bucketLength << ")"); - - model.sample(time, time + bucketLength, m_ResourceMonitor); - if (maths::common::CBasicStatistics::count(expectedMean) > 0.0) { - expectedBaselineMean.add( - maths::common::CBasicStatistics::mean(expectedMean)); - } - if (numberSamples > 0) { - LOG_DEBUG(<< "Adding mean samples = " << expectedMeanSamples - << ", min samples = " << expectedMinSamples - << ", max samples = " << expectedMaxSamples); - - maths::common::CModelAddSamplesParams::TDouble2VecWeightsAryVec weights( - numberSamples, maths_t::CUnitWeights::unit(1)); - maths::common::CModelAddSamplesParams params_; - params_.isInteger(false) - .isNonNegative(true) - .propagationInterval(1.0) - .trendWeights(weights) - .priorWeights(weights) - .firstValueTime(startTime); - - maths::common::CModel::TTimeDouble2VecSizeTrVec expectedMeanSamples_; - maths::common::CModel::TTimeDouble2VecSizeTrVec expectedMinSamples_; - maths::common::CModel::TTimeDouble2VecSizeTrVec expectedMaxSamples_; - for (std::size_t k = 0; k < numberSamples; ++k) { - // We round to the nearest integer time (note this has to match - // the behaviour of CMetricPartialStatistic::time). - core_t::TTime sampleTime{static_cast( - expectedSampleTimes[k] + 0.5)}; - expectedMeanSamples_.emplace_back( - sampleTime, TDouble2Vec{expectedMeanSamples[k]}, 0); - expectedMinSamples_.emplace_back( - sampleTime, TDouble2Vec{expectedMinSamples[k]}, 0); - expectedMaxSamples_.emplace_back( - sampleTime, TDouble2Vec{expectedMaxSamples[k]}, 0); - } - expectedMeanModel->addSamples(params_, expectedMeanSamples_); - expectedMinModel->addSamples(params_, expectedMinSamples_); - expectedMaxModel->addSamples(params_, expectedMaxSamples_); - numberSamples = 0; - expectedSampleTimes.clear(); - expectedMeanSamples.clear(); - expectedMinSamples.clear(); - expectedMaxSamples.clear(); - } - - model_t::CResultType type(model_t::CResultType::E_Unconditional | - model_t::CResultType::E_Final); - TOptionalUInt64 currentCount = model.currentBucketCount(0, time); - TDouble1Vec bucketMean = model.currentBucketValue( - model_t::E_IndividualMeanByPerson, 0, 0, time); - TDouble1Vec baselineMean = model.baselineBucketMean( - model_t::E_IndividualMeanByPerson, 0, 0, type, NO_CORRELATES, time); - - LOG_DEBUG(<< "bucket count = " << currentCount); - LOG_DEBUG(<< "current bucket mean = " << bucketMean << ", expected baseline bucket mean = " - << maths::common::CBasicStatistics::mean(expectedBaselineMean) - << ", baseline bucket mean = " << baselineMean); - - BOOST_TEST_REQUIRE(currentCount.has_value()); - BOOST_REQUIRE_EQUAL(expectedCount, *currentCount); - - TDouble1Vec mean = - maths::common::CBasicStatistics::count(expectedMean) > 0.0 - ? TDouble1Vec(1, maths::common::CBasicStatistics::mean(expectedMean)) - : TDouble1Vec(); - TDouble1Vec min = expectedMin.count() > 0 - ? TDouble1Vec(1, expectedMin[0]) - : TDouble1Vec(); - TDouble1Vec max = expectedMax.count() > 0 - ? TDouble1Vec(1, expectedMax[0]) - : TDouble1Vec(); - - BOOST_TEST_REQUIRE(mean == bucketMean); - if (!baselineMean.empty()) { - baselineMeanError.add(std::fabs( - baselineMean[0] - maths::common::CBasicStatistics::mean( - expectedBaselineMean))); - } - - BOOST_TEST_REQUIRE(mean == featureData(model, model_t::E_IndividualMeanByPerson, - 0, time)); - BOOST_TEST_REQUIRE(min == featureData(model, model_t::E_IndividualMinByPerson, - 0, time)); - BOOST_TEST_REQUIRE(max == featureData(model, model_t::E_IndividualMaxByPerson, - 0, time)); - - BOOST_REQUIRE_EQUAL(expectedMeanModel->checksum(), - model.details() - ->model(model_t::E_IndividualMeanByPerson, 0) - ->checksum()); - BOOST_REQUIRE_EQUAL(expectedMinModel->checksum(), - model.details() - ->model(model_t::E_IndividualMinByPerson, 0) - ->checksum()); - BOOST_REQUIRE_EQUAL(expectedMaxModel->checksum(), - model.details() - ->model(model_t::E_IndividualMaxByPerson, 0) - ->checksum()); - - // Test persistence. (We check for idempotency.) - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - model.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - - // Restore the XML into a new filter - core::CRapidXmlParser parser; - BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - CModelFactory::TModelPtr restoredModel( - m_Factory->makeModel(m_Gatherer, traverser)); - - // The XML representation of the new filter should be the same as the original - std::string newXml; - { - ml::core::CRapidXmlStatePersistInserter inserter("root"); - restoredModel->acceptPersistInserter(inserter); - inserter.toXml(newXml); - } - - std::uint64_t origChecksum = model.checksum(false); - LOG_DEBUG(<< "original checksum = " << origChecksum); - std::uint64_t restoredChecksum = restoredModel->checksum(false); - LOG_DEBUG(<< "restored checksum = " << restoredChecksum); - BOOST_REQUIRE_EQUAL(origChecksum, restoredChecksum); - BOOST_REQUIRE_EQUAL(origXml, newXml); - - expectedCount = 0; - expectedMean = TMeanAccumulator(); - expectedMin = TMinAccumulator(); - expectedMax = TMaxAccumulator(); - - if (j >= data.size()) { - break; - } - - time += bucketLength; - } - } - LOG_DEBUG(<< "baseline mean error = " - << maths::common::CBasicStatistics::mean(baselineMeanError)); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(baselineMeanError) < 0.25); - - ++i; - } - } -} - -BOOST_FIXTURE_TEST_CASE(testMultivariateSample, CTestFixture) { - using TVector2 = maths::common::CVectorNx1; - using TMean2Accumulator = maths::common::CBasicStatistics::SSampleMean::TAccumulator; - using TTimeDouble2AryPr = std::pair>; - using TTimeDouble2AryPrVec = std::vector; - - core_t::TTime startTime(45); - core_t::TTime bucketLength(5); - SModelParams params(bucketLength); - params.s_InitialDecayRateMultiplier = 1.0; - params.s_MaximumUpdatesPerBucket = 0.0; - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricModelFactory factory(params, interimBucketCorrector); - - TTimeDouble2AryPrVec data{ - {49, {1.5, 1.1}}, {60, {1.3, 1.2}}, {61, {1.3, 2.1}}, - {62, {1.6, 1.5}}, {65, {1.7, 1.4}}, {66, {1.33, 1.6}}, - {68, {1.5, 1.37}}, {84, {1.58, 1.42}}, {87, {1.6, 1.6}}, - {157, {1.6, 1.6}}, {164, {1.66, 1.55}}, {199, {1.28, 1.4}}, - {202, {1.3, 1.1}}, {204, {1.5, 1.8}}}; - - TUIntVec sampleCounts{2, 1}; - TUIntVec expectedSampleCounts{2, 1}; - - std::size_t i{0}; - for (auto& sampleCount : sampleCounts) { - LOG_DEBUG(<< "*** sample count = " << sampleCount << " ***"); - - this->makeModel(params, {model_t::E_IndividualMeanLatLongByPerson}, - startTime, sampleCount); - auto& model = static_cast(*m_Model); - BOOST_REQUIRE_EQUAL(0, this->addPerson("p", m_Gatherer)); - - // Bucket values. - std::uint64_t expectedCount{0}; - TMean2Accumulator baselineLatLongError; - TMean2Accumulator expectedLatLong; - TMean2Accumulator expectedBaselineLatLong; - - // Sampled values. - TMean2Accumulator expectedLatLongSample; - std::size_t numberSamples{0}; - TDoubleVecVec expectedLatLongSamples; - TMultivariatePriorPtr expectedPrior = - factory.defaultMultivariatePrior(model_t::E_IndividualMeanLatLongByPerson); - - std::size_t j{0}; - core_t::TTime time{startTime}; - for (;;) { - if (j < data.size() && data[j].first < time + bucketLength) { - LOG_DEBUG(<< "Adding " << data[j].second[0] << "," - << data[j].second[1] << " at " << data[j].first); - - this->addArrival( - SMessage(data[j].first, "p", {}, - TDoubleDoublePr(data[j].second[0], data[j].second[1])), - m_Gatherer); - - ++expectedCount; - expectedLatLong.add(TVector2(data[j].second)); - expectedLatLongSample.add(TVector2(data[j].second)); - - if (++j % expectedSampleCounts[i] == 0) { - ++numberSamples; - expectedLatLongSamples.push_back(TDoubleVec( - maths::common::CBasicStatistics::mean(expectedLatLongSample) - .begin(), - maths::common::CBasicStatistics::mean(expectedLatLongSample) - .end())); - expectedLatLongSample = TMean2Accumulator(); - } - } else { - LOG_DEBUG(<< "Sampling [" << time << ", " << time + bucketLength << ")"); - model.sample(time, time + bucketLength, m_ResourceMonitor); - - if (maths::common::CBasicStatistics::count(expectedLatLong) > 0.0) { - expectedBaselineLatLong.add( - maths::common::CBasicStatistics::mean(expectedLatLong)); - } - if (numberSamples > 0) { - std::sort(expectedLatLongSamples.begin(), - expectedLatLongSamples.end()); - LOG_DEBUG(<< "Adding mean samples = " << expectedLatLongSamples); - expectedPrior->dataType(maths_t::E_ContinuousData); - expectedPrior->addSamples( - expectedLatLongSamples, - maths_t::TDouble10VecWeightsAry1Vec( - expectedLatLongSamples.size(), - maths_t::CUnitWeights::unit(2))); - expectedPrior->propagateForwardsByTime(1.0); - numberSamples = 0; - expectedLatLongSamples.clear(); - } - - model_t::CResultType type(model_t::CResultType::E_Unconditional | - model_t::CResultType::E_Final); - TOptionalUInt64 count = model.currentBucketCount(0, time); - TDouble1Vec bucketLatLong = model.currentBucketValue( - model_t::E_IndividualMeanLatLongByPerson, 0, 0, time); - TDouble1Vec baselineLatLong = - model.baselineBucketMean(model_t::E_IndividualMeanLatLongByPerson, - 0, 0, type, NO_CORRELATES, time); - TDouble1Vec featureLatLong = featureData( - model, model_t::E_IndividualMeanLatLongByPerson, 0, time); - const auto& prior = - dynamic_cast( - model.details()->model(model_t::E_IndividualMeanLatLongByPerson, 0)) - ->residualModel(); - - LOG_DEBUG(<< "bucket count = " << count); - LOG_DEBUG(<< "current = " << bucketLatLong << ", expected baseline = " - << maths::common::CBasicStatistics::mean(expectedBaselineLatLong) - << ", actual baseline = " << baselineLatLong); - - BOOST_TEST_REQUIRE(count.has_value()); - BOOST_REQUIRE_EQUAL(expectedCount, *count); - - TDouble1Vec latLong; - if (maths::common::CBasicStatistics::count(expectedLatLong) > 0.0) { - latLong.push_back( - maths::common::CBasicStatistics::mean(expectedLatLong)(0)); - latLong.push_back( - maths::common::CBasicStatistics::mean(expectedLatLong)(1)); - } - BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(latLong), - core::CContainerPrinter::print(bucketLatLong)); - if (!baselineLatLong.empty()) { - baselineLatLongError.add(maths::common::fabs( - TVector2(baselineLatLong) - - maths::common::CBasicStatistics::mean(expectedBaselineLatLong))); - } - - BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(latLong), - core::CContainerPrinter::print(featureLatLong)); - BOOST_REQUIRE_EQUAL(expectedPrior->checksum(), prior.checksum()); - - // Test persistence. (We check for idempotency.) - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - model.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - - // Restore the XML into a new filter - core::CRapidXmlParser parser; - BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - CModelFactory::TModelPtr restoredModel(factory.makeModel(m_Gatherer, traverser)); - - // The XML representation of the new filter should be the same as the original - std::string newXml; - { - ml::core::CRapidXmlStatePersistInserter inserter("root"); - restoredModel->acceptPersistInserter(inserter); - inserter.toXml(newXml); - } - - std::uint64_t origChecksum = model.checksum(false); - LOG_DEBUG(<< "original checksum = " << origChecksum); - std::uint64_t restoredChecksum = restoredModel->checksum(false); - LOG_DEBUG(<< "restored checksum = " << restoredChecksum); - BOOST_REQUIRE_EQUAL(origChecksum, restoredChecksum); - BOOST_REQUIRE_EQUAL(origXml, newXml); - - expectedCount = 0; - expectedLatLong = TMean2Accumulator(); - - if (j >= data.size()) { - break; - } - - time += bucketLength; - } - } - LOG_DEBUG(<< "baseline mean error = " - << maths::common::CBasicStatistics::mean(baselineLatLongError)); - BOOST_TEST_REQUIRE( - maths::common::CBasicStatistics::mean(baselineLatLongError)(0) < 0.25); - BOOST_TEST_REQUIRE( - maths::common::CBasicStatistics::mean(baselineLatLongError)(1) < 0.25); - - ++i; - } -} - BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForMetric, CTestFixture) { core_t::TTime startTime{0}; core_t::TTime bucketLength{10}; @@ -533,7 +102,7 @@ BOOST_FIXTURE_TEST_CASE(testProbabilityCalculationForMetric, CTestFixture) { double mean{5.0}; double variance{2.0}; std::size_t anomalousBucket{12}; - double anomaly{5 * std::sqrt(variance)}; + double anomaly{5.0 * std::sqrt(variance)}; SModelParams params(bucketLength); model_t::TFeatureVec features{model_t::E_IndividualMeanByPerson, @@ -1010,9 +579,9 @@ BOOST_FIXTURE_TEST_CASE(testInfluence, CTestFixture) { {}, {}, {}, - {}, - {}, - {}, + {core::make_triple(std::string{"i3"}, 0.99, 1.0)}, + {core::make_triple(std::string{"i2"}, 0.99, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0)}, {core::make_triple(std::string{"i1"}, 0.9, 1.0)}, {core::make_triple(std::string{"i1"}, 0.8, 0.9)}, {}, @@ -1085,16 +654,18 @@ BOOST_FIXTURE_TEST_CASE(testInfluence, CTestFixture) { {"i1", "i2", "i3", "i4", "i5", "i6"}, {"i2"}}; TStrDoubleDoubleTrVecVec influences{ - {}, - {}, - {}, - {}, {}, {}, {}, {}, {core::make_triple(std::string{"i1"}, 0.9, 1.0), core::make_triple(std::string{"i3"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i3"}, 1.0, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0), + core::make_triple(std::string{"i2"}, 0.9, 1.0)}, + {core::make_triple(std::string{"i1"}, 0.9, 1.0), + core::make_triple(std::string{"i2"}, 0.9, 1.0)}, + {}, {core::make_triple(std::string{"i1"}, 0.9, 1.0)}, {core::make_triple(std::string{"i5"}, 0.9, 1.0)}, {}}; @@ -1853,8 +1424,9 @@ BOOST_FIXTURE_TEST_CASE(testSummaryCountZeroRecordsAreIgnored, CTestFixture) { BOOST_REQUIRE_EQUAL(model_t::E_MetricOnline, modelNoZerosPtr->category()); auto& modelNoZeros = static_cast(*modelNoZerosPtr.get()); - // The idea here is to compare a model that has records with summary count of zero - // against a model that has no records at all where the first model had the zero-count records. + // The idea here is to compare a model that has records with summary + // count of zero against a model that has no records at all where the + // first model had the zero-count records. core_t::TTime now = 100; core_t::TTime end = now + 50 * bucketLength; @@ -2234,8 +1806,9 @@ BOOST_FIXTURE_TEST_CASE(testIgnoreSamplingGivenDetectionRules, CTestFixture) { std::size_t endTime = startTime + bucketLength; - // Add a few buckets to both models (this seems to be necessary to ensure subsequent calls to 'sample' - // actually result in samples being added to the model) + // Add a few buckets to both models (this seems to be necessary to ensure + // subsequent calls to 'sample' actually result in samples being added to + // the model). for (std::size_t j = 0; j < 3; ++j) { for (std::size_t i = 0; i < bucketLength; i++) { this->addArrival(SMessage(startTime + i, "p1", 1.0), gathererNoSkip); @@ -2245,7 +1818,7 @@ BOOST_FIXTURE_TEST_CASE(testIgnoreSamplingGivenDetectionRules, CTestFixture) { endTime += bucketLength; } - // Add a bucket to both models + // Add a bucket to both models. for (std::size_t i = 0; i < bucketLength; i++) { this->addArrival(SMessage(startTime + i, "p1", 1.0), gathererNoSkip); this->addArrival(SMessage(startTime + i, "p1", 1.0), gathererWithSkip); @@ -2256,8 +1829,8 @@ BOOST_FIXTURE_TEST_CASE(testIgnoreSamplingGivenDetectionRules, CTestFixture) { endTime += bucketLength; BOOST_REQUIRE_EQUAL(modelWithSkip->checksum(), modelNoSkip->checksum()); - // Add data to both models - // the model with the detection rule will apply a small weighting to the sample + // Add data to both models the model with the detection rule will apply a small + // weighting to the sample. for (std::size_t i = 0; i < bucketLength; i++) { this->addArrival(SMessage(startTime + i, "p1", 110.0), gathererNoSkip); this->addArrival(SMessage(startTime + i, "p1", 110.0), gathererWithSkip); @@ -2272,7 +1845,7 @@ BOOST_FIXTURE_TEST_CASE(testIgnoreSamplingGivenDetectionRules, CTestFixture) { startTime = endTime; endTime += bucketLength; - // Add more data to both models, for which the detection rule will not apply + // Add more data to both models, for which the detection rule will not apply. for (std::size_t i = 0; i < bucketLength; i++) { this->addArrival(SMessage(startTime + i, "p1", 2.0), gathererNoSkip); this->addArrival(SMessage(startTime + i, "p1", 2.0), gathererWithSkip); @@ -2284,7 +1857,8 @@ BOOST_FIXTURE_TEST_CASE(testIgnoreSamplingGivenDetectionRules, CTestFixture) { // added to the model with the detector rule. BOOST_TEST_REQUIRE(modelWithSkip->checksum() != modelNoSkip->checksum()); - // The underlying models should also differ due to the different weighting applied to the samples. + // The underlying models should also differ due to the different weighting applied + // to the samples. CAnomalyDetectorModel::TModelDetailsViewUPtr modelWithSkipView = modelWithSkip->details(); CAnomalyDetectorModel::TModelDetailsViewUPtr modelNoSkipView = modelNoSkip->details(); @@ -2299,7 +1873,7 @@ BOOST_FIXTURE_TEST_CASE(testIgnoreSamplingGivenDetectionRules, CTestFixture) { std::uint64_t noSkipChecksum = mathsModelNoSkip->checksum(); BOOST_TEST_REQUIRE(withSkipChecksum != noSkipChecksum); - // Check the last value times of the underlying models are the same + // Check the last value times of the underlying models are the same. const auto* timeSeriesModel = dynamic_cast( modelNoSkipView->model(model_t::E_IndividualMeanByPerson, 0)); @@ -2309,7 +1883,7 @@ BOOST_FIXTURE_TEST_CASE(testIgnoreSamplingGivenDetectionRules, CTestFixture) { BOOST_TEST_REQUIRE(trendModel != nullptr); core_t::TTime modelNoSkipTime = trendModel->lastValueTime(); - // The last times of model with a skip should be the same + // The last times of model with a skip should be the same. timeSeriesModel = dynamic_cast( modelWithSkipView->model(model_t::E_IndividualMeanByPerson, 0)); BOOST_TEST_REQUIRE(timeSeriesModel); @@ -2343,25 +1917,23 @@ class MyFakeModel : public ml::maths::common::CModelStub { }; BOOST_FIXTURE_TEST_CASE(testLatLongNotMalformed, CTestFixture) { - // This test ensures that the latitudes and longitudes generated by the model are within the - // expected range. + // This test ensures that the latitudes and longitudes generated by the model + // are within the expected range. - // initialize the model + // Initialize the model. core_t::TTime startTime{45}; core_t::TTime bucketLength{5}; model_t::TFeatureVec features{model_t::E_IndividualMeanLatLongByPerson}; SModelParams params(bucketLength); params.s_InitialDecayRateMultiplier = 1.0; - params.s_MaximumUpdatesPerBucket = 0.0; - size_t sampleCount{1}; - this->makeModel(params, features, startTime, sampleCount); + this->makeModel(params, features, startTime); ml::model::CAnomalyDetectorModel::TFeatureMultivariatePriorSPtrPrVec newFeatureCorelateModelPriors; ml::model::CAnomalyDetectorModel::TFeatureCorrelationsPtrPrVec featureCorrelatesModels; ml::model::CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators; - // generate random numbers for latitudes and longitudes in the range [-360, 360] + // Generate random numbers for latitudes and longitudes in the range [-360, 360]. test::CRandomNumbers rng; int numberOfTrials{100}; std::vector latitudes; @@ -2398,3 +1970,4 @@ BOOST_FIXTURE_TEST_CASE(testLatLongNotMalformed, CTestFixture) { } BOOST_AUTO_TEST_SUITE_END() +} diff --git a/lib/model/unittest/CMetricPopulationDataGathererTest.cc b/lib/model/unittest/CMetricPopulationDataGathererTest.cc index dea04a2880..0f135af24f 100644 --- a/lib/model/unittest/CMetricPopulationDataGathererTest.cc +++ b/lib/model/unittest/CMetricPopulationDataGathererTest.cc @@ -18,11 +18,11 @@ #include #include +#include #include #include #include #include -#include #include #include @@ -35,15 +35,16 @@ #include #include +namespace { + BOOST_AUTO_TEST_SUITE(CMetricPopulationDataGathererTest) using namespace ml; using namespace model; -namespace { - using TDoubleVec = std::vector; using TStrVec = std::vector; +using TStrVecVec = std::vector; using TStrStrPr = std::pair; using TStrStrPrDoubleMap = std::map; using TOptionalStr = std::optional; @@ -71,12 +72,6 @@ struct SMessage { }; using TMessageVec = std::vector; -TStrVec vec(const std::string& s1, const std::string& s2) { - TStrVec result(1, s1); - result.push_back(s2); - return result; -} - void generateTestMessages(const core_t::TTime& startTime, TMessageVec& result) { const std::size_t numberMessages = 100000; const std::size_t numberPeople = 40; @@ -140,8 +135,6 @@ bool isSpace(const char x) { const CSearchKey searchKey; const std::string EMPTY_STRING; -} // unnamed:: - class CTestFixture { protected: CResourceMonitor m_ResourceMonitor; @@ -179,7 +172,7 @@ BOOST_FIXTURE_TEST_CASE(testMean, CTestFixture) { << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; - gatherer.featureData(bucketStart, bucketLength, tmp); + gatherer.featureData(bucketStart, tmp); BOOST_REQUIRE_EQUAL(features.size(), tmp.size()); BOOST_REQUIRE_EQUAL(features[0], tmp[0].first); @@ -218,9 +211,8 @@ BOOST_FIXTURE_TEST_CASE(testMin, CTestFixture) { // Test that we correctly sample the bucket minimums. using TMinAccumulator = - maths::common::CBasicStatistics::COrderStatisticsStack; + maths::common::CBasicStatistics::COrderStatisticsStack; using TStrStrPrMinAccumulatorMap = std::map; - using TStrStrPrMinAccumulatorMapCItr = TStrStrPrMinAccumulatorMap::const_iterator; const core_t::TTime startTime = 1373932800; const core_t::TTime bucketLength = 3600; @@ -246,7 +238,7 @@ BOOST_FIXTURE_TEST_CASE(testMin, CTestFixture) { << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; - gatherer.featureData(bucketStart, bucketLength, tmp); + gatherer.featureData(bucketStart, tmp); BOOST_REQUIRE_EQUAL(features.size(), tmp.size()); BOOST_REQUIRE_EQUAL(features[0], tmp[0].first); @@ -262,8 +254,7 @@ BOOST_FIXTURE_TEST_CASE(testMin, CTestFixture) { } TStrStrPrDoubleMap expectedMins; - for (TStrStrPrMinAccumulatorMapCItr itr = accumulators.begin(); - itr != accumulators.end(); ++itr) { + for (auto itr = accumulators.begin(); itr != accumulators.end(); ++itr) { expectedMins[itr->first] = itr->second[0]; } @@ -312,7 +303,7 @@ BOOST_FIXTURE_TEST_CASE(testMax, CTestFixture) { << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; - gatherer.featureData(bucketStart, bucketLength, tmp); + gatherer.featureData(bucketStart, tmp); BOOST_REQUIRE_EQUAL(features.size(), tmp.size()); BOOST_REQUIRE_EQUAL(features[0], tmp[0].first); @@ -373,7 +364,7 @@ BOOST_FIXTURE_TEST_CASE(testSum, CTestFixture) { << bucketStart + bucketLength << ")"); TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; - gatherer.featureData(bucketStart, bucketLength, tmp); + gatherer.featureData(bucketStart, tmp); BOOST_REQUIRE_EQUAL(features.size(), tmp.size()); BOOST_REQUIRE_EQUAL(features[0], tmp[0].first); @@ -401,82 +392,15 @@ BOOST_FIXTURE_TEST_CASE(testSum, CTestFixture) { } } -BOOST_FIXTURE_TEST_CASE(testSampleCount, CTestFixture) { - // Test that we set sensible sample counts for each attribute. - - const core_t::TTime startTime = 1373932800; - const core_t::TTime bucketLength = 3600; - SModelParams params(bucketLength); - const std::string attribute("c1"); - const std::string person("p1"); - const std::size_t numberBuckets = 40; - const std::size_t personMessageCount[] = { - 11, 11, 11, 11, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, - 110, 110, 110, 97, 97, 97, 97, 97, 97, 97, 97, 97}; - const double expectedSampleCounts[] = { - 0.0, 0.0, 0.0, 11.0, 11.0, 11.0, 11.0, 11.0, - 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, - 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, - 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.3597, 11.7164, - 12.0701, 12.421, 12.7689, 13.114, 13.4562, 13.7957, 14.1325, 14.4665}; - const double tolerance = 5e-4; - - TMessageVec messages; - for (std::size_t bucket = 0; bucket < numberBuckets; ++bucket) { - core_t::TTime bucketStart = startTime + static_cast(bucket) * bucketLength; - - std::size_t n = personMessageCount[bucket]; - for (std::size_t i = 0; i < n; ++i) { - core_t::TTime time = bucketStart + bucketLength * - static_cast(i) / - static_cast(n); - messages.push_back(SMessage(time, person, attribute, 1.0)); - } - } - - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationMeanByPersonAndAttribute}); - CModelFactory::SGathererInitializationData initData(startTime); - CModelFactory::TDataGathererPtr gathererPtr(factory.makeDataGatherer(initData)); - CDataGatherer& gatherer(*gathererPtr); - - std::size_t bucket = 0; - for (std::size_t i = 0; i < messages.size(); ++i) { - core_t::TTime bucketStart = startTime + static_cast(bucket) * bucketLength; - - if (messages[i].s_Time >= bucketStart + bucketLength) { - gatherer.sampleNow(bucketStart); - LOG_DEBUG(<< gatherer.effectiveSampleCount(0)); - BOOST_REQUIRE_CLOSE_ABSOLUTE(expectedSampleCounts[bucket], - gatherer.effectiveSampleCount(0), tolerance); - ++bucket; - } - - addArrival(messages[i], gatherer, m_ResourceMonitor); - } - - core_t::TTime bucketStart = startTime + static_cast(bucket) * bucketLength; - gatherer.sampleNow(bucketStart); - BOOST_REQUIRE_CLOSE_ABSOLUTE(expectedSampleCounts[bucket], - gatherer.effectiveSampleCount(0), tolerance); -} - BOOST_FIXTURE_TEST_CASE(testFeatureData, CTestFixture) { // Test we correctly sample the mean, minimum and maximum statistics. using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean::TAccumulator; + using TMinAccumulator = maths::common::CBasicStatistics::SMin::TAccumulator; + using TMaxAccumulator = maths::common::CBasicStatistics::SMax::TAccumulator; using TStrStrPrMeanAccumulatorMap = std::map; - using TStrStrPrMeanAccumulatorMapCItr = TStrStrPrMeanAccumulatorMap::const_iterator; - using TMinAccumulator = - maths::common::CBasicStatistics::COrderStatisticsStack; using TStrStrPrMinAccumulatorMap = std::map; - using TStrStrPrMinAccumulatorMapCItr = TStrStrPrMinAccumulatorMap::const_iterator; - using TMaxAccumulator = - maths::common::CBasicStatistics::COrderStatisticsStack>; using TStrStrPrMaxAccumulatorMap = std::map; - using TStrStrPrMaxAccumulatorMapCItr = TStrStrPrMaxAccumulatorMap::const_iterator; using TStrStrPrDoubleVecMap = std::map; const core_t::TTime startTime = 1373932800; @@ -499,14 +423,9 @@ BOOST_FIXTURE_TEST_CASE(testFeatureData, CTestFixture) { CDataGatherer& gatherer(*gathererPtr); TStrStrPrMeanAccumulatorMap bucketMeanAccumulators; - TStrStrPrMeanAccumulatorMap sampleMeanAccumulators; - TStrStrPrDoubleVecMap expectedMeanSamples; TStrStrPrMinAccumulatorMap bucketMinAccumulators; - TStrStrPrMinAccumulatorMap sampleMinAccumulators; - TStrStrPrDoubleVecMap expectedMinSamples; TStrStrPrMaxAccumulatorMap bucketMaxAccumulators; - TStrStrPrMaxAccumulatorMap sampleMaxAccumulators; - TStrStrPrDoubleVecMap expectedMaxSamples; + core_t::TTime bucketStart = startTime; for (std::size_t i = 0; i < messages.size(); ++i) { if (messages[i].s_Time >= bucketStart + bucketLength) { @@ -515,138 +434,71 @@ BOOST_FIXTURE_TEST_CASE(testFeatureData, CTestFixture) { gatherer.sampleNow(bucketStart); - TFeatureSizeSizePrFeatureDataPrVecPrVec tmp; - gatherer.featureData(bucketStart, bucketLength, tmp); - BOOST_REQUIRE_EQUAL(static_cast(3), tmp.size()); - + TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; + gatherer.featureData(bucketStart, featureData); + BOOST_REQUIRE_EQUAL(static_cast(3), featureData.size()); BOOST_REQUIRE_EQUAL(model_t::E_PopulationMeanByPersonAndAttribute, - tmp[0].first); - TStrStrPrDoubleMap means; - TStrStrPrDoubleVecMap meanSamples; - for (std::size_t j = 0; j < tmp[0].second.size(); ++j) { - const TSizeSizePrFeatureDataPr& data = tmp[0].second[j]; - TStrStrPr key(gatherer.personName(data.first.first), - gatherer.attributeName(data.first.second)); - if (data.second.s_BucketValue) { - means[key] = data.second.s_BucketValue->value()[0]; - } - TDoubleVec& samples = meanSamples[key]; - for (std::size_t k = 0; - k < core::unwrap_ref(data.second.s_Samples).size(); ++k) { - samples.push_back( - core::unwrap_ref(data.second.s_Samples)[k].value()[0]); - } - } - + featureData[0].first); BOOST_REQUIRE_EQUAL(model_t::E_PopulationMinByPersonAndAttribute, - tmp[1].first); - TStrStrPrDoubleMap mins; - TStrStrPrDoubleVecMap minSamples; - for (std::size_t j = 0; j < tmp[1].second.size(); ++j) { - const TSizeSizePrFeatureDataPr& data = tmp[1].second[j]; - TStrStrPr key(gatherer.personName(data.first.first), - gatherer.attributeName(data.first.second)); - if (data.second.s_BucketValue) { - mins[key] = data.second.s_BucketValue->value()[0]; - } - TDoubleVec& samples = minSamples[key]; - for (std::size_t k = 0; - k < core::unwrap_ref(data.second.s_Samples).size(); ++k) { - samples.push_back( - core::unwrap_ref(data.second.s_Samples)[k].value()[0]); - } - } - + featureData[1].first); BOOST_REQUIRE_EQUAL(model_t::E_PopulationMaxByPersonAndAttribute, - tmp[2].first); - TStrStrPrDoubleMap maxs; - TStrStrPrDoubleVecMap maxSamples; - for (std::size_t j = 0; j < tmp[2].second.size(); ++j) { - const TSizeSizePrFeatureDataPr& data = tmp[2].second[j]; - TStrStrPr key(gatherer.personName(data.first.first), - gatherer.attributeName(data.first.second)); - if (data.second.s_BucketValue) { - maxs[key] = data.second.s_BucketValue->value()[0]; + featureData[2].first); + + for (std::size_t j = 0; j < 3; ++j) { + TStrStrPrDoubleMap bucketValues; + TStrStrPrDoubleVecMap sampleValues; + for (const auto& data : featureData[j].second) { + TStrStrPr key{gatherer.personName(data.first.first), + gatherer.attributeName(data.first.second)}; + if (data.second.s_BucketValue) { + bucketValues[key] = data.second.s_BucketValue->value()[0]; + } + for (const auto& sample : data.second.s_Samples) { + sampleValues[key].push_back(sample.value()[0]); + } } - TDoubleVec& samples = maxSamples[key]; - for (std::size_t k = 0; - k < core::unwrap_ref(data.second.s_Samples).size(); ++k) { - samples.push_back( - core::unwrap_ref(data.second.s_Samples)[k].value()[0]); + TStrStrPrDoubleMap expectedBucketValues; + TStrStrPrDoubleVecMap expectedSampleValues; + switch (j) { + case 0: + for (const auto & [ key, value ] : bucketMeanAccumulators) { + expectedBucketValues[key] = + maths::common::CBasicStatistics::mean(value); + expectedSampleValues[key].push_back( + maths::common::CBasicStatistics::mean(value)); + } + break; + case 1: + for (const auto & [ key, value ] : bucketMinAccumulators) { + expectedBucketValues[key] = value[0]; + expectedSampleValues[key].push_back(value[0]); + } + break; + case 2: + for (const auto & [ key, value ] : bucketMaxAccumulators) { + expectedBucketValues[key] = value[0]; + expectedSampleValues[key].push_back(value[0]); + } + break; } + BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedBucketValues), + core::CContainerPrinter::print(bucketValues)); + BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedSampleValues), + core::CContainerPrinter::print(sampleValues)); } - TStrStrPrDoubleMap expectedMeans; - for (TStrStrPrMeanAccumulatorMapCItr itr = bucketMeanAccumulators.begin(); - itr != bucketMeanAccumulators.end(); ++itr) { - expectedMeans[itr->first] = - maths::common::CBasicStatistics::mean(itr->second); - } - - TStrStrPrDoubleMap expectedMins; - for (TStrStrPrMinAccumulatorMapCItr itr = bucketMinAccumulators.begin(); - itr != bucketMinAccumulators.end(); ++itr) { - expectedMins[itr->first] = itr->second[0]; - } - - TStrStrPrDoubleMap expectedMaxs; - for (TStrStrPrMaxAccumulatorMapCItr itr = bucketMaxAccumulators.begin(); - itr != bucketMaxAccumulators.end(); ++itr) { - expectedMaxs[itr->first] = itr->second[0]; - } - - BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedMeans), - core::CContainerPrinter::print(means)); - BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedMins), - core::CContainerPrinter::print(mins)); - BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedMaxs), - core::CContainerPrinter::print(maxs)); - - BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedMeanSamples), - core::CContainerPrinter::print(meanSamples)); - BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedMinSamples), - core::CContainerPrinter::print(minSamples)); - BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedMaxSamples), - core::CContainerPrinter::print(maxSamples)); - bucketStart += bucketLength; bucketMeanAccumulators.clear(); - expectedMeanSamples.clear(); bucketMinAccumulators.clear(); - expectedMinSamples.clear(); bucketMaxAccumulators.clear(); - expectedMaxSamples.clear(); } addArrival(messages[i], gatherer, m_ResourceMonitor); - TStrStrPr key(messages[i].s_Person, messages[i].s_Attribute); + TStrStrPr key(messages[i].s_Person, messages[i].s_Attribute); bucketMeanAccumulators[key].add(messages[i].s_Value); bucketMinAccumulators[key].add(messages[i].s_Value); bucketMaxAccumulators[key].add(messages[i].s_Value); - expectedMeanSamples.insert(TStrStrPrDoubleVecMap::value_type(key, TDoubleVec())); - expectedMinSamples.insert(TStrStrPrDoubleVecMap::value_type(key, TDoubleVec())); - expectedMaxSamples.insert(TStrStrPrDoubleVecMap::value_type(key, TDoubleVec())); - - std::size_t cid; - BOOST_TEST_REQUIRE(gatherer.attributeId(messages[i].s_Attribute, cid)); - - double sampleCount = gatherer.effectiveSampleCount(cid); - if (sampleCount > 0.0) { - sampleMeanAccumulators[key].add(messages[i].s_Value); - sampleMinAccumulators[key].add(messages[i].s_Value); - sampleMaxAccumulators[key].add(messages[i].s_Value); - if (maths::common::CBasicStatistics::count(sampleMeanAccumulators[key]) == - std::floor(sampleCount + 0.5)) { - expectedMeanSamples[key].push_back( - maths::common::CBasicStatistics::mean(sampleMeanAccumulators[key])); - expectedMinSamples[key].push_back(sampleMinAccumulators[key][0]); - expectedMaxSamples[key].push_back(sampleMaxAccumulators[key][0]); - sampleMeanAccumulators[key] = TMeanAccumulator(); - sampleMinAccumulators[key] = TMinAccumulator(); - sampleMaxAccumulators[key] = TMaxAccumulator(); - } - } } } @@ -672,7 +524,7 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { features.push_back(model_t::E_PopulationSumByBucketPersonAndAttribute); CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, searchKey, features, startTime, 0); + EMPTY_STRING, {}, searchKey, features, startTime); TMessageVec messages; generateTestMessages(startTime, messages); @@ -728,7 +580,7 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { TStrFeatureDataPrVec expectedFeatureData; { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart, bucketLength, featureData); + gatherer.featureData(bucketStart, featureData); for (std::size_t i = 0; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0; j < data.size(); ++j) { @@ -770,14 +622,14 @@ BOOST_FIXTURE_TEST_CASE(testRemovePeople, CTestFixture) { TStrFeatureDataPrVec actualFeatureData; { TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart, bucketLength, featureData); + gatherer.featureData(bucketStart, featureData); for (std::size_t i = 0; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0; j < data.size(); ++j) { std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); - actualFeatureData.push_back(TStrFeatureDataPr(key, data[j].second)); + actualFeatureData.emplace_back(key, data[j].second); LOG_TRACE(<< " " << key); LOG_TRACE(<< " " << data[j].second.print()); } @@ -807,7 +659,7 @@ BOOST_FIXTURE_TEST_CASE(testRemoveAttributes, CTestFixture) { features.push_back(model_t::E_PopulationSumByBucketPersonAndAttribute); CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, {}, searchKey, features, startTime, 0); + EMPTY_STRING, {}, searchKey, features, startTime); TMessageVec messages; generateTestMessages(startTime, messages); @@ -825,21 +677,19 @@ BOOST_FIXTURE_TEST_CASE(testRemoveAttributes, CTestFixture) { // Remove attributes 0, 2, 3, 9 (i.e. last). TSizeVec attributesToRemove; - attributesToRemove.push_back(0u); - attributesToRemove.push_back(2u); - attributesToRemove.push_back(3u); - attributesToRemove.push_back(9u); + attributesToRemove.push_back(0); + attributesToRemove.push_back(2); + attributesToRemove.push_back(3); + attributesToRemove.push_back(9); std::size_t numberAttributes = gatherer.numberActiveAttributes(); BOOST_REQUIRE_EQUAL(numberAttributes, gatherer.numberByFieldValues()); TStrVec expectedAttributeNames; TSizeVec expectedAttributeIds; - TDoubleVec expectedSampleCounts; for (std::size_t i = 0; i < numberAttributes; ++i) { if (!std::binary_search(attributesToRemove.begin(), attributesToRemove.end(), i)) { expectedAttributeNames.push_back(gatherer.attributeName(i)); expectedAttributeIds.push_back(i); - expectedSampleCounts.push_back(gatherer.effectiveSampleCount(i)); } else { LOG_DEBUG(<< "Removing " << gatherer.attributeName(i)); } @@ -850,7 +700,7 @@ BOOST_FIXTURE_TEST_CASE(testRemoveAttributes, CTestFixture) { LOG_TRACE(<< "Expected"); TStrFeatureDataPrVec expected; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart, bucketLength, featureData); + gatherer.featureData(bucketStart, featureData); for (std::size_t i = 0; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0; j < data.size(); ++j) { @@ -859,7 +709,7 @@ BOOST_FIXTURE_TEST_CASE(testRemoveAttributes, CTestFixture) { std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); - expected.push_back(TStrFeatureDataPr(key, data[j].second)); + expected.emplace_back(key, data[j].second); LOG_TRACE(<< " " << key); LOG_TRACE(<< " " << data[j].second.print()); } @@ -879,26 +729,20 @@ BOOST_FIXTURE_TEST_CASE(testRemoveAttributes, CTestFixture) { } numberAttributes = gatherer.numberActiveAttributes(); - TDoubleVec actualSampleCounts; - for (std::size_t i = 0; i < numberAttributes; ++i) { - actualSampleCounts.push_back(gatherer.effectiveSampleCount(expectedAttributeIds[i])); - } - BOOST_REQUIRE_EQUAL(core::CContainerPrinter::print(expectedSampleCounts), - core::CContainerPrinter::print(actualSampleCounts)); std::string actualFeatureData; { LOG_TRACE(<< "Actual"); TStrFeatureDataPrVec actual; TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart, bucketLength, featureData); + gatherer.featureData(bucketStart, featureData); for (std::size_t i = 0; i < featureData.size(); ++i) { const TSizeSizePrFeatureDataPrVec& data = featureData[i].second; for (std::size_t j = 0; j < data.size(); ++j) { std::string key = model_t::print(featureData[i].first) + " " + gatherer.personName(data[j].first.first) + " " + gatherer.attributeName(data[j].first.second); - actual.push_back(TStrFeatureDataPr(key, data[j].second)); + actual.emplace_back(key, data[j].second); LOG_TRACE(<< " " << key); LOG_TRACE(<< " " << data[j].second.print()); } @@ -919,32 +763,31 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) { SModelParams params(bucketLength); params.s_DecayRate = 0.001; - std::string influencerNames_[] = {"i1", "i2"}; - std::string influencerValues[][3] = {{"i11", "i12", "i13"}, {"i21", "i22", "i23"}}; - - SMessage data[] = { - SMessage(1, "p1", "", 1.0, vec(influencerValues[0][0], influencerValues[1][0])), // Bucket 1 - SMessage(150, "p1", "", 5.0, vec(influencerValues[0][1], influencerValues[1][1])), - SMessage(150, "p1", "", 3.0, vec(influencerValues[0][2], influencerValues[1][2])), - SMessage(550, "p2", "", 2.0, vec(influencerValues[0][0], influencerValues[1][0])), - SMessage(551, "p2", "", 2.1, vec(influencerValues[0][1], influencerValues[1][1])), - SMessage(552, "p2", "", 4.0, vec(influencerValues[0][2], influencerValues[1][2])), - SMessage(554, "p2", "", 2.2, vec(influencerValues[0][2], influencerValues[1][2])), - SMessage(600, "p1", "", 3.0, vec(influencerValues[0][1], influencerValues[1][0])), // Bucket 2 - SMessage(660, "p2", "", 3.0, vec(influencerValues[0][0], influencerValues[1][2])), - SMessage(690, "p1", "", 7.3, vec(influencerValues[0][1], "")), - SMessage(700, "p2", "", 4.0, vec(influencerValues[0][0], influencerValues[1][2])), - SMessage(800, "p1", "", 2.2, vec(influencerValues[0][2], influencerValues[1][0])), - SMessage(900, "p2", "", 2.5, vec(influencerValues[0][1], influencerValues[1][0])), - SMessage(1000, "p1", "", 5.0, vec(influencerValues[0][1], influencerValues[1][0])), - SMessage(1200, "p2", "", 6.4, vec("", influencerValues[1][2])), // Bucket 3 - SMessage(1210, "p2", "", 6.0, vec("", influencerValues[1][2])), - SMessage(1240, "p2", "", 7.0, vec("", influencerValues[1][1])), - SMessage(1600, "p2", "", 11.0, vec("", influencerValues[1][0])), - SMessage(1800, "p1", "", 11.0, vec("", "")) // Sentinel + TStrVecVec influencerValues{{"i11", "i12", "i13"}, {"i21", "i22", "i23"}}; + + TMessageVec data{ + SMessage(1, "p1", "", 1.0, {influencerValues[0][0], influencerValues[1][0]}), // Bucket 1 + SMessage(150, "p1", "", 5.0, {influencerValues[0][1], influencerValues[1][1]}), + SMessage(150, "p1", "", 3.0, {influencerValues[0][2], influencerValues[1][2]}), + SMessage(550, "p2", "", 2.0, {influencerValues[0][0], influencerValues[1][0]}), + SMessage(551, "p2", "", 2.1, {influencerValues[0][1], influencerValues[1][1]}), + SMessage(552, "p2", "", 4.0, {influencerValues[0][2], influencerValues[1][2]}), + SMessage(554, "p2", "", 2.2, {influencerValues[0][2], influencerValues[1][2]}), + SMessage(600, "p1", "", 3.0, {influencerValues[0][1], influencerValues[1][0]}), // Bucket 2 + SMessage(660, "p2", "", 3.0, {influencerValues[0][0], influencerValues[1][2]}), + SMessage(690, "p1", "", 7.3, {influencerValues[0][1], ""}), + SMessage(700, "p2", "", 4.0, {influencerValues[0][0], influencerValues[1][2]}), + SMessage(800, "p1", "", 2.2, {influencerValues[0][2], influencerValues[1][0]}), + SMessage(900, "p2", "", 2.5, {influencerValues[0][1], influencerValues[1][0]}), + SMessage(1000, "p1", "", 5.0, {influencerValues[0][1], influencerValues[1][0]}), + SMessage(1200, "p2", "", 6.4, {"", influencerValues[1][2]}), // Bucket 3 + SMessage(1210, "p2", "", 6.0, {"", influencerValues[1][2]}), + SMessage(1240, "p2", "", 7.0, {"", influencerValues[1][1]}), + SMessage(1600, "p2", "", 11.0, {"", influencerValues[1][0]}), + SMessage(1800, "p1", "", 11.0, {"", ""}) // Sentinel }; - std::string expectedStatistics[] = { + TStrVec expectedStatistics{ "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (3.1, 2)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (3.1, 2))]", "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", @@ -952,57 +795,44 @@ BOOST_FIXTURE_TEST_CASE(testInfluenceStatistics, CTestFixture) { "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (4, 1)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (4, 1))]", "[(i11, (1, 1)), (i12, (5, 1)), (i13, (3, 1)), (i21, (1, 1)), (i22, (5, 1)), (i23, (3, 1))]", - "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (6.2, 1)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (6.2, 1))]", + "[(i11, (2, 1)), (i12, (2.1, 1)), (i13, (6.2, 2)), (i21, (2, 1)), (i22, (2.1, 1)), (i23, (6.2, 2))]", "[(i12, (5.1, 3)), (i13, (2.2, 1)), (i21, (3.4, 3))]", "[(i11, (3.5, 2)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (3.5, 2))]", "[(i12, (3, 1)), (i13, (2.2, 1)), (i21, (2.2, 1))]", "[(i11, (3, 1)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (3, 1))]", "[(i12, (7.3, 1)), (i13, (2.2, 1)), (i21, (5, 1))]", "[(i11, (4, 1)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (4, 1))]", - "[(i12, (15.3, 1)), (i13, (2.2, 1)), (i21, (10.2, 1))]", - "[(i11, (7, 1)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (7, 1))]", + "[(i12, (15.3, 3)), (i13, (2.2, 1)), (i21, (10.2, 3))]", + "[(i11, (7, 2)), (i12, (2.5, 1)), (i21, (2.5, 1)), (i23, (7, 2))]", "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.2, 2))]", "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6, 1))]", "[(i21, (11, 1)), (i22, (7, 1)), (i23, (6.4, 1))]", - "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 1))]"}; - const std::string* expected = expectedStatistics; + "[(i21, (11, 1)), (i22, (7, 1)), (i23, (12.4, 2))]"}; - model_t::TFeatureVec features; - features.push_back(model_t::E_PopulationMeanByPersonAndAttribute); - features.push_back(model_t::E_PopulationMinByPersonAndAttribute); - features.push_back(model_t::E_PopulationMaxByPersonAndAttribute); - features.push_back(model_t::E_PopulationHighSumByBucketPersonAndAttribute); - TStrVec influencerNames(std::begin(influencerNames_), std::end(influencerNames_)); - CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, + model_t::TFeatureVec features{model_t::E_PopulationMeanByPersonAndAttribute, + model_t::E_PopulationMinByPersonAndAttribute, + model_t::E_PopulationMaxByPersonAndAttribute, + model_t::E_PopulationHighSumByBucketPersonAndAttribute}; + TStrVec influencerNames{"i1", "i2"}; + CDataGatherer gatherer(model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - influencerNames, searchKey, features, startTime, 2); + EMPTY_STRING, influencerNames, searchKey, features, startTime); core_t::TTime bucketStart = startTime; - for (std::size_t i = 0; i < std::size(data); ++i) { + auto expected = expectedStatistics.begin(); + for (std::size_t i = 0; i < data.size(); ++i) { if (data[i].s_Time >= bucketStart + bucketLength) { LOG_DEBUG(<< "*** processing bucket ***"); - TFeatureSizeSizePrFeatureDataPrVecPrVec featureData; - gatherer.featureData(bucketStart, bucketLength, featureData); - for (std::size_t j = 0; j < featureData.size(); ++j) { - model_t::EFeature feature = featureData[j].first; + TFeatureSizeSizePrFeatureDataPrVecPrVec featuresData; + gatherer.featureData(bucketStart, featuresData); + for (const auto & [ feature, featureData ] : featuresData) { LOG_DEBUG(<< "feature = " << model_t::print(feature)); - - const TSizeSizePrFeatureDataPrVec& data_ = featureData[j].second; - for (std::size_t k = 0; k < data_.size(); ++k) { + for (const auto& data_ : featureData) { TStrDoubleDoublePrPrVec statistics; - for (std::size_t m = 0; - m < data_[k].second.s_InfluenceValues.size(); ++m) { - for (std::size_t n = 0; - n < data_[k].second.s_InfluenceValues[m].size(); ++n) { - statistics.push_back(TStrDoubleDoublePrPr( - data_[k].second.s_InfluenceValues[m][n].first, - TDoubleDoublePr( - data_[k] - .second.s_InfluenceValues[m][n] - .second.first[0], - data_[k] - .second.s_InfluenceValues[m][n] - .second.second))); + for (const auto& influenceValue : data_.second.s_InfluenceValues) { + for (const auto & [ influence, value ] : influenceValue) { + statistics.emplace_back( + influence, std::make_pair(value.first[0], value.second)); } } std::sort(statistics.begin(), statistics.end(), @@ -1035,7 +865,7 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { CDataGatherer origDataGatherer(model_t::E_PopulationMetric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, {}, - searchKey, features, startTime, 0); + searchKey, features, startTime); TMessageVec messages; generateTestMessages(startTime, messages); @@ -1091,55 +921,5 @@ BOOST_FIXTURE_TEST_CASE(testPersistence, CTestFixture) { BOOST_REQUIRE_EQUAL(origXml, newXml); } -BOOST_FIXTURE_TEST_CASE(testReleaseMemory, CTestFixture) { - const core_t::TTime startTime = 1373932800; - const core_t::TTime bucketLength = 3600; - - SModelParams params(bucketLength); - params.s_LatencyBuckets = 3; - auto interimBucketCorrector = std::make_shared(bucketLength); - CMetricPopulationModelFactory factory(params, interimBucketCorrector); - factory.features({model_t::E_PopulationMeanByPersonAndAttribute}); - CModelFactory::SGathererInitializationData initData(startTime); - CModelFactory::TDataGathererPtr gathererPtr(factory.makeDataGatherer(initData)); - CDataGatherer& gatherer(*gathererPtr); - BOOST_TEST_REQUIRE(gatherer.isPopulation()); - - core_t::TTime bucketStart = startTime; - // Add a few buckets with count of 10 so that sample count gets estimated - for (std::size_t i = 0; i < 10; ++i) { - // Add 10 events - for (std::size_t j = 0; j < 10; ++j) { - addArrival(SMessage(bucketStart, "p1", "", 10.0), gatherer, m_ResourceMonitor); - addArrival(SMessage(bucketStart, "p2", "", 10.0), gatherer, m_ResourceMonitor); - } - gatherer.sampleNow(bucketStart - params.s_LatencyBuckets * bucketLength); - bucketStart += bucketLength; - } - - // Add a bucket with not enough data to sample for p2 - for (std::size_t j = 0; j < 10; ++j) { - addArrival(SMessage(bucketStart, "p1", "", 10.0), gatherer, m_ResourceMonitor); - } - addArrival(SMessage(bucketStart, "p2", "", 10.0), gatherer, m_ResourceMonitor); - gatherer.sampleNow(bucketStart - params.s_LatencyBuckets * bucketLength); - bucketStart += bucketLength; - - std::size_t mem = gatherer.memoryUsage(); - - // Add 48 + 1 buckets ( > 2 days) to force incomplete samples out of consideration for p2 - for (std::size_t i = 0; i < 49 + 1; ++i) { - for (std::size_t j = 0; j < 10; ++j) { - addArrival(SMessage(bucketStart, "p1", "", 10.0), gatherer, m_ResourceMonitor); - } - gatherer.sampleNow(bucketStart - params.s_LatencyBuckets * bucketLength); - bucketStart += bucketLength; - gatherer.releaseMemory(bucketStart - params.s_SamplingAgeCutoff); - if (i <= 40) { - BOOST_TEST_REQUIRE(gatherer.memoryUsage() >= mem - 1000); - } - } - BOOST_TEST_REQUIRE(gatherer.memoryUsage() < mem - 1000); -} - BOOST_AUTO_TEST_SUITE_END() +} diff --git a/lib/model/unittest/CMetricPopulationModelTest.cc b/lib/model/unittest/CMetricPopulationModelTest.cc index 30b77a9112..efd9c473b6 100644 --- a/lib/model/unittest/CMetricPopulationModelTest.cc +++ b/lib/model/unittest/CMetricPopulationModelTest.cc @@ -51,16 +51,15 @@ #include #include +namespace { + BOOST_AUTO_TEST_SUITE(CMetricPopulationModelTest) using namespace ml; using namespace model; -namespace { - -using TMinAccumulator = maths::common::CBasicStatistics::COrderStatisticsStack; -using TMaxAccumulator = - maths::common::CBasicStatistics::COrderStatisticsStack>; +using TMinAccumulator = maths::common::CBasicStatistics::SMin::TAccumulator; +using TMaxAccumulator = maths::common::CBasicStatistics::SMax::TAccumulator; struct SValuesAndWeights { maths::common::CModel::TTimeDouble2VecSizeTrVec s_Values; maths::common::CModelAddSamplesParams::TDouble2VecWeightsAryVec s_TrendWeights; @@ -69,7 +68,6 @@ struct SValuesAndWeights { const std::size_t numberAttributes{5}; const std::size_t numberPeople{10}; -} class CTestFixture : public CModelTestFixtureBase { public: @@ -318,233 +316,6 @@ BOOST_FIXTURE_TEST_CASE(testBasicAccessors, CTestFixture) { } } -BOOST_FIXTURE_TEST_CASE(testMinMaxAndMean, CTestFixture) { - // Check the correct data is read from the gatherer into the model on sample. - - using TSizeTimeUMap = boost::unordered_map; - using TSizeValueAndWeightsMap = std::map; - using TSizeSizeValueAndWeightsMapMap = std::map; - using TSizeSizePrDoubleVecMap = std::map; - using TSizeSizePrMeanAccumulatorUMap = std::map; - using TSizeSizePrMinAccumulatorMap = std::map; - using TSizeSizePrMaxAccumulatorMap = std::map; - using TMathsModelPtr = std::shared_ptr; - using TSizeMathsModelPtrMap = std::map; - - core_t::TTime startTime{1367280000}; - const core_t::TTime bucketLength{3600}; - - TMessageVec messages; - generateTestMessages(1, startTime, bucketLength, messages); - - SModelParams params(bucketLength); - params.s_InitialDecayRateMultiplier = 1.0; - params.s_MaximumUpdatesPerBucket = 0.0; - - model_t::TFeatureVec features{model_t::E_PopulationMeanByPersonAndAttribute, - model_t::E_PopulationMinByPersonAndAttribute, - model_t::E_PopulationMaxByPersonAndAttribute}; - - this->makeModel(params, features, startTime); - auto* model = dynamic_cast(m_Model.get()); - BOOST_TEST_REQUIRE(model); - - CModelFactory::TFeatureMathsModelPtrPrVec models{ - m_Factory->defaultFeatureModels(features, bucketLength, 1.0, false)}; - BOOST_REQUIRE_EQUAL(features.size(), models.size()); - BOOST_REQUIRE_EQUAL(features[0], models[0].first); - BOOST_REQUIRE_EQUAL(features[1], models[1].first); - BOOST_REQUIRE_EQUAL(features[2], models[2].first); - - TSizeTimeUMap attributeFirstValueTimes; - TSizeSizePrMeanAccumulatorUMap sampleTimes; - TSizeSizePrMeanAccumulatorUMap sampleMeans; - TSizeSizePrMinAccumulatorMap sampleMins; - TSizeSizePrMaxAccumulatorMap sampleMaxs; - TSizeSizePrDoubleVecMap expectedSampleTimes; - TSizeSizePrDoubleVecMap expectedSamples[3]; - TSizeMathsModelPtrMap expectedPopulationModels[3]; - bool isNonNegative = true; - - for (const auto& message : messages) { - if (message.s_Time >= startTime + bucketLength) { - model->sample(startTime, startTime + bucketLength, m_ResourceMonitor); - - TSizeSizeValueAndWeightsMapMap populationWeightedSamples; - for (std::size_t feature = 0; feature < features.size(); ++feature) { - for (const auto& samples_ : expectedSamples[feature]) { - std::size_t pid = samples_.first.first; - std::size_t cid = samples_.first.second; - attributeFirstValueTimes.emplace(cid, startTime); - auto& attribute = populationWeightedSamples[feature][cid]; - TMathsModelPtr& attributeModel = expectedPopulationModels[feature][cid]; - if (attributeModel == nullptr) { - attributeModel = m_Factory->defaultFeatureModel( - features[feature], bucketLength, 1.0, false); - } - for (std::size_t j = 0; j < samples_.second.size(); ++j) { - // We round to the nearest integer time (note this has to - // match the behaviour of CMetricPartialStatistic::time). - core_t::TTime time_ = static_cast( - expectedSampleTimes[{pid, cid}][j] + 0.5); - TDouble2Vec sample{samples_.second[j]}; - attribute.s_Values.emplace_back(time_, sample, pid); - attribute.s_TrendWeights.push_back( - maths_t::CUnitWeights::unit(1)); - attribute.s_ResidualWeights.push_back( - maths_t::CUnitWeights::unit(1)); - double countWeight{model->sampleRateWeight(pid, cid)}; - attributeModel->countWeights( - time_, sample, countWeight, countWeight, 1.0, 1.0, - attribute.s_TrendWeights.back(), - attribute.s_ResidualWeights.back()); - } - } - } - - for (auto& feature : populationWeightedSamples) { - for (auto& attribute : feature.second) { - maths::common::COrderings::simultaneousSort( - attribute.second.s_Values, attribute.second.s_TrendWeights, - attribute.second.s_ResidualWeights); - maths::common::CModelAddSamplesParams params_; - params_.isInteger(false) - .isNonNegative(isNonNegative) - .propagationInterval(1.0) - .trendWeights(attribute.second.s_TrendWeights) - .priorWeights(attribute.second.s_ResidualWeights) - .firstValueTime(attributeFirstValueTimes[attribute.first]); - expectedPopulationModels[feature.first][attribute.first]->addSamples( - params_, attribute.second.s_Values); - } - } - - for (std::size_t feature = 0; feature < features.size(); ++feature) { - for (std::size_t cid = 0; cid < numberAttributes; ++cid) { - if (expectedPopulationModels[feature].count(cid) > 0) { - BOOST_REQUIRE_EQUAL( - expectedPopulationModels[feature][cid]->checksum(), - model->details()->model(features[feature], cid)->checksum()); - } - } - } - - expectedSampleTimes.clear(); - expectedSamples[0].clear(); - expectedSamples[1].clear(); - expectedSamples[2].clear(); - startTime += bucketLength; - } - - CEventData eventData = this->addArrival(message, m_Gatherer); - std::size_t pid = *eventData.personId(); - std::size_t cid = *eventData.attributeId(); - isNonNegative &= (*message.s_Dbl1Vec)[0] < 0.0; - - double sampleCount = m_Gatherer->sampleCount(cid); - if (sampleCount > 0.0) { - TSizeSizePr key{pid, cid}; - sampleTimes[key].add(static_cast(message.s_Time)); - sampleMeans[key].add((*message.s_Dbl1Vec)[0]); - sampleMins[key].add((*message.s_Dbl1Vec)[0]); - sampleMaxs[key].add((*message.s_Dbl1Vec)[0]); - if (maths::common::CBasicStatistics::count(sampleTimes[key]) == sampleCount) { - expectedSampleTimes[key].push_back( - maths::common::CBasicStatistics::mean(sampleTimes[key])); - expectedSamples[0][key].push_back( - maths::common::CBasicStatistics::mean(sampleMeans[key])); - expectedSamples[1][key].push_back(sampleMins[key][0]); - expectedSamples[2][key].push_back(sampleMaxs[key][0]); - sampleTimes[key] = TMeanAccumulator(); - sampleMeans[key] = TMeanAccumulator(); - sampleMins[key] = TMinAccumulator(); - sampleMaxs[key] = TMaxAccumulator(); - } - } - } -} - -BOOST_FIXTURE_TEST_CASE(testVarp, CTestFixture) { - core_t::TTime startTime{3600}; - core_t::TTime bucketLength{3600}; - SModelParams params(bucketLength); - - model_t::TFeatureVec features{model_t::E_PopulationVarianceByPersonAndAttribute}; - - m_InterimBucketCorrector = std::make_shared(bucketLength); - m_Factory.reset(new CMetricPopulationModelFactory(params, m_InterimBucketCorrector)); - m_Factory->features({model_t::E_PopulationVarianceByPersonAndAttribute}); - m_Factory->fieldNames("", "P", "", "V", TStrVec{1, "I"}); - - this->makeModel(params, features, startTime); - - CMetricPopulationModel& model = - static_cast(*m_Model.get()); - - TDoubleStrPrVec b1{{1.0, "i1"}, {1.1, "i1"}, {1.01, "i2"}, {1.02, "i2"}}; - TDoubleStrPrVec b2{{10.0, "i1"}}; - TDoubleStrPrVec b3{{4.3, "i1"}, {4.4, "i1"}, {4.6, "i1"}, {4.2, "i1"}, {4.8, "i3"}}; - TDoubleStrPrVec b4{{3.2, "i3"}, {3.3, "i3"}}; - TDoubleStrPrVec b5{{20.1, "i2"}, {20.8, "i1"}, {20.9, "i1"}}; - TDoubleStrPrVec b6{{4.1, "i1"}, {4.2, "i2"}, {3.9, "i2"}, {4.2, "i2"}}; - TDoubleStrPrVec b7{{0.1, "i1"}, {0.3, "i1"}, {0.2, "i3"}}; - TDoubleStrPrVec b8{{12.5, "i1"}, {12.3, "i2"}}; - TDoubleStrPrVec b9{{6.9, "i1"}, {7.0, "i2"}, {7.1, "i3"}, - {6.6, "i4"}, {7.1, "i5"}, {6.7, "i6"}}; - // This last bucket is much more improbable, with influencer i2 being responsible - TDoubleStrPrVec b10{{0.3, "i2"}, {15.4, "i2"}, {77.62, "i2"}, - {112.999, "i2"}, {5.1, "i1"}, {5.1, "i1"}, - {5.1, "i1"}, {5.1, "i1"}, {5.1, "i1"}}; - - SAnnotatedProbability annotatedProbability; - - core_t::TTime time = startTime; - processBucket(time, bucketLength, b1, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); - - time += bucketLength; - processBucket(time, bucketLength, b2, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); - - time += bucketLength; - processBucket(time, bucketLength, b3, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); - - time += bucketLength; - processBucket(time, bucketLength, b4, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); - - time += bucketLength; - processBucket(time, bucketLength, b5, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); - - time += bucketLength; - processBucket(time, bucketLength, b6, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); - - time += bucketLength; - processBucket(time, bucketLength, b7, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); - - time += bucketLength; - processBucket(time, bucketLength, b8, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability > 0.8); - - time += bucketLength; - processBucket(time, bucketLength, b9, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability < 0.85); - - time += bucketLength; - processBucket(time, bucketLength, b10, m_Gatherer, model, annotatedProbability); - BOOST_TEST_REQUIRE(annotatedProbability.s_Probability < 0.1); - BOOST_REQUIRE_EQUAL(1, annotatedProbability.s_Influences.size()); - BOOST_REQUIRE_EQUAL(std::string("I"), - *annotatedProbability.s_Influences[0].first.first); - BOOST_REQUIRE_EQUAL(std::string("i2"), - *annotatedProbability.s_Influences[0].first.second); - BOOST_REQUIRE_CLOSE_ABSOLUTE(1.0, annotatedProbability.s_Influences[0].second, 0.00001); -} - BOOST_FIXTURE_TEST_CASE(testComputeProbability, CTestFixture) { maths::common::CSampling::CScopeMockRandomNumberGenerator scopeMockRng; @@ -1334,3 +1105,4 @@ BOOST_FIXTURE_TEST_CASE(testIgnoreSamplingGivenDetectionRules, CTestFixture) { } BOOST_AUTO_TEST_SUITE_END() +} diff --git a/lib/model/unittest/CMetricStatGathererTest.cc b/lib/model/unittest/CMetricStatGathererTest.cc new file mode 100644 index 0000000000..513fd80969 --- /dev/null +++ b/lib/model/unittest/CMetricStatGathererTest.cc @@ -0,0 +1,401 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include +#include +#include +#include + +#include + +#include + +#include + +#include + +#include +#include +#include +#include + +namespace { + +BOOST_AUTO_TEST_SUITE(CMetricStatGathererTest) + +using namespace ml; +using namespace model; +using TStrVec = std::vector; + +template +std::string printInfluencers(const DATA& featureData) { + auto influencers = featureData.s_InfluenceValues; + for (auto& influencer : influencers) { + std::sort(influencer.begin(), influencer.end(), + maths::common::COrderings::SFirstLess{}); + } + std::ostringstream result; + result << ml::core::CScopePrintContainers{} << influencers; + return result.str(); +} + +BOOST_AUTO_TEST_CASE(testSumGatherer) { + + core_t::TTime time{1672531200}; + TStrVec empty; + + auto assertFirstBucketStats = [&](const TSumGatherer& gatherer) { + auto data = gatherer.featureData(time + 599); + BOOST_REQUIRE_EQUAL(true, data.s_IsNonNegative); + BOOST_REQUIRE_EQUAL(false, data.s_IsInteger); + BOOST_REQUIRE_EQUAL(1, data.s_Samples.size()); + BOOST_REQUIRE_EQUAL(true, data.s_BucketValue != std::nullopt); + BOOST_REQUIRE_EQUAL(1672531500, data.s_BucketValue->time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_BucketValue->varianceScale()); + BOOST_REQUIRE_EQUAL(5.4, data.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(4, data.s_BucketValue->count()); + BOOST_REQUIRE_EQUAL(1672531500, data.s_Samples[0].time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_Samples[0].varianceScale()); + BOOST_REQUIRE_EQUAL(5.4, data.s_Samples[0].value()[0]); + BOOST_REQUIRE_EQUAL(4, data.s_Samples[0].count()); + }; + + auto assertSecondBucketStats = [&](const TSumGatherer& gatherer) { + auto data = gatherer.featureData(time + 1199); + BOOST_REQUIRE_EQUAL(true, data.s_IsNonNegative); + BOOST_REQUIRE_EQUAL(false, data.s_IsInteger); + BOOST_REQUIRE_EQUAL(1, data.s_Samples.size()); + BOOST_REQUIRE_EQUAL(true, data.s_BucketValue != std::nullopt); + BOOST_REQUIRE_EQUAL(1672532100, data.s_BucketValue->time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_BucketValue->varianceScale()); + BOOST_REQUIRE_EQUAL(4.4, data.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(3, data.s_BucketValue->count()); + BOOST_REQUIRE_EQUAL(1672532100, data.s_Samples[0].time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_Samples[0].varianceScale()); + BOOST_REQUIRE_EQUAL(4.4, data.s_Samples[0].value()[0]); + BOOST_REQUIRE_EQUAL(3, data.s_Samples[0].count()); + }; + + for (std::size_t i = 1; i <= 2; ++i) { + TSumGatherer gatherer{i, 1, time, 600, empty.begin(), empty.end()}; + + gatherer.add(time + 30, {1.3}, 1, {}); + gatherer.add(time + 70, {3.1}, 2, {}); + gatherer.add(time + 122, {1.0}, 1, {}); + + assertFirstBucketStats(gatherer); + BOOST_REQUIRE_EQUAL( + true, gatherer.featureData(time + 599).s_InfluenceValues.empty()); + + gatherer.startNewBucket(time + 600); + gatherer.add(time + 630, {0.3}, 1, {}); + gatherer.add(time + 670, {2.1}, 1, {}); + gatherer.add(time + 722, {2.0}, 1, {}); + + assertSecondBucketStats(gatherer); + BOOST_REQUIRE_EQUAL( + true, gatherer.featureData(time + 1199).s_InfluenceValues.empty()); + if (i == 2) { + assertFirstBucketStats(gatherer); + BOOST_REQUIRE_EQUAL( + true, gatherer.featureData(time + 599).s_InfluenceValues.empty()); + } + } + + for (std::size_t i = 1; i <= 2; ++i) { + TStrVec influencers{"i1", "i2"}; + TSumGatherer gatherer{ + i, 1, time, 600, influencers.begin(), influencers.end()}; + + gatherer.add(time + 30, {1.3}, 1, {std::string{"i11"}, std::string{"i21"}}); + gatherer.add(time + 70, {3.1}, 2, {std::string{"i11"}, std::string{"i22"}}); + gatherer.add(time + 122, {1.0}, 1, {std::string{"i12"}, std::string{"i21"}}); + + assertFirstBucketStats(gatherer); + BOOST_REQUIRE_EQUAL("[[(i11, ([4.4], 3)), (i12, ([1], 1))], [(i21, ([2.3], 2)), (i22, ([3.1], 2))]]", + printInfluencers(gatherer.featureData(time + 599))); + + gatherer.startNewBucket(time + 600); + gatherer.add(time + 630, {0.3}, 1, {std::string{"i11"}, std::string{"i21"}}); + gatherer.add(time + 670, {2.1}, 1, {std::string{"i11"}, std::string{"i22"}}); + gatherer.add(time + 722, {2.0}, 1, {std::string{"i12"}, std::string{"i21"}}); + + assertSecondBucketStats(gatherer); + BOOST_REQUIRE_EQUAL("[[(i11, ([2.4], 2)), (i12, ([2], 1))], [(i21, ([2.3], 2)), (i22, ([2.1], 1))]]", + printInfluencers(gatherer.featureData(time + 1199))); + if (i == 2) { + assertFirstBucketStats(gatherer); + BOOST_REQUIRE_EQUAL("[[(i11, ([4.4], 3)), (i12, ([1], 1))], [(i21, ([2.3], 2)), (i22, ([3.1], 2))]]", + printInfluencers(gatherer.featureData(time + 599))); + } + + std::string xml; + core::CRapidXmlStatePersistInserter inserter("root"); + gatherer.acceptPersistInserter(inserter); + inserter.toXml(xml); + LOG_TRACE(<< xml); + core::CRapidXmlParser parser; + BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(xml)); + core::CRapidXmlStateRestoreTraverser traverser(parser); + TSumGatherer restoredGatherer{ + i, 1, 1672531200, 600, influencers.begin(), influencers.end()}; + BOOST_TEST_REQUIRE(traverser.traverseSubLevel([&](auto& traverser_) { + return restoredGatherer.acceptRestoreTraverser(traverser_); + })); + BOOST_REQUIRE_EQUAL(gatherer.checksum(), restoredGatherer.checksum()); + } +} + +BOOST_AUTO_TEST_CASE(testMeanGatherer) { + + core_t::TTime time{1672531200}; + + auto assertFirstBucketStats = [&](const TMeanGatherer& gatherer) { + auto data = gatherer.featureData(time + 599); + BOOST_REQUIRE_EQUAL(true, data.s_IsNonNegative); + BOOST_REQUIRE_EQUAL(false, data.s_IsInteger); + BOOST_REQUIRE_EQUAL(1, data.s_Samples.size()); + BOOST_REQUIRE_EQUAL(true, data.s_BucketValue != std::nullopt); + BOOST_REQUIRE_EQUAL(1672531273, data.s_BucketValue->time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_BucketValue->varianceScale()); + BOOST_REQUIRE_CLOSE(2.125, data.s_BucketValue->value()[0], 1e-6); + BOOST_REQUIRE_EQUAL(4, data.s_BucketValue->count()); + BOOST_REQUIRE_EQUAL(1672531273, data.s_Samples[0].time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_Samples[0].varianceScale()); + BOOST_REQUIRE_CLOSE(2.125, data.s_Samples[0].value()[0], 1e-6); + BOOST_REQUIRE_EQUAL(4, data.s_Samples[0].count()); + BOOST_REQUIRE_EQUAL("[[(i11, ([2.5], 3)), (i12, ([1], 1))], [(i21, ([1.15], 2)), (i22, ([3.1], 2))]]", + printInfluencers(gatherer.featureData(time + 599))); + }; + + auto assertSecondBucketStats = [&](const TMeanGatherer& gatherer) { + auto data = gatherer.featureData(time + 1199); + BOOST_REQUIRE_EQUAL(true, data.s_IsNonNegative); + BOOST_REQUIRE_EQUAL(false, data.s_IsInteger); + BOOST_REQUIRE_EQUAL(1, data.s_Samples.size()); + BOOST_REQUIRE_EQUAL(true, data.s_BucketValue != std::nullopt); + BOOST_REQUIRE_EQUAL(1672531874, data.s_BucketValue->time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_BucketValue->varianceScale()); + BOOST_REQUIRE_EQUAL(1.1, data.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(3, data.s_BucketValue->count()); + BOOST_REQUIRE_EQUAL(1672531874, data.s_Samples[0].time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_Samples[0].varianceScale()); + BOOST_REQUIRE_EQUAL(1.1, data.s_Samples[0].value()[0]); + BOOST_REQUIRE_EQUAL(3, data.s_Samples[0].count()); + BOOST_REQUIRE_EQUAL("[[(i11, ([0.65], 2)), (i12, ([2], 1))], [(i21, ([1.15], 2)), (i22, ([1], 1))]]", + printInfluencers(gatherer.featureData(time + 1199))); + }; + + for (std::size_t i = 1; i <= 2; ++i) { + TStrVec influencers{"i1", "i2"}; + TMeanGatherer gatherer{ + i, 1, time, 600, influencers.begin(), influencers.end()}; + + gatherer.add(time + 30, {1.3}, 1, {std::string{"i11"}, std::string{"i21"}}); + gatherer.add(time + 70, {3.1}, 2, {std::string{"i11"}, std::string{"i22"}}); + gatherer.add(time + 122, {1.0}, 1, {std::string{"i12"}, std::string{"i21"}}); + + assertFirstBucketStats(gatherer); + + gatherer.startNewBucket(time + 600); + gatherer.add(time + 630, {0.3}, 1, {std::string{"i11"}, std::string{"i21"}}); + gatherer.add(time + 670, {1.0}, 1, {std::string{"i11"}, std::string{"i22"}}); + gatherer.add(time + 722, {2.0}, 1, {std::string{"i12"}, std::string{"i21"}}); + + assertSecondBucketStats(gatherer); + if (i == 2) { + assertFirstBucketStats(gatherer); + } + + std::string xml; + core::CRapidXmlStatePersistInserter inserter("root"); + gatherer.acceptPersistInserter(inserter); + inserter.toXml(xml); + LOG_TRACE(<< xml); + core::CRapidXmlParser parser; + BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(xml)); + core::CRapidXmlStateRestoreTraverser traverser(parser); + TMeanGatherer restoredGatherer{ + i, 1, 1672531200, 600, influencers.begin(), influencers.end()}; + BOOST_TEST_REQUIRE(traverser.traverseSubLevel([&](auto& traverser_) { + return restoredGatherer.acceptRestoreTraverser(traverser_); + })); + BOOST_REQUIRE_EQUAL(gatherer.checksum(), restoredGatherer.checksum()); + } +} + +BOOST_AUTO_TEST_CASE(testMinGatherer) { + + core_t::TTime time{1672531200}; + + auto assertFirstBucketStats = [&](const TMinGatherer& gatherer) { + auto data = gatherer.featureData(time + 599); + BOOST_REQUIRE_EQUAL(true, data.s_IsNonNegative); + BOOST_REQUIRE_EQUAL(false, data.s_IsInteger); + BOOST_REQUIRE_EQUAL(1, data.s_Samples.size()); + BOOST_REQUIRE_EQUAL(true, data.s_BucketValue != std::nullopt); + BOOST_REQUIRE_EQUAL(1672531322, data.s_BucketValue->time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_BucketValue->varianceScale()); + BOOST_REQUIRE_EQUAL(1.0, data.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(1, data.s_BucketValue->count()); + BOOST_REQUIRE_EQUAL(1672531322, data.s_Samples[0].time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_Samples[0].varianceScale()); + BOOST_REQUIRE_EQUAL(1.0, data.s_Samples[0].value()[0]); + BOOST_REQUIRE_EQUAL(1, data.s_Samples[0].count()); + BOOST_REQUIRE_EQUAL("[[(i11, ([1.3], 1)), (i12, ([1], 1))], [(i21, ([1], 1)), (i22, ([3.1], 1))]]", + printInfluencers(gatherer.featureData(time + 599))); + }; + + auto assertSecondBucketStats = [&](const TMinGatherer& gatherer) { + auto data = gatherer.featureData(time + 1199); + BOOST_REQUIRE_EQUAL(true, data.s_IsNonNegative); + BOOST_REQUIRE_EQUAL(false, data.s_IsInteger); + BOOST_REQUIRE_EQUAL(1, data.s_Samples.size()); + BOOST_REQUIRE_EQUAL(true, data.s_BucketValue != std::nullopt); + BOOST_REQUIRE_EQUAL(1672531830, data.s_BucketValue->time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_BucketValue->varianceScale()); + BOOST_REQUIRE_EQUAL(0.3, data.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(1, data.s_BucketValue->count()); + BOOST_REQUIRE_EQUAL(1672531830, data.s_Samples[0].time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_Samples[0].varianceScale()); + BOOST_REQUIRE_EQUAL(0.3, data.s_Samples[0].value()[0]); + BOOST_REQUIRE_EQUAL(1, data.s_Samples[0].count()); + BOOST_REQUIRE_EQUAL("[[(i11, ([0.3], 1)), (i12, ([2], 1))], [(i21, ([0.3], 1)), (i22, ([1], 1))]]", + printInfluencers(gatherer.featureData(time + 1199))); + }; + + for (std::size_t i = 1; i <= 2; ++i) { + TStrVec influencers{"i1", "i2"}; + TMinGatherer gatherer{ + i, 1, time, 600, influencers.begin(), influencers.end()}; + + gatherer.add(time + 30, {1.3}, 1, {std::string{"i11"}, std::string{"i21"}}); + gatherer.add(time + 70, {3.1}, 2, {std::string{"i11"}, std::string{"i22"}}); + gatherer.add(time + 122, {1.0}, 1, {std::string{"i12"}, std::string{"i21"}}); + + assertFirstBucketStats(gatherer); + + gatherer.startNewBucket(time + 600); + gatherer.add(time + 630, {0.3}, 1, {std::string{"i11"}, std::string{"i21"}}); + gatherer.add(time + 670, {1.0}, 1, {std::string{"i11"}, std::string{"i22"}}); + gatherer.add(time + 722, {2.0}, 1, {std::string{"i12"}, std::string{"i21"}}); + + assertSecondBucketStats(gatherer); + if (i == 2) { + assertFirstBucketStats(gatherer); + } + + std::string xml; + core::CRapidXmlStatePersistInserter inserter("root"); + gatherer.acceptPersistInserter(inserter); + inserter.toXml(xml); + LOG_TRACE(<< xml); + core::CRapidXmlParser parser; + BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(xml)); + core::CRapidXmlStateRestoreTraverser traverser(parser); + TMinGatherer restoredGatherer{ + i, 1, 1672531200, 600, influencers.begin(), influencers.end()}; + BOOST_TEST_REQUIRE(traverser.traverseSubLevel([&](auto& traverser_) { + return restoredGatherer.acceptRestoreTraverser(traverser_); + })); + BOOST_REQUIRE_EQUAL(gatherer.checksum(), restoredGatherer.checksum()); + } +} + +BOOST_AUTO_TEST_CASE(testMaxGatherer) { + + core_t::TTime time{1672531200}; + + auto assertFirstBucketStats = [&](const TMaxGatherer& gatherer) { + auto data = gatherer.featureData(time + 599); + BOOST_REQUIRE_EQUAL(true, data.s_IsNonNegative); + BOOST_REQUIRE_EQUAL(false, data.s_IsInteger); + BOOST_REQUIRE_EQUAL(1, data.s_Samples.size()); + BOOST_REQUIRE_EQUAL(true, data.s_BucketValue != std::nullopt); + BOOST_REQUIRE_EQUAL(1672531270, data.s_BucketValue->time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_BucketValue->varianceScale()); + BOOST_REQUIRE_EQUAL(3.1, data.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(1, data.s_BucketValue->count()); + BOOST_REQUIRE_EQUAL(1672531270, data.s_Samples[0].time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_Samples[0].varianceScale()); + BOOST_REQUIRE_EQUAL(3.1, data.s_Samples[0].value()[0]); + BOOST_REQUIRE_EQUAL(1, data.s_Samples[0].count()); + BOOST_REQUIRE_EQUAL("[[(i11, ([3.1], 1)), (i12, ([1], 1))], [(i21, ([1.3], 1)), (i22, ([3.1], 1))]]", + printInfluencers(gatherer.featureData(time + 599))); + }; + + auto assertSecondBucketStats = [&](const TMaxGatherer& gatherer) { + auto data = gatherer.featureData(time + 1199); + BOOST_REQUIRE_EQUAL(true, data.s_IsNonNegative); + BOOST_REQUIRE_EQUAL(false, data.s_IsInteger); + BOOST_REQUIRE_EQUAL(1, data.s_Samples.size()); + BOOST_REQUIRE_EQUAL(true, data.s_BucketValue != std::nullopt); + BOOST_REQUIRE_EQUAL(1672531922, data.s_BucketValue->time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_BucketValue->varianceScale()); + BOOST_REQUIRE_EQUAL(2.0, data.s_BucketValue->value()[0]); + BOOST_REQUIRE_EQUAL(1, data.s_BucketValue->count()); + BOOST_REQUIRE_EQUAL(1672531922, data.s_Samples[0].time()); + BOOST_REQUIRE_EQUAL(1.0, data.s_Samples[0].varianceScale()); + BOOST_REQUIRE_EQUAL(2.0, data.s_Samples[0].value()[0]); + BOOST_REQUIRE_EQUAL(1, data.s_Samples[0].count()); + BOOST_REQUIRE_EQUAL("[[(i11, ([1], 1)), (i12, ([2], 1))], [(i21, ([2], 1)), (i22, ([1], 1))]]", + printInfluencers(gatherer.featureData(time + 1199))); + }; + + for (std::size_t i = 1; i <= 2; ++i) { + TStrVec influencers{"i1", "i2"}; + TMaxGatherer gatherer{ + i, 1, time, 600, influencers.begin(), influencers.end()}; + + gatherer.add(time + 30, {1.3}, 1, {std::string{"i11"}, std::string{"i21"}}); + gatherer.add(time + 70, {3.1}, 2, {std::string{"i11"}, std::string{"i22"}}); + gatherer.add(time + 122, {1.0}, 1, {std::string{"i12"}, std::string{"i21"}}); + + assertFirstBucketStats(gatherer); + + gatherer.startNewBucket(time + 600); + gatherer.add(time + 630, {0.3}, 1, {std::string{"i11"}, std::string{"i21"}}); + gatherer.add(time + 670, {1.0}, 1, {std::string{"i11"}, std::string{"i22"}}); + gatherer.add(time + 722, {2.0}, 1, {std::string{"i12"}, std::string{"i21"}}); + + assertSecondBucketStats(gatherer); + if (i == 2) { + assertFirstBucketStats(gatherer); + } + + std::string xml; + core::CRapidXmlStatePersistInserter inserter("root"); + gatherer.acceptPersistInserter(inserter); + inserter.toXml(xml); + LOG_TRACE(<< xml); + core::CRapidXmlParser parser; + BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(xml)); + core::CRapidXmlStateRestoreTraverser traverser(parser); + TMaxGatherer restoredGatherer{ + i, 1, 1672531200, 600, influencers.begin(), influencers.end()}; + BOOST_TEST_REQUIRE(traverser.traverseSubLevel([&](auto& traverser_) { + return restoredGatherer.acceptRestoreTraverser(traverser_); + })); + BOOST_REQUIRE_EQUAL(gatherer.checksum(), restoredGatherer.checksum()); + } +} + +/*BOOST_AUTO_TEST_CASE(testMedianGatherer) { +} + +BOOST_AUTO_TEST_CASE(testVarianceGatherer) { +} + +BOOST_AUTO_TEST_CASE(testMultivariateMeanGatherer) { +}*/ + +BOOST_AUTO_TEST_SUITE_END() +} diff --git a/lib/model/unittest/CModelDetailsViewTest.cc b/lib/model/unittest/CModelDetailsViewTest.cc index 275d1d446f..3df5d389db 100644 --- a/lib/model/unittest/CModelDetailsViewTest.cc +++ b/lib/model/unittest/CModelDetailsViewTest.cc @@ -63,7 +63,7 @@ BOOST_FIXTURE_TEST_CASE(testModelPlot, CTestFixture) { gatherer = std::make_shared( model_t::analysisCategory(features[0]), model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, "p", EMPTY_STRING, EMPTY_STRING, - TStrVec{}, key, features, 0, 0); + TStrVec{}, key, features, 0); std::string person11{"p11"}; std::string person12{"p12"}; std::string person21{"p21"}; diff --git a/lib/model/unittest/CModelTestFixtureBase.h b/lib/model/unittest/CModelTestFixtureBase.h index c78a96b365..9b4f635e21 100644 --- a/lib/model/unittest/CModelTestFixtureBase.h +++ b/lib/model/unittest/CModelTestFixtureBase.h @@ -226,7 +226,6 @@ class CModelTestFixtureBase { ml::model_t::EModelType modelType, ml::model::CModelFactory::TDataGathererPtr& gatherer, ml::model::CModelFactory::TModelPtr& model, - TOptionalUInt sampleCount = TOptionalUInt(), const std::string& summaryCountField = EMPTY_STRING) { if (m_InterimBucketCorrector == nullptr) { m_InterimBucketCorrector = @@ -240,9 +239,6 @@ class CModelTestFixtureBase { m_Factory->features(features); } ml::model::CModelFactory::SGathererInitializationData initData(startTime); - if (sampleCount) { - initData.s_SampleOverrideCount = *sampleCount; - } gatherer.reset(m_Factory->makeDataGatherer(initData)); model.reset(m_Factory->makeModel({gatherer})); diff --git a/lib/model/unittest/CResourceLimitTest.cc b/lib/model/unittest/CResourceLimitTest.cc index 6d9253d49d..2231cc23b0 100644 --- a/lib/model/unittest/CResourceLimitTest.cc +++ b/lib/model/unittest/CResourceLimitTest.cc @@ -591,7 +591,7 @@ BOOST_FIXTURE_TEST_CASE(testLargeAllocations, CTestFixture) { {false, 70, 3000, 2700, 2900, model_t::E_EventRateOnline}, {true, 70, 5000, 4500, 4700, model_t::E_EventRateOnline}, {false, 100, 4000, 3400, 3700, model_t::E_MetricOnline}, - {true, 100, 7000, 5900, 6100, model_t::E_MetricOnline}}; + {true, 100, 7000, 5900, 6300, model_t::E_MetricOnline}}; for (auto& param : params) { doTestLargeAllocations(param); diff --git a/lib/model/unittest/CRuleConditionTest.cc b/lib/model/unittest/CRuleConditionTest.cc index 49b587ba40..febc97188a 100644 --- a/lib/model/unittest/CRuleConditionTest.cc +++ b/lib/model/unittest/CRuleConditionTest.cc @@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE(testTimeContition) { features.push_back(model_t::E_IndividualMeanByPerson); CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared( model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING, - EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0)); + EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime)); CMockModel model(params, gathererPtr, influenceCalculators); diff --git a/lib/model/unittest/CSampleQueueTest.cc b/lib/model/unittest/CSampleQueueTest.cc deleted file mode 100644 index 02f20dce75..0000000000 --- a/lib/model/unittest/CSampleQueueTest.cc +++ /dev/null @@ -1,1181 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include - -BOOST_AUTO_TEST_SUITE(CSampleQueueTest) - -using namespace ml; -using namespace model; - -using TDoubleVec = std::vector; -using TSampleVec = std::vector; -using TMeanAccumulator = maths::common::CBasicStatistics::SSampleMean::TAccumulator; -using TTestSampleQueue = CSampleQueue; - -BOOST_AUTO_TEST_CASE(testSampleToString) { - CSample sample(10, {3.0}, 0.8, 1.0); - - BOOST_REQUIRE_EQUAL(std::string("10;0.800000012;1;3"), CSample::SToString()(sample)); -} - -BOOST_AUTO_TEST_CASE(testSampleFromString) { - CSample sample; - - BOOST_TEST_REQUIRE(CSample::SFromString()("15;7e-1;3;2.0", sample)); - - BOOST_REQUIRE_EQUAL(core_t::TTime(15), sample.time()); - BOOST_REQUIRE_EQUAL(2.0, sample.value()[0]); - BOOST_REQUIRE_EQUAL(0.7, sample.varianceScale()); - BOOST_REQUIRE_EQUAL(3.0, sample.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenQueueIsEmptyShouldCreateNewSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(1, {1.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(1, queue.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(1), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(1), queue[0].s_End); - BOOST_REQUIRE_EQUAL(core_t::TTime(1), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(1.0, queue[0].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenQueueIsFullShouldResize) { - std::size_t sampleCountFactor(1); - std::size_t latencyBuckets(1); - double growthFactor(0.5); - core_t::TTime bucketLength(10); - unsigned int sampleCount(1); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(1, {1.0}, 1, sampleCount); - BOOST_REQUIRE_EQUAL(1, queue.size()); - BOOST_REQUIRE_EQUAL(1, queue.capacity()); - - queue.add(2, {2.0}, 1, sampleCount); - BOOST_REQUIRE_EQUAL(2, queue.size()); - BOOST_REQUIRE_EQUAL(2, queue.capacity()); - - queue.add(3, {3.0}, 1, sampleCount); - BOOST_REQUIRE_EQUAL(3, queue.size()); - BOOST_REQUIRE_EQUAL(3, queue.capacity()); - - queue.add(4, {4.0}, 1, sampleCount); - BOOST_REQUIRE_EQUAL(4, queue.size()); - BOOST_REQUIRE_EQUAL(4, queue.capacity()); - - queue.add(5, {5.0}, 1, sampleCount); - BOOST_REQUIRE_EQUAL(5, queue.size()); - BOOST_REQUIRE_EQUAL(6, queue.capacity()); - - queue.add(6, {6.0}, 1, sampleCount); - BOOST_REQUIRE_EQUAL(6, queue.size()); - BOOST_REQUIRE_EQUAL(6, queue.capacity()); - - queue.add(7, {7.0}, 1, sampleCount); - BOOST_REQUIRE_EQUAL(7, queue.size()); - BOOST_REQUIRE_EQUAL(9, queue.capacity()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(0, {1.0}, 1, sampleCount); - - queue.add(3, {2.5}, 2, sampleCount); - - BOOST_REQUIRE_EQUAL(1, queue.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[0].s_End); - BOOST_REQUIRE_EQUAL(2.0, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(2), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(3.0, queue[0].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsInOrderAndCloseToNonFullLatestSubSampleButDifferentBucket) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue.latestEnd()); - - queue.add(9, {1.0}, 1, sampleCount); - queue.add(10, {2.5}, 2, sampleCount); - - BOOST_REQUIRE_EQUAL(2, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(10), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(10), queue[0].s_End); - BOOST_REQUIRE_EQUAL(2.5, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(10), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(2.0, queue[0].s_Statistic.count()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_End); - BOOST_REQUIRE_EQUAL(1.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[1].s_Statistic.count()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(10), queue.latestEnd()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsInOrderAndCloseToFullLatestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(0, {1.0}, 5, sampleCount); - - queue.add(3, {2.5}, 2, sampleCount); - - BOOST_REQUIRE_EQUAL(2, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[0].s_End); - BOOST_REQUIRE_EQUAL(2.5, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(2.0, queue[0].s_Statistic.count()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[1].s_End); - BOOST_REQUIRE_EQUAL(1.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(5.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsInOrderAndFarFromLatestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(0, {1.0}, 1, sampleCount); - - queue.add(5, {2.5}, 2, sampleCount); - - BOOST_REQUIRE_EQUAL(2, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(5), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(5), queue[0].s_End); - BOOST_REQUIRE_EQUAL(2.5, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(5), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(2.0, queue[0].s_Statistic.count()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[1].s_End); - BOOST_REQUIRE_EQUAL(1.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsWithinFullLatestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(0, {1.0}, 2, sampleCount); - queue.add(4, {1.0}, 3, sampleCount); - - queue.add(3, {7.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(1, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(4), queue[0].s_End); - BOOST_REQUIRE_EQUAL(2.0, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(6.0, queue[0].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndFarBeforeEarliestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(8, {1.0}, 5, sampleCount); - queue.add(15, {1.0}, 3, sampleCount); - - queue.add(3, {7.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[2].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[2].s_End); - BOOST_REQUIRE_EQUAL(7.0, queue[2].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[2].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[2].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloseBeforeFullEarliestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(8, {1.0}, 5, sampleCount); - queue.add(15, {1.0}, 3, sampleCount); - - queue.add(5, {7.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(5), queue[2].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(5), queue[2].s_End); - BOOST_REQUIRE_EQUAL(7.0, queue[2].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(5), queue[2].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[2].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(9, {1.0}, 4, sampleCount); - queue.add(15, {1.0}, 3, sampleCount); - - queue.add(6, {6.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(2, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(6), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_End); - BOOST_REQUIRE_EQUAL(2.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(8), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(5.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloseBeforeNonFullEarliestSubSampleButDifferentBucket) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(11, {1.0}, 4, sampleCount); - - queue.add(9, {6.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(2, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(11), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(11), queue[0].s_End); - BOOST_REQUIRE_EQUAL(1.0, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(11), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(4.0, queue[0].s_Statistic.count()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_End); - BOOST_REQUIRE_EQUAL(6.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndWithinSomeSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 1, sampleCount); - queue.add(6, {2.0}, 1, sampleCount); - queue.add(8, {4.0}, 1, sampleCount); - queue.add(12, {1.0}, 1, sampleCount); - - queue.add(7, {6.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(6), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(8), queue[1].s_End); - BOOST_REQUIRE_EQUAL(4.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(7), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(3.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatest) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 1, sampleCount); - queue.add(5, {1.0}, 1, sampleCount); - queue.add(10, {4.0}, 1, sampleCount); - queue.add(15, {1.0}, 1, sampleCount); - - queue.add(12, {6.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(4, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(10), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(12), queue[1].s_End); - BOOST_REQUIRE_EQUAL(5.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(11), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(2.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloserToSubSampleBeforeLatestButDifferentBucket) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 1, sampleCount); - queue.add(8, {4.0}, 1, sampleCount); - queue.add(15, {1.0}, 1, sampleCount); - - queue.add(10, {6.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(4, queue.size()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloserToPreviousOfNonFullSubSamples) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 1, sampleCount); - queue.add(8, {2.0}, 1, sampleCount); - queue.add(15, {3.0}, 1, sampleCount); - - queue.add(3, {7.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[2].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[2].s_End); - BOOST_REQUIRE_EQUAL(4.0, queue[2].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(2), queue[2].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(2.0, queue[2].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloserToNextOfNonFullSubSamples) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 1, sampleCount); - queue.add(8, {2.0}, 1, sampleCount); - queue.add(15, {3.0}, 1, sampleCount); - - queue.add(5, {7.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(5), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(8), queue[1].s_End); - BOOST_REQUIRE_EQUAL(4.5, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(7), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(2.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloserToPreviousOfFullSubSamples) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 5, sampleCount); - queue.add(8, {2.0}, 5, sampleCount); - queue.add(15, {3.0}, 1, sampleCount); - - queue.add(3, {7.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[2].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[2].s_End); - BOOST_REQUIRE_EQUAL(2.0, queue[2].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(1), queue[2].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(6.0, queue[2].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloserToNextOfFullSubSamples) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 5, sampleCount); - queue.add(8, {2.0}, 5, sampleCount); - queue.add(15, {3.0}, 5, sampleCount); - - queue.add(5, {8.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(5), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(8), queue[1].s_End); - BOOST_REQUIRE_EQUAL(3.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(8), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(6.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloserToPreviousSubSampleButOnlyNextHasSpace) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 5, sampleCount); - queue.add(5, {2.0}, 1, sampleCount); - queue.add(10, {3.0}, 1, sampleCount); - - queue.add(2, {7.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(2), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(5), queue[1].s_End); - BOOST_REQUIRE_EQUAL(4.5, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(4), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(2.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndCloserToNextSubSampleButOnlyPreviousHasSpace) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 1, sampleCount); - queue.add(5, {2.0}, 5, sampleCount); - queue.add(10, {3.0}, 5, sampleCount); - - queue.add(3, {8.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[2].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[2].s_End); - BOOST_REQUIRE_EQUAL(4.5, queue[2].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(2), queue[2].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(2.0, queue[2].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndFallsInBigEnoughGap) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 1, sampleCount); - queue.add(15, {2.0}, 1, sampleCount); - - queue.add(6, {8.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(6), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(6), queue[1].s_End); - BOOST_REQUIRE_EQUAL(8.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(6), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testAddGivenTimeIsHistoricalAndFallsInTooSmallGap) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(5); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(0, {1.0}, 1, sampleCount); - queue.add(4, {1.0}, 1, sampleCount); - queue.add(9, {2.0}, 1, sampleCount); - - queue.add(6, {7.0}, 1, sampleCount); - - BOOST_REQUIRE_EQUAL(2, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(0), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(6), queue[1].s_End); - BOOST_REQUIRE_EQUAL(3.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(3.0, queue[1].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testCanSampleGivenEmptyQueue) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - BOOST_TEST_REQUIRE(queue.canSample(42) == false); -} - -BOOST_AUTO_TEST_CASE(testCanSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(24, {1.0}, 1, sampleCount); - queue.add(26, {1.0}, 1, sampleCount); - queue.add(45, {1.0}, 5, sampleCount); - - BOOST_TEST_REQUIRE(queue.canSample(0) == false); - BOOST_TEST_REQUIRE(queue.canSample(16) == false); - - BOOST_TEST_REQUIRE(queue.canSample(17)); - BOOST_TEST_REQUIRE(queue.canSample(40)); -} - -BOOST_AUTO_TEST_CASE(testSampleGivenExactlyOneSampleOfExactCountToBeCreated) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - TTestSampleQueue::TSampleVec samples; - queue.add(0, {1.0}, 5, sampleCount); - queue.add(6, {3.0}, 5, sampleCount); - queue.add(30, {5.0}, 1, sampleCount); - BOOST_TEST_REQUIRE(queue.canSample(0)); - - queue.sample(0, sampleCount, model_t::E_IndividualMeanByPerson, samples); - - BOOST_REQUIRE_EQUAL(1, samples.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), samples[0].time()); - BOOST_REQUIRE_EQUAL(2.0, samples[0].value()[0]); - BOOST_REQUIRE_EQUAL(1.0, samples[0].varianceScale()); - - BOOST_REQUIRE_EQUAL(1, queue.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_End); - BOOST_REQUIRE_EQUAL(5.0, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[0].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testSampleGivenExactlyOneSampleOfOverCountToBeCreated) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - TTestSampleQueue::TSampleVec samples; - queue.add(0, {0.0}, 3, sampleCount); - queue.add(1, {1.0}, 1, sampleCount); - queue.add(6, {3.0}, 7, sampleCount); - queue.add(30, {5.0}, 1, sampleCount); - BOOST_TEST_REQUIRE(queue.canSample(0)); - - queue.sample(0, sampleCount, model_t::E_IndividualMeanByPerson, samples); - - BOOST_REQUIRE_EQUAL(1, samples.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(4), samples[0].time()); - BOOST_REQUIRE_EQUAL(2.0, samples[0].value()[0]); - BOOST_TEST_REQUIRE(samples[0].varianceScale() < 1.0); - - BOOST_REQUIRE_EQUAL(1, queue.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_End); - BOOST_REQUIRE_EQUAL(5.0, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[0].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testSampleGivenOneSampleToBeCreatedAndRemainder) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - TTestSampleQueue::TSampleVec samples; - queue.add(0, {1.0}, 5, sampleCount); - queue.add(6, {3.0}, 5, sampleCount); - queue.add(7, {3.0}, 1, sampleCount); - queue.add(8, {5.0}, 1, sampleCount); - queue.add(40, {8.0}, 1, sampleCount); - BOOST_TEST_REQUIRE(queue.canSample(0)); - - queue.sample(0, sampleCount, model_t::E_IndividualMeanByPerson, samples); - - BOOST_REQUIRE_EQUAL(1, samples.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(3), samples[0].time()); - BOOST_REQUIRE_EQUAL(2.0, samples[0].value()[0]); - BOOST_REQUIRE_EQUAL(1.0, samples[0].varianceScale()); - - BOOST_REQUIRE_EQUAL(2, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(7), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(8), queue[1].s_End); - BOOST_REQUIRE_EQUAL(4.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(8), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(2.0, queue[1].s_Statistic.count()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(40), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(40), queue[0].s_End); - BOOST_REQUIRE_EQUAL(8.0, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(40), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[0].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testSampleGivenTwoSamplesToBeCreatedAndRemainder) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - TTestSampleQueue::TSampleVec samples; - queue.add(0, {1.0}, 5, sampleCount); - queue.add(2, {4.0}, 5, sampleCount); - queue.add(7, {2.0}, 5, sampleCount); - queue.add(8, {5.0}, 5, sampleCount); - queue.add(9, {0.0}, 1, sampleCount); - queue.add(30, {8.0}, 1, sampleCount); - BOOST_TEST_REQUIRE(queue.canSample(0)); - - queue.sample(0, sampleCount, model_t::E_IndividualMeanByPerson, samples); - - BOOST_REQUIRE_EQUAL(2, samples.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(1), samples[0].time()); - BOOST_REQUIRE_EQUAL(2.5, samples[0].value()[0]); - BOOST_REQUIRE_EQUAL(1.0, samples[0].varianceScale()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(8), samples[1].time()); - BOOST_REQUIRE_EQUAL(3.5, samples[1].value()[0]); - BOOST_REQUIRE_EQUAL(1.0, samples[1].varianceScale()); - - BOOST_REQUIRE_EQUAL(2, queue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_End); - BOOST_REQUIRE_EQUAL(0.0, queue[1].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(9), queue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[1].s_Statistic.count()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_End); - BOOST_REQUIRE_EQUAL(8.0, queue[0].s_Statistic.value()[0]); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, queue[0].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testSampleGivenNoSampleToBeCreated) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - TTestSampleQueue::TSampleVec samples; - queue.add(0, {1.0}, 4, sampleCount); - queue.add(30, {5.0}, 1, sampleCount); - BOOST_TEST_REQUIRE(queue.canSample(0)); - - queue.sample(0, sampleCount, model_t::E_IndividualMeanByPerson, samples); - - BOOST_TEST_REQUIRE(samples.empty()); - - BOOST_REQUIRE_EQUAL(2, queue.size()); -} - -BOOST_AUTO_TEST_CASE(testSampleGivenUsingSubSamplesUpToCountExceedItMoreThanUsingOneLess) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - TTestSampleQueue::TSampleVec samples; - queue.add(0, {2.0}, 5, sampleCount); - queue.add(2, {2.0}, 3, sampleCount); - queue.add(10, {6.0}, 6, sampleCount); - queue.add(30, {8.0}, 1, sampleCount); - BOOST_TEST_REQUIRE(queue.canSample(0)); - - queue.sample(0, sampleCount, model_t::E_IndividualMeanByPerson, samples); - - BOOST_REQUIRE_EQUAL(1, samples.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(1), samples[0].time()); - BOOST_REQUIRE_EQUAL(2.0, samples[0].value()[0]); - BOOST_REQUIRE_EQUAL(1.25, samples[0].varianceScale()); -} - -BOOST_AUTO_TEST_CASE(testResetBucketGivenEmptyQueue) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.resetBucket(10); - - BOOST_TEST_REQUIRE(queue.empty()); -} - -BOOST_AUTO_TEST_CASE(testResetBucketGivenBucketBeforeEarliestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(10, {1.0}, 5, sampleCount); - queue.add(15, {1.0}, 5, sampleCount); - queue.add(20, {1.0}, 5, sampleCount); - queue.add(24, {1.0}, 5, sampleCount); - queue.add(29, {1.0}, 5, sampleCount); - queue.add(30, {1.0}, 5, sampleCount); - - queue.resetBucket(0); - - BOOST_REQUIRE_EQUAL(6, queue.size()); -} - -BOOST_AUTO_TEST_CASE(testResetBucketGivenBucketAtEarliestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(10, {1.0}, 3, sampleCount); - queue.add(11, {1.0}, 2, sampleCount); - queue.add(15, {1.0}, 5, sampleCount); - queue.add(20, {1.0}, 5, sampleCount); - queue.add(24, {1.0}, 5, sampleCount); - queue.add(29, {1.0}, 5, sampleCount); - queue.add(30, {1.0}, 5, sampleCount); - - queue.resetBucket(10); - - BOOST_REQUIRE_EQUAL(4, queue.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(29), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(24), queue[2].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(20), queue[3].s_Start); -} - -BOOST_AUTO_TEST_CASE(testResetBucketGivenBucketInBetweenWithoutAnySubSamples) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(10, {1.0}, 5, sampleCount); - queue.add(15, {1.0}, 5, sampleCount); - queue.add(30, {1.0}, 5, sampleCount); - - queue.resetBucket(20); - - BOOST_REQUIRE_EQUAL(3, queue.size()); -} - -BOOST_AUTO_TEST_CASE(testResetBucketGivenBucketAtInBetweenSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(10, {1.0}, 5, sampleCount); - queue.add(15, {1.0}, 5, sampleCount); - queue.add(20, {1.0}, 5, sampleCount); - queue.add(24, {1.0}, 5, sampleCount); - queue.add(29, {1.0}, 5, sampleCount); - queue.add(30, {1.0}, 5, sampleCount); - - queue.resetBucket(20); - - BOOST_REQUIRE_EQUAL(3, queue.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(15), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(10), queue[2].s_Start); -} - -BOOST_AUTO_TEST_CASE(testResetBucketGivenBucketAtLatestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(10, {1.0}, 5, sampleCount); - queue.add(15, {1.0}, 5, sampleCount); - queue.add(20, {1.0}, 5, sampleCount); - queue.add(24, {1.0}, 5, sampleCount); - queue.add(29, {1.0}, 5, sampleCount); - queue.add(30, {1.0}, 5, sampleCount); - - queue.resetBucket(30); - - BOOST_REQUIRE_EQUAL(5, queue.size()); - BOOST_REQUIRE_EQUAL(core_t::TTime(29), queue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(24), queue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(20), queue[2].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(15), queue[3].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(10), queue[4].s_Start); -} - -BOOST_AUTO_TEST_CASE(testResetBucketGivenBucketAfterLatestSubSample) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - - queue.add(10, {1.0}, 5, sampleCount); - queue.add(15, {1.0}, 5, sampleCount); - queue.add(20, {1.0}, 5, sampleCount); - queue.add(24, {1.0}, 5, sampleCount); - queue.add(29, {1.0}, 5, sampleCount); - queue.add(30, {1.0}, 5, sampleCount); - - queue.resetBucket(40); - - BOOST_REQUIRE_EQUAL(6, queue.size()); -} - -BOOST_AUTO_TEST_CASE(testSubSamplesNeverSpanOverDifferentBuckets) { - std::size_t sampleCountFactor(10); - std::size_t latencyBuckets(3); - double growthFactor(0.1); - core_t::TTime bucketLength(600); - unsigned int sampleCount(45); - - core_t::TTime latency = (latencyBuckets + 1) * bucketLength; - std::size_t numberOfMeasurements = 5000; - - test::CRandomNumbers rng; - - core_t::TTime latestTime = bucketLength * (latencyBuckets + 1); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) { - TDoubleVec testData; - rng.generateUniformSamples(static_cast(latestTime - latency), - static_cast(latestTime), 1, testData); - latestTime += - 60 + static_cast( - 40.0 * std::sin(boost::math::constants::two_pi() * - static_cast(latestTime % 86400) / 86400.0)); - core_t::TTime measurementTime = static_cast(testData[0]); - queue.add(measurementTime, {1.0}, 1u, sampleCount); - } - - for (std::size_t i = 0; i < queue.size(); ++i) { - core_t::TTime startBucket = - maths::common::CIntegerTools::floor(queue[i].s_Start, bucketLength); - core_t::TTime endBucket = - maths::common::CIntegerTools::floor(queue[i].s_End, bucketLength); - BOOST_REQUIRE_EQUAL(startBucket, endBucket); - } -} - -BOOST_AUTO_TEST_CASE(testPersistence) { - std::size_t sampleCountFactor(2); - std::size_t latencyBuckets(2); - double growthFactor(0.1); - core_t::TTime bucketLength(10); - unsigned int sampleCount(10); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - queue.add(0, {1.0}, 3, sampleCount); - queue.add(2, {3.5}, 2, sampleCount); - queue.add(30, {8.0}, 1, sampleCount); - - std::string origXml; - { - core::CRapidXmlStatePersistInserter inserter("root"); - queue.acceptPersistInserter(inserter); - inserter.toXml(origXml); - } - LOG_DEBUG(<< "XML:\n" << origXml); - - core::CRapidXmlParser parser; - BOOST_TEST_REQUIRE(parser.parseStringIgnoreCdata(origXml)); - core::CRapidXmlStateRestoreTraverser traverser(parser); - - TTestSampleQueue restoredQueue(1, sampleCountFactor, latencyBuckets, - growthFactor, bucketLength); - traverser.traverseSubLevel(std::bind(&TTestSampleQueue::acceptRestoreTraverser, - &restoredQueue, std::placeholders::_1)); - - BOOST_REQUIRE_EQUAL(2, restoredQueue.size()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(0), restoredQueue[1].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(2), restoredQueue[1].s_End); - BOOST_REQUIRE_CLOSE_ABSOLUTE(2.0, restoredQueue[1].s_Statistic.value()[0], 0.0001); - BOOST_REQUIRE_EQUAL(core_t::TTime(1), restoredQueue[1].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(5.0, restoredQueue[1].s_Statistic.count()); - - BOOST_REQUIRE_EQUAL(core_t::TTime(30), restoredQueue[0].s_Start); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), restoredQueue[0].s_End); - BOOST_REQUIRE_CLOSE_ABSOLUTE(8.0, restoredQueue[0].s_Statistic.value()[0], 0.0001); - BOOST_REQUIRE_EQUAL(core_t::TTime(30), restoredQueue[0].s_Statistic.time()); - BOOST_REQUIRE_EQUAL(1.0, restoredQueue[0].s_Statistic.count()); -} - -BOOST_AUTO_TEST_CASE(testQualityOfSamplesGivenConstantRate) { - std::size_t sampleCountFactor(5); - std::size_t latencyBuckets(3); - double growthFactor(0.1); - core_t::TTime bucketLength(600); - unsigned int sampleCount(30); - - core_t::TTime latency = (latencyBuckets + 1) * bucketLength; - std::size_t numberOfMeasurements = 5000; - std::size_t numberOfRuns = 100; - - test::CRandomNumbers rng; - - maths::common::CBasicStatistics::SSampleMean::TAccumulator meanQueueSize; - maths::common::CBasicStatistics::SSampleMean::TAccumulator meanMinVariance; - maths::common::CBasicStatistics::SSampleMean::TAccumulator meanMaxVariance; - - for (std::size_t runId = 0; runId < numberOfRuns; ++runId) { - TSampleVec samples; - core_t::TTime latestTime = bucketLength * (latencyBuckets + 1); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; - measurementId < numberOfMeasurements; ++measurementId) { - TDoubleVec testData; - rng.generateUniformSamples(static_cast(latestTime - latency), - static_cast(latestTime), 1, testData); - latestTime += 60; - core_t::TTime measurementTime = static_cast(testData[0]); - queue.add(measurementTime, {1.0}, 1u, sampleCount); - } - meanQueueSize.add(static_cast(queue.size())); - queue.sample(latestTime, sampleCount, model_t::E_IndividualMeanByPerson, samples); - - maths::common::CBasicStatistics::SSampleMeanVar::TAccumulator varianceStat; - maths::common::CBasicStatistics::COrderStatisticsStack varianceMin; - maths::common::CBasicStatistics::COrderStatisticsStack> varianceMax; - for (std::size_t i = 0; i < samples.size(); ++i) { - varianceStat.add(samples[i].varianceScale()); - varianceMin.add(samples[i].varianceScale()); - varianceMax.add(samples[i].varianceScale()); - } - varianceMin.sort(); - varianceMax.sort(); - meanMinVariance.add(varianceMin[0]); - meanMaxVariance.add(varianceMax[0]); - - LOG_TRACE(<< "Results for run: " << runId); - LOG_TRACE(<< "Mean variance scale = " - << maths::common::CBasicStatistics::mean(varianceStat)); - LOG_TRACE(<< "Variance of variance scale = " - << maths::common::CBasicStatistics::variance(varianceStat)); - LOG_TRACE(<< "Top min variance scale = " << varianceMin); - LOG_TRACE(<< "Top max variance scale = " << varianceMax); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(varianceStat) > 0.98); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(varianceStat) < 1.01); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::variance(varianceStat) < 0.0025); - BOOST_TEST_REQUIRE(varianceMin[0] > 0.85); - BOOST_TEST_REQUIRE(varianceMax[0] < 1.12); - } - LOG_DEBUG(<< "Mean queue size = " << maths::common::CBasicStatistics::mean(meanQueueSize)); - LOG_DEBUG(<< "Mean min variance = " - << maths::common::CBasicStatistics::mean(meanMinVariance)); - LOG_DEBUG(<< "Mean max variance = " - << maths::common::CBasicStatistics::mean(meanMaxVariance)); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanMinVariance) > 0.90); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanMaxVariance) < 1.1); -} - -BOOST_AUTO_TEST_CASE(testQualityOfSamplesGivenVariableRate) { - std::size_t sampleCountFactor(5); - std::size_t latencyBuckets(3); - double growthFactor(0.1); - core_t::TTime bucketLength(600); - unsigned int sampleCount(30); - - core_t::TTime latency = (latencyBuckets + 1) * bucketLength; - std::size_t numberOfMeasurements = 5000; - std::size_t numberOfRuns = 100; - - test::CRandomNumbers rng; - - maths::common::CBasicStatistics::SSampleMean::TAccumulator meanQueueSize; - maths::common::CBasicStatistics::SSampleMean::TAccumulator meanMinVariance; - maths::common::CBasicStatistics::SSampleMean::TAccumulator meanMaxVariance; - - for (std::size_t runId = 0; runId < numberOfRuns; ++runId) { - TSampleVec samples; - core_t::TTime latestTime = bucketLength * (latencyBuckets + 1); - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; - measurementId < numberOfMeasurements; ++measurementId) { - TDoubleVec testData; - rng.generateUniformSamples(static_cast(latestTime - latency), - static_cast(latestTime), 1, testData); - latestTime += - 60 + static_cast( - 40.0 * std::sin(boost::math::constants::two_pi() * - static_cast(latestTime % 86400) / 86400.0)); - core_t::TTime measurementTime = static_cast(testData[0]); - queue.add(measurementTime, {1.0}, 1u, sampleCount); - } - meanQueueSize.add(queue.size()); - queue.sample(latestTime, sampleCount, model_t::E_IndividualMeanByPerson, samples); - - maths::common::CBasicStatistics::SSampleMeanVar::TAccumulator varianceStat; - maths::common::CBasicStatistics::COrderStatisticsStack varianceMin; - maths::common::CBasicStatistics::COrderStatisticsStack> varianceMax; - for (std::size_t i = 0; i < samples.size(); ++i) { - varianceStat.add(samples[i].varianceScale()); - varianceMin.add(samples[i].varianceScale()); - varianceMax.add(samples[i].varianceScale()); - } - varianceMin.sort(); - varianceMax.sort(); - meanMinVariance.add(varianceMin[0]); - meanMaxVariance.add(varianceMax[0]); - - LOG_TRACE(<< "Results for run: " << runId); - LOG_TRACE(<< "Mean variance scale = " - << maths::common::CBasicStatistics::mean(varianceStat)); - LOG_TRACE(<< "Variance of variance scale = " - << maths::common::CBasicStatistics::variance(varianceStat)); - LOG_TRACE(<< "Top min variance scale = " << varianceMin); - LOG_TRACE(<< "Top max variance scale = " << varianceMax); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(varianceStat) > 0.97); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(varianceStat) < 1.01); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::variance(varianceStat) < 0.0065); - BOOST_TEST_REQUIRE(varianceMin[0] > 0.74); - BOOST_TEST_REQUIRE(varianceMax[0] < 1.26); - } - LOG_DEBUG(<< "Mean queue size = " << maths::common::CBasicStatistics::mean(meanQueueSize)); - LOG_DEBUG(<< "Mean min variance = " - << maths::common::CBasicStatistics::mean(meanMinVariance)); - LOG_DEBUG(<< "Mean max variance = " - << maths::common::CBasicStatistics::mean(meanMaxVariance)); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanMinVariance) > 0.82); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(meanMaxVariance) < 1.16); -} - -BOOST_AUTO_TEST_CASE(testQualityOfSamplesGivenHighLatencyAndDataInReverseOrder) { - std::size_t sampleCountFactor(5); - std::size_t latencyBuckets(500); - double growthFactor(0.1); - core_t::TTime bucketLength(600); - unsigned int sampleCount(30); - - std::size_t numberOfMeasurements = 5000; - - test::CRandomNumbers rng; - - TSampleVec samples; - core_t::TTime latestTime = 60 * numberOfMeasurements; - core_t::TTime time = latestTime; - TTestSampleQueue queue(1, sampleCountFactor, latencyBuckets, growthFactor, bucketLength); - for (std::size_t measurementId = 0; measurementId < numberOfMeasurements; ++measurementId) { - queue.add(time, {1.0}, 1u, sampleCount); - time -= 60; - } - queue.add(360000, {1.0}, 1u, sampleCount); - queue.sample(latestTime, sampleCount, model_t::E_IndividualMeanByPerson, samples); - - maths::common::CBasicStatistics::SSampleMeanVar::TAccumulator varianceStat; - maths::common::CBasicStatistics::COrderStatisticsStack varianceMin; - maths::common::CBasicStatistics::COrderStatisticsStack> varianceMax; - for (std::size_t i = 0; i < samples.size(); ++i) { - varianceStat.add(samples[i].varianceScale()); - varianceMin.add(samples[i].varianceScale()); - varianceMax.add(samples[i].varianceScale()); - } - - LOG_DEBUG(<< "Mean variance scale = " - << maths::common::CBasicStatistics::mean(varianceStat)); - LOG_DEBUG(<< "Variance of variance scale = " - << maths::common::CBasicStatistics::variance(varianceStat)); - LOG_DEBUG(<< "Min variance scale = " << varianceMin[0]); - LOG_DEBUG(<< "Max variance scale = " << varianceMax[0]); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(varianceStat) >= 0.999); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::mean(varianceStat) <= 1.0); - BOOST_TEST_REQUIRE(maths::common::CBasicStatistics::variance(varianceStat) <= 0.0001); - BOOST_TEST_REQUIRE(varianceMin[0] > 0.96); - BOOST_TEST_REQUIRE(varianceMax[0] <= 1.0); -} - -BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/Mocks.cc b/lib/model/unittest/Mocks.cc index 13d2024c4f..5872593c28 100644 --- a/lib/model/unittest/Mocks.cc +++ b/lib/model/unittest/Mocks.cc @@ -242,9 +242,7 @@ const CAnomalyDetectorModel& CMockModelDetailsView::base() const { return *m_Model; } -double CMockModelDetailsView::countVarianceScale(model_t::EFeature /*feature*/, - std::size_t /*byFieldId*/, - core_t::TTime /*time*/) const { +double CMockModelDetailsView::countVarianceScale() const { return 1.0; } } diff --git a/lib/model/unittest/Mocks.h b/lib/model/unittest/Mocks.h index 97f046b068..5382477ba4 100644 --- a/lib/model/unittest/Mocks.h +++ b/lib/model/unittest/Mocks.h @@ -166,9 +166,7 @@ class CMockModelDetailsView : public CModelDetailsView { std::size_t byFieldId) const override; TTimeTimePr dataTimeInterval(std::size_t byFieldId) const override; const CAnomalyDetectorModel& base() const override; - double countVarianceScale(model_t::EFeature feature, - std::size_t byFieldId, - core_t::TTime time) const override; + double countVarianceScale() const override; private: //! The model.