From ba047a26aa048f4464e1f7db4c1ecb9aa582a5ef Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 30 Jan 2024 22:59:05 +0100 Subject: [PATCH 01/53] Add TODOs. --- .../common/include/mlrl/common/binning/feature_binning.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp index 060bf2ffcc..183bb3ee58 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp @@ -24,6 +24,7 @@ class IFeatureBinning { * thresholds that result from the boundaries of the bins, as well as to a vector that stores the indices of the * bins, individual values have been assigned to. */ + // TODO Remove struct Result final { public: @@ -51,6 +52,7 @@ class IFeatureBinning { * from the boundaries between the bins, as well as a vector that stores the indices of the * bins, individual values have been assigned to */ + // TODO Remove virtual Result createBins(FeatureVector& featureVector, uint32 numExamples) const = 0; }; From 602d255bebaa2eef00f1502f94bb38fe72e0aaf7 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 30 Jan 2024 23:04:43 +0100 Subject: [PATCH 02/53] The class IFeatureBinning now inherits from IFeatureType. --- .../mlrl/common/binning/feature_binning.hpp | 4 ++-- .../feature_binning_equal_frequency.cpp | 20 +++++++++++++++++++ .../binning/feature_binning_equal_width.cpp | 20 +++++++++++++++++++ .../binning/feature_binning_nominal.hpp | 20 +++++++++++++++++++ 4 files changed, 62 insertions(+), 2 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp index 183bb3ee58..468de92ac9 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp @@ -16,7 +16,7 @@ /** * Defines an interface for methods that assign feature values to bins. */ -class IFeatureBinning { +class IFeatureBinning : public IFeatureType { public: /** @@ -41,7 +41,7 @@ class IFeatureBinning { std::unique_ptr binIndicesPtr; }; - virtual ~IFeatureBinning() {} + virtual ~IFeatureBinning() override {} /** * Assigns the values in a given `FeatureVector` to bins. diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp index 8052e76139..206063ee7b 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp +++ b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp @@ -152,6 +152,26 @@ class EqualFrequencyFeatureBinning final : public IFeatureBinning { return result; } + + bool isOrdinal() const override { + return false; + } + + bool isNominal() const override { + return false; + } + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const FortranContiguousView& featureMatrix) const override { + // TODO Implement + return nullptr; + } + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const CscView& featureMatrix) const override { + // TODO Implement + return nullptr; + } }; /** diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp index 4a27901796..9bf22f9790 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp +++ b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp @@ -171,6 +171,26 @@ class EqualWidthFeatureBinning final : public IFeatureBinning { return result; } + + bool isOrdinal() const override { + return false; + } + + bool isNominal() const override { + return false; + } + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const FortranContiguousView& featureMatrix) const override { + // TODO Implement + return nullptr; + } + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const CscView& featureMatrix) const override { + // TODO Implement + return nullptr; + } }; /** diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_nominal.hpp b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_nominal.hpp index 33b49c5ac3..2de5cb6a48 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_nominal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_nominal.hpp @@ -65,6 +65,26 @@ class NominalFeatureBinning final : public IFeatureBinning { return result; } + + bool isOrdinal() const override { + return false; + } + + bool isNominal() const override { + return false; + } + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const FortranContiguousView& featureMatrix) const override { + // TODO Implement + return nullptr; + } + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const CscView& featureMatrix) const override { + // TODO Implement + return nullptr; + } }; /** From eb4752035c210fc640d26732e79cdd6adff00320 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 31 Jan 2024 00:13:54 +0100 Subject: [PATCH 03/53] The class ExactThresholds does now expect an object of type IFeatureBinning to be provided as a constructor argument. --- .../mlrl/common/binning/feature_binning.hpp | 3 ++- .../common/thresholds/thresholds_exact.hpp | 11 +++++++++-- .../common/binning/feature_binning_no.cpp | 13 ++++++++++++- .../common/thresholds/thresholds_exact.cpp | 19 +++++++++++++++---- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp index 468de92ac9..fe312abf11 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp @@ -67,7 +67,8 @@ class IFeatureBinningFactory { /** * Creates and returns a new object of type `IFeatureBinning`. * - * @return An unique pointer to an object of type `IFeatureBinning` that has been created + * @return An unique pointer to an object of type `IFeatureBinning` that has been created or a null pointer, if + * no feature binning should be used */ virtual std::unique_ptr create() const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp index 9cba03ce30..d99e7fc00c 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp @@ -3,6 +3,7 @@ */ #pragma once +#include "mlrl/common/binning/feature_binning.hpp" #include "mlrl/common/thresholds/thresholds.hpp" /** @@ -11,14 +12,20 @@ class ExactThresholdsFactory final : public IThresholdsFactory { private: + const std::unique_ptr featureBinningFactoryPtr_; + const uint32 numThreads_; public: /** - * @param numThreads The number of CPU threads to be used to update statistics in parallel. Must be at least 1 + * @param featureBinningFactoryPtr An unique pointer to an object of type `IFeatureBinningFactory` that allows + * to create implementations of the binning method to be used for assigning + * numerical feature values to bins + * @param numThreads The number of CPU threads to be used to update statistics in parallel. Must + * be at least 1 */ - ExactThresholdsFactory(uint32 numThreads); + ExactThresholdsFactory(std::unique_ptr featureBinningFactoryPtr, uint32 numThreads); std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp index 25bd5dfad8..7b9524ae6f 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp +++ b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp @@ -2,11 +2,22 @@ #include "mlrl/common/thresholds/thresholds_exact.hpp" +/** + * Allows to create instances of the type `IFeatureBinning` that do not actualy perform any feature binning. + */ +class NoFeatureBinningFactory final : public IFeatureBinningFactory { + public: + + std::unique_ptr create() const override { + return nullptr; + } +}; + NoFeatureBinningConfig::NoFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr) : multiThreadingConfigPtr_(multiThreadingConfigPtr) {} std::unique_ptr NoFeatureBinningConfig::createThresholdsFactory( const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumLabels()); - return std::make_unique(numThreads); + return std::make_unique(std::make_unique(), numThreads); } diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index b5fac8a6fc..7dd3a05d88 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -307,6 +307,8 @@ class ExactThresholds final : public AbstractThresholds { } }; + const IFeatureBinningFactory& featureBinningFactory_; + const uint32 numThreads_; std::unordered_map> cache_; @@ -320,11 +322,17 @@ class ExactThresholds final : public AbstractThresholds { * the types of individual features * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides access to * statistics about the labels of the training examples + * @param featureBinningFactory A reference to an object of type `IFeatureBinningFactory` that allows to create + * implementations of the binning method to be used for assigning numerical feature + * values to bins + * assign nominal feature values to bins * @param numThreads The number of CPU threads to be used to update statistics in parallel */ ExactThresholds(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider, uint32 numThreads) - : AbstractThresholds(featureMatrix, featureInfo, statisticsProvider), numThreads_(numThreads) {} + IStatisticsProvider& statisticsProvider, const IFeatureBinningFactory& featureBinningFactory, + uint32 numThreads) + : AbstractThresholds(featureMatrix, featureInfo, statisticsProvider), + featureBinningFactory_(featureBinningFactory), numThreads_(numThreads) {} std::unique_ptr createSubset(const EqualWeightVector& weights) override { IStatistics& statistics = statisticsProvider_.get(); @@ -348,10 +356,13 @@ class ExactThresholds final : public AbstractThresholds { } }; -ExactThresholdsFactory::ExactThresholdsFactory(uint32 numThreads) : numThreads_(numThreads) {} +ExactThresholdsFactory::ExactThresholdsFactory(std::unique_ptr featureBinningFactoryPtr, + uint32 numThreads) + : featureBinningFactoryPtr_(std::move(featureBinningFactoryPtr)), numThreads_(numThreads) {} std::unique_ptr ExactThresholdsFactory::create(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, IStatisticsProvider& statisticsProvider) const { - return std::make_unique(featureMatrix, featureInfo, statisticsProvider, numThreads_); + return std::make_unique(featureMatrix, featureInfo, statisticsProvider, *featureBinningFactoryPtr_, + numThreads_); } From a62837087ad2ab2c3ca0798166e796868c30d617 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 31 Jan 2024 23:45:50 +0100 Subject: [PATCH 04/53] Add class IThresholdsConfig. --- .../boosting/binning/feature_binning_auto.hpp | 18 +++---------- .../include/mlrl/boosting/learner.hpp | 3 +-- .../boosting/binning/feature_binning_auto.cpp | 11 +++----- .../mlrl/common/binning/feature_binning.hpp | 10 +++---- .../feature_binning_equal_frequency.hpp | 14 +++------- .../binning/feature_binning_equal_width.hpp | 14 +++------- .../common/binning/feature_binning_no.hpp | 16 ++--------- .../common/include/mlrl/common/learner.hpp | 24 ++++++++++++++--- .../mlrl/common/thresholds/thresholds.hpp | 23 ++++++++++++++++ .../common/thresholds/thresholds_exact.hpp | 27 +++++++++++++++++++ .../feature_binning_equal_frequency.cpp | 15 +++-------- .../binning/feature_binning_equal_width.cpp | 15 +++-------- .../common/binning/feature_binning_no.cpp | 10 ++----- .../common/src/mlrl/common/learner.cpp | 11 ++++++-- .../common/thresholds/thresholds_exact.cpp | 12 +++++++++ 15 files changed, 119 insertions(+), 104 deletions(-) diff --git a/cpp/subprojects/boosting/include/mlrl/boosting/binning/feature_binning_auto.hpp b/cpp/subprojects/boosting/include/mlrl/boosting/binning/feature_binning_auto.hpp index 0bc3a36d59..3beecaa631 100644 --- a/cpp/subprojects/boosting/include/mlrl/boosting/binning/feature_binning_auto.hpp +++ b/cpp/subprojects/boosting/include/mlrl/boosting/binning/feature_binning_auto.hpp @@ -4,7 +4,6 @@ #pragma once #include "mlrl/common/binning/feature_binning.hpp" -#include "mlrl/common/multi_threading/multi_threading.hpp" namespace boosting { @@ -12,24 +11,13 @@ namespace boosting { * Allows to configure a method that automatically decides whether feature binning should be used or not. */ class AutomaticFeatureBinningConfig final : public IFeatureBinningConfig { - private: - - const std::unique_ptr& multiThreadingConfigPtr_; - public: /** - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - AutomaticFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr); - - /** - * @see `IFeatureBinningConfig::createThresholdsFactory` + * @see `IFeatureBinningConfig::createFeatureBinningFactory` */ - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const override; + std::unique_ptr createFeatureBinningFactory( + const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const override; }; } diff --git a/cpp/subprojects/boosting/include/mlrl/boosting/learner.hpp b/cpp/subprojects/boosting/include/mlrl/boosting/learner.hpp index 05fab90082..2a6cc53080 100644 --- a/cpp/subprojects/boosting/include/mlrl/boosting/learner.hpp +++ b/cpp/subprojects/boosting/include/mlrl/boosting/learner.hpp @@ -157,8 +157,7 @@ namespace boosting { virtual void useAutomaticFeatureBinning() { std::unique_ptr& featureBinningConfigPtr = this->getFeatureBinningConfigPtr(); - featureBinningConfigPtr = - std::make_unique(this->getParallelStatisticUpdateConfigPtr()); + featureBinningConfigPtr = std::make_unique(); } }; diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/binning/feature_binning_auto.cpp b/cpp/subprojects/boosting/src/mlrl/boosting/binning/feature_binning_auto.cpp index a32b5a9999..6131cc618c 100644 --- a/cpp/subprojects/boosting/src/mlrl/boosting/binning/feature_binning_auto.cpp +++ b/cpp/subprojects/boosting/src/mlrl/boosting/binning/feature_binning_auto.cpp @@ -5,17 +5,12 @@ namespace boosting { - AutomaticFeatureBinningConfig::AutomaticFeatureBinningConfig( - const std::unique_ptr& multiThreadingConfigPtr) - : multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - std::unique_ptr AutomaticFeatureBinningConfig::createThresholdsFactory( + std::unique_ptr AutomaticFeatureBinningConfig::createFeatureBinningFactory( const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { if (!featureMatrix.isSparse() && featureMatrix.getNumExamples() > 200000) { - return EqualWidthFeatureBinningConfig(multiThreadingConfigPtr_) - .createThresholdsFactory(featureMatrix, labelMatrix); + return EqualWidthFeatureBinningConfig().createFeatureBinningFactory(featureMatrix, labelMatrix); } else { - return NoFeatureBinningConfig(multiThreadingConfigPtr_).createThresholdsFactory(featureMatrix, labelMatrix); + return NoFeatureBinningConfig().createFeatureBinningFactory(featureMatrix, labelMatrix); } } diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp index fe312abf11..8c5e65fa3b 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp @@ -7,9 +7,9 @@ #include "mlrl/common/binning/bin_index_vector.hpp" #include "mlrl/common/binning/threshold_vector.hpp" #include "mlrl/common/input/feature_matrix.hpp" +#include "mlrl/common/input/feature_type.hpp" #include "mlrl/common/input/feature_vector.hpp" #include "mlrl/common/input/label_matrix.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" #include @@ -82,14 +82,14 @@ class IFeatureBinningConfig { virtual ~IFeatureBinningConfig() {} /** - * Creates and returns a new object of type `IThresholdsFactory` according to the specified configuration. + * Creates and returns a new object of type `IFeatureBinningFactory` according to the specified configuration. * * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the feature * values of the training examples * @param labelMatrix A reference to an object of type `ILabelMatrix` that provides access to the labels of * the training examples - * @return An unique pointer to an object of type `IThresholdsFactory` that has been created + * @return An unique pointer to an object of type `IFeatureBinningFactory` that has been created */ - virtual std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const = 0; + virtual std::unique_ptr createFeatureBinningFactory( + const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_frequency.hpp b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_frequency.hpp index 27d5f30c59..1bd0026a3a 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_frequency.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_frequency.hpp @@ -5,7 +5,6 @@ #pragma once #include "mlrl/common/binning/feature_binning.hpp" -#include "mlrl/common/multi_threading/multi_threading.hpp" /** * Defines an interface for all classes that allow to configure a method that assigns numerical feature values to bins, @@ -82,16 +81,9 @@ class EqualFrequencyFeatureBinningConfig final : public IFeatureBinningConfig, uint32 maxBins_; - const std::unique_ptr& multiThreadingConfigPtr_; - public: - /** - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - EqualFrequencyFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr); + EqualFrequencyFeatureBinningConfig(); float32 getBinRatio() const override; @@ -105,6 +97,6 @@ class EqualFrequencyFeatureBinningConfig final : public IFeatureBinningConfig, IEqualFrequencyFeatureBinningConfig& setMaxBins(uint32 maxBins) override; - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const override; + std::unique_ptr createFeatureBinningFactory( + const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_width.hpp b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_width.hpp index 0b2e353a65..7e5275838c 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_width.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_width.hpp @@ -5,7 +5,6 @@ #pragma once #include "mlrl/common/binning/feature_binning.hpp" -#include "mlrl/common/multi_threading/multi_threading.hpp" /** * Defines an interface for all classes that allow to configure a method that assigns numerical feature values to bins, @@ -82,16 +81,9 @@ class EqualWidthFeatureBinningConfig final : public IFeatureBinningConfig, uint32 maxBins_; - const std::unique_ptr& multiThreadingConfigPtr_; - public: - /** - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - EqualWidthFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr); + EqualWidthFeatureBinningConfig(); float32 getBinRatio() const override; @@ -105,6 +97,6 @@ class EqualWidthFeatureBinningConfig final : public IFeatureBinningConfig, IEqualWidthFeatureBinningConfig& setMaxBins(uint32 maxBins) override; - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const override; + std::unique_ptr createFeatureBinningFactory( + const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_no.hpp b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_no.hpp index 3014ffede5..8cb6740ae1 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_no.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_no.hpp @@ -4,25 +4,13 @@ #pragma once #include "mlrl/common/binning/feature_binning.hpp" -#include "mlrl/common/multi_threading/multi_threading.hpp" /** * Allows to configure a method that does not actually perform any feature binning. */ class NoFeatureBinningConfig final : public IFeatureBinningConfig { - private: - - const std::unique_ptr& multiThreadingConfigPtr_; - public: - /** - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - NoFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr); - - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const override; + std::unique_ptr createFeatureBinningFactory( + const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/learner.hpp b/cpp/subprojects/common/include/mlrl/common/learner.hpp index b68841c47e..a28fd88450 100644 --- a/cpp/subprojects/common/include/mlrl/common/learner.hpp +++ b/cpp/subprojects/common/include/mlrl/common/learner.hpp @@ -192,6 +192,15 @@ class MLRLCOMMON_API IRuleLearner { */ virtual std::unique_ptr& getFeatureBinningConfigPtr() = 0; + /** + * Returns an unique pointer to the configuration of the class that provides access to the thresholds + * that may be used by the conditions of rules. + * + * @return A reference to an unique pointer of type `IThresholdsConfig` that stores the configuration of + * the class that provides access to the thresholds that may be used by the conditions of rules + */ + virtual std::unique_ptr& getThresholdsConfigPtr() = 0; + /** * Returns an unique pointer to the configuration of the method for sampling labels. * @@ -503,8 +512,7 @@ class MLRLCOMMON_API IRuleLearner { virtual void useNoFeatureBinning() { std::unique_ptr& featureBinningConfigPtr = this->getFeatureBinningConfigPtr(); - featureBinningConfigPtr = - std::make_unique(this->getParallelStatisticUpdateConfigPtr()); + featureBinningConfigPtr = std::make_unique(); } }; @@ -528,7 +536,7 @@ class MLRLCOMMON_API IRuleLearner { std::unique_ptr& featureBinningConfigPtr = this->getFeatureBinningConfigPtr(); std::unique_ptr ptr = - std::make_unique(this->getParallelStatisticUpdateConfigPtr()); + std::make_unique(); IEqualWidthFeatureBinningConfig& ref = *ptr; featureBinningConfigPtr = std::move(ptr); return ref; @@ -555,7 +563,7 @@ class MLRLCOMMON_API IRuleLearner { std::unique_ptr& featureBinningConfigPtr = this->getFeatureBinningConfigPtr(); std::unique_ptr ptr = - std::make_unique(this->getParallelStatisticUpdateConfigPtr()); + std::make_unique(); IEqualFrequencyFeatureBinningConfig& ref = *ptr; featureBinningConfigPtr = std::move(ptr); return ref; @@ -1634,6 +1642,12 @@ class AbstractRuleLearner : virtual public IRuleLearner { */ std::unique_ptr featureBinningConfigPtr_; + /** + * An unique pointer that stores the configuration of the class that provides access to the thresholds + * that may be used by the conditions of rules. + */ + std::unique_ptr thresholdsConfigPtr_; + /** * An unique pointer that stores the configuration of the method for sampling labels. */ @@ -1756,6 +1770,8 @@ class AbstractRuleLearner : virtual public IRuleLearner { std::unique_ptr& getFeatureBinningConfigPtr() override final; + std::unique_ptr& getThresholdsConfigPtr() override final; + std::unique_ptr& getLabelSamplingConfigPtr() override final; std::unique_ptr& getInstanceSamplingConfigPtr() override final; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp index 809c61d063..178dd9b0e9 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp @@ -5,6 +5,7 @@ #include "mlrl/common/input/feature_info.hpp" #include "mlrl/common/input/feature_matrix_column_wise.hpp" +#include "mlrl/common/input/label_matrix.hpp" #include "mlrl/common/sampling/weight_vector_bit.hpp" #include "mlrl/common/sampling/weight_vector_dense.hpp" #include "mlrl/common/sampling/weight_vector_equal.hpp" @@ -79,3 +80,25 @@ class IThresholdsFactory { const IFeatureInfo& featureInfo, IStatisticsProvider& statisticsProvider) const = 0; }; + +/** + * Defines an interface for all classes that allow to configure a class that provides access to thresholds that may be + * used by the conditions of rules. + */ +class IThresholdsConfig { + public: + + virtual ~IThresholdsConfig() {} + + /** + * Creates and returns a new object of type `IThresholdsFactory` according to the specified configuration. + * + * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the feature + * values of the training examples + * @param labelMatrix A reference to an object of type `ILabelMatrix` that provides access to the labels of + * the training examples + * @return An unique pointer to an object of type `IThresholdsFactory` that has been created + */ + virtual std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, + const ILabelMatrix& labelMatrix) const = 0; +}; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp index d99e7fc00c..9a827df02c 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp @@ -4,6 +4,7 @@ #pragma once #include "mlrl/common/binning/feature_binning.hpp" +#include "mlrl/common/multi_threading/multi_threading.hpp" #include "mlrl/common/thresholds/thresholds.hpp" /** @@ -31,3 +32,29 @@ class ExactThresholdsFactory final : public IThresholdsFactory { const IFeatureInfo& featureInfo, IStatisticsProvider& statisticsProvider) const override; }; + +/** + * Allows to configure a class that provides access to thresholds that may be used by the conditions of rules. + */ +class ExactThresholdsConfig final : public IThresholdsConfig { + private: + + const std::unique_ptr& featureBinningConfigPtr_; + + const std::unique_ptr& multiThreadingConfigPtr_; + + public: + + /** + * @param featureBinningconfigPtr A reference to an unique pointer that stores the configuration of the method + * that should be used for assigning numerical feature values to bins + * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the + * multi-threading behavior that should be used for the parallel update of + * statistics + */ + ExactThresholdsConfig(const std::unique_ptr& featureBinningConfigPtr, + const std::unique_ptr& multiThreadingConfigPtr); + + std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, + const ILabelMatrix& labelMatrix) const override; +}; diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp index 206063ee7b..2304861f3c 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp +++ b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp @@ -3,7 +3,6 @@ #include "feature_binning_nominal.hpp" #include "mlrl/common/binning/bin_index_vector_dense.hpp" #include "mlrl/common/binning/bin_index_vector_dok.hpp" -#include "mlrl/common/thresholds/thresholds_approximate.hpp" #include "mlrl/common/util/math.hpp" #include "mlrl/common/util/validation.hpp" @@ -204,9 +203,7 @@ class EqualFrequencyFeatureBinningFactory final : public IFeatureBinningFactory } }; -EqualFrequencyFeatureBinningConfig::EqualFrequencyFeatureBinningConfig( - const std::unique_ptr& multiThreadingConfigPtr) - : binRatio_(0.33f), minBins_(2), maxBins_(0), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} +EqualFrequencyFeatureBinningConfig::EqualFrequencyFeatureBinningConfig() : binRatio_(0.33f), minBins_(2), maxBins_(0) {} float32 EqualFrequencyFeatureBinningConfig::getBinRatio() const { return binRatio_; @@ -239,13 +236,7 @@ IEqualFrequencyFeatureBinningConfig& EqualFrequencyFeatureBinningConfig::setMaxB return *this; } -std::unique_ptr EqualFrequencyFeatureBinningConfig::createThresholdsFactory( +std::unique_ptr EqualFrequencyFeatureBinningConfig::createFeatureBinningFactory( const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - std::unique_ptr numericalFeatureBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - std::unique_ptr nominalFeatureBinningFactoryPtr = - std::make_unique(); - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumLabels()); - return std::make_unique(std::move(numericalFeatureBinningFactoryPtr), - std::move(nominalFeatureBinningFactoryPtr), numThreads); + return std::make_unique(binRatio_, minBins_, maxBins_); } diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp index 9bf22f9790..a0a9924bd5 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp +++ b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp @@ -4,7 +4,6 @@ #include "mlrl/common/binning/bin_index_vector_dense.hpp" #include "mlrl/common/binning/bin_index_vector_dok.hpp" #include "mlrl/common/data/array.hpp" -#include "mlrl/common/thresholds/thresholds_approximate.hpp" #include "mlrl/common/util/math.hpp" #include "mlrl/common/util/validation.hpp" @@ -223,9 +222,7 @@ class EqualWidthFeatureBinningFactory final : public IFeatureBinningFactory { } }; -EqualWidthFeatureBinningConfig::EqualWidthFeatureBinningConfig( - const std::unique_ptr& multiThreadingConfigPtr) - : binRatio_(0.33f), minBins_(2), maxBins_(0), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} +EqualWidthFeatureBinningConfig::EqualWidthFeatureBinningConfig() : binRatio_(0.33f), minBins_(2), maxBins_(0) {} float32 EqualWidthFeatureBinningConfig::getBinRatio() const { return binRatio_; @@ -258,13 +255,7 @@ IEqualWidthFeatureBinningConfig& EqualWidthFeatureBinningConfig::setMaxBins(uint return *this; } -std::unique_ptr EqualWidthFeatureBinningConfig::createThresholdsFactory( +std::unique_ptr EqualWidthFeatureBinningConfig::createFeatureBinningFactory( const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - std::unique_ptr numericalFeatureBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - std::unique_ptr nominalFeatureBinningFactoryPtr = - std::make_unique(); - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumLabels()); - return std::make_unique(std::move(numericalFeatureBinningFactoryPtr), - std::move(nominalFeatureBinningFactoryPtr), numThreads); + return std::make_unique(binRatio_, minBins_, maxBins_); } diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp index 7b9524ae6f..7b330b2f92 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp +++ b/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp @@ -1,7 +1,5 @@ #include "mlrl/common/binning/feature_binning_no.hpp" -#include "mlrl/common/thresholds/thresholds_exact.hpp" - /** * Allows to create instances of the type `IFeatureBinning` that do not actualy perform any feature binning. */ @@ -13,11 +11,7 @@ class NoFeatureBinningFactory final : public IFeatureBinningFactory { } }; -NoFeatureBinningConfig::NoFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr) - : multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - -std::unique_ptr NoFeatureBinningConfig::createThresholdsFactory( +std::unique_ptr NoFeatureBinningConfig::createFeatureBinningFactory( const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumLabels()); - return std::make_unique(std::make_unique(), numThreads); + return std::make_unique(); } diff --git a/cpp/subprojects/common/src/mlrl/common/learner.cpp b/cpp/subprojects/common/src/mlrl/common/learner.cpp index b81948daa9..f692ecce66 100644 --- a/cpp/subprojects/common/src/mlrl/common/learner.cpp +++ b/cpp/subprojects/common/src/mlrl/common/learner.cpp @@ -2,6 +2,7 @@ #include "mlrl/common/prediction/label_space_info_no.hpp" #include "mlrl/common/stopping/stopping_criterion_size.hpp" +#include "mlrl/common/thresholds/thresholds_exact.hpp" #include "mlrl/common/util/validation.hpp" /** @@ -88,7 +89,9 @@ AbstractRuleLearner::Config::Config(RuleCompareFunction ruleCompareFunction) ruleModelAssemblageConfigPtr_(std::make_unique(defaultRuleConfigPtr_)), ruleInductionConfigPtr_( std::make_unique(ruleCompareFunction_, parallelRuleRefinementConfigPtr_)), - featureBinningConfigPtr_(std::make_unique(parallelStatisticUpdateConfigPtr_)), + featureBinningConfigPtr_(std::make_unique()), + thresholdsConfigPtr_( + std::make_unique(featureBinningConfigPtr_, parallelStatisticUpdateConfigPtr_)), labelSamplingConfigPtr_(std::make_unique()), instanceSamplingConfigPtr_(std::make_unique()), featureSamplingConfigPtr_(std::make_unique()), @@ -122,6 +125,10 @@ std::unique_ptr& AbstractRuleLearner::Config::getFeatureB return featureBinningConfigPtr_; } +std::unique_ptr& AbstractRuleLearner::Config::getThresholdsConfigPtr() { + return thresholdsConfigPtr_; +} + std::unique_ptr& AbstractRuleLearner::Config::getLabelSamplingConfigPtr() { return labelSamplingConfigPtr_; } @@ -210,7 +217,7 @@ std::unique_ptr AbstractRuleLearner::createRuleMode std::unique_ptr AbstractRuleLearner::createThresholdsFactory( const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - return config_.getFeatureBinningConfigPtr()->createThresholdsFactory(featureMatrix, labelMatrix); + return config_.getThresholdsConfigPtr()->createThresholdsFactory(featureMatrix, labelMatrix); } std::unique_ptr AbstractRuleLearner::createRuleInductionFactory( diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 7dd3a05d88..83b425e2b3 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -366,3 +366,15 @@ std::unique_ptr ExactThresholdsFactory::create(const IColumnWiseFea return std::make_unique(featureMatrix, featureInfo, statisticsProvider, *featureBinningFactoryPtr_, numThreads_); } + +ExactThresholdsConfig::ExactThresholdsConfig(const std::unique_ptr& featureBinningConfigPtr, + const std::unique_ptr& multiThreadingConfigPtr) + : featureBinningConfigPtr_(featureBinningConfigPtr), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} + +std::unique_ptr ExactThresholdsConfig::createThresholdsFactory( + const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { + std::unique_ptr featureBinningFactoryPtr = + featureBinningConfigPtr_->createFeatureBinningFactory(featureMatrix, labelMatrix); + uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumLabels()); + return std::make_unique(std::move(featureBinningFactoryPtr), numThreads); +} From 1f6195d69bc40f8a9aacafdcff5afc8da1b4bcb7 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 31 Jan 2024 23:51:28 +0100 Subject: [PATCH 05/53] The function "createFeatureType" of the class IFeatureInfo does now expect an object of type IFeatureBinningFactory to be provided as an argument. --- .../mlrl/common/input/feature_info.hpp | 10 +++- .../mlrl/common/input/feature_info_equal.cpp | 51 +++++++++++++++---- .../mlrl/common/input/feature_info_mixed.cpp | 9 +++- .../thresholds/thresholds_approximate.cpp | 3 +- .../common/thresholds/thresholds_exact.cpp | 4 +- 5 files changed, 61 insertions(+), 16 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_info.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_info.hpp index c2f20a5fa3..5b60b0128f 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_info.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_info.hpp @@ -3,6 +3,7 @@ */ #pragma once +#include "mlrl/common/binning/feature_binning.hpp" #include "mlrl/common/data/types.hpp" #include "mlrl/common/input/feature_type.hpp" #include "mlrl/common/util/dll_exports.hpp" @@ -21,7 +22,12 @@ class MLRLCOMMON_API IFeatureInfo { * Creates and returns a new object of type `IFeatureType` that corresponds to the type of the feature at a * specific index. * - * @return An unique pointer to an object of the type `IFeatureType` that has been created + * @param featureIndex The index of the feature + * @param featureBinningFactory A reference to an object of type `IFeatureBinningFactory` that allows to create + * implementations of the binning method to be used for assigning numerical feature + * values to bins + * @return An unique pointer to an object of the type `IFeatureType` that has been created */ - virtual std::unique_ptr createFeatureType(uint32 featureIndex) const = 0; + virtual std::unique_ptr createFeatureType( + uint32 featureIndex, const IFeatureBinningFactory& featureBinningFactory) const = 0; }; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_info_equal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_info_equal.cpp index f5bf329218..a62ab69057 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_info_equal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_info_equal.cpp @@ -5,27 +5,58 @@ #include "mlrl/common/input/feature_type_ordinal.hpp" /** - * An implementation of the type `IEqualFeatureInfo` that stores the type of all features. - * - * @tparam FeatureType The type of all features + * An implementation of the type `IEqualFeatureInfo` that creates an object of type `OrdinalFeatureType` for each + * feature. */ -template -class EqualFeatureInfo final : public IEqualFeatureInfo { +class OrdinalFeatureInfo final : public IEqualFeatureInfo { public: - std::unique_ptr createFeatureType(uint32 featureIndex) const override { - return std::make_unique(); + std::unique_ptr createFeatureType( + uint32 featureIndex, const IFeatureBinningFactory& featureBinningFactory) const override { + return std::make_unique(); + } +}; + +/** + * An implementation of the type `IEqualFeatureInfo` that creates an object of type `NominalFeatureType` for each + * feature. + */ +class NominalFeatureInfo final : public IEqualFeatureInfo { + public: + + std::unique_ptr createFeatureType( + uint32 featureIndex, const IFeatureBinningFactory& featureBinningFactory) const override { + return std::make_unique(); + } +}; + +/** + * An implementation of the type `IEqualFeatureInfo` that creates an object of type `IFeatureBinning` or + * `NumericalFeatureType` for each feature, depending on whether feature binning should be used or not. + */ +class NumericalFeatureInfo final : public IEqualFeatureInfo { + public: + + std::unique_ptr createFeatureType( + uint32 featureIndex, const IFeatureBinningFactory& featureBinningFactory) const override { + std::unique_ptr featureBinningPtr = featureBinningFactory.create(); + + if (featureBinningPtr) { + return featureBinningPtr; + } + + return std::make_unique(); } }; std::unique_ptr createOrdinalFeatureInfo() { - return std::make_unique>(); + return std::make_unique(); } std::unique_ptr createNominalFeatureInfo() { - return std::make_unique>(); + return std::make_unique(); } std::unique_ptr createNumericalFeatureInfo() { - return std::make_unique>(); + return std::make_unique(); } diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_info_mixed.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_info_mixed.cpp index cd9eaf3b6d..d8246326ac 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_info_mixed.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_info_mixed.cpp @@ -24,12 +24,19 @@ class BitFeatureInfo final : public IMixedFeatureInfo { BitFeatureInfo(uint32 numFeatures) : ordinalBitVector_(numFeatures, true), nominalBitVector_(numFeatures, true) {} - std::unique_ptr createFeatureType(uint32 featureIndex) const override { + std::unique_ptr createFeatureType( + uint32 featureIndex, const IFeatureBinningFactory& featureBinningFactory) const override { if (ordinalBitVector_[featureIndex]) { return std::make_unique(); } else if (nominalBitVector_[featureIndex]) { return std::make_unique(); } else { + std::unique_ptr featureBinningPtr = featureBinningFactory.create(); + + if (featureBinningPtr) { + return featureBinningPtr; + } + return std::make_unique(); } } diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_approximate.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_approximate.cpp index 2b34a9cace..77c97e62d3 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_approximate.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_approximate.cpp @@ -200,8 +200,9 @@ class ApproximateThresholds final : public AbstractThresholds { thresholds_.cache_.emplace(featureIndex, IFeatureBinning::Result()); } + std::unique_ptr featureBinningFactoryPtr; std::unique_ptr featureTypePtr = - thresholds_.featureInfo_.createFeatureType(featureIndex); + thresholds_.featureInfo_.createFeatureType(featureIndex, *featureBinningFactoryPtr); bool ordinal = featureTypePtr->isOrdinal(); bool nominal = featureTypePtr->isNominal(); std::unique_ptr callbackPtr = std::make_unique(*this, featureIndex, nominal); diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 83b425e2b3..6f36a441c3 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -81,8 +81,8 @@ class ExactThresholds final : public AbstractThresholds { featureVector = cacheIterator->second.get(); if (!featureVector) { - std::unique_ptr featureTypePtr = - featureInfo_.createFeatureType(featureIndex_); + std::unique_ptr featureTypePtr = featureInfo_.createFeatureType( + featureIndex_, thresholdsSubset_.thresholds_.featureBinningFactory_); cacheIterator->second = thresholdsSubset_.thresholds_.featureMatrix_.createFeatureVector(featureIndex_, *featureTypePtr); From 4f0b3af68f49884169b45dd23beba4c4e1ec7ea1 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 6 Feb 2024 17:18:55 +0100 Subject: [PATCH 06/53] Add classes BinnedFeatureVector and AllocatedBinnedFeatureVector. --- .../common/input/feature_vector_binned.hpp | 193 ++++++++++++++++++ cpp/subprojects/common/meson.build | 1 + .../common/input/feature_vector_binned.cpp | 63 ++++++ .../input/feature_vector_binned_allocated.hpp | 65 ++++++ 4 files changed, 322 insertions(+) create mode 100644 cpp/subprojects/common/include/mlrl/common/input/feature_vector_binned.hpp create mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_vector_binned.cpp create mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_vector_binned_allocated.hpp diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binned.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binned.hpp new file mode 100644 index 0000000000..39e5c769fa --- /dev/null +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binned.hpp @@ -0,0 +1,193 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/data/view.hpp" + +/** + * A feature vector that stores the indices of the examples that are associated with each bin, except for the most + * frequent one, created by a method that assigns numerical feature values to bins. + */ +class MLRLCOMMON_API BinnedFeatureVector { + public: + + /** + * A pointer to an array that stores thresholds separating adjacent bins. + */ + float32* thresholds; + + /** + * A pointer to an array that stores the indices of all examples not associated with the most frequent bin. + */ + uint32* indices; + + /** + * A pointer to an array that stores the indices of the first element in `indices` that corresponds to a certain + * bin. + */ + uint32* indptr; + + /** + * The number of bins, excluding the most frequent one. + */ + uint32 numBins; + + /** + * The index of the most frequent bin. + */ + uint32 sparseBinIndex; + + public: + + /** + * @param thresholds A pointer to an array of type `float32`, shape `(numBins - 1)` that stores + * thresholds separating bins + * @param indices A pointer to an array of type `uint32`, shape `(numIndices)` that stores the indices + * of all examples not associated with the most frequent bin + * @param indptr A pointer to an array that stores the indices of the first element in `indices` that + * corresponds to a certain bin + * @param numBins The number of bins, including the most frequent one + * @param numIndices The number of elements in the array `indices` + * @param sparseBinIndex The index of the most frequent bin + */ + BinnedFeatureVector(float32* thresholds, uint32* indices, uint32* indptr, uint32 numBins, uint32 numIndices, + uint32 sparseBinIndex); + + /** + * @param other A reference to an object of type `BinnedFeatureVector` that should be copied + */ + BinnedFeatureVector(const BinnedFeatureVector& other); + + /** + * @param other A reference to an object of type `BinnedFeatureVector` that should be moved + */ + BinnedFeatureVector(BinnedFeatureVector&& other); + + virtual ~BinnedFeatureVector() {}; + + /** + * The type of the indices, the view provides access to. + */ + typedef uint32 index_type; + + /** + * The type of the thresholds, the view provides access to. + */ + typedef float32 threshold_type; + + /** + * An iterator that provides read-only access to all thresholds. + */ + typedef const float32* threshold_const_iterator; + + /** + * An iterator that provides access to all thresholds and allows to modify them. + */ + typedef float32* threshold_iterator; + + /** + * An iterator that provides read-only access to the indices of the examples that are associated with each bin, + * except for the most frequent bin. + */ + typedef const uint32* index_const_iterator; + + /** + * An iterator that provides access to the indices of the examples that are associated with each bin, except for + * the most frequent bin, and allows to modify them. + */ + typedef uint32* index_iterator; + + /** + * Returns a `threshold_const_iterator` to the beginning of the thresholds. + * + * @return A `threshold_const_iterator` to the beginning + */ + threshold_const_iterator thresholds_cbegin() const; + + /** + * Returns a `value_const_iterator` to the end of the thresholds. + * + * @return A `value_const_iterator` to the end + */ + threshold_const_iterator thresholds_cend() const; + + /** + * Returns a `value_iterator` to the beginning of the thresholds. + * + * @return A `value_iterator` to the beginning + */ + threshold_iterator thresholds_begin(); + + /** + * Returns a `threshold_iterator` to the end of the thresholds. + * + * @return A `threshld_iterator` to the end + */ + threshold_iterator thresholds_end(); + + /** + * Returns an `index_const_iterator` to the beginning of the indices of the examples that are associated with a + * specific bin. + * + * @param index The index of the bin + * @return An `index_const_iterator` to the beginning + */ + index_const_iterator indices_cbegin(uint32 index) const; + + /** + * Returns an `index_const_iterator` to the end of the indices of the examples that are associated with a + * specific bin. + * + * @param index The index of the bin + * @return An `index_const_iterator` to the end + */ + index_const_iterator indices_cend(uint32 index) const; + + /** + * Returns an `index_iterator` to the beginning of the indices of the examples that are associated with a + * specific bin. + * + * @param index The index of the bin + * @return An `index_iterator` to the beginning + */ + index_iterator indices_begin(uint32 index); + + /** + * Returns an `index_iterator` to the end of the indices of the examples that are associated with a specific + * bin. + * + * @param index The index of the bin + * @return An `index_iterator` to the end + */ + index_iterator indices_end(uint32 index); + + /** + * Releases the ownership of the array that stores the thresholds. As a result, the behavior of this view + * becomes undefined and it should not be used anymore. The caller is responsible for freeing the memory that is + * occupied by the array. + * + * @return A pointer to an array that stores all thresholds + */ + threshold_type* releaseThresholds(); + + /** + * Releases the ownership of the array that stores the indices of all examples not associated with the most + * frequent bin. As a result, the behavior of this view becomes undefined and it should not be used anymore. The + * caller is responsible for freeing the memory that is occupied by the array. + * + * @return A pointer to the array that stores the indices of all examples not associated with the most frequent + * bin + */ + index_type* releaseIndices(); + + /** + * Releases the ownership of the array that stores the indices of the first element in `indices` that + * corresponds to a certain bin. As a result, the behavior of this view becomes undefined and it should not be + * used anymore. The caller is responsible for freeing the memory that is occupied by the array. + * + * @return A pointer to an array that stores the indices of the first element in `indices` that corresponds to + * a certain bin + */ + index_type* releaseIndptr(); +}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index cf4df9071d..e11332c8b1 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -23,6 +23,7 @@ source_files = [ 'src/mlrl/common/input/feature_type_numerical.cpp', 'src/mlrl/common/input/feature_type_ordinal.cpp', 'src/mlrl/common/input/feature_vector.cpp', + 'src/mlrl/common/input/feature_vector_binned.cpp', 'src/mlrl/common/input/feature_vector_equal.cpp', 'src/mlrl/common/input/feature_vector_nominal.cpp', 'src/mlrl/common/input/feature_vector_numerical.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binned.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binned.cpp new file mode 100644 index 0000000000..1ea6dddebd --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binned.cpp @@ -0,0 +1,63 @@ +#include "mlrl/common/input/feature_vector_binned.hpp" + +BinnedFeatureVector::BinnedFeatureVector(float32* thresholds, uint32* indices, uint32* indptr, uint32 numBins, + uint32 numIndices, uint32 sparseBinIndex) + : thresholds(thresholds), indices(indices), indptr(indptr), numBins(numBins), sparseBinIndex(sparseBinIndex) {} + +BinnedFeatureVector::BinnedFeatureVector(const BinnedFeatureVector& other) + : thresholds(other.thresholds), indices(other.indices), indptr(other.indptr), numBins(other.numBins), + sparseBinIndex(other.sparseBinIndex) {} + +BinnedFeatureVector::BinnedFeatureVector(BinnedFeatureVector&& other) + : thresholds(other.thresholds), indices(other.indices), indptr(other.indptr), numBins(other.numBins), + sparseBinIndex(other.sparseBinIndex) {} + +BinnedFeatureVector::threshold_const_iterator BinnedFeatureVector::thresholds_cbegin() const { + return thresholds; +} + +BinnedFeatureVector::threshold_const_iterator BinnedFeatureVector::thresholds_cend() const { + return &thresholds[numBins - 1]; +} + +BinnedFeatureVector::threshold_iterator BinnedFeatureVector::thresholds_begin() { + return thresholds; +} + +BinnedFeatureVector::threshold_iterator BinnedFeatureVector::thresholds_end() { + return &thresholds[numBins - 1]; +} + +BinnedFeatureVector::index_const_iterator BinnedFeatureVector::indices_cbegin(uint32 index) const { + return &indices[indptr[index]]; +} + +BinnedFeatureVector::index_const_iterator BinnedFeatureVector::indices_cend(uint32 index) const { + return &indices[indptr[index + 1]]; +} + +BinnedFeatureVector::index_iterator BinnedFeatureVector::indices_begin(uint32 index) { + return &indices[indptr[index]]; +} + +BinnedFeatureVector::index_iterator BinnedFeatureVector::indices_end(uint32 index) { + return &indices[indptr[index + 1]]; +} + +BinnedFeatureVector::threshold_type* BinnedFeatureVector::releaseThresholds() { + threshold_type* ptr = thresholds; + thresholds = nullptr; + return ptr; +} + +BinnedFeatureVector::index_type* BinnedFeatureVector::releaseIndices() { + index_type* ptr = indices; + indices = nullptr; + return ptr; +} + +BinnedFeatureVector::index_type* BinnedFeatureVector::releaseIndptr() { + index_type* ptr = indptr; + indptr = nullptr; + return ptr; +} diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binned_allocated.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binned_allocated.hpp new file mode 100644 index 0000000000..2523308cfc --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binned_allocated.hpp @@ -0,0 +1,65 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/input/feature_vector_binned.hpp" + +/** + * Allocates the memory, a `BinnedFeatureVector` provides access to. + */ +class MLRLCOMMON_API AllocatedBinnedFeatureVector : public BinnedFeatureVector { + public: + + /** + * @param numBins The number of bins, including the most frequent one + * @param numIndices The number of examples not associated with the most frequent bin + * @param sparseBinIndex The index of the most frequent bin + */ + AllocatedBinnedFeatureVector(uint32 numBins, uint32 numIndices, uint32 sparseBinIndex = 0) + : BinnedFeatureVector(allocateMemory(numBins - 1), allocateMemory(numIndices), + allocateMemory(numBins + 1), numBins, numIndices, sparseBinIndex) { + BinnedFeatureVector::indptr[0] = 0; + BinnedFeatureVector::indptr[numBins] = numIndices; + } + + /** + * @param other A reference to an object of type `AllocatedBinnedFeatureVector` that should be copied + */ + AllocatedBinnedFeatureVector(const AllocatedBinnedFeatureVector& other) : BinnedFeatureVector(other) { + throw std::runtime_error("Objects of type AllocatedBinnedFeatureVector cannot be copied"); + } + + /** + * @param other A reference to an object of type `AllocatedBinnedFeatureVector` that should be moved + */ + AllocatedBinnedFeatureVector(AllocatedBinnedFeatureVector&& other) : BinnedFeatureVector(std::move(other)) { + other.releaseThresholds(); + other.releaseIndices(); + other.releaseIndptr(); + } + + virtual ~AllocatedBinnedFeatureVector() override { + freeMemory(BinnedFeatureVector::thresholds); + freeMemory(BinnedFeatureVector::indices); + freeMemory(BinnedFeatureVector::indptr); + } + + /** + * Resizes the view by re-allocating the memory it provides access to. + * + * @param numValues The number of bins, including the most frequent one + * @param numIndices The number of examples not associated with the most frequent bin + */ + void resize(uint32 numBins, uint32 numIndices) { + BinnedFeatureVector::thresholds = reallocateMemory(BinnedFeatureVector::thresholds, numBins - 1); + BinnedFeatureVector::indices = reallocateMemory(BinnedFeatureVector::indices, numIndices); + BinnedFeatureVector::indptr = reallocateMemory(BinnedFeatureVector::indptr, numBins + 1); + BinnedFeatureVector::numBins = numBins; + BinnedFeatureVector::indptr[numBins] = numIndices; + + if (BinnedFeatureVector::sparseBinIndex >= numBins) { + BinnedFeatureVector::sparseBinIndex = numBins - 1; + } + } +}; From 517bef566f694549468361607a7da16cd4095b22 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 6 Feb 2024 17:50:14 +0100 Subject: [PATCH 07/53] Add "searchForBinnedRefinement" functions to the class RuleRefinementSearch. --- .../rule_refinement_search.hpp | 85 +++++++++++++++---- .../rule_refinement_search.cpp | 20 +++++ 2 files changed, 89 insertions(+), 16 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_search.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_search.hpp index 2d8b407410..dc5e19dfd6 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_search.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_search.hpp @@ -4,6 +4,7 @@ #pragma once #include "mlrl/common/input/feature_vector_binary.hpp" +#include "mlrl/common/input/feature_vector_binned.hpp" #include "mlrl/common/input/feature_vector_missing.hpp" #include "mlrl/common/input/feature_vector_nominal.hpp" #include "mlrl/common/input/feature_vector_numerical.hpp" @@ -20,8 +21,8 @@ class RuleRefinementSearch final { public: /** - * Conducts a search for the best refinement of an existing rule that can be created from a given numerical - * feature vector. + * Conducts a search for the best refinement of an existing rule that can be created from a + * `NumericalFeatureVector`. * * @param featureVector A reference to an object of type `NumericalFeatureVector`, the * refinements should be created from @@ -46,8 +47,8 @@ class RuleRefinementSearch final { uint32 minCoverage, Refinement& refinement) const; /** - * Conducts a search for the best refinement of an existing rule that can be created from a given numerical - * feature vector. + * Conducts a search for the best refinement of an existing rule that can be created from a + * `NumericalFeatureVector`. * * @param featureVector A reference to an object of type `NumericalFeatureVector`, the * refinements should be created from @@ -72,8 +73,8 @@ class RuleRefinementSearch final { uint32 minCoverage, Refinement& refinement) const; /** - * Conducts a search for the best refinement of an existing rule that can be created from a given nominal - * feature vector. + * Conducts a search for the best refinement of an existing rule that can be created from a + * `NominalFeatureVector`. * * @param featureVector A reference to an object of type `NominalFeatureVector`, the refinements * should be created from @@ -98,8 +99,8 @@ class RuleRefinementSearch final { uint32 minCoverage, Refinement& refinement) const; /** - * Conducts a search for the best refinement of an existing rule that can be created from a given nominal - * feature vector. + * Conducts a search for the best refinement of an existing rule that can be created from a + * `NominalFeatureVector`. * * @param featureVector A reference to an object of type `NominalFeatureVector`, the refinements * should be created from @@ -124,8 +125,8 @@ class RuleRefinementSearch final { uint32 minCoverage, Refinement& refinement) const; /** - * Conducts a search for the best refinement of an existing rule that can be created from a given binary feature - * vector. + * Conducts a search for the best refinement of an existing rule that can be created from a + * `BinaryFeatureVector`. * * @param featureVector A reference to an object of type `BinaryFeatureVector`, the refinements * should be created from @@ -150,8 +151,8 @@ class RuleRefinementSearch final { uint32 minCoverage, Refinement& refinement) const; /** - * Conducts a search for the best refinement of an existing rule that can be created from a given binary feature - * vector. + * Conducts a search for the best refinement of an existing rule that can be created from a + * `BinaryFeatureVector`. * * @param featureVector A reference to an object of type `BinaryFeatureVector`, the refinements * should be created from @@ -176,8 +177,8 @@ class RuleRefinementSearch final { uint32 minCoverage, Refinement& refinement) const; /** - * Conducts a search for the best refinement of an existing rule that can be created from a given ordinal - * feature vector. + * Conducts a search for the best refinement of an existing rule that can be created from an + * `OrdinalFeatureVector`. * * @param featureVector A reference to an object of type `OrdinalFeatureVector`, the refinements * should be created from @@ -202,8 +203,8 @@ class RuleRefinementSearch final { uint32 minCoverage, Refinement& refinement) const; /** - * Conducts a search for the best refinement of an existing rule that can be created from a given ordinal - * feature vector. + * Conducts a search for the best refinement of an existing rule that can be created from an + * `OrdinalFeatureVector`. * * @param featureVector A reference to an object of type `OrdinalFeatureVector`, the refinements * should be created from @@ -226,4 +227,56 @@ class RuleRefinementSearch final { IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, Refinement& refinement) const; + + /** + * Conducts a search for the best refinement of an existing rule that can be created from a + * `BinnedFeatureVector`. + * + * @param featureVector A reference to an object of type `BinnedFeatureVector`, the refinements + * should be created from + * @param missingFeatureVector A reference to an object of type `MissingFeatureVector` that provides + * access to the indices of training examples with missing feature values + * @param statisticsSubset A reference to an object of type `IWeightedStatisticsSubset` that + * provides access to weighted statistics about the labels of the training + * examples, which should serve as the basis for evaluating the quality of + * potential refinements + * @param comparator A reference to an object of type `SingleRefinementComparator` that + * should be used for comparing potential refinements + * @param numExamplesWithNonZeroWeights The total number of examples with non-zero weights that may be covered + * by a refinement + * @param minCoverage The minimum number of examples that must be covered by the refinement + * @param refinement A reference to an object of type `Refinement` that should be used for + * storing the properties of the best refinement that is found + */ + void searchForBinnedRefinement(const BinnedFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const; + + /** + * Conducts a search for the best refinement of an existing rule that can be created from a + * `BinnedFeatureVector`. + * + * @param featureVector A reference to an object of type `BinnedFeatureVector`, the refinements + * should be created from + * @param missingFeatureVector A reference to an object of type `MissingFeatureVector` that provides + * access to the indices of training examples with missing feature values + * @param statisticsSubset A reference to an object of type `IWeightedStatisticsSubset` that + * provides access to weighted statistics about the labels of the training + * examples, which should serve as the basis for evaluating the quality of + * potential refinements + * @param comparator A reference to an object of type `MultiRefinementComparator` that should + * be used for comparing potential refinements + * @param numExamplesWithNonZeroWeights The total number of examples with non-zero weights that may be covered + * by a refinement + * @param minCoverage The minimum number of examples that must be covered by the refinements + * @param refinement A reference to an object of type `Refinement` that should be used for + * storing the properties of the best refinement that is found + */ + void searchForBinnedRefinement(const BinnedFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const; }; diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp index 5addb24d7a..200c83c4f5 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp @@ -100,3 +100,23 @@ void RuleRefinementSearch::searchForOrdinalRefinement(const OrdinalFeatureVector searchForOrdinalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, minCoverage, refinement); } + +void RuleRefinementSearch::searchForBinnedRefinement(const BinnedFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + // TODO Implement +} + +void RuleRefinementSearch::searchForBinnedRefinement(const BinnedFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + // TODO Implement +} From 0f77739cd528427d1fb2a9a2bc00bd1136fcf055 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 6 Feb 2024 17:45:12 +0100 Subject: [PATCH 08/53] Add class BinnedFeatureVectorDecorator. --- cpp/subprojects/common/meson.build | 1 + .../input/feature_vector_decorator_binned.hpp | 253 ++++++++++++++++++ .../input/feature_vector_decorator_binned.cpp | 207 ++++++++++++++ 3 files changed, 461 insertions(+) create mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp create mode 100644 cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binned.cpp diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index e11332c8b1..4ac7df3604 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -111,6 +111,7 @@ test_files = [ 'test/mlrl/common/input/feature_type_numerical.cpp', 'test/mlrl/common/input/feature_type_ordinal.cpp', 'test/mlrl/common/input/feature_vector_decorator_binary.cpp', + 'test/mlrl/common/input/feature_vector_decorator_binned.cpp', 'test/mlrl/common/input/feature_vector_decorator_nominal.cpp', 'test/mlrl/common/input/feature_vector_decorator_numerical.cpp', 'test/mlrl/common/input/feature_vector_decorator_ordinal.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp new file mode 100644 index 0000000000..cac0fc9ea2 --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp @@ -0,0 +1,253 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "feature_vector_binned_allocated.hpp" +#include "feature_vector_decorator.hpp" +#include "mlrl/common/input/feature_vector_equal.hpp" + +template +static inline std::unique_ptr createFilteredBinnedFeatureVectorDecorator( + const View& view, std::unique_ptr& existing, const CoverageMask& coverageMask) { + std::unique_ptr filteredDecoratorPtr = + createFilteredFeatureVectorDecorator(view, existing, coverageMask); + + // Filter the indices of examples not associated with the majority value... + const BinnedFeatureVector& featureVector = view.getView().firstView; + AllocatedBinnedFeatureVector& filteredFeatureVector = filteredDecoratorPtr->getView().firstView; + AllocatedBinnedFeatureVector::index_iterator filteredIndexIterator = filteredFeatureVector.indices; + AllocatedBinnedFeatureVector::index_iterator filteredIndptrIterator = filteredFeatureVector.indptr; + AllocatedBinnedFeatureVector::threshold_iterator filteredThresholdIterator = filteredFeatureVector.thresholds; + uint32 numFilteredBins = 0; + uint32 numFilteredIndices = 0; + + for (uint32 i = 0; i < featureVector.numBins; i++) { + BinnedFeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(i); + BinnedFeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indexIterator; + uint32 indptr = numFilteredIndices; + + for (uint32 j = 0; j < numIndices; j++) { + uint32 index = indexIterator[j]; + + if (coverageMask[index]) { + filteredIndexIterator[numFilteredIndices] = index; + numFilteredIndices++; + } + } + + if (numFilteredIndices > indptr) { + if (numFilteredBins >= filteredFeatureVector.sparseBinIndex) { + filteredFeatureVector.sparseBinIndex = numFilteredBins; + } + + filteredIndptrIterator[numFilteredBins] = indptr; + + if (i < featureVector.numBins - 1) { + filteredThresholdIterator[numFilteredBins] = featureVector.thresholds[i]; + } + + numFilteredBins++; + } + } + + if (numFilteredIndices > 0) { + filteredFeatureVector.resize(numFilteredBins, numFilteredIndices); + return filteredDecoratorPtr; + } + + return std::make_unique(); +} + +// Forward declarations +class BinnedFeatureVectorDecorator; + +/** + * Provides random read and write access, as well as read and write access via iterators, to the values and thresholds + * stored in a `BinnedFeatureVector`. + */ +class BinnedFeatureVectorView final : public AbstractFeatureVectorDecorator { + public: + + /** + * @param firstView A reference to an object of type `BinnedFeatureVector` + */ + BinnedFeatureVectorView(BinnedFeatureVector&& firstView) + : AbstractFeatureVectorDecorator(std::move(firstView), AllocatedMissingFeatureVector()) {} + + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); + } + + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); + } + + void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, + uint32 indicatorValue, + IWeightedStatistics& statistics) const override final { + // TODO Implement + } + + std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, + const Interval& interval) const override { + // TODO Implement + return nullptr; + } + + std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, + const CoverageMask& coverageMask) const override { + return createFilteredBinnedFeatureVectorDecorator( + *this, existing, coverageMask); + } +}; + +/** + * Provides random read and write access, as well as read and write access via iterators, to a subset of the indices and + * thresholds stored in a `AllocatedBinnedFeatureVector`. + */ +class AllocatedBinnedFeatureVectorView final : public AbstractFeatureVectorDecorator { + public: + + /** + * The `AllocatedBinnedFeatureVector`, the view provides access to. + */ + AllocatedBinnedFeatureVector allocatedView; + + /** + * @param allocatedView A reference to an object of type `AllocatedBinnedFeatureVector` + * @param firstView A reference to an object of type `BinnedFeatureVector` + */ + AllocatedBinnedFeatureVectorView(AllocatedBinnedFeatureVector&& allocatedView, BinnedFeatureVector&& firstView) + : AbstractFeatureVectorDecorator(std::move(firstView), + AllocatedMissingFeatureVector()), + allocatedView(std::move(allocatedView)) {} + + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); + } + + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); + } + + void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, + uint32 indicatorValue, + IWeightedStatistics& statistics) const override final { + // TODO Implement + } + + std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, + const Interval& interval) const override { + // TODO Implement + return nullptr; + } + + std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, + const CoverageMask& coverageMask) const override { + return createFilteredBinnedFeatureVectorDecorator(*this, existing, + coverageMask); + } +}; + +/** + * Provides random read and write access, as well as read and write access via iterators, to the values and thresholds + * stored in an `AllocatedBinnedFeatureVector`. + */ +class BinnedFeatureVectorDecorator final : public AbstractFeatureVectorDecorator { + public: + + /** + * @param firstView A reference to an object of type `AllocatedBinnedFeatureVector` + * @param secondView A reference to an object of type `AllocatedMissingFeatureVector` + */ + BinnedFeatureVectorDecorator(AllocatedBinnedFeatureVector&& firstView, + AllocatedMissingFeatureVector&& secondView) + : AbstractFeatureVectorDecorator(std::move(firstView), + std::move(secondView)) {} + + /** + * @param other A reference to an object of type `BinnedFeatureVectorDecorator` that should be copied + */ + BinnedFeatureVectorDecorator(const BinnedFeatureVectorDecorator& other) + : BinnedFeatureVectorDecorator( + AllocatedBinnedFeatureVector(other.view.firstView.numBins, + other.view.firstView.indptr[other.view.firstView.numBins], + other.view.firstView.sparseBinIndex), + AllocatedMissingFeatureVector()) {} + + /** + * @param other A reference to an object of type `BinnedFeatureVectorView` that should be copied + */ + BinnedFeatureVectorDecorator(const BinnedFeatureVectorView& other) + : BinnedFeatureVectorDecorator( + AllocatedBinnedFeatureVector(other.getView().firstView.numBins, + other.getView().firstView.indptr[other.getView().firstView.numBins], + other.getView().firstView.sparseBinIndex), + AllocatedMissingFeatureVector()) {} + + /** + * @param other A reference to an object of type `AllocatedBinnedFeatureVectorView` that should be copied + */ + BinnedFeatureVectorDecorator(const AllocatedBinnedFeatureVectorView& other) + : BinnedFeatureVectorDecorator( + AllocatedBinnedFeatureVector(other.getView().firstView.numBins, + other.getView().firstView.indptr[other.getView().firstView.numBins], + other.getView().firstView.sparseBinIndex), + AllocatedMissingFeatureVector()) {} + + void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, + IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const override { + ruleRefinementSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, + statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); + } + + void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, + IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const override { + ruleRefinementSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, + statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); + } + + void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, + uint32 indicatorValue, + IWeightedStatistics& statistics) const override final { + // TODO Implement + } + + std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, + const Interval& interval) const override { + // TODO Implement + return nullptr; + } + + std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, + const CoverageMask& coverageMask) const override { + return createFilteredBinnedFeatureVectorDecorator(*this, existing, + coverageMask); + } +}; diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binned.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binned.cpp new file mode 100644 index 0000000000..9b69854a20 --- /dev/null +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binned.cpp @@ -0,0 +1,207 @@ +#include "mlrl/common/input/feature_vector_decorator_binned.hpp" + +#include "mlrl/common/input/feature_vector_binned.hpp" +#include "statistics_weighted.hpp" + +#include + +TEST(BinnedFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageMask) { + uint32 numBins = 3; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + AllocatedMissingFeatureVector missingFeatureVector; + uint32 numMissingIndices = 10; + uint32 numExamples = numMinorityExamples + numMissingIndices; + + for (uint32 i = numMinorityExamples; i < numExamples; i++) { + missingFeatureVector.set(i, true); + } + + CoverageMask coverageMask(numExamples); + uint32 indicatorValue = 1; + coverageMask.indicatorValue = indicatorValue; + CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); + + for (uint32 i = 0; i < numExamples; i++) { + if (i % 2 == 0) { + coverageMaskIterator[i] = indicatorValue; + } + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), std::move(missingFeatureVector)); + std::unique_ptr existing; + std::unique_ptr filtered = decorator.createFilteredFeatureVector(existing, coverageMask); + const BinnedFeatureVectorDecorator* filteredDecorator = + dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredDecorator != nullptr); + + if (filteredDecorator) { + // Check filtered indices... + const BinnedFeatureVector& filteredFeatureVector = filteredDecorator->getView().firstView; + + for (uint32 i = 0; i < numBins; i++) { + BinnedFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector.indices_cbegin(i); + BinnedFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indicesBegin; + EXPECT_EQ(numIndices, numExamplesPerBin / 2); + + std::unordered_set indices; + + for (auto it = indicesBegin; it != indicesEnd; it++) { + indices.emplace(*it); + } + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + + if (index % 2 == 0) { + EXPECT_TRUE(indices.find(index) != indices.end()); + } else { + EXPECT_TRUE(indices.find(index) == indices.end()); + } + } + } + + // Check missing indices... + const MissingFeatureVector& filteredMissingFeatureVector = filteredDecorator->getView().secondView; + + for (uint32 i = numMinorityExamples; i < numExamples; i++) { + if (i % 2 == 0) { + EXPECT_TRUE(filteredMissingFeatureVector[i]); + } else { + EXPECT_FALSE(filteredMissingFeatureVector[i]); + } + } + } +} + +TEST(BinnedFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageMaskUsingExisting) { + uint32 numBins = 3; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + AllocatedMissingFeatureVector missingFeatureVector; + uint32 numMissingIndices = 10; + uint32 numExamples = numMinorityExamples + numMissingIndices; + + for (uint32 i = numMinorityExamples; i < numExamples; i++) { + missingFeatureVector.set(i, true); + } + + CoverageMask coverageMask(numExamples); + uint32 indicatorValue = 1; + coverageMask.indicatorValue = indicatorValue; + CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); + + for (uint32 i = 0; i < numExamples; i++) { + if (i % 2 == 0) { + coverageMaskIterator[i] = indicatorValue; + } + } + + std::unique_ptr existing = + std::make_unique(std::move(featureVector), std::move(missingFeatureVector)); + std::unique_ptr filtered = existing->createFilteredFeatureVector(existing, coverageMask); + const BinnedFeatureVectorDecorator* filteredDecorator = + dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredDecorator != nullptr); + EXPECT_TRUE(existing.get() == nullptr); + + if (filteredDecorator) { + // Check filtered indices... + const BinnedFeatureVector& filteredFeatureVector = filteredDecorator->getView().firstView; + + for (uint32 i = 0; i < numBins; i++) { + BinnedFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector.indices_cbegin(i); + BinnedFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indicesBegin; + EXPECT_EQ(numIndices, numExamplesPerBin / 2); + + std::unordered_set indices; + + for (auto it = indicesBegin; it != indicesEnd; it++) { + indices.emplace(*it); + } + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + + if (index % 2 == 0) { + EXPECT_TRUE(indices.find(index) != indices.end()); + } else { + EXPECT_TRUE(indices.find(index) == indices.end()); + } + } + } + + // Check missing indices... + const MissingFeatureVector& filteredMissingFeatureVector = filteredDecorator->getView().secondView; + + for (uint32 i = numMinorityExamples; i < numExamples; i++) { + if (i % 2 == 0) { + EXPECT_TRUE(filteredMissingFeatureVector[i]); + } else { + EXPECT_FALSE(filteredMissingFeatureVector[i]); + } + } + } +} + +TEST(BinnedFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageMaskReturnsEqualFeatureVector) { + uint32 numBins = 1; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(1, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + CoverageMask coverageMask(numMinorityExamples); + coverageMask.indicatorValue = 1; + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); + std::unique_ptr existing; + std::unique_ptr filtered = decorator.createFilteredFeatureVector(existing, coverageMask); + const EqualFeatureVector* filteredFeatureVector = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredFeatureVector != nullptr); +} From f42d2a3d2966dd877da8c4f8fb6f055a9f5265a2 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 6 Feb 2024 18:29:55 +0100 Subject: [PATCH 09/53] Implement "searchForBinnedRefinement" functions of the class RuleRefinementSearch. --- .../rule_refinement_search.cpp | 7 +- .../rule_refinement_search_binary.hpp | 2 +- .../rule_refinement_search_binned.hpp | 206 ++++++++++++++++++ ... rule_refinement_search_binned_common.hpp} | 17 +- .../rule_refinement_search_nominal.hpp | 3 +- .../rule_refinement_search_ordinal.hpp | 2 +- 6 files changed, 224 insertions(+), 13 deletions(-) create mode 100644 cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned.hpp rename cpp/subprojects/common/src/mlrl/common/rule_refinement/{rule_refinement_search_nominal_common.hpp => rule_refinement_search_binned_common.hpp} (67%) diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp index 200c83c4f5..ba0fb57d11 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp @@ -1,6 +1,7 @@ #include "mlrl/common/rule_refinement/rule_refinement_search.hpp" #include "rule_refinement_search_binary.hpp" +#include "rule_refinement_search_binned.hpp" #include "rule_refinement_search_nominal.hpp" #include "rule_refinement_search_numerical.hpp" #include "rule_refinement_search_ordinal.hpp" @@ -108,7 +109,8 @@ void RuleRefinementSearch::searchForBinnedRefinement(const BinnedFeatureVector& uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, Refinement& refinement) const { addMissingStatistics(statisticsSubset, missingFeatureVector); - // TODO Implement + searchForBinnedRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); } void RuleRefinementSearch::searchForBinnedRefinement(const BinnedFeatureVector& featureVector, @@ -118,5 +120,6 @@ void RuleRefinementSearch::searchForBinnedRefinement(const BinnedFeatureVector& uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, Refinement& refinement) const { addMissingStatistics(statisticsSubset, missingFeatureVector); - // TODO Implement + searchForBinnedRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); } diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binary.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binary.hpp index 922a4f9540..abd0da416b 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binary.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binary.hpp @@ -5,7 +5,7 @@ #include "mlrl/common/input/feature_vector_binary.hpp" #include "mlrl/common/rule_refinement/refinement.hpp" -#include "rule_refinement_search_nominal_common.hpp" +#include "rule_refinement_search_binned_common.hpp" template static inline void searchForBinaryRefinementInternally(const BinaryFeatureVector& featureVector, diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned.hpp new file mode 100644 index 0000000000..9051b4dc2d --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned.hpp @@ -0,0 +1,206 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/input/feature_vector_binned.hpp" +#include "mlrl/common/rule_refinement/refinement.hpp" +#include "mlrl/common/statistics/statistics_subset_weighted.hpp" +#include "rule_refinement_search_binned_common.hpp" + +template +static inline void searchForBinnedRefinementInternally(const BinnedFeatureVector& featureVector, + IWeightedStatisticsSubset& statisticsSubset, + Comparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) { + // Mark all examples corresponding to the first bin with index `i < sparseBinIndex` as covered... + BinnedFeatureVector::threshold_const_iterator thresholdIterator = featureVector.thresholds_cbegin(); + uint32 numBins = featureVector.numBins; + int32 sparseBinIndex = featureVector.sparseBinIndex; + uint32 numCovered = 0; + int64 i = 0; + + if (i < sparseBinIndex) { + numCovered += addAllToSubset(statisticsSubset, featureVector, i); + } + + // Traverse bins with indices `i < sparseBinIndex` in ascending order... + if (numCovered > 0) { + for (i = i + 1; i < sparseBinIndex; i++) { + // Check if a condition using the <= operator covers at least `minCoverage` examples... + if (numCovered >= minCoverage) { + // Determine the best prediction for the examples covered by a condition using the <= operator... + const IScoreVector& scoreVector = statisticsSubset.calculateScores(); + + // Check if the quality of the prediction is better than the quality of the current rule... + if (comparator.isImprovement(scoreVector)) { + refinement.start = 0; + refinement.end = i; + refinement.inverse = false; + refinement.numCovered = numCovered; + refinement.comparator = NUMERICAL_LEQ; + refinement.threshold = thresholdIterator[i - 1]; + comparator.pushRefinement(refinement, scoreVector); + } + } + + // Check if a condition using the > operator covers at least `minCoverage` examples... + uint32 numUncovered = numExamplesWithNonZeroWeights - numCovered; + + if (numUncovered >= minCoverage) { + // Determine the best prediction for examples covered by a condition using the > operator... + const IScoreVector& scoreVector = statisticsSubset.calculateScoresUncovered(); + + // Check if the quality of the prediction is better than the quality of the current rule... + if (comparator.isImprovement(scoreVector)) { + refinement.start = 0; + refinement.end = i; + refinement.inverse = true; + refinement.numCovered = numUncovered; + refinement.comparator = NUMERICAL_GR; + refinement.threshold = thresholdIterator[i - 1]; + comparator.pushRefinement(refinement, scoreVector); + } + } + + // Mark all examples corresponding to the current bin as covered... + numCovered += addAllToSubset(statisticsSubset, featureVector, i); + } + + // Reset the subset, if any bins with indices `i < sparseBinIndex` have been processed... + statisticsSubset.resetSubset(); + } + + // Mark all examples corresponding to the last bin with index `i > sparseBinIndex` as covered... + uint32 numCoveredLessThanSparseBinIndex = numCovered; + numCovered = 0; + i = numBins - 1; + + if (i > sparseBinIndex) { + numCovered += addAllToSubset(statisticsSubset, featureVector, i); + } + + // Traverse bin with indices `i > sparseBinIndex` in descending order... + if (numCovered > 0) { + for (i = i - 1; i > sparseBinIndex; i--) { + // Check if a condition using the > operator covers at least `minCoverage` examples... + if (numCovered >= minCoverage) { + // Determine the best prediction for the covered examples... + const IScoreVector& scoreVector = statisticsSubset.calculateScores(); + + // Check if the quality of the prediction is better than the quality of the current rule... + if (comparator.isImprovement(scoreVector)) { + refinement.start = i + 1; + refinement.end = numBins; + refinement.inverse = false; + refinement.numCovered = numCovered; + refinement.comparator = NUMERICAL_GR; + refinement.threshold = thresholdIterator[i]; + comparator.pushRefinement(refinement, scoreVector); + } + } + + // Check if a condition using the <= operator covers at least `minCoverage` examples... + uint32 numUncovered = numExamplesWithNonZeroWeights - numCovered; + + if (numUncovered >= minCoverage) { + // Determine the best prediction for the covered examples... + const IScoreVector& scoreVector = statisticsSubset.calculateScoresUncovered(); + + // Check if the quality of the prediction is better than the quality of the current rule... + if (comparator.isImprovement(scoreVector)) { + refinement.start = i + 1; + refinement.end = numBins; + refinement.inverse = true; + refinement.numCovered = numUncovered; + refinement.comparator = NUMERICAL_LEQ; + refinement.threshold = thresholdIterator[i]; + comparator.pushRefinement(refinement, scoreVector); + } + } + + // Mark all examples corresponding to the current bin as covered... + numCovered += addAllToSubset(statisticsSubset, featureVector, i); + } + } + + // Check if a condition that covers all bins with indices `i > sparseBinIndex` covers at least `minCoverage` + // examples... + if (numCovered >= minCoverage) { + // Determine the best prediction for examples covered by the condition... + const IScoreVector& scoreVector = statisticsSubset.calculateScores(); + + // Check if the quality of the prediction is better than the quality of the current rule... + if (comparator.isImprovement(scoreVector)) { + refinement.start = sparseBinIndex + 1; + refinement.end = numBins; + refinement.numCovered = numCovered; + refinement.inverse = false; + refinement.comparator = NUMERICAL_GR; + refinement.threshold = thresholdIterator[sparseBinIndex]; + comparator.pushRefinement(refinement, scoreVector); + } + } + + // Check if a condition that covers all bins with indices `i <= sparseBinIndex` covers at least `minCoverage` + // examples... + uint32 numUncovered = numExamplesWithNonZeroWeights - numCovered; + + if (numUncovered >= minCoverage) { + // Determine the best prediction for examples covered by the condition... + const IScoreVector& scoreVector = statisticsSubset.calculateScores(); + + // Check if the quality of the prediction is better than the quality of the current rule... + if (comparator.isImprovement(scoreVector)) { + refinement.start = sparseBinIndex + 1; + refinement.end = numBins; + refinement.numCovered = numUncovered; + refinement.inverse = true; + refinement.comparator = NUMERICAL_LEQ; + refinement.threshold = thresholdIterator[sparseBinIndex]; + comparator.pushRefinement(refinement, scoreVector); + } + } + + // If there have been bin with indices `i < sparseBinIndex`, we must evaluate conditions that separate the examples + // corresponding to these bins from the remaining ones... + if (numCoveredLessThanSparseBinIndex > 0 && numCoveredLessThanSparseBinIndex < numExamplesWithNonZeroWeights) { + // Check if a condition that covers all bins with indices `i < sparseBinIndex` covers at least `minCoverage` + // examples... + if (numCoveredLessThanSparseBinIndex >= minCoverage) { + // Determine the best prediction for the examples covered by the condition... + const IScoreVector& scoreVector = statisticsSubset.calculateScoresAccumulated(); + + // Check if the quality of the prediction is better than the quality of the current rule... + if (comparator.isImprovement(scoreVector)) { + refinement.start = 0; + refinement.end = sparseBinIndex; + refinement.numCovered = numCoveredLessThanSparseBinIndex; + refinement.inverse = false; + refinement.comparator = NUMERICAL_LEQ; + refinement.threshold = thresholdIterator[sparseBinIndex - 1]; + comparator.pushRefinement(refinement, scoreVector); + } + } + + // Check if a condition that covers all bins with indices `i >= sparseBinIndex` covers at least `minCoverage` + // examples... + numUncovered = numExamplesWithNonZeroWeights - numCoveredLessThanSparseBinIndex; + + if (numUncovered >= minCoverage) { + // Determine the best prediction for the examples covered by the condition... + const IScoreVector& scoreVector = statisticsSubset.calculateScoresUncoveredAccumulated(); + + // Check if the quality of the prediction is better than the quality of the current rule... + if (comparator.isImprovement(scoreVector)) { + refinement.start = 0; + refinement.end = sparseBinIndex; + refinement.numCovered = numUncovered; + refinement.inverse = true; + refinement.comparator = NUMERICAL_GR; + refinement.threshold = thresholdIterator[sparseBinIndex - 1]; + comparator.pushRefinement(refinement, scoreVector); + } + } + } +} diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned_common.hpp similarity index 67% rename from cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal_common.hpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned_common.hpp index 3d000840f6..8ee34ee3ad 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned_common.hpp @@ -3,21 +3,22 @@ */ #pragma once -#include "mlrl/common/input/feature_vector_nominal.hpp" #include "mlrl/common/statistics/statistics_subset_weighted.hpp" /** - * Adds all examples corresonding to a nominal feature value to a given `IWeightedStatisticsSubset`, if they have - * non-zero weights. + * Adds all examples corresonding to a single bin in a given feature vector to a given `IWeightedStatisticsSubset`, if + * they have non-zero weights. * + * @tparam FeatureVector The type of the feature vector * @param statisticsSubset A reference to an object of type `IWeightedStatisticsSubset` - * @param featureVector A reference to an object of type `NominalFeatureVector`´that stores the indices of the - * examples that corresond to individual feature values - * @param index The index of the nominal feature value + * @param featureVector A reference to an object of template type `FeatureVector`´that stores the indices of the + * examples that corresond to individual bins + * @param index The index of the bin * @return The number of examples with non-zero weights */ -static inline uint32 addAllToSubset(IWeightedStatisticsSubset& statisticsSubset, - const NominalFeatureVector& featureVector, uint32 index) { +template +static inline uint32 addAllToSubset(IWeightedStatisticsSubset& statisticsSubset, const FeatureVector& featureVector, + uint32 index) { NominalFeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(index); NominalFeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(index); uint32 numIndices = indicesEnd - indexIterator; diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal.hpp index 1d690741d7..9289498c30 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal.hpp @@ -3,8 +3,9 @@ */ #pragma once +#include "mlrl/common/input/feature_vector_nominal.hpp" #include "mlrl/common/rule_refinement/refinement.hpp" -#include "rule_refinement_search_nominal_common.hpp" +#include "rule_refinement_search_binned_common.hpp" template static inline void searchForNominalRefinementInternally(const NominalFeatureVector& featureVector, diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_ordinal.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_ordinal.hpp index 12483268f6..718e0a4b67 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_ordinal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_ordinal.hpp @@ -5,7 +5,7 @@ #include "mlrl/common/input/feature_vector_ordinal.hpp" #include "mlrl/common/rule_refinement/refinement.hpp" -#include "rule_refinement_search_nominal_common.hpp" +#include "rule_refinement_search_binned_common.hpp" template static inline void searchForOrdinalRefinementInternally(const OrdinalFeatureVector& featureVector, From cc45a21a58d643fa388c2a0b1ec7fc4aeff7838e Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 6 Feb 2024 22:56:36 +0100 Subject: [PATCH 10/53] Move source files from directory "binning" into directory "input". --- .../boosting/{binning => input}/feature_binning_auto.hpp | 2 +- cpp/subprojects/boosting/include/mlrl/boosting/learner.hpp | 2 +- cpp/subprojects/boosting/meson.build | 2 +- .../boosting/{binning => input}/feature_binning_auto.cpp | 6 +++--- .../mlrl/common/{binning => input}/feature_binning.hpp | 0 .../{binning => input}/feature_binning_equal_frequency.hpp | 2 +- .../{binning => input}/feature_binning_equal_width.hpp | 2 +- .../mlrl/common/{binning => input}/feature_binning_no.hpp | 2 +- .../common/include/mlrl/common/input/feature_info.hpp | 2 +- cpp/subprojects/common/include/mlrl/common/learner.hpp | 6 +++--- .../mlrl/common/thresholds/thresholds_approximate.hpp | 2 +- .../include/mlrl/common/thresholds/thresholds_exact.hpp | 2 +- cpp/subprojects/common/meson.build | 6 +++--- .../{binning => input}/feature_binning_equal_frequency.cpp | 2 +- .../{binning => input}/feature_binning_equal_width.cpp | 2 +- .../mlrl/common/{binning => input}/feature_binning_no.cpp | 2 +- .../common/{binning => input}/feature_binning_nominal.hpp | 2 +- .../common/mlrl/common/cython/feature_binning.pxd | 4 ++-- 18 files changed, 24 insertions(+), 24 deletions(-) rename cpp/subprojects/boosting/include/mlrl/boosting/{binning => input}/feature_binning_auto.hpp (92%) rename cpp/subprojects/boosting/src/mlrl/boosting/{binning => input}/feature_binning_auto.cpp (75%) rename cpp/subprojects/common/include/mlrl/common/{binning => input}/feature_binning.hpp (100%) rename cpp/subprojects/common/include/mlrl/common/{binning => input}/feature_binning_equal_frequency.hpp (98%) rename cpp/subprojects/common/include/mlrl/common/{binning => input}/feature_binning_equal_width.hpp (98%) rename cpp/subprojects/common/include/mlrl/common/{binning => input}/feature_binning_no.hpp (89%) rename cpp/subprojects/common/src/mlrl/common/{binning => input}/feature_binning_equal_frequency.cpp (99%) rename cpp/subprojects/common/src/mlrl/common/{binning => input}/feature_binning_equal_width.cpp (99%) rename cpp/subprojects/common/src/mlrl/common/{binning => input}/feature_binning_no.cpp (90%) rename cpp/subprojects/common/src/mlrl/common/{binning => input}/feature_binning_nominal.hpp (98%) diff --git a/cpp/subprojects/boosting/include/mlrl/boosting/binning/feature_binning_auto.hpp b/cpp/subprojects/boosting/include/mlrl/boosting/input/feature_binning_auto.hpp similarity index 92% rename from cpp/subprojects/boosting/include/mlrl/boosting/binning/feature_binning_auto.hpp rename to cpp/subprojects/boosting/include/mlrl/boosting/input/feature_binning_auto.hpp index 3beecaa631..679ddb88f5 100644 --- a/cpp/subprojects/boosting/include/mlrl/boosting/binning/feature_binning_auto.hpp +++ b/cpp/subprojects/boosting/include/mlrl/boosting/input/feature_binning_auto.hpp @@ -3,7 +3,7 @@ */ #pragma once -#include "mlrl/common/binning/feature_binning.hpp" +#include "mlrl/common/input/feature_binning.hpp" namespace boosting { diff --git a/cpp/subprojects/boosting/include/mlrl/boosting/learner.hpp b/cpp/subprojects/boosting/include/mlrl/boosting/learner.hpp index 2a6cc53080..eb369e70ad 100644 --- a/cpp/subprojects/boosting/include/mlrl/boosting/learner.hpp +++ b/cpp/subprojects/boosting/include/mlrl/boosting/learner.hpp @@ -8,10 +8,10 @@ #pragma warning(disable : 4250) #endif -#include "mlrl/boosting/binning/feature_binning_auto.hpp" #include "mlrl/boosting/binning/label_binning_auto.hpp" #include "mlrl/boosting/binning/label_binning_equal_width.hpp" #include "mlrl/boosting/binning/label_binning_no.hpp" +#include "mlrl/boosting/input/feature_binning_auto.hpp" #include "mlrl/boosting/losses/loss_example_wise_logistic.hpp" #include "mlrl/boosting/losses/loss_example_wise_squared_error.hpp" #include "mlrl/boosting/losses/loss_example_wise_squared_hinge.hpp" diff --git a/cpp/subprojects/boosting/meson.build b/cpp/subprojects/boosting/meson.build index e97c1bc0b3..a6cc02889d 100644 --- a/cpp/subprojects/boosting/meson.build +++ b/cpp/subprojects/boosting/meson.build @@ -2,7 +2,6 @@ project('boosting', 'cpp') # Source files source_files = [ - 'src/mlrl/boosting/binning/feature_binning_auto.cpp', 'src/mlrl/boosting/binning/label_binning_auto.cpp', 'src/mlrl/boosting/binning/label_binning_equal_width.cpp', 'src/mlrl/boosting/binning/label_binning_no.cpp', @@ -13,6 +12,7 @@ source_files = [ 'src/mlrl/boosting/data/vector_statistic_label_wise_sparse.cpp', 'src/mlrl/boosting/data/view_histogram_label_wise_sparse.cpp', 'src/mlrl/boosting/data/view_statistic_example_wise_dense.cpp', + 'src/mlrl/boosting/input/feature_binning_auto.cpp', 'src/mlrl/boosting/losses/loss_example_wise_logistic.cpp', 'src/mlrl/boosting/losses/loss_example_wise_squared_error.cpp', 'src/mlrl/boosting/losses/loss_example_wise_squared_hinge.cpp', diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/binning/feature_binning_auto.cpp b/cpp/subprojects/boosting/src/mlrl/boosting/input/feature_binning_auto.cpp similarity index 75% rename from cpp/subprojects/boosting/src/mlrl/boosting/binning/feature_binning_auto.cpp rename to cpp/subprojects/boosting/src/mlrl/boosting/input/feature_binning_auto.cpp index 6131cc618c..f3e1b4e0e3 100644 --- a/cpp/subprojects/boosting/src/mlrl/boosting/binning/feature_binning_auto.cpp +++ b/cpp/subprojects/boosting/src/mlrl/boosting/input/feature_binning_auto.cpp @@ -1,7 +1,7 @@ -#include "mlrl/boosting/binning/feature_binning_auto.hpp" +#include "mlrl/boosting/input/feature_binning_auto.hpp" -#include "mlrl/common/binning/feature_binning_equal_width.hpp" -#include "mlrl/common/binning/feature_binning_no.hpp" +#include "mlrl/common/input/feature_binning_equal_width.hpp" +#include "mlrl/common/input/feature_binning_no.hpp" namespace boosting { diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_binning.hpp similarity index 100% rename from cpp/subprojects/common/include/mlrl/common/binning/feature_binning.hpp rename to cpp/subprojects/common/include/mlrl/common/input/feature_binning.hpp diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_frequency.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_binning_equal_frequency.hpp similarity index 98% rename from cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_frequency.hpp rename to cpp/subprojects/common/include/mlrl/common/input/feature_binning_equal_frequency.hpp index 1bd0026a3a..33ee09eb9f 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_frequency.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_binning_equal_frequency.hpp @@ -4,7 +4,7 @@ */ #pragma once -#include "mlrl/common/binning/feature_binning.hpp" +#include "mlrl/common/input/feature_binning.hpp" /** * Defines an interface for all classes that allow to configure a method that assigns numerical feature values to bins, diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_width.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_binning_equal_width.hpp similarity index 98% rename from cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_width.hpp rename to cpp/subprojects/common/include/mlrl/common/input/feature_binning_equal_width.hpp index 7e5275838c..8179018082 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_equal_width.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_binning_equal_width.hpp @@ -4,7 +4,7 @@ */ #pragma once -#include "mlrl/common/binning/feature_binning.hpp" +#include "mlrl/common/input/feature_binning.hpp" /** * Defines an interface for all classes that allow to configure a method that assigns numerical feature values to bins, diff --git a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_no.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_binning_no.hpp similarity index 89% rename from cpp/subprojects/common/include/mlrl/common/binning/feature_binning_no.hpp rename to cpp/subprojects/common/include/mlrl/common/input/feature_binning_no.hpp index 8cb6740ae1..69ddbc9e58 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/feature_binning_no.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_binning_no.hpp @@ -3,7 +3,7 @@ */ #pragma once -#include "mlrl/common/binning/feature_binning.hpp" +#include "mlrl/common/input/feature_binning.hpp" /** * Allows to configure a method that does not actually perform any feature binning. diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_info.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_info.hpp index 5b60b0128f..2a84b3bcd7 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_info.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_info.hpp @@ -3,8 +3,8 @@ */ #pragma once -#include "mlrl/common/binning/feature_binning.hpp" #include "mlrl/common/data/types.hpp" +#include "mlrl/common/input/feature_binning.hpp" #include "mlrl/common/input/feature_type.hpp" #include "mlrl/common/util/dll_exports.hpp" diff --git a/cpp/subprojects/common/include/mlrl/common/learner.hpp b/cpp/subprojects/common/include/mlrl/common/learner.hpp index a28fd88450..7878bf2a6f 100644 --- a/cpp/subprojects/common/include/mlrl/common/learner.hpp +++ b/cpp/subprojects/common/include/mlrl/common/learner.hpp @@ -3,9 +3,9 @@ */ #pragma once -#include "mlrl/common/binning/feature_binning_equal_frequency.hpp" -#include "mlrl/common/binning/feature_binning_equal_width.hpp" -#include "mlrl/common/binning/feature_binning_no.hpp" +#include "mlrl/common/input/feature_binning_equal_frequency.hpp" +#include "mlrl/common/input/feature_binning_equal_width.hpp" +#include "mlrl/common/input/feature_binning_no.hpp" #include "mlrl/common/input/feature_info.hpp" #include "mlrl/common/input/feature_matrix_column_wise.hpp" #include "mlrl/common/input/feature_matrix_row_wise.hpp" diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_approximate.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_approximate.hpp index 14b6082141..7656750d85 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_approximate.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_approximate.hpp @@ -4,7 +4,7 @@ */ #pragma once -#include "mlrl/common/binning/feature_binning.hpp" +#include "mlrl/common/input/feature_binning.hpp" #include "mlrl/common/thresholds/thresholds.hpp" /** diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp index 9a827df02c..3fb7af2d3d 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp @@ -3,7 +3,7 @@ */ #pragma once -#include "mlrl/common/binning/feature_binning.hpp" +#include "mlrl/common/input/feature_binning.hpp" #include "mlrl/common/multi_threading/multi_threading.hpp" #include "mlrl/common/thresholds/thresholds.hpp" diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index 4ac7df3604..e728634ad7 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -5,14 +5,14 @@ source_files = [ 'src/mlrl/common/binning/bin_index_vector_dense.cpp', 'src/mlrl/common/binning/bin_index_vector_dok.cpp', 'src/mlrl/common/binning/bin_weight_vector.cpp', - 'src/mlrl/common/binning/feature_binning_equal_frequency.cpp', - 'src/mlrl/common/binning/feature_binning_equal_width.cpp', - 'src/mlrl/common/binning/feature_binning_no.cpp', 'src/mlrl/common/binning/threshold_vector.cpp', 'src/mlrl/common/data/vector_bit.cpp', 'src/mlrl/common/indices/index_iterator.cpp', 'src/mlrl/common/indices/index_vector_complete.cpp', 'src/mlrl/common/indices/index_vector_partial.cpp', + 'src/mlrl/common/input/feature_binning_equal_frequency.cpp', + 'src/mlrl/common/input/feature_binning_equal_width.cpp', + 'src/mlrl/common/input/feature_binning_no.cpp', 'src/mlrl/common/input/feature_info_equal.cpp', 'src/mlrl/common/input/feature_info_mixed.cpp', 'src/mlrl/common/input/feature_matrix_c_contiguous.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp similarity index 99% rename from cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp rename to cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp index 2304861f3c..c92b873b4d 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_frequency.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp @@ -1,4 +1,4 @@ -#include "mlrl/common/binning/feature_binning_equal_frequency.hpp" +#include "mlrl/common/input/feature_binning_equal_frequency.hpp" #include "feature_binning_nominal.hpp" #include "mlrl/common/binning/bin_index_vector_dense.hpp" diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp similarity index 99% rename from cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp rename to cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp index a0a9924bd5..2b81a26e20 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_equal_width.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp @@ -1,4 +1,4 @@ -#include "mlrl/common/binning/feature_binning_equal_width.hpp" +#include "mlrl/common/input/feature_binning_equal_width.hpp" #include "feature_binning_nominal.hpp" #include "mlrl/common/binning/bin_index_vector_dense.hpp" diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_no.cpp similarity index 90% rename from cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp rename to cpp/subprojects/common/src/mlrl/common/input/feature_binning_no.cpp index 7b330b2f92..580bf60a0f 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_no.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_no.cpp @@ -1,4 +1,4 @@ -#include "mlrl/common/binning/feature_binning_no.hpp" +#include "mlrl/common/input/feature_binning_no.hpp" /** * Allows to create instances of the type `IFeatureBinning` that do not actualy perform any feature binning. diff --git a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_nominal.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_nominal.hpp similarity index 98% rename from cpp/subprojects/common/src/mlrl/common/binning/feature_binning_nominal.hpp rename to cpp/subprojects/common/src/mlrl/common/input/feature_binning_nominal.hpp index 2de5cb6a48..f8bd073677 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/feature_binning_nominal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_nominal.hpp @@ -5,7 +5,7 @@ #include "mlrl/common/binning/bin_index_vector_dense.hpp" #include "mlrl/common/binning/bin_index_vector_dok.hpp" -#include "mlrl/common/binning/feature_binning.hpp" +#include "mlrl/common/input/feature_binning.hpp" #include diff --git a/python/subprojects/common/mlrl/common/cython/feature_binning.pxd b/python/subprojects/common/mlrl/common/cython/feature_binning.pxd index 07c28256a2..00914f4778 100644 --- a/python/subprojects/common/mlrl/common/cython/feature_binning.pxd +++ b/python/subprojects/common/mlrl/common/cython/feature_binning.pxd @@ -1,7 +1,7 @@ from mlrl.common.cython._types cimport float32, uint32 -cdef extern from "mlrl/common/binning/feature_binning_equal_width.hpp" nogil: +cdef extern from "mlrl/common/input/feature_binning_equal_width.hpp" nogil: cdef cppclass IEqualWidthFeatureBinningConfig: @@ -20,7 +20,7 @@ cdef extern from "mlrl/common/binning/feature_binning_equal_width.hpp" nogil: uint32 getMaxBins() const -cdef extern from "mlrl/common/binning/feature_binning_equal_frequency.hpp" nogil: +cdef extern from "mlrl/common/input/feature_binning_equal_frequency.hpp" nogil: cdef cppclass IEqualFrequencyFeatureBinningConfig: From f9a5ff85fad5d91729a438d8ca597a00379331e4 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 6 Feb 2024 22:57:17 +0100 Subject: [PATCH 11/53] Implement "createFeatureVector" functions of the class EqualFrequencyFeatureBinning. --- cpp/subprojects/common/meson.build | 1 + .../input/feature_binning_equal_frequency.cpp | 153 ++++++++++++++- .../common/input/feature_type_numerical.cpp | 47 +---- .../input/feature_type_numerical_common.hpp | 56 ++++++ .../input/feature_binning_equal_frequency.cpp | 183 ++++++++++++++++++ 5 files changed, 390 insertions(+), 50 deletions(-) create mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical_common.hpp create mode 100644 cpp/subprojects/common/test/mlrl/common/input/feature_binning_equal_frequency.cpp diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index e728634ad7..d2a47b25be 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -107,6 +107,7 @@ test_files = [ 'test/mlrl/common/data/array.cpp', 'test/mlrl/common/data/vector_bit.cpp', 'test/mlrl/common/data/vector_dense.cpp', + 'test/mlrl/common/input/feature_binning_equal_frequency.cpp', 'test/mlrl/common/input/feature_type_nominal.cpp', 'test/mlrl/common/input/feature_type_numerical.cpp', 'test/mlrl/common/input/feature_type_ordinal.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp index c92b873b4d..1f905e2744 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp @@ -1,11 +1,125 @@ #include "mlrl/common/input/feature_binning_equal_frequency.hpp" #include "feature_binning_nominal.hpp" +#include "feature_type_numerical_common.hpp" +#include "feature_vector_decorator_binned.hpp" #include "mlrl/common/binning/bin_index_vector_dense.hpp" #include "mlrl/common/binning/bin_index_vector_dok.hpp" #include "mlrl/common/util/math.hpp" #include "mlrl/common/util/validation.hpp" +static inline std::unique_ptr createFeatureVectorInternally( + AllocatedMissingFeatureVector&& missingFeatureVector, const NumericalFeatureVector& numericalFeatureVector, + uint32 numExamples, float32 binRatio, uint32 minBins, uint32 maxBins) { + uint32 numBins = calculateBoundedFraction(numExamples, binRatio, minBins, maxBins); + + if (numBins > 1) { + uint32 numElements = numericalFeatureVector.numElements; + AllocatedBinnedFeatureVector binnedFeatureVector(numBins, numElements); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = binnedFeatureVector.thresholds_begin(); + AllocatedBinnedFeatureVector::index_iterator indexIterator = binnedFeatureVector.indices; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = binnedFeatureVector.indptr; + uint32 numElementsPerBin = (uint32) std::ceil((float64) numElements / (float64) numBins); + bool sparse = numericalFeatureVector.sparse; + float32 sparseValue = numericalFeatureVector.sparseValue; + float32 previousValue = sparseValue; + uint32 numElementsInCurrentBin = 0; + uint32 binIndex = 0; + uint32 numIndices = 0; + uint32 i = 0; + + // Iterate feature values `f < sparseValue`... + for (; i < numElements; i++) { + const IndexedValue& entry = numericalFeatureVector[i]; + float32 currentValue = entry.value; + + if (currentValue >= sparseValue) { + break; + } + + // Feature values that are equal to the previous one must not be assigned to a new bin... + if (!isEqual(currentValue, previousValue)) { + // Check, if the bin is fully occupied... + if (numElementsInCurrentBin >= numElementsPerBin) { + thresholdIterator[binIndex] = arithmeticMean(previousValue, currentValue); + indptrIterator[binIndex + 1] = numIndices; + numElementsInCurrentBin = 0; + binIndex++; + } + + previousValue = currentValue; + } + + indexIterator[numIndices] = entry.index; + numElementsInCurrentBin++; + numIndices++; + } + + // If there are any sparse values, check if they belong to the current one or the next one... + if (sparse) { + uint32 numSparseValues = numExamples - numElements; + + if (numElementsInCurrentBin >= numElementsPerBin) { + // The sparse values belong to the next bin... + thresholdIterator[binIndex] = arithmeticMean(previousValue, sparseValue); + indptrIterator[binIndex + 1] = numIndices; + numElementsInCurrentBin = numSparseValues; + binIndex++; + } else { + // The sparse values belong to the current bin... + numIndices -= numElementsInCurrentBin; + numElementsInCurrentBin += numSparseValues; + } + + // If the current bin is not fully occupied yet, the subsequent values do also belong to it... + previousValue = sparseValue; + + // Skip feature values that are equal to the previous one... + for (; i < numElements; i++) { + if (numericalFeatureVector[i].value != previousValue) { + break; + } + + numElementsInCurrentBin++; + } + } + + // Set the index of the sparse bin... + binnedFeatureVector.sparseBinIndex = binIndex; + + // Iterate feature values `f >= sparseValue`... + for (; i < numElements; i++) { + const IndexedValue& entry = numericalFeatureVector[i]; + float32 currentValue = entry.value; + + // Feature values that are equal to the previous one must not be assigned to a new bin... + if (!isEqual(currentValue, previousValue)) { + // Check, if the bin is fully occupied... + if (numElementsInCurrentBin >= numElementsPerBin) { + thresholdIterator[binIndex] = arithmeticMean(previousValue, currentValue); + indptrIterator[binIndex + 1] = numIndices; + numElementsInCurrentBin = 0; + binIndex++; + } + + previousValue = currentValue; + } + + indexIterator[numIndices] = entry.index; + numElementsInCurrentBin++; + numIndices++; + } + + if (binIndex > 0) { + binnedFeatureVector.resize(binIndex + 1, numIndices); + return std::make_unique(std::move(binnedFeatureVector), + std::move(missingFeatureVector)); + } + } + + return std::make_unique(); +} + static inline uint32 getNumBins(FeatureVector& featureVector, bool sparse, float32 binRatio, uint32 minBins, uint32 maxBins) { uint32 numElements = featureVector.getNumElements(); @@ -162,14 +276,45 @@ class EqualFrequencyFeatureBinning final : public IFeatureBinning { std::unique_ptr createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const override { - // TODO Implement - return nullptr; + // Create a numerical feature vector from the given feature matrix... + const std::unique_ptr featureVectorDecoratorPtr = + createNumericalFeatureVector(featureIndex, featureMatrix); + + // Check if all feature values are equal... + const NumericalFeatureVector& numericalFeatureVector = featureVectorDecoratorPtr->getView().firstView; + uint32 numElements = numericalFeatureVector.numElements; + + if (numElements > 0 + && !isEqual(numericalFeatureVector[0].value, numericalFeatureVector[numElements - 1].value)) { + return createFeatureVectorInternally(std::move(featureVectorDecoratorPtr->getView().secondView), + numericalFeatureVector, featureMatrix.numRows, binRatio_, minBins_, + maxBins_); + } + + return std::make_unique(); } std::unique_ptr createFeatureVector( uint32 featureIndex, const CscView& featureMatrix) const override { - // TODO Implement - return nullptr; + // Create a numerical feature vector from the given feature matrix... + const std::unique_ptr featureVectorDecoratorPtr = + createNumericalFeatureVector(featureIndex, featureMatrix); + + // Check if all feature values are equal... + NumericalFeatureVector& numericalFeatureVector = featureVectorDecoratorPtr->getView().firstView; + uint32 numElements = numericalFeatureVector.numElements; + uint32 numExamples = featureMatrix.numRows; + + if (numElements > 0 + && (numElements < numExamples + || !isEqual(numericalFeatureVector[0].value, numericalFeatureVector[numElements - 1].value))) { + numericalFeatureVector.sparse = numElements < numExamples; + return createFeatureVectorInternally(std::move(featureVectorDecoratorPtr->getView().secondView), + numericalFeatureVector, numExamples, binRatio_, minBins_, + maxBins_); + } + + return std::make_unique(); } }; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp index c8d796dc4f..f99ed75a62 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp @@ -1,56 +1,11 @@ #include "mlrl/common/input/feature_type_numerical.hpp" -#include "feature_vector_decorator_numerical.hpp" +#include "feature_type_numerical_common.hpp" #include "mlrl/common/input/feature_vector_equal.hpp" #include "mlrl/common/iterator/index_iterator.hpp" #include -template -static inline std::unique_ptr createNumericalFeatureVector(IndexIterator indexIterator, - ValueIterator valueIterator, - uint32 numElements) { - AllocatedNumericalFeatureVector numericalFeatureVector(numElements); - AllocatedMissingFeatureVector missingFeatureVector; - uint32 n = 0; - - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indexIterator[i]; - float32 value = valueIterator[i]; - - if (std::isnan(value)) { - missingFeatureVector.set(index, true); - } else { - IndexedValue& entry = numericalFeatureVector[n]; - entry.index = index; - entry.value = value; - n++; - } - } - - numericalFeatureVector.resize(n, true); - std::sort(numericalFeatureVector.begin(), numericalFeatureVector.end(), IndexedValue::CompareValue()); - return std::make_unique(std::move(numericalFeatureVector), - std::move(missingFeatureVector)); -} - -static inline std::unique_ptr createNumericalFeatureVector( - uint32 featureIndex, const FortranContiguousView& featureMatrix) { - FortranContiguousView::value_const_iterator valueIterator = - featureMatrix.values_cbegin(featureIndex); - uint32 numRows = featureMatrix.numRows; - return createNumericalFeatureVector(IndexIterator(), valueIterator, numRows); -} - -static inline std::unique_ptr createNumericalFeatureVector( - uint32 featureIndex, const CscView& featureMatrix) { - CscView::index_const_iterator indexIterator = featureMatrix.indices_cbegin(featureIndex); - CscView::index_const_iterator indicesEnd = featureMatrix.indices_cend(featureIndex); - CscView::value_const_iterator valueIterator = featureMatrix.values_cbegin(featureIndex); - uint32 numIndices = indicesEnd - indexIterator; - return createNumericalFeatureVector(indexIterator, valueIterator, numIndices); -} - static inline std::unique_ptr createFeatureVectorInternally( uint32 featureIndex, const FortranContiguousView& featureMatrix) { std::unique_ptr featureVectorDecoratorPtr = diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical_common.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical_common.hpp new file mode 100644 index 0000000000..ef70ab5bca --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical_common.hpp @@ -0,0 +1,56 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "feature_vector_decorator_numerical.hpp" +#include "mlrl/common/data/view_matrix_csc.hpp" +#include "mlrl/common/data/view_matrix_fortran_contiguous.hpp" +#include "mlrl/common/iterator/index_iterator.hpp" + +#include + +template +static inline std::unique_ptr createNumericalFeatureVector(IndexIterator indexIterator, + ValueIterator valueIterator, + uint32 numElements) { + AllocatedNumericalFeatureVector numericalFeatureVector(numElements); + AllocatedMissingFeatureVector missingFeatureVector; + uint32 n = 0; + + for (uint32 i = 0; i < numElements; i++) { + uint32 index = indexIterator[i]; + float32 value = valueIterator[i]; + + if (std::isnan(value)) { + missingFeatureVector.set(index, true); + } else { + IndexedValue& entry = numericalFeatureVector[n]; + entry.index = index; + entry.value = value; + n++; + } + } + + numericalFeatureVector.resize(n, true); + std::sort(numericalFeatureVector.begin(), numericalFeatureVector.end(), IndexedValue::CompareValue()); + return std::make_unique(std::move(numericalFeatureVector), + std::move(missingFeatureVector)); +} + +static inline std::unique_ptr createNumericalFeatureVector( + uint32 featureIndex, const FortranContiguousView& featureMatrix) { + FortranContiguousView::value_const_iterator valueIterator = + featureMatrix.values_cbegin(featureIndex); + uint32 numRows = featureMatrix.numRows; + return createNumericalFeatureVector(IndexIterator(), valueIterator, numRows); +} + +static inline std::unique_ptr createNumericalFeatureVector( + uint32 featureIndex, const CscView& featureMatrix) { + CscView::index_const_iterator indexIterator = featureMatrix.indices_cbegin(featureIndex); + CscView::index_const_iterator indicesEnd = featureMatrix.indices_cend(featureIndex); + CscView::value_const_iterator valueIterator = featureMatrix.values_cbegin(featureIndex); + uint32 numIndices = indicesEnd - indexIterator; + return createNumericalFeatureVector(indexIterator, valueIterator, numIndices); +} diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_binning_equal_frequency.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_binning_equal_frequency.cpp new file mode 100644 index 0000000000..79e865bd46 --- /dev/null +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_binning_equal_frequency.cpp @@ -0,0 +1,183 @@ +#include "mlrl/common/input/feature_binning_equal_frequency.hpp" + +#include "mlrl/common/input/feature_binning_equal_frequency.cpp" +#include "mlrl/common/input/feature_vector_equal.hpp" + +#include + +TEST(EqualFrequencyFeatureBinningTest, createBinnedFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 7; + AllocatedFortranContiguousView featureView(numExamples, 1); + AllocatedFortranContiguousView::value_iterator features = featureView.values_begin(0); + features[0] = 0.2; + features[1] = -0.1; + features[2] = NAN; + features[3] = -0.2; + features[4] = 0.0; + features[5] = NAN; + features[6] = 0.1; + FortranContiguousView view(features, numExamples, 1); + + // Create feature vector... + std::unique_ptr featureVectorPtr = + EqualFrequencyFeatureBinning(0.5, 1, 0).createFeatureVector(0, view); + + // Check type of feature vector... + const AbstractFeatureVectorDecorator* featureVectorDecorator = + dynamic_cast*>(featureVectorPtr.get()); + EXPECT_TRUE(featureVectorDecorator != nullptr); + + if (featureVectorDecorator) { + // Check for missing feature values... + const MissingFeatureVector& missingFeatureVector = featureVectorDecorator->getView().secondView; + EXPECT_TRUE(missingFeatureVector[2]); + EXPECT_TRUE(missingFeatureVector[5]); + + // Check dimensionality of feature vector... + const BinnedFeatureVector& featureVector = featureVectorDecorator->getView().firstView; + EXPECT_EQ(featureVector.numBins, (uint32) 3); + EXPECT_EQ(featureVector.sparseBinIndex, (uint32) 0); + + // Check thresholds and indices associated with each bin... + BinnedFeatureVector::threshold_const_iterator thresholdIterator = featureVector.thresholds_cbegin(); + BinnedFeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(0); + BinnedFeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(0); + uint32 numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 2); + EXPECT_EQ(indexIterator[0], (uint32) 3); + EXPECT_EQ(indexIterator[1], (uint32) 1); + EXPECT_FLOAT_EQ(thresholdIterator[0], arithmeticMean(features[1], features[4])); + + indexIterator = featureVector.indices_cbegin(1); + indicesEnd = featureVector.indices_cend(1); + numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 2); + EXPECT_EQ(indexIterator[0], (uint32) 4); + EXPECT_EQ(indexIterator[1], (uint32) 6); + EXPECT_FLOAT_EQ(thresholdIterator[1], arithmeticMean(features[6], features[0])); + + indexIterator = featureVector.indices_cbegin(2); + indicesEnd = featureVector.indices_cend(2); + numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 1); + EXPECT_EQ(indexIterator[0], (uint32) 0); + } +} + +TEST(EqualFrequencyFeatureBinningTest, createEqualFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 1; + AllocatedFortranContiguousView featureView(numExamples, 1); + AllocatedFortranContiguousView::value_iterator features = featureView.values_begin(0); + features[0] = 0.0; + FortranContiguousView view(features, numExamples, 1); + + // Create feature vector... + std::unique_ptr featureVectorPtr = + EqualFrequencyFeatureBinning(0.5, 1, 0).createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); +} + +TEST(EqualFrequencyFeatureBinningTest, createBinnedFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 7; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 0.2; + rowIndices[0] = 0; + data[1] = -0.1; + rowIndices[1] = 2; + data[2] = NAN; + rowIndices[2] = 3; + data[3] = -0.2; + rowIndices[3] = 5; + data[4] = 0.0; + rowIndices[4] = 6; + data[5] = NAN; + rowIndices[5] = 7; + data[6] = 0.1; + rowIndices[6] = 9; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscView view(data, rowIndices, indptr, numDense + 3, 1); + + // Create feature vector... + std::unique_ptr featureVectorPtr = + EqualFrequencyFeatureBinning(0.3, 1, 0).createFeatureVector(0, view); + + // Check type of feature vector... + const AbstractFeatureVectorDecorator* featureVectorDecorator = + dynamic_cast*>(featureVectorPtr.get()); + EXPECT_TRUE(featureVectorDecorator != nullptr); + + if (featureVectorDecorator) { + // Check for missing feature values... + const MissingFeatureVector& missingFeatureVector = featureVectorDecorator->getView().secondView; + EXPECT_TRUE(missingFeatureVector[3]); + EXPECT_TRUE(missingFeatureVector[7]); + + // Check dimensionality of feature vector... + const BinnedFeatureVector& featureVector = featureVectorDecorator->getView().firstView; + EXPECT_EQ(featureVector.numBins, (uint32) 3); + EXPECT_EQ(featureVector.sparseBinIndex, (uint32) 1); + + // Check thresholds and indices associated with each bin... + BinnedFeatureVector::threshold_const_iterator thresholdIterator = featureVector.thresholds_cbegin(); + BinnedFeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(0); + BinnedFeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(0); + uint32 numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 2); + EXPECT_EQ(indexIterator[0], (uint32) 5); + EXPECT_EQ(indexIterator[1], (uint32) 2); + EXPECT_FLOAT_EQ(thresholdIterator[0], arithmeticMean(data[1], 0.0f)); + + indexIterator = featureVector.indices_cbegin(1); + indicesEnd = featureVector.indices_cend(1); + numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 0); + EXPECT_FLOAT_EQ(thresholdIterator[1], arithmeticMean(0.0f, data[6])); + + indexIterator = featureVector.indices_cbegin(2); + indicesEnd = featureVector.indices_cend(2); + numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 2); + EXPECT_EQ(indexIterator[0], (uint32) 9); + EXPECT_EQ(indexIterator[1], (uint32) 0); + } + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(EqualFrequencyFeatureBinningTest, createEqualFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 2; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 0.1; + rowIndices[0] = 0; + data[1] = 0.1; + rowIndices[1] = 1; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscView view(data, rowIndices, indptr, numDense, 1); + + // Create feature vector... + std::unique_ptr featureVectorPtr = + EqualFrequencyFeatureBinning(0.3, 1, 0).createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} From 57b4c698c37ab38227e74a4b68af7ffe4d9a6695 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Sun, 18 Feb 2024 21:49:27 +0100 Subject: [PATCH 12/53] Implement "createFeatureVector" functions of the class EqualWidthFeatureBinning. --- cpp/subprojects/common/meson.build | 1 + .../input/feature_binning_equal_width.cpp | 163 +++++++++++++++- .../input/feature_binning_equal_width.cpp | 178 ++++++++++++++++++ 3 files changed, 333 insertions(+), 9 deletions(-) create mode 100644 cpp/subprojects/common/test/mlrl/common/input/feature_binning_equal_width.cpp diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index d2a47b25be..6a8c879160 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -108,6 +108,7 @@ test_files = [ 'test/mlrl/common/data/vector_bit.cpp', 'test/mlrl/common/data/vector_dense.cpp', 'test/mlrl/common/input/feature_binning_equal_frequency.cpp', + 'test/mlrl/common/input/feature_binning_equal_width.cpp', 'test/mlrl/common/input/feature_type_nominal.cpp', 'test/mlrl/common/input/feature_type_numerical.cpp', 'test/mlrl/common/input/feature_type_ordinal.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp index 2b81a26e20..4c35779792 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp @@ -1,15 +1,134 @@ #include "mlrl/common/input/feature_binning_equal_width.hpp" #include "feature_binning_nominal.hpp" +#include "feature_type_numerical_common.hpp" +#include "feature_vector_decorator_binned.hpp" #include "mlrl/common/binning/bin_index_vector_dense.hpp" #include "mlrl/common/binning/bin_index_vector_dok.hpp" #include "mlrl/common/data/array.hpp" +#include "mlrl/common/data/tuple.hpp" #include "mlrl/common/util/math.hpp" #include "mlrl/common/util/validation.hpp" #include #include +static inline Tuple getMinAndMaxFeatureValue(const NumericalFeatureVector& numericalFeatureVector) { + uint32 numElements = numericalFeatureVector.numElements; + float32 min; + uint32 i; + + if (numericalFeatureVector.sparse) { + min = numericalFeatureVector.sparseValue; + i = 0; + } else { + min = numericalFeatureVector[0].value; + i = 1; + } + + float32 max = min; + + for (; i < numElements; i++) { + float32 currentValue = numericalFeatureVector[i].value; + + if (currentValue < min) { + min = currentValue; + } else if (currentValue > max) { + max = currentValue; + } + } + + return Tuple(min, max); +} + +static inline constexpr uint32 getBinIndex(float32 value, float32 min, float32 width, uint32 numBins) { + uint32 binIndex = (uint32) std::floor((value - min) / width); + return binIndex >= numBins ? numBins - 1 : binIndex; +} + +static inline std::unique_ptr createFeatureVectorInternally( + AllocatedMissingFeatureVector&& missingFeatureVector, const NumericalFeatureVector& numericalFeatureVector, + uint32 numExamples, float32 binRatio, uint32 minBins, uint32 maxBins) { + uint32 numWidths = calculateBoundedFraction(numExamples, binRatio, minBins, maxBins); + + if (numWidths > 0) { + const Tuple tuple = getMinAndMaxFeatureValue(numericalFeatureVector); + float32 min = tuple.first; + float32 max = tuple.second; + float32 width = (max - min) / numWidths; + uint32 numElements = numericalFeatureVector.numElements; + float32 sparseValue = numericalFeatureVector.sparseValue; + uint32 sparseBinIndex = getBinIndex(sparseValue, min, width, numWidths); + AllocatedBinnedFeatureVector binnedFeatureVector(numWidths, numElements, sparseBinIndex); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = binnedFeatureVector.thresholds_begin(); + AllocatedBinnedFeatureVector::index_iterator indptrIterator = binnedFeatureVector.indptr; + + // Iterate all non-sparse feature values and determine the bins they should be assigned to... + Array numExamplesPerBin(numWidths, true); + + for (uint32 i = 0; i < numElements; i++) { + float32 currentValue = numericalFeatureVector[i].value; + uint32 binIndex = getBinIndex(currentValue, min, width, numWidths); + + if (binIndex != sparseBinIndex) { + numExamplesPerBin[binIndex]++; + } + } + + // Remove empty bins and calculate thresholds... + Array mapping(numWidths); + uint32 numIndices = 0; + uint32 numBins = 0; + + for (uint32 i = 0; i < numWidths; i++) { + uint32 numExamplesInCurrentBin = numExamplesPerBin[i]; + + if (i == sparseBinIndex || numExamplesInCurrentBin > 0) { + thresholdIterator[numBins] = min + ((numBins + 1) * width); + indptrIterator[numBins] = numIndices; + + if (i != sparseBinIndex) { + numIndices += numExamplesInCurrentBin; + } else { + binnedFeatureVector.sparseBinIndex = numBins; + } + + mapping[i] = numBins; + numBins++; + } else { + mapping[i] = sparseBinIndex; + } + } + + // Set the indices of the examples that have been assigned to each bin... + for (uint32 i = 0; i < numElements; i++) { + const IndexedValue& entry = numericalFeatureVector[i]; + float32 currentValue = entry.value; + uint32 originalBinIndex = getBinIndex(currentValue, min, width, numWidths); + + if (originalBinIndex != sparseBinIndex) { + uint32 binIndex = mapping[originalBinIndex]; + + if (binIndex != binnedFeatureVector.sparseBinIndex) { + uint32 numExamplesInCurrentBin = numExamplesPerBin[originalBinIndex]; + uint32 numRemaining = numExamplesInCurrentBin - 1; + numExamplesPerBin[originalBinIndex] = numRemaining; + BinnedFeatureVector::index_iterator indexIterator = binnedFeatureVector.indices_begin(binIndex); + indexIterator[numRemaining] = entry.index; + } + } + } + + if (numBins > 1) { + binnedFeatureVector.resize(numBins, numIndices); + return std::make_unique(std::move(binnedFeatureVector), + std::move(missingFeatureVector)); + } + } + + return std::make_unique(); +} + static inline std::tuple preprocess(const FeatureVector& featureVector, bool sparse, float32 binRatio, uint32 minBins, uint32 maxBins) { std::tuple result; @@ -59,11 +178,6 @@ static inline std::tuple preprocess(const FeatureVecto return result; } -static inline uint32 getBinIndex(float32 value, float32 min, float32 width, uint32 numBins) { - uint32 binIndex = (uint32) std::floor((value - min) / width); - return binIndex >= numBins ? numBins - 1 : binIndex; -} - /** * An implementation of the type `IFeatureBinning` that assigns numerical feature values to bins, such that each bin * contains values from equally sized value ranges. @@ -181,14 +295,45 @@ class EqualWidthFeatureBinning final : public IFeatureBinning { std::unique_ptr createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const override { - // TODO Implement - return nullptr; + // Create a numerical feature vector from the given feature matrix... + const std::unique_ptr featureVectorDecoratorPtr = + createNumericalFeatureVector(featureIndex, featureMatrix); + + // Check if all feature values are equal... + const NumericalFeatureVector& numericalFeatureVector = featureVectorDecoratorPtr->getView().firstView; + uint32 numElements = numericalFeatureVector.numElements; + + if (numElements > 0 + && !isEqual(numericalFeatureVector[0].value, numericalFeatureVector[numElements - 1].value)) { + return createFeatureVectorInternally(std::move(featureVectorDecoratorPtr->getView().secondView), + numericalFeatureVector, featureMatrix.numRows, binRatio_, minBins_, + maxBins_); + } + + return std::make_unique(); } std::unique_ptr createFeatureVector( uint32 featureIndex, const CscView& featureMatrix) const override { - // TODO Implement - return nullptr; + // Create a numerical feature vector from the given feature matrix... + const std::unique_ptr featureVectorDecoratorPtr = + createNumericalFeatureVector(featureIndex, featureMatrix); + + // Check if all feature values are equal... + NumericalFeatureVector& numericalFeatureVector = featureVectorDecoratorPtr->getView().firstView; + uint32 numElements = numericalFeatureVector.numElements; + uint32 numExamples = featureMatrix.numRows; + + if (numElements > 0 + && (numElements < numExamples + || !isEqual(numericalFeatureVector[0].value, numericalFeatureVector[numElements - 1].value))) { + numericalFeatureVector.sparse = numElements < numExamples; + return createFeatureVectorInternally(std::move(featureVectorDecoratorPtr->getView().secondView), + numericalFeatureVector, numExamples, binRatio_, minBins_, + maxBins_); + } + + return std::make_unique(); } }; diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_binning_equal_width.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_binning_equal_width.cpp new file mode 100644 index 0000000000..39ecbe5f48 --- /dev/null +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_binning_equal_width.cpp @@ -0,0 +1,178 @@ +#include "mlrl/common/input/feature_binning_equal_width.hpp" + +#include "mlrl/common/input/feature_binning_equal_width.cpp" +#include "mlrl/common/input/feature_vector_equal.hpp" + +#include + +TEST(EqualWidthFeatureBinningTest, createBinnedFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 7; + AllocatedFortranContiguousView featureView(numExamples, 1); + AllocatedFortranContiguousView::value_iterator features = featureView.values_begin(0); + features[0] = 0.2; + features[1] = -0.1; + features[2] = NAN; + features[3] = -0.2; + features[4] = 0.0; + features[5] = NAN; + features[6] = 0.1; + FortranContiguousView view(features, numExamples, 1); + + // Create feature vector... + std::unique_ptr featureVectorPtr = EqualWidthFeatureBinning(0.4, 1, 0).createFeatureVector(0, view); + + // Check type of feature vector... + const AbstractFeatureVectorDecorator* featureVectorDecorator = + dynamic_cast*>(featureVectorPtr.get()); + EXPECT_TRUE(featureVectorDecorator != nullptr); + + if (featureVectorDecorator) { + // Check for missing feature values... + const MissingFeatureVector& missingFeatureVector = featureVectorDecorator->getView().secondView; + EXPECT_TRUE(missingFeatureVector[2]); + EXPECT_TRUE(missingFeatureVector[5]); + + // Check dimensionality of feature vector... + const BinnedFeatureVector& featureVector = featureVectorDecorator->getView().firstView; + EXPECT_EQ(featureVector.numBins, (uint32) 3); + EXPECT_EQ(featureVector.sparseBinIndex, (uint32) 1); + + // Check thresholds and indices associated with each bin... + BinnedFeatureVector::threshold_const_iterator thresholdIterator = featureVector.thresholds_cbegin(); + BinnedFeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(0); + BinnedFeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(0); + uint32 numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 2); + EXPECT_EQ(indexIterator[0], (uint32) 1); + EXPECT_EQ(indexIterator[1], (uint32) 3); + EXPECT_FLOAT_EQ(thresholdIterator[0], -0.066666663f); + + indexIterator = featureVector.indices_cbegin(1); + indicesEnd = featureVector.indices_cend(1); + numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 0); + EXPECT_FLOAT_EQ(thresholdIterator[1], 0.066666678f); + + indexIterator = featureVector.indices_cbegin(2); + indicesEnd = featureVector.indices_cend(2); + numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 2); + EXPECT_EQ(indexIterator[0], (uint32) 0); + EXPECT_EQ(indexIterator[1], (uint32) 6); + } +} + +TEST(EqualWidthFeatureBinningTest, createEqualFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 1; + AllocatedFortranContiguousView featureView(numExamples, 1); + AllocatedFortranContiguousView::value_iterator features = featureView.values_begin(0); + features[0] = 0.0; + FortranContiguousView view(features, numExamples, 1); + + // Create feature vector... + std::unique_ptr featureVectorPtr = EqualWidthFeatureBinning(0.4, 1, 0).createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); +} + +TEST(EqualWidthFeatureBinningTest, createBinnedFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 7; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 0.2; + rowIndices[0] = 0; + data[1] = -0.1; + rowIndices[1] = 2; + data[2] = NAN; + rowIndices[2] = 3; + data[3] = -0.2; + rowIndices[3] = 5; + data[4] = 0.0; + rowIndices[4] = 6; + data[5] = NAN; + rowIndices[5] = 7; + data[6] = 0.1; + rowIndices[6] = 9; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscView view(data, rowIndices, indptr, numDense + 3, 1); + + // Create feature vector... + std::unique_ptr featureVectorPtr = EqualWidthFeatureBinning(0.3, 1, 0).createFeatureVector(0, view); + + // Check type of feature vector... + const AbstractFeatureVectorDecorator* featureVectorDecorator = + dynamic_cast*>(featureVectorPtr.get()); + EXPECT_TRUE(featureVectorDecorator != nullptr); + + if (featureVectorDecorator) { + // Check for missing feature values... + const MissingFeatureVector& missingFeatureVector = featureVectorDecorator->getView().secondView; + EXPECT_TRUE(missingFeatureVector[3]); + EXPECT_TRUE(missingFeatureVector[7]); + + // Check dimensionality of feature vector... + const BinnedFeatureVector& featureVector = featureVectorDecorator->getView().firstView; + EXPECT_EQ(featureVector.numBins, (uint32) 3); + EXPECT_EQ(featureVector.sparseBinIndex, (uint32) 1); + + // Check thresholds and indices associated with each bin... + BinnedFeatureVector::threshold_const_iterator thresholdIterator = featureVector.thresholds_cbegin(); + BinnedFeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(0); + BinnedFeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(0); + uint32 numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 2); + EXPECT_EQ(indexIterator[0], (uint32) 2); + EXPECT_EQ(indexIterator[1], (uint32) 5); + EXPECT_FLOAT_EQ(thresholdIterator[0], -0.066666663f); + + indexIterator = featureVector.indices_cbegin(1); + indicesEnd = featureVector.indices_cend(1); + numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 0); + EXPECT_FLOAT_EQ(thresholdIterator[1], 0.066666678f); + + indexIterator = featureVector.indices_cbegin(2); + indicesEnd = featureVector.indices_cend(2); + numIndices = indicesEnd - indexIterator; + EXPECT_EQ(numIndices, (uint32) 2); + EXPECT_EQ(indexIterator[0], (uint32) 0); + EXPECT_EQ(indexIterator[1], (uint32) 9); + } + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(EqualWidthFeatureBinningTest, createEqualFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 2; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 0.1; + rowIndices[0] = 0; + data[1] = 0.1; + rowIndices[1] = 1; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscView view(data, rowIndices, indptr, numDense, 1); + + // Create feature vector... + std::unique_ptr featureVectorPtr = EqualWidthFeatureBinning(0.4, 1, 0).createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} From 676c9e547066ca3046be515ea9ffaded36610fec Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Sun, 18 Feb 2024 16:19:48 +0100 Subject: [PATCH 13/53] Update documentation. --- doc/user_guide/boosting/parameters.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/user_guide/boosting/parameters.md b/doc/user_guide/boosting/parameters.md index 0409745e7d..66d4804d39 100644 --- a/doc/user_guide/boosting/parameters.md +++ b/doc/user_guide/boosting/parameters.md @@ -241,13 +241,13 @@ The following parameters may be used to control the behavior of the algorithm. T - `'equal-width'` Examples are assigned to bins, based on their feature values, according to the equal-width binning method. The following options may be provided using the {ref}`bracket-notation`: - - `bin_ratio` (Default value = `0.33`) A percentage that specifies how many bins should be used. For example, a value of 0.3 means that the number of bins should be set to 30% of the number of distinct values for a feature. + - `bin_ratio` (Default value = `0.33`) A percentage that specifies how many bins should be used. For example, a value of 0.3 means that the number of bins should be set to 30% of the total number of available training examples. - `min_bins` (Default value = `2`) The minimum number of bins. Must be at least 2. - `max_bins` (Default value = `0`) The maximum number of bins. Must be at least min_bins or 0, if the number of bins should not be restricted. - `'equal-frequency'`. Examples are assigned to bins, based on their feature values, according to the equal-frequency binning method. The following options may be provided using the {ref}`bracket-notation`: - - `bin_ratio` (Default value = `0.33`) A percentage that specifies how many bins should be used. For example, a value of 0.3 means that the number of bins should be set to 30% of the number of distinct values for a feature. + - `bin_ratio` (Default value = `0.33`) A percentage that specifies how many bins should be used. For example, a value of 0.3 means that the number of bins should be set to 30% of the total number of available training examples. - `min_bins` (Default value = `2`) The minimum number of bins. Must be at least 2. - `max_bins` (Default value = `0`) The maximum number of bins. Must be at least min_bins or 0, if the number of bins should not be restricted. From 5726881a4c220f1f0d455fd32278a4a6c3df7dc5 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:09:13 +0100 Subject: [PATCH 14/53] Remove unused class ApproximateThresholdsFactory. --- .../thresholds/thresholds_approximate.hpp | 41 -- cpp/subprojects/common/meson.build | 1 - .../thresholds/thresholds_approximate.cpp | 422 ------------------ .../common/thresholds/thresholds_common.hpp | 183 -------- .../common/thresholds/thresholds_exact.cpp | 156 ++++++- 5 files changed, 153 insertions(+), 650 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_approximate.hpp delete mode 100644 cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_approximate.cpp delete mode 100644 cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_common.hpp diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_approximate.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_approximate.hpp deleted file mode 100644 index 7656750d85..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_approximate.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/input/feature_binning.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" - -/** - * A factory that allows to create instances of the type `ApproximateThresholds`. - */ -class ApproximateThresholdsFactory final : public IThresholdsFactory { - private: - - const std::unique_ptr numericalFeatureBinningFactoryPtr_; - - const std::unique_ptr nominalFeatureBinningFactoryPtr_; - - const uint32 numThreads_; - - public: - - /** - * @param numericalFeatureBinningFactoryPtr An unique pointer to an object of type `IFeatureBinningFactory` that - * allows to create implementations of the binning method to be used - * for assigning numerical feature values to bins - * @param nominalFeatureBinningFactoryPtr An unique pointer to an object of type `IFeatureBinningFactory` that - * allows to create implementations of the binning method to be used - * for assigning nominal feature values to bins - * @param numThreads The number of CPU threads to be used to update statistics in - * parallel. Must be at least 1 - */ - ApproximateThresholdsFactory(std::unique_ptr numericalFeatureBinningFactoryPtr, - std::unique_ptr nominalFeatureBinningFactoryPtr, - uint32 numThreads); - - std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const override; -}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index 6a8c879160..7b38f13d89 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -96,7 +96,6 @@ source_files = [ 'src/mlrl/common/stopping/stopping_criterion_time.cpp', 'src/mlrl/common/thresholds/coverage_mask.cpp', 'src/mlrl/common/thresholds/coverage_set.cpp', - 'src/mlrl/common/thresholds/thresholds_approximate.cpp', 'src/mlrl/common/thresholds/thresholds_exact.cpp', 'src/mlrl/common/info.cpp', 'src/mlrl/common/learner.cpp' diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_approximate.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_approximate.cpp deleted file mode 100644 index 77c97e62d3..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_approximate.cpp +++ /dev/null @@ -1,422 +0,0 @@ -#include "mlrl/common/thresholds/thresholds_approximate.hpp" - -#include "mlrl/common/rule_refinement/rule_refinement_approximate.hpp" -#include "mlrl/common/util/openmp.hpp" -#include "thresholds_common.hpp" - -#include - -/** - * Updates a given `CoverageSet` after a new condition has been added, such that only the examples that are covered by - * the new rule are marked es covered. - * - * @param thresholdVector A reference to an object of type `ThresholdVector` that stores the thresholds that result - * from the boundaries of the bins - * @param binIndices A reference to an object of type `IBinIndexVector` that stores the indices of the bins, - * individual examples belong to - * @param conditionStart The first bin (inclusive) that is covered by the new condition - * @param conditionEnd The last bin (exclusive) that is covered by the new condition - * @param covered True, if the bins in range [conditionStart, conditionEnd) are covered by the new condition - * and the remaining ones are not, false, if the elements in said range are not covered, but - * the remaining ones are - * @param coverageSet A reference to an object of type `CoverageSet` that is used to keep track of the examples - * that are covered by the previous rule. It will be updated by this function - * @param statistics A reference to an object of type `IWeightedStatistics` to be notified about the statistics - * that must be considered when searching for the next refinement, i.e., the statistics that - * are covered by the new rule - */ -static inline void updateCoveredExamples(const ThresholdVector& thresholdVector, const IBinIndexVector& binIndices, - int64 conditionStart, int64 conditionEnd, bool covered, - CoverageSet& coverageSet, IWeightedStatistics& statistics) { - int64 start, end; - - if (conditionEnd < conditionStart) { - start = conditionEnd + 1; - end = conditionStart + 1; - } else { - start = conditionStart; - end = conditionEnd; - } - - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::iterator coverageSetIterator = coverageSet.begin(); - statistics.resetCoveredStatistics(); - uint32 n = 0; - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - - if (!thresholdVector.isMissing(exampleIndex)) { - uint32 binIndex = binIndices.getBinIndex(exampleIndex); - - if (binIndex == IBinIndexVector::BIN_INDEX_SPARSE) { - binIndex = thresholdVector.getSparseBinIndex(); - } - - if ((binIndex >= start && binIndex < end) == covered) { - statistics.addCoveredStatistic(exampleIndex); - coverageSetIterator[n] = exampleIndex; - n++; - } - } - } - - coverageSet.setNumCovered(n); -} - -/** - * Rebuilds a given histogram. - * - * @param thresholdVector A reference to an object of type `ThresholdVector` that stores the thresholds that result - * from the boundaries of the bins - * @param histogram A reference to an object of type `IHistogram` that should be rebuild - * @param coverageSet A reference to an object of type `CoverageSet` that is used to keep track of the examples - * that are currently covered - */ -static inline void rebuildHistogram(const ThresholdVector& thresholdVector, IHistogram& histogram, - const CoverageSet& coverageSet) { - // Reset all statistics in the histogram to zero... - histogram.clear(); - - // Iterate the covered examples and add their statistics to the corresponding bin... - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - - if (!thresholdVector.isMissing(exampleIndex)) { - histogram.addToBin(exampleIndex); - } - } -} - -/** - * Provides access to the thresholds that result from applying a binning method to the feature values of the training - * examples. - */ -class ApproximateThresholds final : public AbstractThresholds { - private: - - /** - * Provides access to a subset of the thresholds that are stored by an instance of the class - * `ApproximateThresholds`. - * - * @tparam WeightVector The type of the vector that provides access to the weights of individual training - * examples - */ - template - class ThresholdsSubset final : public IThresholdsSubset { - private: - - /** - * A callback that allows to retrieve bins and corresponding statistics. If available, the bins and - * statistics are retrieved from the cache. Otherwise, they are computed by fetching the feature values - * from the feature matrix and applying a binning method. - */ - class Callback final : public IRuleRefinementCallback { - private: - - ThresholdsSubset& thresholdsSubset_; - - const uint32 featureIndex_; - - const bool nominal_; - - public: - - /** - * @param thresholdsSubset A reference to an object of type `ThresholdsSubset` that caches the - * bins - * @param featureIndex The index of the feature for which the bins should be retrieved - * @param nominal True, if the feature at index `featureIndex` is nominal, false - * otherwise - */ - Callback(ThresholdsSubset& thresholdsSubset, uint32 featureIndex, bool nominal) - : thresholdsSubset_(thresholdsSubset), featureIndex_(featureIndex), nominal_(nominal) {} - - Result get() override { - auto cacheIterator = thresholdsSubset_.thresholds_.cache_.find(featureIndex_); - IFeatureBinning::Result& cacheEntry = cacheIterator->second; - ThresholdVector* thresholdVector = cacheEntry.thresholdVectorPtr.get(); - IBinIndexVector* binIndices = cacheEntry.binIndicesPtr.get(); - - if (!thresholdVector) { - // Fetch feature vector... - std::unique_ptr featureVectorPtr; - const IColumnWiseFeatureMatrix& featureMatrix = - thresholdsSubset_.thresholds_.featureMatrix_; - uint32 numExamples = featureMatrix.getNumExamples(); - featureMatrix.fetchFeatureVector(featureIndex_, featureVectorPtr); - - // Apply binning method... - const IFeatureBinning& binning = - nominal_ ? *thresholdsSubset_.thresholds_.nominalFeatureBinningPtr_ - : *thresholdsSubset_.thresholds_.numericalFeatureBinningPtr_; - IFeatureBinning::Result result = binning.createBins(*featureVectorPtr, numExamples); - cacheEntry.thresholdVectorPtr = std::move(result.thresholdVectorPtr); - thresholdVector = cacheEntry.thresholdVectorPtr.get(); - cacheEntry.binIndicesPtr = std::move(result.binIndicesPtr); - binIndices = cacheEntry.binIndicesPtr.get(); - } - - auto cacheHistogramIterator = thresholdsSubset_.cacheHistogram_.find(featureIndex_); - - if (!cacheHistogramIterator->second) { - // Create histogram and weight vector... - uint32 numBins = thresholdVector->getNumElements(); - cacheHistogramIterator->second = - binIndices->createHistogram(*thresholdsSubset_.weightedStatisticsPtr_, numBins); - } - - // Rebuild histogram... - IHistogram& histogram = *cacheHistogramIterator->second; - rebuildHistogram(*thresholdVector, histogram, thresholdsSubset_.coverageSet_); - - return Result(histogram, *thresholdVector); - } - }; - - ApproximateThresholds& thresholds_; - - std::unique_ptr weightedStatisticsPtr_; - - const WeightVector& weights_; - - CoverageSet coverageSet_; - - std::unordered_map> cacheHistogram_; - - template - std::unique_ptr createApproximateRuleRefinement(const IndexVector& labelIndices, - uint32 featureIndex) { - // Retrieve `unique_ptr` from the cache, or insert an empty one if it does not already exist... - auto cacheHistogramIterator = - cacheHistogram_.emplace(featureIndex, std::unique_ptr()).first; - - // If the `unique_ptr` in the cache does not refer to an `IHistogram`, add an empty - // `IFeatureBinning::Result` to the cache... - if (!cacheHistogramIterator->second) { - thresholds_.cache_.emplace(featureIndex, IFeatureBinning::Result()); - } - - std::unique_ptr featureBinningFactoryPtr; - std::unique_ptr featureTypePtr = - thresholds_.featureInfo_.createFeatureType(featureIndex, *featureBinningFactoryPtr); - bool ordinal = featureTypePtr->isOrdinal(); - bool nominal = featureTypePtr->isNominal(); - std::unique_ptr callbackPtr = std::make_unique(*this, featureIndex, nominal); - return std::make_unique>( - labelIndices, coverageSet_.getNumCovered(), featureIndex, ordinal, nominal, - std::move(callbackPtr)); - } - - public: - - /** - * @param thresholds A reference to an object of type `ApproximateThresholds` that stores the - * thresholds - * @param weightedStatisticsPtr An unique pointer to an object of type `IWeightedStatistics` that - * provides access to the statistics - * @param weights A reference to an object of template type `WeightWeight` that provides - * access to the weights of individual training examples - */ - ThresholdsSubset(ApproximateThresholds& thresholds, - std::unique_ptr weightedStatisticsPtr, - const WeightVector& weights) - : thresholds_(thresholds), weightedStatisticsPtr_(std::move(weightedStatisticsPtr)), - weights_(weights), coverageSet_(thresholds.featureMatrix_.getNumExamples()) {} - - /** - * @param thresholdsSubset A reference to an object of type `ThresholdsSubset` to be copied - */ - ThresholdsSubset(const ThresholdsSubset& thresholdsSubset) - : thresholds_(thresholdsSubset.thresholds_), - weightedStatisticsPtr_(thresholdsSubset.weightedStatisticsPtr_->copy()), - weights_(thresholdsSubset.weights_), coverageSet_(thresholdsSubset.coverageSet_) {} - - std::unique_ptr copy() const override { - return std::make_unique>(*this); - } - - std::unique_ptr createRuleRefinement(const CompleteIndexVector& labelIndices, - uint32 featureIndex) override { - return createApproximateRuleRefinement(labelIndices, featureIndex); - } - - std::unique_ptr createRuleRefinement(const PartialIndexVector& labelIndices, - uint32 featureIndex) override { - return createApproximateRuleRefinement(labelIndices, featureIndex); - } - - void filterThresholds(const Condition& condition) override { - uint32 featureIndex = condition.featureIndex; - auto cacheIterator = thresholds_.cache_.find(featureIndex); - IFeatureBinning::Result& cacheEntry = cacheIterator->second; - const ThresholdVector& thresholdVector = *cacheEntry.thresholdVectorPtr; - const IBinIndexVector& binIndices = *cacheEntry.binIndicesPtr; - updateCoveredExamples(thresholdVector, binIndices, condition.start, condition.end, - !condition.inverse, coverageSet_, *weightedStatisticsPtr_); - } - - void resetThresholds() override { - coverageSet_.reset(); - } - - const ICoverageState& getCoverageState() const override { - return coverageSet_; - } - - Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageMask& coverageState, - const IPrediction& head) const override { - return evaluateOutOfSampleInternally( - partition.cbegin(), partition.getNumElements(), weights_, coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageState, - const IPrediction& head) const override { - return evaluateOutOfSampleInternally( - partition.first_cbegin(), partition.getNumFirst(), weights_, coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageSet& coverageState, - const IPrediction& head) const override { - return evaluateOutOfSampleInternally(weights_, coverageState, thresholds_.statisticsProvider_.get(), - head); - } - - Quality evaluateOutOfSample(BiPartition& partition, const CoverageSet& coverageState, - const IPrediction& head) const override { - return evaluateOutOfSampleInternally(weights_, coverageState, partition, - thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageState, - IPrediction& head) const override { - recalculatePredictionInternally( - partition.cbegin(), partition.getNumElements(), coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageState, - IPrediction& head) const override { - recalculatePredictionInternally( - partition.first_cbegin(), partition.getNumFirst(), coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(const SinglePartition& partition, const CoverageSet& coverageState, - IPrediction& head) const override { - recalculatePredictionInternally(coverageState, thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(BiPartition& partition, const CoverageSet& coverageState, - IPrediction& head) const override { - recalculatePredictionInternally(coverageState, partition, thresholds_.statisticsProvider_.get(), - head); - } - - void applyPrediction(const IPrediction& prediction) override { - uint32 numCovered = coverageSet_.getNumCovered(); - CoverageSet::const_iterator iterator = coverageSet_.cbegin(); - const IPrediction* predictionPtr = &prediction; - IStatistics* statisticsPtr = &thresholds_.statisticsProvider_.get(); - -#if MULTI_THREADING_SUPPORT_ENABLED - #pragma omp parallel for firstprivate(numCovered) firstprivate(iterator) firstprivate(predictionPtr) \ - firstprivate(statisticsPtr) schedule(dynamic) num_threads(thresholds_.numThreads_) -#endif - for (int64 i = 0; i < numCovered; i++) { - uint32 exampleIndex = iterator[i]; - predictionPtr->apply(*statisticsPtr, exampleIndex); - } - } - - void revertPrediction(const IPrediction& prediction) override { - uint32 numCovered = coverageSet_.getNumCovered(); - CoverageSet::const_iterator iterator = coverageSet_.cbegin(); - const IPrediction* predictionPtr = &prediction; - IStatistics* statisticsPtr = &thresholds_.statisticsProvider_.get(); - -#if MULTI_THREADING_SUPPORT_ENABLED - #pragma omp parallel for firstprivate(numCovered) firstprivate(iterator) firstprivate(predictionPtr) \ - firstprivate(statisticsPtr) schedule(dynamic) num_threads(thresholds_.numThreads_) -#endif - for (int64 i = 0; i < numCovered; i++) { - uint32 exampleIndex = iterator[i]; - predictionPtr->revert(*statisticsPtr, exampleIndex); - } - } - }; - - const std::unique_ptr numericalFeatureBinningPtr_; - - const std::unique_ptr nominalFeatureBinningPtr_; - - const uint32 numThreads_; - - std::unordered_map cache_; - - public: - - /** - * @param featureMatrix A reference to an object of type `IColumnWiseFeatureMatrix` that - * provides column-wise access to the feature values of individual training - * examples - * @param featureInfo A reference to an object of type `IFeatureInfo` that provides - * information about the types of individual features - * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides - * access to statistics about the labels of the training examples - * @param numericalFeatureBinningPtr An unique pointer to an object of type `IFeatureBinning` that should be - * used to assign numerical feature values to bins - * @param nominalFeatureBinningPtr An unique pointer to an object of type `IFeatureBinning` that should be - * used to assign nominal feature values to bins - * @param numThreads The number of CPU threads to be used to update statistics in parallel - */ - ApproximateThresholds(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider, - std::unique_ptr numericalFeatureBinningPtr, - std::unique_ptr nominalFeatureBinningPtr, uint32 numThreads) - : AbstractThresholds(featureMatrix, featureInfo, statisticsProvider), - numericalFeatureBinningPtr_(std::move(numericalFeatureBinningPtr)), - nominalFeatureBinningPtr_(std::move(nominalFeatureBinningPtr)), numThreads_(numThreads) {} - - std::unique_ptr createSubset(const EqualWeightVector& weights) override { - IStatistics& statistics = statisticsProvider_.get(); - std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>( - *this, std::move(weightedStatisticsPtr), weights); - } - - std::unique_ptr createSubset(const BitWeightVector& weights) override { - IStatistics& statistics = statisticsProvider_.get(); - std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>( - *this, std::move(weightedStatisticsPtr), weights); - } - - std::unique_ptr createSubset(const DenseWeightVector& weights) override { - IStatistics& statistics = statisticsProvider_.get(); - std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>>( - *this, std::move(weightedStatisticsPtr), weights); - } -}; - -ApproximateThresholdsFactory::ApproximateThresholdsFactory( - std::unique_ptr numericalFeatureBinningFactoryPtr, - std::unique_ptr nominalFeatureBinningFactoryPtr, uint32 numThreads) - : numericalFeatureBinningFactoryPtr_(std::move(numericalFeatureBinningFactoryPtr)), - nominalFeatureBinningFactoryPtr_(std::move(nominalFeatureBinningFactoryPtr)), numThreads_(numThreads) {} - -std::unique_ptr ApproximateThresholdsFactory::create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const { - std::unique_ptr numericalFeatureBinningPtr = numericalFeatureBinningFactoryPtr_->create(); - std::unique_ptr nominalFeatureBinningPtr = nominalFeatureBinningFactoryPtr_->create(); - return std::make_unique(featureMatrix, featureInfo, statisticsProvider, - std::move(numericalFeatureBinningPtr), - std::move(nominalFeatureBinningPtr), numThreads_); -} diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_common.hpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_common.hpp deleted file mode 100644 index 745c71484b..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_common.hpp +++ /dev/null @@ -1,183 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/input/feature_info.hpp" -#include "mlrl/common/input/feature_matrix.hpp" -#include "mlrl/common/iterator/binary_forward_iterator.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" - -template -static inline Quality evaluateOutOfSampleInternally(IndexIterator indexIterator, uint32 numExamples, - const WeightVector& weights, const CoverageMask& coverageMask, - const IStatistics& statistics, const IPrediction& prediction) { - OutOfSampleWeightVector outOfSampleWeights(weights); - std::unique_ptr statisticsSubsetPtr = - prediction.createStatisticsSubset(statistics, outOfSampleWeights); - - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - - if (statisticsSubsetPtr->hasNonZeroWeight(exampleIndex) && coverageMask.isCovered(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - } - - return statisticsSubsetPtr->calculateScores(); -} - -template -static inline Quality evaluateOutOfSampleInternally(const WeightVector& weights, const CoverageSet& coverageSet, - const IStatistics& statistics, const IPrediction& prediction) { - OutOfSampleWeightVector outOfSampleWeights(weights); - std::unique_ptr statisticsSubsetPtr = - prediction.createStatisticsSubset(statistics, outOfSampleWeights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - - if (statisticsSubsetPtr->hasNonZeroWeight(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - } - - return statisticsSubsetPtr->calculateScores(); -} - -template -static inline Quality evaluateOutOfSampleInternally(const WeightVector& weights, const CoverageSet& coverageSet, - BiPartition& partition, const IStatistics& statistics, - const IPrediction& prediction) { - OutOfSampleWeightVector outOfSampleWeights(weights); - std::unique_ptr statisticsSubsetPtr = - prediction.createStatisticsSubset(statistics, outOfSampleWeights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - partition.sortSecond(); - auto holdoutSetIterator = make_binary_forward_iterator(partition.second_cbegin(), partition.second_cend()); - uint32 previousExampleIndex = 0; - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - std::advance(holdoutSetIterator, exampleIndex - previousExampleIndex); - - if (*holdoutSetIterator && statisticsSubsetPtr->hasNonZeroWeight(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - - previousExampleIndex = exampleIndex; - } - - return statisticsSubsetPtr->calculateScores(); -} - -template -static inline void recalculatePredictionInternally(IndexIterator indexIterator, uint32 numExamples, - const CoverageMask& coverageMask, const IStatistics& statistics, - IPrediction& prediction) { - EqualWeightVector weights(numExamples); - std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); - - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - - if (coverageMask.isCovered(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - } - - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - scoreVector.updatePrediction(prediction); -} - -static inline void recalculatePredictionInternally(const CoverageSet& coverageSet, const IStatistics& statistics, - IPrediction& prediction) { - uint32 numStatistics = statistics.getNumStatistics(); - EqualWeightVector weights(numStatistics); - std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - statisticsSubsetPtr->addToSubset(exampleIndex); - } - - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - scoreVector.updatePrediction(prediction); -} - -static inline void recalculatePredictionInternally(const CoverageSet& coverageSet, BiPartition& partition, - const IStatistics& statistics, IPrediction& prediction) { - uint32 numStatistics = statistics.getNumStatistics(); - EqualWeightVector weights(numStatistics); - std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - partition.sortSecond(); - auto holdoutSetIterator = make_binary_forward_iterator(partition.second_cbegin(), partition.second_cend()); - uint32 previousExampleIndex = 0; - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - std::advance(holdoutSetIterator, exampleIndex - previousExampleIndex); - - if (*holdoutSetIterator) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - - previousExampleIndex = exampleIndex; - } - - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - scoreVector.updatePrediction(prediction); -} - -/** - * An abstract base class for all classes that provide access to thresholds that may be used by the first condition of a - * rule that currently has an empty body and therefore covers the entire instance space. - */ -class AbstractThresholds : public IThresholds { - protected: - - /** - * A reference to an object of type `IColumnWiseFeatureMatrix` that provides column-wise access to the feature - * values of the training examples. - */ - const IColumnWiseFeatureMatrix& featureMatrix_; - - /** - * A reference to an object of type `IFeatureInfo` that provides information about the types of individual - * features. - */ - const IFeatureInfo& featureInfo_; - - /** - * A reference to an object of type `IStatisticsProvider` that provides access to statistics about the labels of - * the training examples. - */ - IStatisticsProvider& statisticsProvider_; - - public: - - /** - * @param featureMatrix A reference to an object of type `IColumnWiseFeatureMatrix` that provides - * column-wise access to the feature values of individual training examples - * @param featureInfo A reference to an object of type `IFeatureInfo` that provides information about - * the types of individual features - * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides access to - * statistics about the labels of the training examples - */ - AbstractThresholds(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) - : featureMatrix_(featureMatrix), featureInfo_(featureInfo), statisticsProvider_(statisticsProvider) {} - - virtual ~AbstractThresholds() override {} - - IStatisticsProvider& getStatisticsProvider() const override final { - return statisticsProvider_; - } -}; diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 6f36a441c3..9862e23083 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -1,11 +1,139 @@ #include "mlrl/common/thresholds/thresholds_exact.hpp" +#include "mlrl/common/iterator/binary_forward_iterator.hpp" #include "mlrl/common/rule_refinement/rule_refinement_exact.hpp" #include "mlrl/common/util/openmp.hpp" -#include "thresholds_common.hpp" #include +template +static inline Quality evaluateOutOfSampleInternally(IndexIterator indexIterator, uint32 numExamples, + const WeightVector& weights, const CoverageMask& coverageMask, + const IStatistics& statistics, const IPrediction& prediction) { + OutOfSampleWeightVector outOfSampleWeights(weights); + std::unique_ptr statisticsSubsetPtr = + prediction.createStatisticsSubset(statistics, outOfSampleWeights); + + for (uint32 i = 0; i < numExamples; i++) { + uint32 exampleIndex = indexIterator[i]; + + if (statisticsSubsetPtr->hasNonZeroWeight(exampleIndex) && coverageMask.isCovered(exampleIndex)) { + statisticsSubsetPtr->addToSubset(exampleIndex); + } + } + + return statisticsSubsetPtr->calculateScores(); +} + +template +static inline Quality evaluateOutOfSampleInternally(const WeightVector& weights, const CoverageSet& coverageSet, + const IStatistics& statistics, const IPrediction& prediction) { + OutOfSampleWeightVector outOfSampleWeights(weights); + std::unique_ptr statisticsSubsetPtr = + prediction.createStatisticsSubset(statistics, outOfSampleWeights); + uint32 numCovered = coverageSet.getNumCovered(); + CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); + + for (uint32 i = 0; i < numCovered; i++) { + uint32 exampleIndex = coverageSetIterator[i]; + + if (statisticsSubsetPtr->hasNonZeroWeight(exampleIndex)) { + statisticsSubsetPtr->addToSubset(exampleIndex); + } + } + + return statisticsSubsetPtr->calculateScores(); +} + +template +static inline Quality evaluateOutOfSampleInternally(const WeightVector& weights, const CoverageSet& coverageSet, + BiPartition& partition, const IStatistics& statistics, + const IPrediction& prediction) { + OutOfSampleWeightVector outOfSampleWeights(weights); + std::unique_ptr statisticsSubsetPtr = + prediction.createStatisticsSubset(statistics, outOfSampleWeights); + uint32 numCovered = coverageSet.getNumCovered(); + CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); + partition.sortSecond(); + auto holdoutSetIterator = make_binary_forward_iterator(partition.second_cbegin(), partition.second_cend()); + uint32 previousExampleIndex = 0; + + for (uint32 i = 0; i < numCovered; i++) { + uint32 exampleIndex = coverageSetIterator[i]; + std::advance(holdoutSetIterator, exampleIndex - previousExampleIndex); + + if (*holdoutSetIterator && statisticsSubsetPtr->hasNonZeroWeight(exampleIndex)) { + statisticsSubsetPtr->addToSubset(exampleIndex); + } + + previousExampleIndex = exampleIndex; + } + + return statisticsSubsetPtr->calculateScores(); +} + +template +static inline void recalculatePredictionInternally(IndexIterator indexIterator, uint32 numExamples, + const CoverageMask& coverageMask, const IStatistics& statistics, + IPrediction& prediction) { + EqualWeightVector weights(numExamples); + std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); + + for (uint32 i = 0; i < numExamples; i++) { + uint32 exampleIndex = indexIterator[i]; + + if (coverageMask.isCovered(exampleIndex)) { + statisticsSubsetPtr->addToSubset(exampleIndex); + } + } + + const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); + scoreVector.updatePrediction(prediction); +} + +static inline void recalculatePredictionInternally(const CoverageSet& coverageSet, const IStatistics& statistics, + IPrediction& prediction) { + uint32 numStatistics = statistics.getNumStatistics(); + EqualWeightVector weights(numStatistics); + std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); + uint32 numCovered = coverageSet.getNumCovered(); + CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); + + for (uint32 i = 0; i < numCovered; i++) { + uint32 exampleIndex = coverageSetIterator[i]; + statisticsSubsetPtr->addToSubset(exampleIndex); + } + + const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); + scoreVector.updatePrediction(prediction); +} + +static inline void recalculatePredictionInternally(const CoverageSet& coverageSet, BiPartition& partition, + const IStatistics& statistics, IPrediction& prediction) { + uint32 numStatistics = statistics.getNumStatistics(); + EqualWeightVector weights(numStatistics); + std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); + uint32 numCovered = coverageSet.getNumCovered(); + CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); + partition.sortSecond(); + auto holdoutSetIterator = make_binary_forward_iterator(partition.second_cbegin(), partition.second_cend()); + uint32 previousExampleIndex = 0; + + for (uint32 i = 0; i < numCovered; i++) { + uint32 exampleIndex = coverageSetIterator[i]; + std::advance(holdoutSetIterator, exampleIndex - previousExampleIndex); + + if (*holdoutSetIterator) { + statisticsSubsetPtr->addToSubset(exampleIndex); + } + + previousExampleIndex = exampleIndex; + } + + const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); + scoreVector.updatePrediction(prediction); +} + /** * An entry that is stored in a cache and contains an unique pointer to a feature vector. The field `numConditions` * specifies how many conditions the rule contained when the vector was updated for the last time. It may be used to @@ -30,7 +158,7 @@ struct FilteredCacheEntry final { /** * Provides access to all thresholds that result from the feature values of the training examples. */ -class ExactThresholds final : public AbstractThresholds { +class ExactThresholds final : public IThresholds { private: /** @@ -307,6 +435,24 @@ class ExactThresholds final : public AbstractThresholds { } }; + /** + * A reference to an object of type `IColumnWiseFeatureMatrix` that provides column-wise access to the feature + * values of the training examples. + */ + const IColumnWiseFeatureMatrix& featureMatrix_; + + /** + * A reference to an object of type `IFeatureInfo` that provides information about the types of individual + * features. + */ + const IFeatureInfo& featureInfo_; + + /** + * A reference to an object of type `IStatisticsProvider` that provides access to statistics about the labels of + * the training examples. + */ + IStatisticsProvider& statisticsProvider_; + const IFeatureBinningFactory& featureBinningFactory_; const uint32 numThreads_; @@ -331,9 +477,13 @@ class ExactThresholds final : public AbstractThresholds { ExactThresholds(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, IStatisticsProvider& statisticsProvider, const IFeatureBinningFactory& featureBinningFactory, uint32 numThreads) - : AbstractThresholds(featureMatrix, featureInfo, statisticsProvider), + : featureMatrix_(featureMatrix), featureInfo_(featureInfo), statisticsProvider_(statisticsProvider), featureBinningFactory_(featureBinningFactory), numThreads_(numThreads) {} + IStatisticsProvider& getStatisticsProvider() const override final { + return statisticsProvider_; + } + std::unique_ptr createSubset(const EqualWeightVector& weights) override { IStatistics& statistics = statisticsProvider_.get(); std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); From ba17ef0f3629af308abe51229f3a195131ea9459 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:10:30 +0100 Subject: [PATCH 15/53] Remove class ApproximateRuleRefinement. --- .../rule_refinement_approximate.hpp | 57 --- cpp/subprojects/common/meson.build | 1 - .../rule_refinement_approximate.cpp | 408 ------------------ 3 files changed, 466 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_approximate.hpp delete mode 100644 cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_approximate.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_approximate.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_approximate.hpp deleted file mode 100644 index 48d608c20d..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_approximate.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/binning/threshold_vector.hpp" -#include "mlrl/common/rule_refinement/rule_refinement.hpp" -#include "mlrl/common/rule_refinement/rule_refinement_callback.hpp" -#include "mlrl/common/statistics/histogram.hpp" - -/** - * Allows to find the best refinements of existing rules, which result from adding a new condition that correspond to a - * certain feature. The thresholds that may be used by the new condition result from the boundaries between the bins - * that have been created using a binning method. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels for which the refined - * rule is allowed to predict - */ -template -class ApproximateRuleRefinement final : public IRuleRefinement { - private: - - const IndexVector& labelIndices_; - - const uint32 numExamples_; - - const uint32 featureIndex_; - - const bool ordinal_; - - const bool nominal_; - - typedef IRuleRefinementCallback Callback; - - const std::unique_ptr callbackPtr_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides access to the - * indices of the labels for which the refined rule is allowed to predict - * @param numExamples The total number of training examples with non-zero weights that are covered by the - * existing rule - * @param featureIndex The index of the feature, the new condition corresponds to - * @param ordinal True, if the feature at index `featureIndex` is ordinal, false otherwise - * @param nominal True, if the feature at index `featureIndex` is nominal, false otherwise - * @param callbackPtr An unique pointer to an object of type `IRuleRefinementCallback` that allows to - * retrieve the information that is required to search for potential refinements - */ - ApproximateRuleRefinement(const IndexVector& labelIndices, uint32 numExamples, uint32 featureIndex, - bool ordinal, bool nominal, std::unique_ptr callbackPtr); - - void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) override; - - void findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) override; -}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index 7b38f13d89..2e74166809 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -61,7 +61,6 @@ source_files = [ 'src/mlrl/common/rule_refinement/prediction_partial.cpp', 'src/mlrl/common/rule_refinement/refinement_comparator_fixed.cpp', 'src/mlrl/common/rule_refinement/refinement_comparator_single.cpp', - 'src/mlrl/common/rule_refinement/rule_refinement_approximate.cpp', 'src/mlrl/common/rule_refinement/rule_refinement_exact.cpp', 'src/mlrl/common/rule_refinement/rule_refinement_search.cpp', 'src/mlrl/common/rule_refinement/score_processor.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_approximate.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_approximate.cpp deleted file mode 100644 index ccd39f14e7..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_approximate.cpp +++ /dev/null @@ -1,408 +0,0 @@ -#include "mlrl/common/rule_refinement/rule_refinement_approximate.hpp" - -template -static inline void findRefinementInternally(const IndexVector& labelIndices, uint32 numExamples, uint32 featureIndex, - bool ordinal, bool nominal, uint32 minCoverage, - IRuleRefinementCallback& callback, - RefinementComparator& comparator) { - Refinement refinement; - refinement.featureIndex = featureIndex; - - // Invoke the callback... - IRuleRefinementCallback::Result callbackResult = callback.get(); - const IHistogram& statistics = callbackResult.statistics; - const ThresholdVector& thresholdVector = callbackResult.vector; - ThresholdVector::const_iterator thresholdIterator = thresholdVector.cbegin(); - uint32 numBins = thresholdVector.getNumElements(); - uint32 sparseBinIndex = thresholdVector.getSparseBinIndex(); - bool sparse = sparseBinIndex < numBins; - - // Create a new, empty subset of the statistics... - std::unique_ptr statisticsSubsetPtr = statistics.createSubset(labelIndices); - - for (auto it = thresholdVector.missing_indices_cbegin(); it != thresholdVector.missing_indices_cend(); it++) { - uint32 i = *it; - statisticsSubsetPtr->addToMissing(i); - } - - // In the following, we start by processing the bins in range [0, sparseBinIndex)... - uint32 numCovered = 0; - int64 firstR = 0; - int64 r; - - // Traverse bins in ascending order until the first bin with non-zero weight is encountered... - for (r = 0; r < sparseBinIndex; r++) { - uint32 weight = statistics.getBinWeight(r); - - if (weight > 0) { - // Add the bin to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(r); - numCovered += weight; - break; - } - } - - uint32 numAccumulated = numCovered; - - // Traverse the remaining bins in ascending order... - if (numCovered > 0) { - for (r = r + 1; r < sparseBinIndex; r++) { - uint32 weight = statistics.getBinWeight(r); - - // Do only consider bins that are not empty... - if (weight > 0) { - // Check if a condition that uses the <= operator (or the == operator in case of a nominal feature) - // covers at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.inverse = false; - refinement.numCovered = numCovered; - - if (nominal) { - refinement.threshold = (int32) thresholdIterator[r - 1]; - refinement.comparator = NOMINAL_EQ; - } else { - refinement.threshold = - ordinal ? (int32) thresholdIterator[r - 1] : thresholdIterator[r - 1]; - refinement.comparator = ordinal ? ORDINAL_LEQ : NUMERICAL_LEQ; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the > operator (or the != operator in case of a nominal feature) - // covers at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.inverse = true; - refinement.numCovered = coverage; - - if (nominal) { - refinement.threshold = (int32) thresholdIterator[r - 1]; - refinement.comparator = NOMINAL_NEQ; - } else { - refinement.threshold = - ordinal ? (int32) thresholdIterator[r - 1] : thresholdIterator[r - 1]; - refinement.comparator = ordinal ? ORDINAL_GR : NUMERICAL_GR; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Reset the subset in case of a nominal feature, as the previous bins will not be covered by the next - // condition... - if (nominal) { - statisticsSubsetPtr->resetSubset(); - numCovered = 0; - firstR = r; - } - - // Add the bin to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(r); - numCovered += weight; - numAccumulated += weight; - } - } - - // If any bins have been processed so far and if there is a sparse bin, we must evaluate additional conditions - // that separate the bins that have been iterated from the remaining ones (including the sparse bin)... - if (numCovered > 0 && sparse) { - // Check if a condition that uses the <= operator (or the == operator in case of a nominal feature) covers - // at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = sparseBinIndex; - refinement.inverse = false; - refinement.numCovered = numCovered; - - if (nominal) { - refinement.threshold = (int32) thresholdIterator[sparseBinIndex - 1]; - refinement.comparator = NOMINAL_EQ; - } else { - refinement.threshold = ordinal ? (int32) thresholdIterator[sparseBinIndex - 1] - : thresholdIterator[sparseBinIndex - 1]; - refinement.comparator = ordinal ? ORDINAL_LEQ : NUMERICAL_LEQ; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the > operator (or the != operator in case of a nominal feature) covers at - // least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = sparseBinIndex; - refinement.inverse = true; - refinement.numCovered = coverage; - - if (nominal) { - refinement.threshold = (int32) thresholdIterator[sparseBinIndex - 1]; - refinement.comparator = NOMINAL_NEQ; - } else { - refinement.threshold = ordinal ? (int32) thresholdIterator[sparseBinIndex - 1] - : thresholdIterator[sparseBinIndex - 1]; - refinement.comparator = ordinal ? ORDINAL_GR : NUMERICAL_GR; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - } - - // Reset the subset, if any bins have been processed... - statisticsSubsetPtr->resetSubset(); - } - - uint32 numAccumulatedPrevious = numAccumulated; - - // We continue by processing the bins in range (sparseBinIndex, numBins)... - numCovered = 0; - firstR = ((int64) numBins) - 1; - - // Traverse bins in descending order until the first bin with non-zero weight is encountered... - for (r = firstR; r > sparseBinIndex; r--) { - uint32 weight = statistics.getBinWeight(r); - - if (weight > 0) { - // Add the bin to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(r); - numCovered += weight; - break; - } - } - - numAccumulated = numCovered; - - // Traverse the remaining bins in descending order... - if (numCovered > 0) { - for (r = r - 1; r > sparseBinIndex; r--) { - uint32 weight = statistics.getBinWeight(r); - - // Do only consider bins that are not empty... - if (weight > 0) { - // Check if a condition that uses the > operator (or the == operator in case of a nominal feature) - // covers at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.inverse = false; - refinement.numCovered = numCovered; - - if (nominal) { - refinement.threshold = (int32) thresholdIterator[firstR]; - refinement.comparator = NOMINAL_EQ; - } else { - refinement.threshold = ordinal ? (int32) thresholdIterator[r] : thresholdIterator[r]; - refinement.comparator = ordinal ? ORDINAL_GR : NUMERICAL_GR; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the <= operator (or the != operator in case of a nominal feature) - // covers at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.inverse = true; - refinement.numCovered = coverage; - - if (nominal) { - refinement.threshold = (int32) thresholdIterator[firstR]; - refinement.comparator = NOMINAL_NEQ; - } else { - refinement.threshold = ordinal ? (int32) thresholdIterator[r] : thresholdIterator[r]; - refinement.comparator = ordinal ? ORDINAL_LEQ : NUMERICAL_LEQ; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Reset the subset in case of a nominal feature, as the previous bins will not be covered by the next - // condition... - if (nominal) { - statisticsSubsetPtr->resetSubset(); - numCovered = 0; - firstR = r; - } - - // Add the bin to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(r); - numCovered += weight; - numAccumulated += weight; - } - } - - // If there is a sparse bin, we must evaluate additional conditions that separate the bins in range - // (sparseBinIndex, numBins) from the remaining ones... - if (sparse) { - // Check if a condition that uses the > operator (or the == operator in case of a nominal feature) covers at - // least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = sparseBinIndex; - refinement.inverse = false; - refinement.numCovered = numCovered; - - if (nominal) { - refinement.threshold = (int32) thresholdIterator[firstR]; - refinement.comparator = NOMINAL_EQ; - } else { - refinement.threshold = - ordinal ? (int32) thresholdIterator[sparseBinIndex] : thresholdIterator[sparseBinIndex]; - refinement.comparator = ordinal ? ORDINAL_GR : NUMERICAL_GR; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the <= operator (or the != operator in case of a nominal feature) covers - // at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = sparseBinIndex; - refinement.inverse = true; - refinement.numCovered = coverage; - - if (nominal) { - refinement.threshold = (int32) thresholdIterator[firstR]; - refinement.comparator = NOMINAL_NEQ; - } else { - refinement.threshold = - ordinal ? (int32) thresholdIterator[sparseBinIndex] : thresholdIterator[sparseBinIndex]; - refinement.comparator = ordinal ? ORDINAL_LEQ : NUMERICAL_LEQ; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // If the feature is nominal and if any bins in the range [0, sparseBinIndex) have been processed earlier, - // we must test additional conditions that separate the sparse bin from the remaining bins... - if (nominal && numAccumulatedPrevious > 0) { - // Reset the subset once again to ensure that the accumulated state includes all bins that have been - // processed so far... - statisticsSubsetPtr->resetSubset(); - - // Check if the condition `f != thresholdIterator[sparseBinIndex]` covers at least `minCoverage` - // examples... - uint32 coverage = numExamples - numAccumulated - numAccumulatedPrevious; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresAccumulated(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = sparseBinIndex; - refinement.end = sparseBinIndex + 1; - refinement.inverse = true; - refinement.numCovered = coverage; - refinement.threshold = (int32) thresholdIterator[sparseBinIndex]; - refinement.comparator = NOMINAL_NEQ; - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if the condition `f == thresholdIterator[sparseBinIndex]` covers at least `minCoverage` - // examples... - coverage = numAccumulated + numAccumulatedPrevious; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncoveredAccumulated(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = sparseBinIndex; - refinement.end = sparseBinIndex + 1; - refinement.inverse = false; - refinement.numCovered = coverage; - refinement.threshold = (int32) thresholdIterator[sparseBinIndex]; - refinement.comparator = NOMINAL_EQ; - comparator.pushRefinement(refinement, scoreVector); - } - } - } - } - } -} - -template -ApproximateRuleRefinement::ApproximateRuleRefinement(const IndexVector& labelIndices, uint32 numExamples, - uint32 featureIndex, bool ordinal, bool nominal, - std::unique_ptr callbackPtr) - : labelIndices_(labelIndices), numExamples_(numExamples), featureIndex_(featureIndex), ordinal_(ordinal), - nominal_(nominal), callbackPtr_(std::move(callbackPtr)) {} - -template -void ApproximateRuleRefinement::findRefinement(SingleRefinementComparator& comparator, - uint32 minCoverage) { - findRefinementInternally(labelIndices_, numExamples_, featureIndex_, ordinal_, nominal_, minCoverage, *callbackPtr_, - comparator); -} - -template -void ApproximateRuleRefinement::findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) { - findRefinementInternally(labelIndices_, numExamples_, featureIndex_, ordinal_, nominal_, minCoverage, *callbackPtr_, - comparator); -} - -template class ApproximateRuleRefinement; -template class ApproximateRuleRefinement; From a889343b18ca069019deb159c87b7213d203a532 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:15:41 +0100 Subject: [PATCH 16/53] Remove function "createBins" from class IFeatureBinning. --- .../mlrl/common/input/feature_binning.hpp | 39 ----- .../input/feature_binning_equal_frequency.cpp | 124 ---------------- .../input/feature_binning_equal_width.cpp | 137 ------------------ .../common/input/feature_binning_nominal.hpp | 100 ------------- 4 files changed, 400 deletions(-) delete mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_binning_nominal.hpp diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_binning.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_binning.hpp index 8c5e65fa3b..b0c0d92557 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_binning.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_binning.hpp @@ -4,56 +4,17 @@ */ #pragma once -#include "mlrl/common/binning/bin_index_vector.hpp" -#include "mlrl/common/binning/threshold_vector.hpp" #include "mlrl/common/input/feature_matrix.hpp" #include "mlrl/common/input/feature_type.hpp" -#include "mlrl/common/input/feature_vector.hpp" #include "mlrl/common/input/label_matrix.hpp" -#include - /** * Defines an interface for methods that assign feature values to bins. */ class IFeatureBinning : public IFeatureType { public: - /** - * The result that is returned by a binning method. It contains an unique pointer to a vector that stores the - * thresholds that result from the boundaries of the bins, as well as to a vector that stores the indices of the - * bins, individual values have been assigned to. - */ - // TODO Remove - struct Result final { - public: - - /** - * An unique pointer to an object of type `ThresholdVector` that provides access to the thresholds that - * result from the boundaries of the bins. - */ - std::unique_ptr thresholdVectorPtr; - - /** - * An unique pointer to an object of type `IBinIndexVector` that provides access to the indices of the - * bins, individual values have been assigned to. - */ - std::unique_ptr binIndicesPtr; - }; - virtual ~IFeatureBinning() override {} - - /** - * Assigns the values in a given `FeatureVector` to bins. - * - * @param featureVector A reference to an object of type `FeatureVector` whose values should be assigned to bins - * @param numExamples The total number of available training examples - * @return An object of type `Result` that contains a vector, which stores thresholds that result - * from the boundaries between the bins, as well as a vector that stores the indices of the - * bins, individual values have been assigned to - */ - // TODO Remove - virtual Result createBins(FeatureVector& featureVector, uint32 numExamples) const = 0; }; /** diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp index 1f905e2744..8a1edf06d2 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp @@ -1,10 +1,7 @@ #include "mlrl/common/input/feature_binning_equal_frequency.hpp" -#include "feature_binning_nominal.hpp" #include "feature_type_numerical_common.hpp" #include "feature_vector_decorator_binned.hpp" -#include "mlrl/common/binning/bin_index_vector_dense.hpp" -#include "mlrl/common/binning/bin_index_vector_dok.hpp" #include "mlrl/common/util/math.hpp" #include "mlrl/common/util/validation.hpp" @@ -120,40 +117,6 @@ static inline std::unique_ptr createFeatureVectorInternally( return std::make_unique(); } -static inline uint32 getNumBins(FeatureVector& featureVector, bool sparse, float32 binRatio, uint32 minBins, - uint32 maxBins) { - uint32 numElements = featureVector.getNumElements(); - - if (numElements > 0) { - featureVector.sortByValues(); - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - uint32 numDistinctValues = 1; - float32 previousValue; - uint32 i; - - if (sparse) { - previousValue = 0; - i = 0; - } else { - previousValue = featureIterator[0].value; - i = 1; - } - - for (; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if ((!sparse || currentValue != 0) && currentValue != previousValue) { - numDistinctValues++; - previousValue = currentValue; - } - } - - return numDistinctValues > 1 ? calculateBoundedFraction(numDistinctValues, binRatio, minBins, maxBins) : 0; - } - - return 0; -} - /** * An implementation of the type `IFeatureBinning` that assigns numerical feature values to bins, such that each bin * contains approximately the same number of values. @@ -179,93 +142,6 @@ class EqualFrequencyFeatureBinning final : public IFeatureBinning { EqualFrequencyFeatureBinning(float32 binRatio, uint32 minBins, uint32 maxBins) : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - Result createBins(FeatureVector& featureVector, uint32 numExamples) const override { - Result result; - uint32 numElements = featureVector.getNumElements(); - uint32 numSparse = numExamples - numElements; - bool sparse = numSparse > 0; - uint32 numBins = getNumBins(featureVector, sparse, binRatio_, minBins_, maxBins_); - result.thresholdVectorPtr = std::make_unique(featureVector, numBins); - - if (sparse) { - result.binIndicesPtr = std::make_unique(); - } else { - result.binIndicesPtr = std::make_unique(numElements); - } - - if (numBins > 0) { - IBinIndexVector& binIndices = *result.binIndicesPtr; - ThresholdVector& thresholdVector = *result.thresholdVectorPtr; - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - ThresholdVector::iterator thresholdIterator = thresholdVector.begin(); - uint32 numElementsPerBin = (uint32) std::ceil((float) numElements / (float) numBins); - uint32 numElementsInCurrentBin = 0; - uint32 binIndex = 0; - float32 previousValue = 0; - uint32 i = 0; - - // Iterate feature values < 0... - for (; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if (currentValue >= 0) { - break; - } - - if (currentValue != previousValue) { - if (numElementsInCurrentBin >= numElementsPerBin) { - thresholdIterator[binIndex] = arithmeticMean(previousValue, currentValue); - binIndex++; - numElementsInCurrentBin = 0; - } - - previousValue = currentValue; - } - - binIndices.setBinIndex(featureIterator[i].index, binIndex); - numElementsInCurrentBin++; - } - - // If there are any sparse values, check if they belong to the current one or the next one... - if (sparse) { - previousValue = 0; - - if (numElementsInCurrentBin >= numElementsPerBin) { - thresholdIterator[binIndex] = arithmeticMean(previousValue, 0); - binIndex++; - numElementsInCurrentBin = 0; - } - - thresholdVector.setSparseBinIndex(binIndex); - numElementsInCurrentBin += numSparse; - } - - // Iterate feature values >= 0... - for (; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if (!sparse || currentValue != 0) { - if (currentValue != previousValue) { - if (numElementsInCurrentBin >= numElementsPerBin) { - thresholdIterator[binIndex] = arithmeticMean(previousValue, currentValue); - binIndex++; - numElementsInCurrentBin = 0; - } - - previousValue = currentValue; - } - - binIndices.setBinIndex(featureIterator[i].index, binIndex); - numElementsInCurrentBin++; - } - } - - thresholdVector.setNumElements(binIndex + 1, true); - } - - return result; - } - bool isOrdinal() const override { return false; } diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp index 4c35779792..87c92cc3e5 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp @@ -1,18 +1,12 @@ #include "mlrl/common/input/feature_binning_equal_width.hpp" -#include "feature_binning_nominal.hpp" #include "feature_type_numerical_common.hpp" #include "feature_vector_decorator_binned.hpp" -#include "mlrl/common/binning/bin_index_vector_dense.hpp" -#include "mlrl/common/binning/bin_index_vector_dok.hpp" #include "mlrl/common/data/array.hpp" #include "mlrl/common/data/tuple.hpp" #include "mlrl/common/util/math.hpp" #include "mlrl/common/util/validation.hpp" -#include -#include - static inline Tuple getMinAndMaxFeatureValue(const NumericalFeatureVector& numericalFeatureVector) { uint32 numElements = numericalFeatureVector.numElements; float32 min; @@ -129,55 +123,6 @@ static inline std::unique_ptr createFeatureVectorInternally( return std::make_unique(); } -static inline std::tuple preprocess(const FeatureVector& featureVector, bool sparse, - float32 binRatio, uint32 minBins, uint32 maxBins) { - std::tuple result; - uint32 numElements = featureVector.getNumElements(); - - if (numElements > 0) { - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - float32 minValue; - uint32 i; - - if (sparse) { - minValue = 0; - i = 0; - } else { - minValue = featureIterator[0].value; - i = 1; - } - - float32 maxValue = minValue; - uint32 numDistinctValues = 1; - std::unordered_set distinctValues; - - for (; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if ((!sparse || currentValue != 0) && distinctValues.insert(currentValue).second) { - numDistinctValues++; - - if (currentValue < minValue) { - minValue = currentValue; - } - - if (currentValue > maxValue) { - maxValue = currentValue; - } - } - } - - std::get<0>(result) = - numDistinctValues > 1 ? calculateBoundedFraction(numDistinctValues, binRatio, minBins, maxBins) : 0; - std::get<1>(result) = minValue; - std::get<2>(result) = maxValue; - } else { - std::get<0>(result) = 0; - } - - return result; -} - /** * An implementation of the type `IFeatureBinning` that assigns numerical feature values to bins, such that each bin * contains values from equally sized value ranges. @@ -203,88 +148,6 @@ class EqualWidthFeatureBinning final : public IFeatureBinning { EqualWidthFeatureBinning(float32 binRatio, uint32 minBins, uint32 maxBins) : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - Result createBins(FeatureVector& featureVector, uint32 numExamples) const override { - Result result; - uint32 numElements = featureVector.getNumElements(); - bool sparse = numElements < numExamples; - std::tuple tuple = - preprocess(featureVector, sparse, binRatio_, minBins_, maxBins_); - uint32 numBins = std::get<0>(tuple); - result.thresholdVectorPtr = std::make_unique(featureVector, numBins, true); - - if (sparse) { - result.binIndicesPtr = std::make_unique(); - } else { - result.binIndicesPtr = std::make_unique(numElements); - } - - if (numBins > 0) { - IBinIndexVector& binIndices = *result.binIndicesPtr; - ThresholdVector& thresholdVector = *result.thresholdVectorPtr; - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - ThresholdVector::iterator thresholdIterator = thresholdVector.begin(); - float32 min = std::get<1>(tuple); - float32 max = std::get<2>(tuple); - float32 width = (max - min) / numBins; - uint32 sparseBinIndex; - - // If there are any sparse values, identify the bin they belong to... - if (sparse) { - sparseBinIndex = getBinIndex(0, min, width, numBins); - thresholdIterator[sparseBinIndex] = 1; - thresholdVector.setSparseBinIndex(sparseBinIndex); - } else { - sparseBinIndex = numBins; - } - - // Iterate all non-sparse feature values and identify the bins they belong to... - for (uint32 i = 0; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if (!sparse || currentValue != 0) { - uint32 binIndex = getBinIndex(currentValue, min, width, numBins); - - if (binIndex != sparseBinIndex) { - thresholdIterator[binIndex] = 1; - binIndices.setBinIndex(featureIterator[i].index, binIndex); - } - } - } - - // Remove empty bins and calculate thresholds... - Array mapping(numBins); - uint32 n = 0; - - for (uint32 i = 0; i < numBins; i++) { - mapping[i] = n; - - if (thresholdIterator[i] > 0) { - thresholdIterator[n] = min + ((i + 1) * width); - n++; - } - } - - thresholdVector.setNumElements(n, true); - - // Adjust bin indices... - DokBinIndexVector* dokBinIndices = dynamic_cast(&binIndices); - - if (dokBinIndices) { - for (auto it = dokBinIndices->begin(); it != dokBinIndices->end(); it++) { - uint32 binIndex = it->second; - it->second = mapping[binIndex]; - } - } else { - for (uint32 i = 0; i < numElements; i++) { - uint32 binIndex = binIndices.getBinIndex(i); - binIndices.setBinIndex(i, mapping[binIndex]); - } - } - } - - return result; - } - bool isOrdinal() const override { return false; } diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_nominal.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_nominal.hpp deleted file mode 100644 index f8bd073677..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_nominal.hpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/binning/bin_index_vector_dense.hpp" -#include "mlrl/common/binning/bin_index_vector_dok.hpp" -#include "mlrl/common/input/feature_binning.hpp" - -#include - -/** - * An implementation of the type `IFeatureBinning` that assigns nominal feature values to bins, such that each bin - * contains one of the available values. - */ -class NominalFeatureBinning final : public IFeatureBinning { - public: - - Result createBins(FeatureVector& featureVector, uint32 numExamples) const override { - Result result; - uint32 numElements = featureVector.getNumElements(); - bool sparse = numElements < numExamples; - uint32 maxBins = sparse ? numElements + 1 : numElements; - result.thresholdVectorPtr = std::make_unique(featureVector, maxBins); - - if (sparse) { - result.binIndicesPtr = std::make_unique(); - } else { - result.binIndicesPtr = std::make_unique(numElements); - } - - if (numElements > 0) { - IBinIndexVector& binIndices = *result.binIndicesPtr; - ThresholdVector& thresholdVector = *result.thresholdVectorPtr; - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - ThresholdVector::iterator thresholdIterator = thresholdVector.begin(); - std::unordered_map mapping; - uint32 nextBinIndex = 0; - - if (sparse) { - thresholdVector.setSparseBinIndex(0); - thresholdIterator[0] = 0; - nextBinIndex++; - } - - for (uint32 i = 0; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if (!sparse || currentValue != 0) { - uint32 index = featureIterator[i].index; - auto mapIterator = mapping.emplace(currentValue, nextBinIndex); - - if (mapIterator.second) { - thresholdIterator[nextBinIndex] = currentValue; - binIndices.setBinIndex(index, nextBinIndex); - nextBinIndex++; - } else { - binIndices.setBinIndex(index, mapIterator.first->second); - } - } - } - - thresholdVector.setNumElements(nextBinIndex, true); - } - - return result; - } - - bool isOrdinal() const override { - return false; - } - - bool isNominal() const override { - return false; - } - - std::unique_ptr createFeatureVector( - uint32 featureIndex, const FortranContiguousView& featureMatrix) const override { - // TODO Implement - return nullptr; - } - - std::unique_ptr createFeatureVector( - uint32 featureIndex, const CscView& featureMatrix) const override { - // TODO Implement - return nullptr; - } -}; - -/** - * Allows to create instances of the type `IFeatureBinning` that assign nominal feature values to bins, such that each - * bin contains one of the available values. - */ -class NominalFeatureBinningFactory final : public IFeatureBinningFactory { - public: - - std::unique_ptr create() const override { - return std::make_unique(); - } -}; From 77841c95d2f8c9649ed4bcf50b1fc727b07bf974 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:18:32 +0100 Subject: [PATCH 17/53] Remove function "fetchFeatureVector" from class IColumnWiseFeatureMatrix. --- .../input/feature_matrix_column_wise.hpp | 12 --------- .../mlrl/common/input/feature_matrix_csc.cpp | 25 ------------------- .../feature_matrix_fortran_contiguous.cpp | 22 ---------------- 3 files changed, 59 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_matrix_column_wise.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_matrix_column_wise.hpp index d65fb37c8e..ffddead4ee 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_matrix_column_wise.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_matrix_column_wise.hpp @@ -17,18 +17,6 @@ class MLRLCOMMON_API IColumnWiseFeatureMatrix : public IFeatureMatrix { virtual ~IColumnWiseFeatureMatrix() override {} - /** - * Fetches a feature vector that stores the indices of the training examples, as well as their feature values, - * for a specific feature and stores it in a given unique pointer. - * - * @param featureIndex The index of the feature - * @param featureVectorPtr An unique pointer to an object of type `FeatureVector` that should be used to store - * the feature vector - */ - // TODO Remove - virtual void fetchFeatureVector(uint32 featureIndex, - std::unique_ptr& featureVectorPtr) const = 0; - /** * Creates and returns a feature vector that stores the feature values of the available examples for a certain * feature. diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_matrix_csc.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_matrix_csc.cpp index e450eb6bd9..68470d876b 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_matrix_csc.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_matrix_csc.cpp @@ -42,31 +42,6 @@ class CscFeatureMatrix final : public IterableSparseMatrixDecoratorgetNumCols(); } - void fetchFeatureVector(uint32 featureIndex, std::unique_ptr& featureVectorPtr) const override { - index_const_iterator indexIterator = this->indices_cbegin(featureIndex); - index_const_iterator indicesEnd = this->indices_cend(featureIndex); - value_const_iterator valueIterator = this->values_cbegin(featureIndex); - uint32 numElements = indicesEnd - indexIterator; - featureVectorPtr = std::make_unique(numElements); - FeatureVector::iterator vectorIterator = featureVectorPtr->begin(); - uint32 i = 0; - - for (uint32 j = 0; j < numElements; j++) { - uint32 index = indexIterator[j]; - float32 value = valueIterator[j]; - - if (std::isnan(value)) { - featureVectorPtr->addMissingIndex(index); - } else { - vectorIterator[i].index = index; - vectorIterator[i].value = value; - i++; - } - } - - featureVectorPtr->setNumElements(i, true); - } - std::unique_ptr createFeatureVector(uint32 featureIndex, const IFeatureType& featureType) const override { return featureType.createFeatureVector(featureIndex, this->getView()); diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_matrix_fortran_contiguous.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_matrix_fortran_contiguous.cpp index 42d935cd79..b881182de7 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_matrix_fortran_contiguous.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_matrix_fortran_contiguous.cpp @@ -37,28 +37,6 @@ class FortranContiguousFeatureMatrix final : public DenseMatrixDecoratorgetNumCols(); } - void fetchFeatureVector(uint32 featureIndex, std::unique_ptr& featureVectorPtr) const override { - value_const_iterator columnIterator = this->values_cbegin(featureIndex); - uint32 numElements = this->getNumRows(); - featureVectorPtr = std::make_unique(numElements); - FeatureVector::iterator vectorIterator = featureVectorPtr->begin(); - uint32 i = 0; - - for (uint32 j = 0; j < numElements; j++) { - float32 value = columnIterator[j]; - - if (std::isnan(value)) { - featureVectorPtr->addMissingIndex(j); - } else { - vectorIterator[i].index = j; - vectorIterator[i].value = value; - i++; - } - } - - featureVectorPtr->setNumElements(i, true); - } - std::unique_ptr createFeatureVector(uint32 featureIndex, const IFeatureType& featureType) const override { return featureType.createFeatureVector(featureIndex, this->getView()); From 34d3f2d91670b4372c6507d3929781aa9b2ac784 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:20:36 +0100 Subject: [PATCH 18/53] Remove class FeatureVector. --- .../mlrl/common/input/feature_vector.hpp | 24 ------------------- cpp/subprojects/common/meson.build | 1 - .../src/mlrl/common/input/feature_vector.cpp | 11 --------- 3 files changed, 36 deletions(-) delete mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_vector.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp index eb600f779f..df8c794006 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp @@ -3,10 +3,7 @@ */ #pragma once -#include "mlrl/common/data/indexed_value.hpp" -#include "mlrl/common/data/vector_dense.hpp" #include "mlrl/common/input/interval.hpp" -#include "mlrl/common/input/missing_feature_vector.hpp" #include "mlrl/common/rule_refinement/rule_refinement_search.hpp" #include "mlrl/common/statistics/statistics_weighted.hpp" #include "mlrl/common/thresholds/coverage_mask.hpp" @@ -103,24 +100,3 @@ class IFeatureVector { virtual std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, const CoverageMask& coverageMask) const = 0; }; - -/** - * An one-dimensional sparse vector that stores the values of training examples for a certain feature, as well as the - * indices of examples with missing feature values. - */ -// TODO Remove -class FeatureVector final - : public ResizableVectorDecorator>>>, - public OldMissingFeatureVector { - public: - - /** - * @param numElements The number of elements in the vector - */ - FeatureVector(uint32 numElements); - - /** - * Sorts the elements in the vector in ascending order based on their values. - */ - void sortByValues(); -}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index 2e74166809..1faf981d39 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -22,7 +22,6 @@ source_files = [ 'src/mlrl/common/input/feature_type_nominal.cpp', 'src/mlrl/common/input/feature_type_numerical.cpp', 'src/mlrl/common/input/feature_type_ordinal.cpp', - 'src/mlrl/common/input/feature_vector.cpp', 'src/mlrl/common/input/feature_vector_binned.cpp', 'src/mlrl/common/input/feature_vector_equal.cpp', 'src/mlrl/common/input/feature_vector_nominal.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector.cpp deleted file mode 100644 index 6d28ccacff..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include "mlrl/common/input/feature_vector.hpp" - -#include - -FeatureVector::FeatureVector(uint32 numElements) - : ResizableVectorDecorator>>>( - ResizableVector>(numElements)) {} - -void FeatureVector::sortByValues() { - std::sort(this->begin(), this->end(), IndexedValue::CompareValue()); -} From 2bf2703485e154169cb36e5a6084dbdeb11fcd3c Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:24:48 +0100 Subject: [PATCH 19/53] Remove class ThresholdVector. --- .../mlrl/common/binning/threshold_vector.hpp | 44 ------------------- cpp/subprojects/common/meson.build | 1 - .../mlrl/common/binning/threshold_vector.cpp | 28 ------------ 3 files changed, 73 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/binning/threshold_vector.hpp delete mode 100644 cpp/subprojects/common/src/mlrl/common/binning/threshold_vector.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/binning/threshold_vector.hpp b/cpp/subprojects/common/include/mlrl/common/binning/threshold_vector.hpp deleted file mode 100644 index 8d2f929569..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/binning/threshold_vector.hpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/data/vector_dense.hpp" -#include "mlrl/common/input/missing_feature_vector.hpp" - -/** - * An one-dimensional vector that stores thresholds that may be used by conditions. - */ -class ThresholdVector final : public ResizableVectorDecorator>>, - public OldMissingFeatureVector { - private: - - uint32 sparseBinIndex_; - - public: - - /** - * @param missingFeatureVector A reference to an object of type `OldMissingFeatureVector` the missing indices - * should be taken from - * @param numElements The number of elements in the vector - * @param init True, if all elements in the vector should be value-initialized, false otherwise - */ - ThresholdVector(OldMissingFeatureVector& missingFeatureVector, uint32 numElements, bool init = false); - - /** - * Returns the index of the bin, sparse values have been assigned to. - * - * @return The index of the bin, sparse values have been assigned to. If there is no such bin, the returned - * index is equal to `getNumElements()` - */ - uint32 getSparseBinIndex() const; - - /** - * Sets the index of the bin, sparse values have been assigned to. - * - * @param sparseBinIndex The index to be set - */ - void setSparseBinIndex(uint32 sparseBinIndex); - - void setNumElements(uint32 numElements, bool freeMemory) override; -}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index 1faf981d39..c779adae6d 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -5,7 +5,6 @@ source_files = [ 'src/mlrl/common/binning/bin_index_vector_dense.cpp', 'src/mlrl/common/binning/bin_index_vector_dok.cpp', 'src/mlrl/common/binning/bin_weight_vector.cpp', - 'src/mlrl/common/binning/threshold_vector.cpp', 'src/mlrl/common/data/vector_bit.cpp', 'src/mlrl/common/indices/index_iterator.cpp', 'src/mlrl/common/indices/index_vector_complete.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/binning/threshold_vector.cpp b/cpp/subprojects/common/src/mlrl/common/binning/threshold_vector.cpp deleted file mode 100644 index ac9877547e..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/binning/threshold_vector.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "mlrl/common/binning/threshold_vector.hpp" - -ThresholdVector::ThresholdVector(OldMissingFeatureVector& missingFeatureVector, uint32 numElements, bool init) - : ResizableVectorDecorator>>( - ResizableVector(numElements, init)), - OldMissingFeatureVector(missingFeatureVector), sparseBinIndex_(numElements) {} - -uint32 ThresholdVector::getSparseBinIndex() const { - return sparseBinIndex_; -} - -void ThresholdVector::setSparseBinIndex(uint32 sparseBinIndex) { - uint32 numElements = this->getNumElements(); - - if (sparseBinIndex > numElements) { - sparseBinIndex_ = numElements; - } else { - sparseBinIndex_ = sparseBinIndex; - } -} - -void ThresholdVector::setNumElements(uint32 numElements, bool freeMemory) { - ResizableVectorDecorator>>::setNumElements(numElements, freeMemory); - - if (sparseBinIndex_ > numElements) { - sparseBinIndex_ = numElements; - } -} From 594997ac7945b0ed7c5cfc6f175243cdc7db8495 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:29:31 +0100 Subject: [PATCH 20/53] Remove function "createHistogram" from class IBinIndexVector. --- .../mlrl/common/binning/bin_index_vector.hpp | 14 -------------- .../mlrl/common/binning/bin_index_vector_dense.hpp | 3 --- .../mlrl/common/binning/bin_index_vector_dok.hpp | 3 --- .../mlrl/common/binning/bin_index_vector_dense.cpp | 7 ------- .../mlrl/common/binning/bin_index_vector_dok.cpp | 7 ------- 5 files changed, 34 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector.hpp b/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector.hpp index 688ea90382..d46e0ce232 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector.hpp @@ -7,10 +7,6 @@ #include -// Forward declarations -class IHistogram; -class IWeightedStatistics; - /** * Defines an interface for all classes that provide access to the indices of the bins, individual examples have been * assigned to. @@ -40,14 +36,4 @@ class IBinIndexVector { * @param binIndex The index of the bin, the example should be assigned to */ virtual void setBinIndex(uint32 exampleIndex, uint32 binIndex) = 0; - - /** - * Creates and returns a new histogram based on given statistics and the indices that are stored by this vector. - * - * @param statistics A reference to an object of type `IWeightedStatistics` that should be used - * @param numBins The number of bins in the histogram - * @return An unique pointer to an object of type `IHistogram` that has been created - */ - virtual std::unique_ptr createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dense.hpp b/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dense.hpp index 50d7560089..0814074748 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dense.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dense.hpp @@ -21,7 +21,4 @@ class DenseBinIndexVector final : public DenseVectorDecorator createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dok.hpp b/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dok.hpp index ff1c2c63c4..203e8fb703 100644 --- a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dok.hpp +++ b/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dok.hpp @@ -19,7 +19,4 @@ class DokBinIndexVector final : public DokVectorDecorator createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const override; }; diff --git a/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dense.cpp b/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dense.cpp index b8832f94ca..56c4368e03 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dense.cpp +++ b/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dense.cpp @@ -1,7 +1,5 @@ #include "mlrl/common/binning/bin_index_vector_dense.hpp" -#include "mlrl/common/statistics/statistics_weighted.hpp" - DenseBinIndexVector::DenseBinIndexVector(uint32 numElements) : DenseVectorDecorator>(AllocatedVector(numElements)) {} @@ -12,8 +10,3 @@ uint32 DenseBinIndexVector::getBinIndex(uint32 exampleIndex) const { void DenseBinIndexVector::setBinIndex(uint32 exampleIndex, uint32 binIndex) { (*this)[exampleIndex] = binIndex; } - -std::unique_ptr DenseBinIndexVector::createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const { - return statistics.createHistogram(*this, numBins); -} diff --git a/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dok.cpp b/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dok.cpp index 4e1e0d6aae..ca3456d799 100644 --- a/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dok.cpp +++ b/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dok.cpp @@ -1,7 +1,5 @@ #include "mlrl/common/binning/bin_index_vector_dok.hpp" -#include "mlrl/common/statistics/statistics_weighted.hpp" - DokBinIndexVector::DokBinIndexVector() : DokVectorDecorator>(AllocatedDokVector(BIN_INDEX_SPARSE)) {} @@ -12,8 +10,3 @@ uint32 DokBinIndexVector::getBinIndex(uint32 exampleIndex) const { void DokBinIndexVector::setBinIndex(uint32 exampleIndex, uint32 binIndex) { this->view.set(exampleIndex, binIndex); } - -std::unique_ptr DokBinIndexVector::createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const { - return statistics.createHistogram(*this, numBins); -} From 7e6f4a8b013311a48f4e297bbfc355414a4fbc5b Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:38:33 +0100 Subject: [PATCH 21/53] Add TODO. --- .../mlrl/boosting/data/view_histogram_label_wise_sparse.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/subprojects/boosting/include/mlrl/boosting/data/view_histogram_label_wise_sparse.hpp b/cpp/subprojects/boosting/include/mlrl/boosting/data/view_histogram_label_wise_sparse.hpp index 9f631706c4..c7e26a8d8b 100644 --- a/cpp/subprojects/boosting/include/mlrl/boosting/data/view_histogram_label_wise_sparse.hpp +++ b/cpp/subprojects/boosting/include/mlrl/boosting/data/view_histogram_label_wise_sparse.hpp @@ -16,6 +16,7 @@ namespace boosting { * label-wise decomposable loss function and are stored in a pre-allocated histogram in the list of lists (LIL) * format. */ + // TODO Remove class MLRLBOOSTING_API SparseLabelWiseHistogramView : public CompositeMatrix>, AllocatedVector> { public: From c3f9daab98d3d78b9a30369d72d10dd961c4886c Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:39:10 +0100 Subject: [PATCH 22/53] Remove function "createHistogram" from class IWeightedStatistics. --- .../statistics_example_wise_common.hpp | 234 +---------------- .../statistics_label_wise_common.hpp | 236 +----------------- .../statistics_label_wise_dense.hpp | 8 +- ...statistics_provider_example_wise_dense.cpp | 12 +- .../statistics_provider_label_wise_sparse.cpp | 47 +--- .../common/statistics/statistics_weighted.hpp | 25 -- .../mlrl/common/input/statistics_weighted.hpp | 10 - .../statistics_label_wise_common.hpp | 18 -- 8 files changed, 37 insertions(+), 553 deletions(-) diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_example_wise_common.hpp b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_example_wise_common.hpp index cb00f7ed39..5a050485d7 100644 --- a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_example_wise_common.hpp +++ b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_example_wise_common.hpp @@ -4,7 +4,6 @@ #pragma once #include "mlrl/boosting/statistics/statistics_example_wise.hpp" -#include "mlrl/common/binning/bin_weight_vector.hpp" namespace boosting { @@ -290,181 +289,6 @@ namespace boosting { } }; - /** - * Provides access to gradients and Hessians that are calculated according to a differentiable loss function that is - * applied example-wise and are organized as a histogram. - * - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the original gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam BinIndexVector The type of the vector that stores the indices of the bins, individual examples - * have been assigned to - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - */ - template - class ExampleWiseHistogram final - : virtual public IHistogram, - public AbstractExampleWiseImmutableWeightedStatistics { - private: - - /** - * Provides access to a subset of the gradients and Hessians that are stored by an instance of the class - * `ExampleWiseHistogram`. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that are - * included in the subset - */ - template - class WeightedStatisticsSubset final - : public AbstractExampleWiseImmutableWeightedStatistics< - StatisticVector, typename Histogram::view_type, RuleEvaluationFactory, - BinWeightVector>::template AbstractWeightedStatisticsSubset { - private: - - const ExampleWiseHistogram& histogram_; - - std::unique_ptr totalCoverableSumVectorPtr_; - - public: - - /** - * @param histogram A reference to an object of type `ExampleWiseHistogram` that stores the - * gradients and Hessians - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores - * the total sums of gradients and Hessians - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels that are included in the subset - */ - WeightedStatisticsSubset(const ExampleWiseHistogram& histogram, - const StatisticVector& totalSumVector, const IndexVector& labelIndices) - : AbstractExampleWiseImmutableWeightedStatistics< - StatisticVector, typename Histogram::view_type, RuleEvaluationFactory, - BinWeightVector>::template AbstractWeightedStatisticsSubset(histogram, - totalSumVector, - labelIndices), - histogram_(histogram) {} - - /** - * @see `IWeightedStatisticsSubset::addToMissing` - */ - void addToMissing(uint32 statisticIndex) override { - // Create a vector for storing the totals sums of gradients and Hessians, if necessary... - if (!totalCoverableSumVectorPtr_) { - totalCoverableSumVectorPtr_ = std::make_unique(*this->totalSumVector_); - this->totalSumVector_ = totalCoverableSumVectorPtr_.get(); - } - - // Subtract the gradients and Hessians of the example at the given index (weighted by the given - // weight) from the total sums of gradients and Hessians... - removeExampleWiseStatistic(histogram_.originalWeights_, histogram_.originalStatisticView_, - *totalCoverableSumVectorPtr_, statisticIndex); - } - }; - - const std::unique_ptr histogramPtr_; - - const std::unique_ptr binWeightVectorPtr_; - - const BinIndexVector& binIndexVector_; - - const StatisticView& originalStatisticView_; - - const WeightVector& originalWeights_; - - const StatisticVector& totalSumVector_; - - public: - - /** - * @param histogramPtr An unique pointer to an object of template type `Histogram` that stores the - * gradients and Hessians in the histogram - * @param binWeightVectorPtr An unique pointer to an object of type `BinWeightVector` that stores the - * weights of individual bins - * @param binIndexVector A reference to an object of template type `BinIndexVector` that stores the - * indices of the bins, individual examples have been assigned to - * @param originalStatisticView A reference to an object of template type `StatisticView` that provides - * access to the original gradients and Hessians, the histogram was created - * from - * @param originalWeights A reference to an object of template type `WeightVector` that provides - * access to the weights of the original statistics, the histogram was created - * from - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores the - * total sums of gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of type `RuleEvaluationFactory` that allows to - * create instances of the class that should be used for calculating the - * predictions of rules, as well as their overall quality - */ - ExampleWiseHistogram(std::unique_ptr histogramPtr, - std::unique_ptr binWeightVectorPtr, - const BinIndexVector& binIndexVector, const StatisticView& originalStatisticView, - const WeightVector& originalWeights, const StatisticVector& totalSumVector, - const RuleEvaluationFactory& ruleEvaluationFactory) - : AbstractExampleWiseImmutableWeightedStatistics( - histogramPtr->getView(), ruleEvaluationFactory, *binWeightVectorPtr), - histogramPtr_(std::move(histogramPtr)), binWeightVectorPtr_(std::move(binWeightVectorPtr)), - binIndexVector_(binIndexVector), originalStatisticView_(originalStatisticView), - originalWeights_(originalWeights), totalSumVector_(totalSumVector) {} - - /** - * @see `IHistogram::clear` - */ - void clear() override { - histogramPtr_->clear(); - binWeightVectorPtr_->clear(); - } - - /** - * @see `IHistogram::getBinWeight` - */ - uint32 getBinWeight(uint32 binIndex) const override { - return (*binWeightVectorPtr_)[binIndex]; - } - - /** - * @see `IHistogram::addToBin` - */ - void addToBin(uint32 statisticIndex) override { - float64 weight = originalWeights_[statisticIndex]; - - if (weight > 0) { - uint32 binIndex = binIndexVector_.getBinIndex(statisticIndex); - - if (binIndex != IBinIndexVector::BIN_INDEX_SPARSE) { - binWeightVectorPtr_->increaseWeight(binIndex); - histogramPtr_->addToRow(binIndex, originalStatisticView_.gradients_cbegin(statisticIndex), - originalStatisticView_.gradients_cend(statisticIndex), - originalStatisticView_.hessians_cbegin(statisticIndex), - originalStatisticView_.hessians_cend(statisticIndex), weight); - } - } - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices) const override { - return std::make_unique>(*this, totalSumVector_, - labelIndices); - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices) const override { - return std::make_unique>(*this, totalSumVector_, - labelIndices); - } - }; - template static inline void addExampleWiseStatistic(const WeightVector& weights, const StatisticView& statisticView, StatisticVector& statisticVector, uint32 statisticIndex) { @@ -499,20 +323,6 @@ namespace boosting { statisticView.hessians_cbegin(statisticIndex), statisticView.hessians_cend(statisticIndex)); } - template - static inline std::unique_ptr createExampleWiseHistogramInternally( - const BinIndexVector& binIndexVector, const StatisticView& originalStatisticView, - const WeightVector& originalWeights, const StatisticVector& totalSumVector, - const RuleEvaluationFactory& ruleEvaluationFactory, uint32 numBins) { - std::unique_ptr histogramPtr = std::make_unique(numBins, originalStatisticView.numCols); - std::unique_ptr binWeightVectorPtr = std::make_unique(numBins); - return std::make_unique>( - std::move(histogramPtr), std::move(binWeightVectorPtr), binIndexVector, originalStatisticView, - originalWeights, totalSumVector, ruleEvaluationFactory); - } - /** * Provides access to weighted gradients and Hessians that are calculated according to a differentiable loss * function that is applied example-wise and allows to update the gradients and Hessians after a new rule has been @@ -520,15 +330,13 @@ namespace boosting { * * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is * used for calculating the predictions of rules, as well as corresponding quality * scores * @tparam WeightVector The type of the vector that provides access to the weights of individual * statistics */ - template + template class ExampleWiseWeightedStatistics final : virtual public IWeightedStatistics, public AbstractExampleWiseImmutableWeightedStatistics copy() const override { - return std::make_unique>(*this); + return std::make_unique< + ExampleWiseWeightedStatistics>( + *this); } /** @@ -651,28 +460,6 @@ namespace boosting { removeExampleWiseStatistic(this->weights_, this->statisticView_, *totalSumVectorPtr_, statisticIndex); } - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DenseBinIndexVector& binIndexVector, - uint32 numBins) const override { - return createExampleWiseHistogramInternally( - binIndexVector, this->statisticView_, this->weights_, *totalSumVectorPtr_, - this->ruleEvaluationFactory_, numBins); - } - - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DokBinIndexVector& binIndexVector, - uint32 numBins) const override { - return createExampleWiseHistogramInternally( - binIndexVector, this->statisticView_, this->weights_, *totalSumVectorPtr_, - this->ruleEvaluationFactory_, numBins); - } - /** * @see `IImmutableWeightedStatistics::createSubset` */ @@ -707,7 +494,6 @@ namespace boosting { * training examples * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians * @tparam StatisticMatrix The type of the matrix that stores the gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians * @tparam ScoreMatrix The type of the matrices that are used to store predicted scores * @tparam LossFunction The type of the loss function that is used to calculate gradients and * Hessians @@ -720,9 +506,9 @@ namespace boosting { * that is used for calculating the label-wise predictions of rules, as * well as their overall quality */ - template + template class AbstractExampleWiseStatistics : virtual public IExampleWiseStatistics { private: @@ -996,7 +782,7 @@ namespace boosting { std::unique_ptr createWeightedStatistics( const EqualWeightVector& weights) const override final { return std::make_unique< - ExampleWiseWeightedStatistics>( statisticMatrixPtr_->getView(), *ruleEvaluationFactory_, weights); } @@ -1007,7 +793,7 @@ namespace boosting { std::unique_ptr createWeightedStatistics( const BitWeightVector& weights) const override final { return std::make_unique< - ExampleWiseWeightedStatistics>( statisticMatrixPtr_->getView(), *ruleEvaluationFactory_, weights); } @@ -1018,7 +804,7 @@ namespace boosting { std::unique_ptr createWeightedStatistics( const DenseWeightVector& weights) const override final { return std::make_unique< - ExampleWiseWeightedStatistics>>( statisticMatrixPtr_->getView(), *ruleEvaluationFactory_, weights); } diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_label_wise_common.hpp b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_label_wise_common.hpp index 1442066a0e..934f2bf8c6 100644 --- a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_label_wise_common.hpp +++ b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_label_wise_common.hpp @@ -4,7 +4,6 @@ #pragma once #include "mlrl/boosting/statistics/statistics_label_wise.hpp" -#include "mlrl/common/binning/bin_weight_vector.hpp" namespace boosting { @@ -276,179 +275,6 @@ namespace boosting { } }; - /** - * Provides access to gradients and Hessians that are calculated according to a differentiable loss function that is - * applied label-wise and are organized as a histogram. - * - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the original gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam BinIndexVector The type of the vector that stores the indices of the bins, individual examples - * have been assigned to - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - */ - template - class LabelWiseHistogram final - : virtual public IHistogram, - public AbstractLabelWiseImmutableWeightedStatistics { - private: - - /** - * Provides access to a subset of the gradients and Hessians that are stored by an instance of the class - * `LabelWiseHistogram`. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that are - * included in the subset - */ - template - class WeightedStatisticsSubset final - : public AbstractLabelWiseImmutableWeightedStatistics< - StatisticVector, typename Histogram::view_type, RuleEvaluationFactory, - BinWeightVector>::template AbstractWeightedStatisticsSubset { - private: - - const LabelWiseHistogram& histogram_; - - std::unique_ptr totalCoverableSumVectorPtr_; - - public: - - /** - * @param histogram A reference to an object of type `LabelWiseHistogram` that stores the - * gradients and Hessians - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores - * the total sums of gradients and Hessians - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels that are included in the subset - */ - WeightedStatisticsSubset(const LabelWiseHistogram& histogram, const StatisticVector& totalSumVector, - const IndexVector& labelIndices) - : AbstractLabelWiseImmutableWeightedStatistics< - StatisticVector, typename Histogram::view_type, RuleEvaluationFactory, - BinWeightVector>::template AbstractWeightedStatisticsSubset(histogram, - totalSumVector, - labelIndices), - histogram_(histogram) {} - - /** - * @see `IWeightedStatisticsSubset::addToMissing` - */ - void addToMissing(uint32 statisticIndex) override { - // Create a vector for storing the totals sums of gradients and Hessians, if necessary... - if (!totalCoverableSumVectorPtr_) { - totalCoverableSumVectorPtr_ = std::make_unique(*this->totalSumVector_); - this->totalSumVector_ = totalCoverableSumVectorPtr_.get(); - } - - // Subtract the gradients and Hessians of the example at the given index (weighted by the given - // weight) from the total sums of gradients and Hessians... - removeLabelWiseStatistic(histogram_.originalWeights_, histogram_.originalStatisticView_, - *totalCoverableSumVectorPtr_, statisticIndex); - } - }; - - const std::unique_ptr histogramPtr_; - - const std::unique_ptr binWeightVectorPtr_; - - const BinIndexVector& binIndexVector_; - - const StatisticView& originalStatisticView_; - - const WeightVector& originalWeights_; - - const StatisticVector& totalSumVector_; - - public: - - /** - * @param histogramPtr An unique pointer to an object of template type `Histogram` that stores the - * gradients and Hessians in the histogram - * @param binWeightVectorPtr An unique pointer to an object of type `BinWeightVector` that stores the - * weights of individual bins - * @param binIndexVector A reference to an object of template type `BinIndexVector` that stores the - * indices of the bins, individual examples have been assigned to - * @param originalStatisticView A reference to an object of template type `StatisticView` that provides - * access to the original gradients and Hessians, the histogram was created - * from - * @param originalWeights A reference to an object of template type `WeightVector` that provides - * access to the weights of the original statistics, the histogram was created - * from - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores the - * total sums of gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of type `RuleEvaluationFactory` that allows to - * create instances of the class that should be used for calculating the - * predictions of rules, as well as their overall quality - */ - LabelWiseHistogram(std::unique_ptr histogramPtr, - std::unique_ptr binWeightVectorPtr, - const BinIndexVector& binIndexVector, const StatisticView& originalStatisticView, - const WeightVector& originalWeights, const StatisticVector& totalSumVector, - const RuleEvaluationFactory& ruleEvaluationFactory) - : AbstractLabelWiseImmutableWeightedStatistics( - histogramPtr->getView(), ruleEvaluationFactory, *binWeightVectorPtr), - histogramPtr_(std::move(histogramPtr)), binWeightVectorPtr_(std::move(binWeightVectorPtr)), - binIndexVector_(binIndexVector), originalStatisticView_(originalStatisticView), - originalWeights_(originalWeights), totalSumVector_(totalSumVector) {} - - /** - * @see `IHistogram::clear` - */ - void clear() override { - histogramPtr_->clear(); - binWeightVectorPtr_->clear(); - } - - /** - * @see `IHistogram::getBinWeight` - */ - uint32 getBinWeight(uint32 binIndex) const override { - return (*binWeightVectorPtr_)[binIndex]; - } - - /** - * @see `IHistogram::addToBin` - */ - void addToBin(uint32 statisticIndex) override { - float64 weight = originalWeights_[statisticIndex]; - - if (weight > 0) { - uint32 binIndex = binIndexVector_.getBinIndex(statisticIndex); - - if (binIndex != IBinIndexVector::BIN_INDEX_SPARSE) { - binWeightVectorPtr_->increaseWeight(binIndex); - histogramPtr_->addToRow(binIndex, originalStatisticView_.values_cbegin(statisticIndex), - originalStatisticView_.values_cend(statisticIndex), weight); - } - } - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices) const override { - return std::make_unique>(*this, totalSumVector_, - labelIndices); - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices) const override { - return std::make_unique>(*this, totalSumVector_, - labelIndices); - } - }; - template static inline void addLabelWiseStatistic(const EqualWeightVector& weights, const StatisticView& statisticView, StatisticVector& statisticVector, uint32 statisticIndex) { @@ -475,20 +301,6 @@ namespace boosting { statisticVector.remove(statisticView, statisticIndex, weight); } - template - static inline std::unique_ptr createLabelWiseHistogramInternally( - const BinIndexVector& binIndexVector, const StatisticView& originalStatisticView, - const WeightVector& originalWeights, const StatisticVector& totalSumVector, - const RuleEvaluationFactory& ruleEvaluationFactory, uint32 numBins) { - std::unique_ptr histogramPtr = std::make_unique(numBins, originalStatisticView.numCols); - std::unique_ptr binWeightVectorPtr = std::make_unique(numBins); - return std::make_unique>( - std::move(histogramPtr), std::move(binWeightVectorPtr), binIndexVector, originalStatisticView, - originalWeights, totalSumVector, ruleEvaluationFactory); - } - /** * Provides access to weighted gradients and Hessians that are calculated according to a differentiable loss * function that is applied label-wise and allows to update the gradients and Hessians after a new rule has been @@ -496,15 +308,13 @@ namespace boosting { * * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is * used for calculating the predictions of rules, as well as corresponding quality * scores * @tparam WeightVector The type of the vector that provides access to the weights of individual * statistics */ - template + template class LabelWiseWeightedStatistics final : virtual public IWeightedStatistics, public AbstractLabelWiseImmutableWeightedStatistics copy() const override { - return std::make_unique>(*this); + return std::make_unique< + LabelWiseWeightedStatistics>( + *this); } /** @@ -626,28 +437,6 @@ namespace boosting { removeLabelWiseStatistic(this->weights_, this->statisticView_, *totalSumVectorPtr_, statisticIndex); } - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DenseBinIndexVector& binIndexVector, - uint32 numBins) const override { - return createLabelWiseHistogramInternally( - binIndexVector, this->statisticView_, this->weights_, *totalSumVectorPtr_, - this->ruleEvaluationFactory_, numBins); - } - - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DokBinIndexVector& binIndexVector, - uint32 numBins) const override { - return createLabelWiseHistogramInternally( - binIndexVector, this->statisticView_, this->weights_, *totalSumVectorPtr_, - this->ruleEvaluationFactory_, numBins); - } - /** * @see `IImmutableWeightedStatistics::createSubset` */ @@ -698,7 +487,6 @@ namespace boosting { * examples * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians * @tparam StatisticMatrix The type of the matrix that provides access to the gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians * @tparam ScoreMatrix The type of the matrices that are used to store predicted scores * @tparam LossFunction The type of the loss function that is used to calculate gradients and Hessians * @tparam EvaluationMeasure The type of the evaluation measure that is used to assess the quality of @@ -707,8 +495,8 @@ namespace boosting { * used for calculating the predictions of rules, as well as corresponding quality * scores */ - template + template class AbstractLabelWiseStatistics : virtual public ILabelWiseStatistics { private: @@ -969,9 +757,8 @@ namespace boosting { */ std::unique_ptr createWeightedStatistics( const EqualWeightVector& weights) const override final { - return std::make_unique< - LabelWiseWeightedStatistics>( + return std::make_unique>( statisticMatrixPtr_->getView(), *ruleEvaluationFactory_, weights); } @@ -980,9 +767,8 @@ namespace boosting { */ std::unique_ptr createWeightedStatistics( const BitWeightVector& weights) const override final { - return std::make_unique< - LabelWiseWeightedStatistics>( + return std::make_unique>( statisticMatrixPtr_->getView(), *ruleEvaluationFactory_, weights); } @@ -992,7 +778,7 @@ namespace boosting { std::unique_ptr createWeightedStatistics( const DenseWeightVector& weights) const override final { return std::make_unique< - LabelWiseWeightedStatistics>>( statisticMatrixPtr_->getView(), *ruleEvaluationFactory_, weights); } diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_label_wise_dense.hpp b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_label_wise_dense.hpp index 24c8ca3202..9cff061d36 100644 --- a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_label_wise_dense.hpp +++ b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_label_wise_dense.hpp @@ -51,8 +51,8 @@ namespace boosting { template class DenseLabelWiseStatistics final : public AbstractLabelWiseStatistics, - ILabelWiseLoss, IEvaluationMeasure, ILabelWiseRuleEvaluationFactory> { + NumericCContiguousMatrix, ILabelWiseLoss, IEvaluationMeasure, + ILabelWiseRuleEvaluationFactory> { public: /** @@ -78,8 +78,8 @@ namespace boosting { std::unique_ptr statisticMatrixPtr, std::unique_ptr> scoreMatrixPtr) : AbstractLabelWiseStatistics, - ILabelWiseLoss, IEvaluationMeasure, ILabelWiseRuleEvaluationFactory>( + NumericCContiguousMatrix, ILabelWiseLoss, IEvaluationMeasure, + ILabelWiseRuleEvaluationFactory>( std::move(lossPtr), std::move(evaluationMeasurePtr), ruleEvaluationFactory, labelMatrix, std::move(statisticMatrixPtr), std::move(scoreMatrixPtr)) {} diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_provider_example_wise_dense.cpp b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_provider_example_wise_dense.cpp index e15b67a040..9be6024469 100644 --- a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_provider_example_wise_dense.cpp +++ b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_provider_example_wise_dense.cpp @@ -60,9 +60,9 @@ namespace boosting { template class DenseExampleWiseStatistics final : public AbstractExampleWiseStatistics, IExampleWiseLoss, IEvaluationMeasure, - IExampleWiseRuleEvaluationFactory, ILabelWiseRuleEvaluationFactory> { + DenseExampleWiseStatisticMatrix, NumericCContiguousMatrix, + IExampleWiseLoss, IEvaluationMeasure, IExampleWiseRuleEvaluationFactory, + ILabelWiseRuleEvaluationFactory> { public: /** @@ -88,9 +88,9 @@ namespace boosting { std::unique_ptr statisticMatrixPtr, std::unique_ptr> scoreMatrixPtr) : AbstractExampleWiseStatistics, IExampleWiseLoss, IEvaluationMeasure, - IExampleWiseRuleEvaluationFactory, ILabelWiseRuleEvaluationFactory>( + DenseExampleWiseStatisticMatrix, NumericCContiguousMatrix, + IExampleWiseLoss, IEvaluationMeasure, IExampleWiseRuleEvaluationFactory, + ILabelWiseRuleEvaluationFactory>( std::move(lossPtr), std::move(evaluationMeasurePtr), ruleEvaluationFactory, labelMatrix, std::move(statisticMatrixPtr), std::move(scoreMatrixPtr)) {} diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_provider_label_wise_sparse.cpp b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_provider_label_wise_sparse.cpp index 13be528a5b..89c4273455 100644 --- a/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_provider_label_wise_sparse.cpp +++ b/cpp/subprojects/boosting/src/mlrl/boosting/statistics/statistics_provider_label_wise_sparse.cpp @@ -7,7 +7,6 @@ #include "mlrl/boosting/data/matrix_sparse_set_numeric.hpp" #include "mlrl/boosting/data/vector_statistic_label_wise_sparse.hpp" -#include "mlrl/boosting/data/view_histogram_label_wise_sparse.hpp" #include "mlrl/common/util/openmp.hpp" #include "statistics_label_wise_common.hpp" #include "statistics_provider_label_wise.hpp" @@ -29,40 +28,6 @@ namespace boosting { : MatrixDecorator>>(SparseSetView>(numRows, numCols)) {} }; - /** - * A histogram that stores gradients and Hessians that have been calculated using a label-wise decomposable - * loss function in the list of lists (LIL) format. - */ - class SparseLabelWiseHistogram final - : public ClearableViewDecorator> { - public: - - /** - * @param numBins The number of bins in the histogram - * @param numCols The number of columns in the histogram - */ - SparseLabelWiseHistogram(uint32 numBins, uint32 numCols) - : ClearableViewDecorator>( - SparseLabelWiseHistogramView(numBins, numCols)) {} - - /** - * Adds all gradients and Hessians in a vector to a specific row of this histogram. The gradients and - * Hessians to be added are multiplied by a specific weight. - * - * @param row The row - * @param begin An iterator to the beginning of the vector - * @param end An iterator to the end of the vector - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToRow(uint32 row, SparseSetView>::value_const_iterator begin, - SparseSetView>::value_const_iterator end, float64 weight) { - if (weight != 0) { - this->view.secondView[row] += weight; - addToSparseLabelWiseStatisticVector(this->view.firstView.values_begin(row), begin, end, weight); - } - } - }; - /** * Provides access to gradients and Hessians that have been calculated according to a differentiable loss * function that is applied label-wise and are stored using sparse data structures. @@ -72,9 +37,9 @@ namespace boosting { template class SparseLabelWiseStatistics final : public AbstractLabelWiseStatistics, ISparseLabelWiseLoss, - ISparseEvaluationMeasure, ISparseLabelWiseRuleEvaluationFactory> { + SparseLabelWiseStatisticMatrix, NumericSparseSetMatrix, + ISparseLabelWiseLoss, ISparseEvaluationMeasure, + ISparseLabelWiseRuleEvaluationFactory> { public: /** @@ -101,9 +66,9 @@ namespace boosting { std::unique_ptr statisticViewPtr, std::unique_ptr> scoreMatrixPtr) : AbstractLabelWiseStatistics, ISparseLabelWiseLoss, - ISparseEvaluationMeasure, ISparseLabelWiseRuleEvaluationFactory>( + SparseLabelWiseStatisticMatrix, NumericSparseSetMatrix, + ISparseLabelWiseLoss, ISparseEvaluationMeasure, + ISparseLabelWiseRuleEvaluationFactory>( std::move(lossPtr), std::move(evaluationMeasurePtr), ruleEvaluationFactory, labelMatrix, std::move(statisticViewPtr), std::move(scoreMatrixPtr)) {} diff --git a/cpp/subprojects/common/include/mlrl/common/statistics/statistics_weighted.hpp b/cpp/subprojects/common/include/mlrl/common/statistics/statistics_weighted.hpp index ceba9ded25..33385fb76c 100644 --- a/cpp/subprojects/common/include/mlrl/common/statistics/statistics_weighted.hpp +++ b/cpp/subprojects/common/include/mlrl/common/statistics/statistics_weighted.hpp @@ -3,9 +3,6 @@ */ #pragma once -#include "mlrl/common/binning/bin_index_vector_dense.hpp" -#include "mlrl/common/binning/bin_index_vector_dok.hpp" -#include "mlrl/common/statistics/histogram.hpp" #include "mlrl/common/statistics/statistics_weighted_immutable.hpp" /** @@ -66,26 +63,4 @@ class IWeightedStatistics : virtual public IImmutableWeightedStatistics { * @param statisticIndex The index of the statistic that should be removed */ virtual void removeCoveredStatistic(uint32 statisticIndex) = 0; - - /** - * Creates and returns a new histogram based on the statistics. - * - * @param binIndexVector A reference to an object of type `DenseBinIndexVector` that stores the indices of - * the bins, individual examples have been assigned to - * @param numBins The number of bins in the histogram - * @return An unique pointer to an object of type `IHistogram` that has been created - */ - virtual std::unique_ptr createHistogram(const DenseBinIndexVector& binIndexVector, - uint32 numBins) const = 0; - - /** - * Creates and returns a new histogram based on the statistics. - * - * @param binIndexVector A reference to an object of type `DokBinIndexVector` that stores the indices of the - * bins, individual examples have been assigned to - * @param numBins The number of bins in the histogram - * @return An unique pointer to an object of type `IHistogram` that has been created - */ - virtual std::unique_ptr createHistogram(const DokBinIndexVector& binIndexVector, - uint32 numBins) const = 0; }; diff --git a/cpp/subprojects/common/test/mlrl/common/input/statistics_weighted.hpp b/cpp/subprojects/common/test/mlrl/common/input/statistics_weighted.hpp index 593efe3aaa..8483e7400d 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/statistics_weighted.hpp +++ b/cpp/subprojects/common/test/mlrl/common/input/statistics_weighted.hpp @@ -46,14 +46,4 @@ class WeightedStatistics final : public IWeightedStatistics { void removeCoveredStatistic(uint32 statisticIndex) override { coveredStatistics.erase(statisticIndex); } - - std::unique_ptr createHistogram(const DenseBinIndexVector& binIndexVector, - uint32 numBins) const override { - throw std::runtime_error("not implemented"); - } - - std::unique_ptr createHistogram(const DokBinIndexVector& binIndexVector, - uint32 numBins) const override { - throw std::runtime_error("not implemented"); - } }; diff --git a/cpp/subprojects/seco/src/mlrl/seco/statistics/statistics_label_wise_common.hpp b/cpp/subprojects/seco/src/mlrl/seco/statistics/statistics_label_wise_common.hpp index ab800b7e77..d28ad43c28 100644 --- a/cpp/subprojects/seco/src/mlrl/seco/statistics/statistics_label_wise_common.hpp +++ b/cpp/subprojects/seco/src/mlrl/seco/statistics/statistics_label_wise_common.hpp @@ -528,24 +528,6 @@ namespace seco { const PartialIndexVector& labelIndices) const override { return std::make_unique>(*this, labelIndices); } - - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DenseBinIndexVector& binIndexVector, - uint32 numBins) const override { - // TODO Support creation of histograms - return nullptr; - } - - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DokBinIndexVector& binIndexVector, - uint32 numBins) const override { - // TODO Support creation of histograms - return nullptr; - } }; template From a345fd5885c3697d2d7837b9ac34c92fabe7919c Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:41:41 +0100 Subject: [PATCH 23/53] Remove class IBinIndexVector. --- .../mlrl/common/binning/bin_index_vector.hpp | 39 ------------------- .../common/binning/bin_index_vector_dense.hpp | 24 ------------ .../common/binning/bin_index_vector_dok.hpp | 22 ----------- cpp/subprojects/common/meson.build | 2 - .../common/binning/bin_index_vector_dense.cpp | 12 ------ .../common/binning/bin_index_vector_dok.cpp | 12 ------ 6 files changed, 111 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector.hpp delete mode 100644 cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dense.hpp delete mode 100644 cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dok.hpp delete mode 100644 cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dense.cpp delete mode 100644 cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dok.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector.hpp b/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector.hpp deleted file mode 100644 index d46e0ce232..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector.hpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/data/types.hpp" - -#include - -/** - * Defines an interface for all classes that provide access to the indices of the bins, individual examples have been - * assigned to. - */ -class IBinIndexVector { - public: - - /** - * The index of the bin that contains sparse values. - */ - static const uint32 BIN_INDEX_SPARSE = std::numeric_limits::max(); - - virtual ~IBinIndexVector() {} - - /** - * Returns the index of the bin, the example at a specific index has been assigned to. - * - * @param exampleIndex The index of the example - * @return The index of the bin, the example has been assigned to - */ - virtual uint32 getBinIndex(uint32 exampleIndex) const = 0; - - /** - * Sets the index of the bin, the examples at a specific index should be assigned to. - * - * @param exampleIndex The index of the example - * @param binIndex The index of the bin, the example should be assigned to - */ - virtual void setBinIndex(uint32 exampleIndex, uint32 binIndex) = 0; -}; diff --git a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dense.hpp b/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dense.hpp deleted file mode 100644 index 0814074748..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dense.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/binning/bin_index_vector.hpp" -#include "mlrl/common/data/vector_dense.hpp" - -/** - * Stores the indices of the bins, individual examples have been assigned to, using a C-contiguous array. - */ -class DenseBinIndexVector final : public DenseVectorDecorator>, - public IBinIndexVector { - public: - - /** - * @param numElements The number of elements in the vector - */ - DenseBinIndexVector(uint32 numElements); - - uint32 getBinIndex(uint32 exampleIndex) const override; - - void setBinIndex(uint32 exampleIndex, uint32 binIndex) override; -}; diff --git a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dok.hpp b/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dok.hpp deleted file mode 100644 index 203e8fb703..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/binning/bin_index_vector_dok.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/binning/bin_index_vector.hpp" -#include "mlrl/common/data/vector_dok.hpp" - -/** - * Stores the indices of the bins, individual examples have been assigned to, using the dictionaries of keys (DOK) - * format. - */ -class DokBinIndexVector final : public DokVectorDecorator>, - public IBinIndexVector { - public: - - DokBinIndexVector(); - - uint32 getBinIndex(uint32 exampleIndex) const override; - - void setBinIndex(uint32 exampleIndex, uint32 binIndex) override; -}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index c779adae6d..dc96b83644 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -2,8 +2,6 @@ project('common', 'cpp') # Source files source_files = [ - 'src/mlrl/common/binning/bin_index_vector_dense.cpp', - 'src/mlrl/common/binning/bin_index_vector_dok.cpp', 'src/mlrl/common/binning/bin_weight_vector.cpp', 'src/mlrl/common/data/vector_bit.cpp', 'src/mlrl/common/indices/index_iterator.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dense.cpp b/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dense.cpp deleted file mode 100644 index 56c4368e03..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dense.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "mlrl/common/binning/bin_index_vector_dense.hpp" - -DenseBinIndexVector::DenseBinIndexVector(uint32 numElements) - : DenseVectorDecorator>(AllocatedVector(numElements)) {} - -uint32 DenseBinIndexVector::getBinIndex(uint32 exampleIndex) const { - return (*this)[exampleIndex]; -} - -void DenseBinIndexVector::setBinIndex(uint32 exampleIndex, uint32 binIndex) { - (*this)[exampleIndex] = binIndex; -} diff --git a/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dok.cpp b/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dok.cpp deleted file mode 100644 index ca3456d799..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/binning/bin_index_vector_dok.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "mlrl/common/binning/bin_index_vector_dok.hpp" - -DokBinIndexVector::DokBinIndexVector() - : DokVectorDecorator>(AllocatedDokVector(BIN_INDEX_SPARSE)) {} - -uint32 DokBinIndexVector::getBinIndex(uint32 exampleIndex) const { - return this->view[exampleIndex]; -} - -void DokBinIndexVector::setBinIndex(uint32 exampleIndex, uint32 binIndex) { - this->view.set(exampleIndex, binIndex); -} From a32df53f129c337f122cf22d3eff4428392eb966 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:43:19 +0100 Subject: [PATCH 24/53] Remove class BinWeightVector. --- .../mlrl/common/binning/bin_weight_vector.hpp | 33 ------------------- cpp/subprojects/common/meson.build | 1 - .../mlrl/common/binning/bin_weight_vector.cpp | 12 ------- 3 files changed, 46 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/binning/bin_weight_vector.hpp delete mode 100644 cpp/subprojects/common/src/mlrl/common/binning/bin_weight_vector.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/binning/bin_weight_vector.hpp b/cpp/subprojects/common/include/mlrl/common/binning/bin_weight_vector.hpp deleted file mode 100644 index ee71403bac..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/binning/bin_weight_vector.hpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/data/view_vector.hpp" - -/** - * A vector that stores the weights of individual bins, i.e., how many examples have been assigned to them. - */ -class BinWeightVector final : public ClearableViewDecorator>> { - public: - - /** - * @param numElements The number of elements in the vector - */ - BinWeightVector(uint32 numElements); - - /** - * Increases the weight at a specific position by one. - * - * @param pos The position - */ - void increaseWeight(uint32 pos); - - /** - * Returns whether the weight at a specific position is non-zero or not. - * - * @param pos The position - * @return True, if the weight is non-zero, false otherwise - */ - bool operator[](uint32 pos) const; -}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index dc96b83644..db9af784bb 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -2,7 +2,6 @@ project('common', 'cpp') # Source files source_files = [ - 'src/mlrl/common/binning/bin_weight_vector.cpp', 'src/mlrl/common/data/vector_bit.cpp', 'src/mlrl/common/indices/index_iterator.cpp', 'src/mlrl/common/indices/index_vector_complete.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/binning/bin_weight_vector.cpp b/cpp/subprojects/common/src/mlrl/common/binning/bin_weight_vector.cpp deleted file mode 100644 index fc69bbb851..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/binning/bin_weight_vector.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "mlrl/common/binning/bin_weight_vector.hpp" - -BinWeightVector::BinWeightVector(uint32 numElements) - : ClearableViewDecorator>>(AllocatedVector(numElements)) {} - -void BinWeightVector::increaseWeight(uint32 pos) { - this->view.array[pos] += 1; -} - -bool BinWeightVector::operator[](uint32 pos) const { - return this->view.array[pos] != 0; -} From b96390c4d3f5a521e0e8737b0e1d685b20a035f5 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:44:55 +0100 Subject: [PATCH 25/53] Remove functions "isOrdinal" and "isNominal" from class IFeatureVector. --- .../include/mlrl/common/input/feature_type.hpp | 16 ---------------- .../mlrl/common/input/feature_type_nominal.hpp | 4 ---- .../mlrl/common/input/feature_type_numerical.hpp | 4 ---- .../mlrl/common/input/feature_type_ordinal.hpp | 4 ---- .../input/feature_binning_equal_frequency.cpp | 8 -------- .../common/input/feature_binning_equal_width.cpp | 8 -------- .../mlrl/common/input/feature_type_nominal.cpp | 8 -------- .../mlrl/common/input/feature_type_numerical.cpp | 8 -------- .../mlrl/common/input/feature_type_ordinal.cpp | 8 -------- 9 files changed, 68 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_type.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_type.hpp index a34547034b..4c31aac94f 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_type.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_type.hpp @@ -15,22 +15,6 @@ class IFeatureType { virtual ~IFeatureType() {} - /** - * Returns whether the feature is ordinal or not. - * - * @return True, if the feature is ordinal, false otherwise - */ - // TODO Remove - virtual bool isOrdinal() const = 0; - - /** - * Returns whether the feature is nominal or not. - * - * @return True, if the feature is nominal, false otherwise - */ - // TODO Remove - virtual bool isNominal() const = 0; - /** * Creates and returns a feature vector that stores the feature values taken from a given Fortran-contiguous * matrix for a certain feature. diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_type_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_type_nominal.hpp index 378482f46d..a1e50c7076 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_type_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_type_nominal.hpp @@ -11,10 +11,6 @@ class NominalFeatureType final : public IFeatureType { public: - bool isOrdinal() const override; - - bool isNominal() const override; - std::unique_ptr createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const override; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_type_numerical.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_type_numerical.hpp index 49d314f646..d12e523543 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_type_numerical.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_type_numerical.hpp @@ -11,10 +11,6 @@ class NumericalFeatureType final : public IFeatureType { public: - bool isOrdinal() const override; - - bool isNominal() const override; - std::unique_ptr createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const override; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_type_ordinal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_type_ordinal.hpp index b94fae8ad9..8ed7dbfb1e 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_type_ordinal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_type_ordinal.hpp @@ -11,10 +11,6 @@ class OrdinalFeatureType final : public IFeatureType { public: - bool isOrdinal() const override; - - bool isNominal() const override; - std::unique_ptr createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const override; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp index 8a1edf06d2..f74c9033fe 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_frequency.cpp @@ -142,14 +142,6 @@ class EqualFrequencyFeatureBinning final : public IFeatureBinning { EqualFrequencyFeatureBinning(float32 binRatio, uint32 minBins, uint32 maxBins) : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - bool isOrdinal() const override { - return false; - } - - bool isNominal() const override { - return false; - } - std::unique_ptr createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const override { // Create a numerical feature vector from the given feature matrix... diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp index 87c92cc3e5..c4e9175b9a 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp @@ -148,14 +148,6 @@ class EqualWidthFeatureBinning final : public IFeatureBinning { EqualWidthFeatureBinning(float32 binRatio, uint32 minBins, uint32 maxBins) : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - bool isOrdinal() const override { - return false; - } - - bool isNominal() const override { - return false; - } - std::unique_ptr createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const override { // Create a numerical feature vector from the given feature matrix... diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp index 8dc7974a12..1b39cdc7cc 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp @@ -117,14 +117,6 @@ static inline std::unique_ptr createFeatureVectorInternally( sparse); } -bool NominalFeatureType::isOrdinal() const { - return false; -} - -bool NominalFeatureType::isNominal() const { - return true; -} - std::unique_ptr NominalFeatureType::createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const { return createFeatureVectorInternally(featureIndex, featureMatrix); diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp index f99ed75a62..de6e1579dc 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp @@ -42,14 +42,6 @@ static inline std::unique_ptr createFeatureVectorInternally( return std::make_unique(); } -bool NumericalFeatureType::isOrdinal() const { - return false; -} - -bool NumericalFeatureType::isNominal() const { - return false; -} - std::unique_ptr NumericalFeatureType::createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const { return createFeatureVectorInternally(featureIndex, featureMatrix); diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp index 78853bec8c..7273405f87 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp @@ -130,14 +130,6 @@ static inline std::unique_ptr createFeatureVectorInternally( sparse); } -bool OrdinalFeatureType::isOrdinal() const { - return true; -} - -bool OrdinalFeatureType::isNominal() const { - return false; -} - std::unique_ptr OrdinalFeatureType::createFeatureVector( uint32 featureIndex, const FortranContiguousView& featureMatrix) const { return createFeatureVectorInternally(featureIndex, featureMatrix); From 1e76c174e8404dd0ebf1557f78f1fbf66d590432 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:46:49 +0100 Subject: [PATCH 26/53] Remove class IHistogram. --- .../mlrl/common/statistics/histogram.hpp | 37 ------------------- 1 file changed, 37 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/statistics/histogram.hpp diff --git a/cpp/subprojects/common/include/mlrl/common/statistics/histogram.hpp b/cpp/subprojects/common/include/mlrl/common/statistics/histogram.hpp deleted file mode 100644 index 865ce0855b..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/statistics/histogram.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/statistics/statistics_weighted_immutable.hpp" - -/** - * Defines an interface for all classes that provide access to statistics that are organized as a histogram, i.e., where - * the statistics of multiple training examples are aggregated into the same bin. - */ -class IHistogram : virtual public IImmutableWeightedStatistics { - public: - - virtual ~IHistogram() override {} - - /** - * Sets all statistics in the histogram to zero. - */ - virtual void clear() = 0; - - /** - * Returns the weight of the bin at a specific index, i.e., the number of statistics that have been assigned to - * it. - * - * @param binIndex The index of the bin - * @return The weight of the bin - */ - virtual uint32 getBinWeight(uint32 binIndex) const = 0; - - /** - * Adds the statistic at a specific index to the corresponding bin. - * - * @param statisticIndex The index of the statistic - */ - virtual void addToBin(uint32 statisticIndex) = 0; -}; From 824e1131da170b4d31995e72ecd5b0b9f6c1d9e2 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:49:13 +0100 Subject: [PATCH 27/53] Remove class OldMissingFeatureVector. --- .../common/input/missing_feature_vector.hpp | 68 ------------------- cpp/subprojects/common/meson.build | 1 - .../common/input/missing_feature_vector.cpp | 28 -------- 3 files changed, 97 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/input/missing_feature_vector.hpp delete mode 100644 cpp/subprojects/common/src/mlrl/common/input/missing_feature_vector.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/input/missing_feature_vector.hpp b/cpp/subprojects/common/include/mlrl/common/input/missing_feature_vector.hpp deleted file mode 100644 index 899d2d9cfa..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/input/missing_feature_vector.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/data/view_vector_dok_binary.hpp" - -#include - -/** - * An one-dimensional sparse vector that stores the indices of training examples with missing feature values using the - * dictionary of keys (DOK) format. - */ -// TODO Remove class -class OldMissingFeatureVector { - private: - - std::unique_ptr>> missingIndicesPtr_; - - public: - - OldMissingFeatureVector(); - - /** - * @param missingFeatureVector A reference to an object of type `OldMissingFeatureVector`, the missing indices - * should be taken from - */ - OldMissingFeatureVector(OldMissingFeatureVector& missingFeatureVector); - - /** - * An iterator that provides read-only access to the missing indices. - */ - typedef BinaryDokVector::index_const_iterator missing_index_const_iterator; - - /** - * Returns a `missing_index_const_iterator` to the beginning of the missing indices. - * - * @return A `missing_index_const_iterator` to the beginning - */ - missing_index_const_iterator missing_indices_cbegin() const; - - /** - * Returns a `missing_index_const_iterator` to the end of the missing indices. - * - * @return A `missing_index_const_iterator` to the end - */ - missing_index_const_iterator missing_indices_cend() const; - - /** - * Adds the index of an example with missing feature value. - * - * @param index The index to be added - */ - void addMissingIndex(uint32 index); - - /** - * Returns whether the example at a specific index has a missing feature value. - * - * @param index The index of the example to be checked - * @return True, if the example at the given index has a missing feature value, false otherwise - */ - bool isMissing(uint32 index) const; - - /** - * Removes all indices of examples with missing feature values. - */ - void clearMissingIndices(); -}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index db9af784bb..9c81a3e66c 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -24,7 +24,6 @@ source_files = [ 'src/mlrl/common/input/feature_vector_numerical.cpp', 'src/mlrl/common/input/label_matrix_c_contiguous.cpp', 'src/mlrl/common/input/label_matrix_csr.cpp', - 'src/mlrl/common/input/missing_feature_vector.cpp', 'src/mlrl/common/model/body_conjunctive.cpp', 'src/mlrl/common/model/body_empty.cpp', 'src/mlrl/common/model/condition_list.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/missing_feature_vector.cpp b/cpp/subprojects/common/src/mlrl/common/input/missing_feature_vector.cpp deleted file mode 100644 index 52b8f0ec80..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/input/missing_feature_vector.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "mlrl/common/input/missing_feature_vector.hpp" - -OldMissingFeatureVector::OldMissingFeatureVector() - : missingIndicesPtr_( - std::make_unique>>(AllocatedBinaryDokVector())) {} - -OldMissingFeatureVector::OldMissingFeatureVector(OldMissingFeatureVector& missingFeatureVector) - : missingIndicesPtr_(std::move(missingFeatureVector.missingIndicesPtr_)) {} - -OldMissingFeatureVector::missing_index_const_iterator OldMissingFeatureVector::missing_indices_cbegin() const { - return missingIndicesPtr_->getView().indices_cbegin(); -} - -OldMissingFeatureVector::missing_index_const_iterator OldMissingFeatureVector::missing_indices_cend() const { - return missingIndicesPtr_->getView().indices_cend(); -} - -void OldMissingFeatureVector::addMissingIndex(uint32 index) { - missingIndicesPtr_->getView().set(index, true); -} - -bool OldMissingFeatureVector::isMissing(uint32 index) const { - return (missingIndicesPtr_->getView())[index]; -} - -void OldMissingFeatureVector::clearMissingIndices() { - missingIndicesPtr_->clear(); -} From ceedf11a7366d320f7d06127bfd75b97ca8a1886 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:51:08 +0100 Subject: [PATCH 28/53] Remove unused class DokVectorDecorator. --- .../include/mlrl/common/data/vector_dok.hpp | 15 -- .../mlrl/common/data/view_vector_dok.hpp | 245 ------------------ 2 files changed, 260 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/data/vector_dok.hpp delete mode 100644 cpp/subprojects/common/include/mlrl/common/data/view_vector_dok.hpp diff --git a/cpp/subprojects/common/include/mlrl/common/data/vector_dok.hpp b/cpp/subprojects/common/include/mlrl/common/data/vector_dok.hpp deleted file mode 100644 index b5831c3086..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/data/vector_dok.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/data/view_vector_dok.hpp" - -/** - * Provides read and write access via iterators to all non-zero values stored in vector in the dictionary of keys (DOK) - * format. - * - * @tparam Vector The type of the vector - */ -template -using DokVectorDecorator = IterableDokVectorDecorator>; diff --git a/cpp/subprojects/common/include/mlrl/common/data/view_vector_dok.hpp b/cpp/subprojects/common/include/mlrl/common/data/view_vector_dok.hpp deleted file mode 100644 index 4834395cb0..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/data/view_vector_dok.hpp +++ /dev/null @@ -1,245 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/data/view.hpp" - -#include - -/** - * A view that provides access to values stored in a sparse vector in the dictionary of keys (DOK) format. - * - * @tparam T The type of the valeus stored in the vector - */ -template -class MLRLCOMMON_API DokVector { - protected: - - /** - * A pointer to an object of type `std::unordered_map` that stores the indices and values of all non-zero - * elements in the view. - */ - std::unordered_map* values_; - - public: - - /** - * The value of sparse elements. - */ - const T sparseValue; - - /** - * @param values A pointer to an object of type `std::unordered_map` that stores the indices and values - * of all non-zero elements in the view - * @param sparseValue The value of sparse elements - */ - DokVector(std::unordered_map* values, T sparseValue) : values_(values), sparseValue(sparseValue) {} - - /** - * @param other A reference to an object of type `DokVector` that should be copied - */ - DokVector(const DokVector& other) : values_(other.values_), sparseValue(other.sparseValue) {} - - /** - * @param other A reference to an object of type `DokVector` that should be moved - */ - DokVector(DokVector&& other) : values_(other.values_), sparseValue(other.sparseValue) {} - - virtual ~DokVector() {} - - /** - * The type of the indices, the view provides access to. - */ - typedef uint32 index_type; - - /** - * The type of the values, the view provides access to. - */ - typedef T value_type; - - /** - * An iterator that provides read-only access to non-zero elements in the vector. - */ - typedef typename std::unordered_map::const_iterator const_iterator; - - /** - * An iterator that provides access to non-zero elements in the vector and allows to modify them. - */ - typedef typename std::unordered_map::iterator iterator; - - /** - * Returns a `const_iterator` to the beginning of the vector. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const { - return values_->cbegin(); - } - - /** - * Returns a `const_iterator` to the end of the vector. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const { - return values_->cend(); - } - - /** - * Returns an `iterator` to the beginning of the vector. - * - * @return An `iterator` to the beginning - */ - iterator begin() { - return values_->begin(); - } - - /** - * Returns an `iterator` to the end of the vector. - * - * @return An `iterator` to the end - */ - iterator end() { - return values_->end(); - } - - /** - * Returns the value of the element at a specific index. - * - * @param index The index of the element - * @return The value of the element at the given index - */ - const value_type& operator[](index_type index) const { - auto it = values_->find(index); - return it != values_->cend() ? it->second : sparseValue; - } - - /** - * Sets the value of the element at a specific position. - * - * @param index The index of the element - * @param value The value to be set - */ - void set(index_type index, value_type value) { - auto result = values_->emplace(index, value); - - if (!result.second) { - result.first->second = value; - } - } - - /** - * Sets all values stored in the view to zero. - */ - void clear() { - values_->clear(); - } -}; - -/** - * Allocates the memory for a view that provides access to values stored in a sparse vector in the dictionary of keys - * (DOK) format. - * - * @tparam Vector The type of the view - */ -template -class MLRLCOMMON_API DokVectorAllocator : public Vector { - public: - - /** - * @param sparseValue The value of sparse elements - */ - DokVectorAllocator(typename Vector::value_type sparseValue = 0) - : Vector(new std::unordered_map(), sparseValue) {} - - /** - * @param other A reference to an object of type `DokVectorAllocator` that should be copied - */ - DokVectorAllocator(const DokVectorAllocator& other) : Vector(other) { - throw std::runtime_error("Objects of type DokVectorAllocator cannot be copied"); - } - - /** - * @param other A reference to an object of type `DokVectorAllocator` that should be moved - */ - DokVectorAllocator(DokVectorAllocator&& other) : Vector(std::move(other)) { - other.values_ = nullptr; - } - - virtual ~DokVectorAllocator() override { - delete Vector::values_; - } -}; - -/** - * Allocates the memory, a `DokVector` provides access to. - * - * @tparam T The type of the values stored in the `DokVector` - */ -template -using AllocatedDokVector = DokVectorAllocator>; - -/** - * Provides read and write access via iterators to all non-zero values stored in a sparse vector in the dictionary of - * keys (DOK) format. - * - * @tparam Vector The type of the vector - */ -template -class MLRLCOMMON_API IterableDokVectorDecorator : public Vector { - public: - - /** - * An iterator that provides read-only access to non-zero elements in the vector. - */ - typedef typename Vector::view_type::const_iterator const_iterator; - - /** - * An iterator that provides access to non-zero elements in the vector and allows to modify them. - */ - typedef typename Vector::view_type::iterator iterator; - - /** - * @param view The view, the vector should be backed by - */ - IterableDokVectorDecorator(typename Vector::view_type&& view) : Vector(std::move(view)) {} - - virtual ~IterableDokVectorDecorator() override {} - - /** - * Returns a `const_iterator` to the beginning of the vector. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const { - return Vector::view.cbegin(); - } - - /** - * Returns a `const_iterator` to the end of the vector. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const { - return Vector::view.cend(); - } - - /** - * Returns an `iterator` to the beginning of the vector. - * - * @return An `iterator` to the beginning - */ - iterator begin() { - return Vector::view.begin(); - } - - /** - * Returns an `iterator` to the end of the vector. - * - * @return An `iterator` to the end - */ - iterator end() { - return Vector::view.end(); - } -}; From f34becb961841b5655a41ce5a9a96d30c3b52355 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:55:08 +0100 Subject: [PATCH 29/53] Remove "addToSubset" functions that take an argument of type SparseLabelWiseHistogramView from class SparseLabelWiseStatisticVector. --- .../vector_statistic_label_wise_sparse.hpp | 52 +------------------ .../vector_statistic_label_wise_sparse.cpp | 44 ---------------- 2 files changed, 1 insertion(+), 95 deletions(-) diff --git a/cpp/subprojects/boosting/include/mlrl/boosting/data/vector_statistic_label_wise_sparse.hpp b/cpp/subprojects/boosting/include/mlrl/boosting/data/vector_statistic_label_wise_sparse.hpp index 06e983d3b7..a8bbe8b601 100644 --- a/cpp/subprojects/boosting/include/mlrl/boosting/data/vector_statistic_label_wise_sparse.hpp +++ b/cpp/subprojects/boosting/include/mlrl/boosting/data/vector_statistic_label_wise_sparse.hpp @@ -3,7 +3,7 @@ */ #pragma once -#include "mlrl/boosting/data/view_histogram_label_wise_sparse.hpp" +#include "mlrl/common/data/triple.hpp" #include "mlrl/common/data/tuple.hpp" #include "mlrl/common/data/view_matrix_sparse_set.hpp" #include "mlrl/common/indices/index_vector_complete.hpp" @@ -266,56 +266,6 @@ namespace boosting { void addToSubset(const SparseSetView>& view, uint32 row, const PartialIndexVector& indices, float64 weight); - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseHistogramView`, whose positions - * are given as a `CompleteIndexVector`, to this vector. - * - * @param view A reference to an object of type `SparseLabelWiseHistogramView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - */ - void addToSubset(const SparseLabelWiseHistogramView& view, uint32 row, const CompleteIndexVector& indices); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseHistogramView`, whose positions - * are given as a `PartialIndexVector`, to this vector. - * - * @param view A reference to an object of type `SparseLabelWiseHistogramView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - */ - void addToSubset(const SparseLabelWiseHistogramView& view, uint32 row, const PartialIndexVector& indices); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseHistogramView`, whose positions - * are given as a `CompleteIndexVector`, to this vector. The gradients and Hessians to be added are - * multiplied by a specific weight. - * - * @param view A reference to an object of type `SparseLabelWiseHistogramView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(const SparseLabelWiseHistogramView& view, uint32 row, const CompleteIndexVector& indices, - float64 weight); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseHistogramView`, whose positions - * are given as a `PartialIndexVector`, to this vector. The gradients and Hessians to be added are - * multiplied by a specific weight. - * - * @param view A reference to an object of type `SparseLabelWiseHistogramView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(const SparseLabelWiseHistogramView& view, uint32 row, const PartialIndexVector& indices, - float64 weight); - /** * Sets the gradients and Hessians in this vector to the difference `first - second` between the gradients * and Hessians in two other vectors, considering only the gradients and Hessians in the first vector that diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/data/vector_statistic_label_wise_sparse.cpp b/cpp/subprojects/boosting/src/mlrl/boosting/data/vector_statistic_label_wise_sparse.cpp index cee5f6c3e9..22b83e0297 100644 --- a/cpp/subprojects/boosting/src/mlrl/boosting/data/vector_statistic_label_wise_sparse.cpp +++ b/cpp/subprojects/boosting/src/mlrl/boosting/data/vector_statistic_label_wise_sparse.cpp @@ -164,50 +164,6 @@ namespace boosting { } } - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseHistogramView& view, uint32 row, - const CompleteIndexVector& indices) { - SparseLabelWiseHistogramView::weight_const_iterator weightIterator = view.weights_cbegin(); - float64 binWeight = weightIterator[row]; - - if (binWeight != 0) { - sumOfWeights_ += binWeight; - addToView(this->view.begin(), view.values_cbegin(row), this->getNumElements()); - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseHistogramView& view, uint32 row, - const PartialIndexVector& indices) { - SparseLabelWiseHistogramView::weight_const_iterator weightIterator = view.weights_cbegin(); - float64 binWeight = weightIterator[row]; - - if (binWeight != 0) { - sumOfWeights_ += binWeight; - addToView(this->view.begin(), view.values_cbegin(row), indices.cbegin(), indices.getNumElements()); - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseHistogramView& view, uint32 row, - const CompleteIndexVector& indices, float64 weight) { - SparseLabelWiseHistogramView::weight_const_iterator weightIterator = view.weights_cbegin(); - float64 binWeight = weightIterator[row] * weight; - - if (binWeight != 0) { - sumOfWeights_ += binWeight; - addToView(this->view.begin(), view.values_cbegin(row), this->getNumElements(), weight); - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseHistogramView& view, uint32 row, - const PartialIndexVector& indices, float64 weight) { - SparseLabelWiseHistogramView::weight_const_iterator weightIterator = view.weights_cbegin(); - float64 binWeight = weightIterator[row] * weight; - - if (binWeight != 0) { - sumOfWeights_ += binWeight; - addToView(this->view.begin(), view.values_cbegin(row), indices.cbegin(), indices.getNumElements(), weight); - } - } - void SparseLabelWiseStatisticVector::difference(const SparseLabelWiseStatisticVector& first, const CompleteIndexVector& firstIndices, const SparseLabelWiseStatisticVector& second) { From b4b2d1cd42f4370258690b3b26eaac54cdf0042a Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 00:57:15 +0100 Subject: [PATCH 30/53] Remove class SparseLabelWiseHistogramView. --- .../data/view_histogram_label_wise_sparse.hpp | 78 ------------------- cpp/subprojects/boosting/meson.build | 1 - .../data/view_histogram_label_wise_sparse.cpp | 28 ------- 3 files changed, 107 deletions(-) delete mode 100644 cpp/subprojects/boosting/include/mlrl/boosting/data/view_histogram_label_wise_sparse.hpp delete mode 100644 cpp/subprojects/boosting/src/mlrl/boosting/data/view_histogram_label_wise_sparse.cpp diff --git a/cpp/subprojects/boosting/include/mlrl/boosting/data/view_histogram_label_wise_sparse.hpp b/cpp/subprojects/boosting/include/mlrl/boosting/data/view_histogram_label_wise_sparse.hpp deleted file mode 100644 index c7e26a8d8b..0000000000 --- a/cpp/subprojects/boosting/include/mlrl/boosting/data/view_histogram_label_wise_sparse.hpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/boosting/util/dll_exports.hpp" -#include "mlrl/common/data/triple.hpp" -#include "mlrl/common/data/view_matrix_c_contiguous.hpp" -#include "mlrl/common/data/view_matrix_composite.hpp" -#include "mlrl/common/data/view_vector.hpp" - -namespace boosting { - - /** - * Implements row-wise read and write access to the gradients and Hessians that have been calculated using a - * label-wise decomposable loss function and are stored in a pre-allocated histogram in the list of lists (LIL) - * format. - */ - // TODO Remove - class MLRLBOOSTING_API SparseLabelWiseHistogramView - : public CompositeMatrix>, AllocatedVector> { - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - */ - SparseLabelWiseHistogramView(uint32 numRows, uint32 numCols); - - /** - * @param other A reference to an object of type `SparseLabelWiseHistogramView` that should be copied - */ - SparseLabelWiseHistogramView(SparseLabelWiseHistogramView&& other); - - virtual ~SparseLabelWiseHistogramView() override {} - - /** - * An iterator that provides read-only access to the gradients and Hessians. - */ - typedef typename AllocatedCContiguousView>::value_const_iterator value_const_iterator; - - /** - * An iterator that provides read-only access to the weights that correspond to individual bins. - */ - typedef typename AllocatedVector::const_iterator weight_const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the gradients and Hessians at a specific row. - * - * @param row The index of the row - * @return A `const_iterator` to the beginning of the row - */ - value_const_iterator values_cbegin(uint32 row) const; - - /** - * Returns a `const_iterator` to the end of the gradients and Hessians at a specific row. - * - * @param row The index of the row - * @return A `const_iterator` to the end of the row - */ - value_const_iterator values_cend(uint32 row) const; - - /** - * Returns a `weight_const_iterator` to the beginning of the weights that correspond to individual bins. - * - * @return A `weight_const_iterator` to the beginning - */ - weight_const_iterator weights_cbegin() const; - - /** - * Returns a `weight_const_iterator` to the end of the weights that correspond to individual bins. - * - * @return A `weight_const_iterator` to the end - */ - weight_const_iterator weights_cend() const; - }; - -} diff --git a/cpp/subprojects/boosting/meson.build b/cpp/subprojects/boosting/meson.build index a6cc02889d..7dc83cd983 100644 --- a/cpp/subprojects/boosting/meson.build +++ b/cpp/subprojects/boosting/meson.build @@ -10,7 +10,6 @@ source_files = [ 'src/mlrl/boosting/data/vector_statistic_example_wise_dense.cpp', 'src/mlrl/boosting/data/vector_statistic_label_wise_dense.cpp', 'src/mlrl/boosting/data/vector_statistic_label_wise_sparse.cpp', - 'src/mlrl/boosting/data/view_histogram_label_wise_sparse.cpp', 'src/mlrl/boosting/data/view_statistic_example_wise_dense.cpp', 'src/mlrl/boosting/input/feature_binning_auto.cpp', 'src/mlrl/boosting/losses/loss_example_wise_logistic.cpp', diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/data/view_histogram_label_wise_sparse.cpp b/cpp/subprojects/boosting/src/mlrl/boosting/data/view_histogram_label_wise_sparse.cpp deleted file mode 100644 index 8f483cad1a..0000000000 --- a/cpp/subprojects/boosting/src/mlrl/boosting/data/view_histogram_label_wise_sparse.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "mlrl/boosting/data/view_histogram_label_wise_sparse.hpp" - -namespace boosting { - - SparseLabelWiseHistogramView::SparseLabelWiseHistogramView(uint32 numRows, uint32 numCols) - : CompositeMatrix>, AllocatedVector>( - AllocatedCContiguousView>(numRows, numCols), AllocatedVector(numRows, true), numRows, - numCols) {} - - SparseLabelWiseHistogramView::SparseLabelWiseHistogramView(SparseLabelWiseHistogramView&& other) - : CompositeMatrix>, AllocatedVector>(std::move(other)) {} - - SparseLabelWiseHistogramView::value_const_iterator SparseLabelWiseHistogramView::values_cbegin(uint32 row) const { - return CompositeView::firstView.values_cbegin(row); - } - - SparseLabelWiseHistogramView::value_const_iterator SparseLabelWiseHistogramView::values_cend(uint32 row) const { - return CompositeView::firstView.values_cend(row); - } - - SparseLabelWiseHistogramView::weight_const_iterator SparseLabelWiseHistogramView::weights_cbegin() const { - return CompositeView::secondView.cbegin(); - } - - SparseLabelWiseHistogramView::weight_const_iterator SparseLabelWiseHistogramView::weights_cend() const { - return CompositeView::secondView.cend(); - } -} From b290af8b6ed192d009d82c08b073fddf05b0f8a0 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 01:02:17 +0100 Subject: [PATCH 31/53] Remove template arguments from class IRuleRefinementCallback. --- .../rule_refinement_callback.hpp | 36 +++++++++---------- .../rule_refinement/rule_refinement_exact.hpp | 6 ++-- .../rule_refinement/rule_refinement_exact.cpp | 13 ++++--- .../common/thresholds/thresholds_exact.cpp | 2 +- 4 files changed, 27 insertions(+), 30 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_callback.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_callback.hpp index 3f8bb1e8d0..2ca3b40b71 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_callback.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_callback.hpp @@ -3,16 +3,15 @@ */ #pragma once +#include "mlrl/common/input/feature_vector.hpp" +#include "mlrl/common/statistics/statistics_weighted_immutable.hpp" + /** * Defines an interface for callbacks that may be invoked by subclasses of the the class `IRuleRefinement` in order to - * retrieve the information that is required to search for potential refinements. It consists of statistics, as well as - * a vector that allows to determine the thresholds that may be used by potential conditions. - * - * @tparam Statistics The type of the statistics, - * @tparam Vector The type of the vector that is returned by the callback + * retrieve the information that is required to search for potential refinements. It consists of + * `IImmutableWeightedStatistics`, as well as an `IFeatureVector` that allows to determine the thresholds that may be + * used by potential conditions. */ -// TODO Remove template argument Vector and use IFeatureVector instead -template class IRuleRefinementCallback { public: @@ -23,24 +22,25 @@ class IRuleRefinementCallback { public: /** - * @param statistics A reference to an object of template type `Statistics` that should be used to - * search for potential refinements - * @param vector A reference to an object of template type `Vector` that should be used to search + * @param statistics A reference to an object of type `IImmutableWeightedStatistics` that should be + * used to search for potential refinements + * @param featureVector A reference to an object of type `IFeatureVector` that should be used to search * for potential refinements */ - Result(const Statistics& statistics, const Vector& vector) : statistics(statistics), vector(vector) {} + Result(const IImmutableWeightedStatistics& statistics, const IFeatureVector& featureVector) + : statistics(statistics), featureVector(featureVector) {} /** - * A reference to an object of template type `Statistics` that should be used to search for potential - * refinements. + * A reference to an object of type `IImmutableWeightedStatistics` that should be used to search for + * potential refinements. */ - const Statistics& statistics; + const IImmutableWeightedStatistics& statistics; /** - * A reference to an object of template type `Vector` that should be used to search for potential + * A reference to an object of type `IFeatureVector` that should be used to search for potential * refinements. */ - const Vector& vector; + const IFeatureVector& featureVector; }; virtual ~IRuleRefinementCallback() {} @@ -48,8 +48,8 @@ class IRuleRefinementCallback { /** * Invokes the callback and returns its result. * - * @return An object of type `Result` that stores references to the statistics and the vector that may be used - * to search for potential refinements + * @return An object of type `Result` that stores references to the statistics and the feature vector that may + * be used to search for potential refinements */ virtual Result get() = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp index d95bb67a12..bd608e489d 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp @@ -26,9 +26,7 @@ class ExactRuleRefinement final : public IRuleRefinement { const uint32 numExamplesWithNonZeroWeights_; - typedef IRuleRefinementCallback Callback; - - const std::unique_ptr callbackPtr_; + const std::unique_ptr callbackPtr_; public: @@ -44,7 +42,7 @@ class ExactRuleRefinement final : public IRuleRefinement { * potential refinements */ ExactRuleRefinement(const IndexVector& labelIndices, uint32 featureIndex, uint32 numExamplesWithNonZeroWeights, - std::unique_ptr callbackPtr); + std::unique_ptr callbackPtr); void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) override; diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp index ae81e9a5a3..a3f35fd3ce 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp @@ -3,14 +3,13 @@ #include "mlrl/common/rule_refinement/rule_refinement_search.hpp" template -static inline void findRefinementInternally( - const IndexVector& labelIndices, uint32 featureIndex, uint32 numExamplesWithNonZeroWeights, - IRuleRefinementCallback& callback, Comparator& comparator, - uint32 minCoverage) { +static inline void findRefinementInternally(const IndexVector& labelIndices, uint32 featureIndex, + uint32 numExamplesWithNonZeroWeights, IRuleRefinementCallback& callback, + Comparator& comparator, uint32 minCoverage) { // Invoke the callback... - IRuleRefinementCallback::Result callbackResult = callback.get(); + IRuleRefinementCallback::Result callbackResult = callback.get(); const IImmutableWeightedStatistics& statistics = callbackResult.statistics; - const IFeatureVector& featureVector = callbackResult.vector; + const IFeatureVector& featureVector = callbackResult.featureVector; // Create a new, empty subset of the statistics... std::unique_ptr statisticsSubsetPtr = statistics.createSubset(labelIndices); @@ -25,7 +24,7 @@ static inline void findRefinementInternally( template ExactRuleRefinement::ExactRuleRefinement(const IndexVector& labelIndices, uint32 featureIndex, uint32 numExamplesWithNonZeroWeights, - std::unique_ptr callbackPtr) + std::unique_ptr callbackPtr) : labelIndices_(labelIndices), featureIndex_(featureIndex), numExamplesWithNonZeroWeights_(numExamplesWithNonZeroWeights), callbackPtr_(std::move(callbackPtr)) {} diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 9862e23083..4c1942ee17 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -175,7 +175,7 @@ class ExactThresholds final : public IThresholds { * A callback that allows to retrieve feature vectors. If available, the feature vectors are retrieved * from the cache. Otherwise, they are fetched from the feature matrix. */ - class Callback final : public IRuleRefinementCallback { + class Callback final : public IRuleRefinementCallback { private: ThresholdsSubset& thresholdsSubset_; From 2a046eeb00987ad67eaafe1f9dccc888d676903b Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 19 Feb 2024 01:07:57 +0100 Subject: [PATCH 32/53] Remove class CoverageSet. --- .../mlrl/common/thresholds/coverage_set.hpp | 63 ---------- .../common/thresholds/thresholds_subset.hpp | 71 ----------- cpp/subprojects/common/meson.build | 1 - .../mlrl/common/thresholds/coverage_set.cpp | 52 -------- .../common/thresholds/thresholds_exact.cpp | 114 ------------------ 5 files changed, 301 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/thresholds/coverage_set.hpp delete mode 100644 cpp/subprojects/common/src/mlrl/common/thresholds/coverage_set.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_set.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_set.hpp deleted file mode 100644 index b642ef6fc2..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_set.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/data/vector_dense.hpp" -#include "mlrl/common/thresholds/coverage_state.hpp" - -/** - * Provides access to the indices of the examples that are covered by a rule. The indices of the covered examples are - * stored in a C-contiguous array that may be updated when the rule is refined. - */ -class CoverageSet final : public DenseVectorDecorator>, - public ICoverageState { - private: - - uint32 numCovered_; - - public: - - /** - * @param numElements The total number of examples - */ - CoverageSet(uint32 numElements); - - /** - * @param other A reference to an object of type `CoverageSet` to be copied - */ - CoverageSet(const CoverageSet& other); - - /** - * Returns the number of covered examples. - * - * @return The number of covered examples - */ - uint32 getNumCovered() const; - - /** - * Sets the number of covered examples. - * - * @param numCovered The number of covered examples to be set - */ - void setNumCovered(uint32 numCovered); - - /** - * Resets the number of covered examples and their indices such that all examples are marked as covered. - */ - void reset(); - - std::unique_ptr copy() const override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const IPrediction& head) const override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const IPrediction& head) const override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - IPrediction& head) const override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - IPrediction& head) const override; -}; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp index c1c4c41fb8..5083f74eb1 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp @@ -11,7 +11,6 @@ #include "mlrl/common/sampling/partition_bi.hpp" #include "mlrl/common/sampling/partition_single.hpp" #include "mlrl/common/thresholds/coverage_mask.hpp" -#include "mlrl/common/thresholds/coverage_set.hpp" #include @@ -117,44 +116,6 @@ class IThresholdsSubset { virtual Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageState, const IPrediction& head) const = 0; - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered according to a given object of type - * `CoverageSet`. - * - * For calculating the quality, only examples that belong to the training set and are not included in the - * current sub-sample, i.e., only examples with zero weights, are considered. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageSet` that keeps track of the examples that are - * covered by the rule - * @param head A reference to an object of type `IPrediction` that stores the scores that are predicted - * by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageSet& coverageState, - const IPrediction& head) const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered according to a given object of type - * `CoverageSet`. - * - * For calculating the quality, only examples that belong to the training set and are not included in the - * current sub-sample, i.e., only examples with zero weights, are considered. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageSet` that keeps track of the examples that are - * covered by the rule - * @param head A reference to an object of type `IPrediction` that stores the scores that are predicted - * by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(BiPartition& partition, const CoverageSet& coverageState, - const IPrediction& head) const = 0; - /** * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as * covered according to a given object of type `CoverageMask`. @@ -187,38 +148,6 @@ class IThresholdsSubset { virtual void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageState, IPrediction& head) const = 0; - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `CoverageSet`. - * - * When calculating the updated prediction, the weights of the individual training examples are ignored and - * equally distributed weights are assumed instead. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that - * are covered by the rule - * @param head A reference to an object of type `IPrediction` to be updated - */ - virtual void recalculatePrediction(const SinglePartition& partition, const CoverageSet& coverageState, - IPrediction& head) const = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `CoverageSet`. - * - * When calculating the updated prediction, the weights of the individual training examples are ignored and - * equally distributed weights are assumed instead. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageSet` that keeps track of the examples that are - * covered by the rule - * @param head A reference to an object of type `IPrediction` to be updated - */ - virtual void recalculatePrediction(BiPartition& partition, const CoverageSet& coverageState, - IPrediction& head) const = 0; - /** * Updates the statistics that correspond to the current subset based on the prediction of a rule. * diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index 9c81a3e66c..d55aa85f18 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -88,7 +88,6 @@ source_files = [ 'src/mlrl/common/stopping/stopping_criterion_size.cpp', 'src/mlrl/common/stopping/stopping_criterion_time.cpp', 'src/mlrl/common/thresholds/coverage_mask.cpp', - 'src/mlrl/common/thresholds/coverage_set.cpp', 'src/mlrl/common/thresholds/thresholds_exact.cpp', 'src/mlrl/common/info.cpp', 'src/mlrl/common/learner.cpp' diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_set.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_set.cpp deleted file mode 100644 index e216609395..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_set.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "mlrl/common/thresholds/coverage_set.hpp" - -#include "mlrl/common/rule_refinement/prediction.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" - -CoverageSet::CoverageSet(uint32 numElements) - : DenseVectorDecorator>(AllocatedVector(numElements)), numCovered_(numElements) { - setViewToIncreasingValues(this->begin(), numElements, 0, 1); -} - -CoverageSet::CoverageSet(const CoverageSet& other) - : DenseVectorDecorator>(AllocatedVector(other.getNumElements())), - numCovered_(other.numCovered_) { - copyView(other.cbegin(), this->begin(), numCovered_); -} - -uint32 CoverageSet::getNumCovered() const { - return numCovered_; -} - -void CoverageSet::setNumCovered(uint32 numCovered) { - numCovered_ = numCovered; -} - -void CoverageSet::reset() { - numCovered_ = this->getNumElements(); - setViewToIncreasingValues(this->begin(), this->getNumElements(), 0, 1); -} - -std::unique_ptr CoverageSet::copy() const { - return std::make_unique(*this); -} - -Quality CoverageSet::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const IPrediction& head) const { - return thresholdsSubset.evaluateOutOfSample(partition, *this, head); -} - -Quality CoverageSet::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const IPrediction& head) const { - return thresholdsSubset.evaluateOutOfSample(partition, *this, head); -} - -void CoverageSet::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - IPrediction& head) const { - thresholdsSubset.recalculatePrediction(partition, *this, head); -} - -void CoverageSet::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - IPrediction& head) const { - thresholdsSubset.recalculatePrediction(partition, *this, head); -} diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 4c1942ee17..cbc00c2ac0 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -1,6 +1,5 @@ #include "mlrl/common/thresholds/thresholds_exact.hpp" -#include "mlrl/common/iterator/binary_forward_iterator.hpp" #include "mlrl/common/rule_refinement/rule_refinement_exact.hpp" #include "mlrl/common/util/openmp.hpp" @@ -25,53 +24,6 @@ static inline Quality evaluateOutOfSampleInternally(IndexIterator indexIterator, return statisticsSubsetPtr->calculateScores(); } -template -static inline Quality evaluateOutOfSampleInternally(const WeightVector& weights, const CoverageSet& coverageSet, - const IStatistics& statistics, const IPrediction& prediction) { - OutOfSampleWeightVector outOfSampleWeights(weights); - std::unique_ptr statisticsSubsetPtr = - prediction.createStatisticsSubset(statistics, outOfSampleWeights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - - if (statisticsSubsetPtr->hasNonZeroWeight(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - } - - return statisticsSubsetPtr->calculateScores(); -} - -template -static inline Quality evaluateOutOfSampleInternally(const WeightVector& weights, const CoverageSet& coverageSet, - BiPartition& partition, const IStatistics& statistics, - const IPrediction& prediction) { - OutOfSampleWeightVector outOfSampleWeights(weights); - std::unique_ptr statisticsSubsetPtr = - prediction.createStatisticsSubset(statistics, outOfSampleWeights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - partition.sortSecond(); - auto holdoutSetIterator = make_binary_forward_iterator(partition.second_cbegin(), partition.second_cend()); - uint32 previousExampleIndex = 0; - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - std::advance(holdoutSetIterator, exampleIndex - previousExampleIndex); - - if (*holdoutSetIterator && statisticsSubsetPtr->hasNonZeroWeight(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - - previousExampleIndex = exampleIndex; - } - - return statisticsSubsetPtr->calculateScores(); -} - template static inline void recalculatePredictionInternally(IndexIterator indexIterator, uint32 numExamples, const CoverageMask& coverageMask, const IStatistics& statistics, @@ -91,49 +43,6 @@ static inline void recalculatePredictionInternally(IndexIterator indexIterator, scoreVector.updatePrediction(prediction); } -static inline void recalculatePredictionInternally(const CoverageSet& coverageSet, const IStatistics& statistics, - IPrediction& prediction) { - uint32 numStatistics = statistics.getNumStatistics(); - EqualWeightVector weights(numStatistics); - std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - statisticsSubsetPtr->addToSubset(exampleIndex); - } - - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - scoreVector.updatePrediction(prediction); -} - -static inline void recalculatePredictionInternally(const CoverageSet& coverageSet, BiPartition& partition, - const IStatistics& statistics, IPrediction& prediction) { - uint32 numStatistics = statistics.getNumStatistics(); - EqualWeightVector weights(numStatistics); - std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - partition.sortSecond(); - auto holdoutSetIterator = make_binary_forward_iterator(partition.second_cbegin(), partition.second_cend()); - uint32 previousExampleIndex = 0; - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - std::advance(holdoutSetIterator, exampleIndex - previousExampleIndex); - - if (*holdoutSetIterator) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - - previousExampleIndex = exampleIndex; - } - - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - scoreVector.updatePrediction(prediction); -} - /** * An entry that is stored in a cache and contains an unique pointer to a feature vector. The field `numConditions` * specifies how many conditions the rule contained when the vector was updated for the last time. It may be used to @@ -361,18 +270,6 @@ class ExactThresholds final : public IThresholds { thresholds_.statisticsProvider_.get(), head); } - Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageSet& coverageState, - const IPrediction& head) const override { - return evaluateOutOfSampleInternally(weights_, coverageState, thresholds_.statisticsProvider_.get(), - head); - } - - Quality evaluateOutOfSample(BiPartition& partition, const CoverageSet& coverageState, - const IPrediction& head) const override { - return evaluateOutOfSampleInternally(weights_, coverageState, partition, - thresholds_.statisticsProvider_.get(), head); - } - void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageState, IPrediction& head) const override { recalculatePredictionInternally( @@ -387,17 +284,6 @@ class ExactThresholds final : public IThresholds { thresholds_.statisticsProvider_.get(), head); } - void recalculatePrediction(const SinglePartition& partition, const CoverageSet& coverageState, - IPrediction& head) const override { - recalculatePredictionInternally(coverageState, thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(BiPartition& partition, const CoverageSet& coverageState, - IPrediction& head) const override { - recalculatePredictionInternally(coverageState, partition, thresholds_.statisticsProvider_.get(), - head); - } - void applyPrediction(const IPrediction& prediction) override { IStatistics& statistics = thresholds_.statisticsProvider_.get(); uint32 numStatistics = statistics.getNumStatistics(); From e54b16620d4726de4934821b20d5824c2eb9d812 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 20 Feb 2024 23:20:34 +0100 Subject: [PATCH 33/53] Remove class ICoverageState. --- .../mlrl/common/rule_pruning/rule_pruning.hpp | 6 +- .../mlrl/common/sampling/partition.hpp | 18 ++-- .../mlrl/common/sampling/partition_bi.hpp | 4 +- .../mlrl/common/sampling/partition_single.hpp | 4 +- .../mlrl/common/thresholds/coverage_mask.hpp | 28 +++--- .../mlrl/common/thresholds/coverage_state.hpp | 86 ------------------- .../common/thresholds/thresholds_subset.hpp | 26 +++--- .../rule_induction/rule_induction_common.hpp | 8 +- .../common/rule_pruning/rule_pruning_irep.cpp | 20 ++--- .../common/rule_pruning/rule_pruning_no.cpp | 4 +- .../src/mlrl/common/sampling/partition_bi.cpp | 8 +- .../mlrl/common/sampling/partition_single.cpp | 10 +-- .../mlrl/common/thresholds/coverage_mask.cpp | 24 ------ .../common/thresholds/thresholds_exact.cpp | 18 ++-- 14 files changed, 73 insertions(+), 191 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/thresholds/coverage_state.hpp diff --git a/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp b/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp index 0a93e27646..c82ec0d9af 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp @@ -32,12 +32,12 @@ class IRulePruning { * existing rule * @param head A reference to an object of type `IPrediction` that stores the scores that are * predicted by the existing rule - * @return An unique pointer to an object of type `ICoverageState` that keeps track of the + * @return An unique pointer to an object of type `CoverageMask` that keeps track of the * examples that are covered by the pruned rule or a null pointer if the rule was not * pruned */ - virtual std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, - ConditionList& conditions, const IPrediction& head) const = 0; + virtual std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, + ConditionList& conditions, const IPrediction& head) const = 0; }; /** diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp index 2c0a9ae57f..cb2e283db1 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp @@ -3,6 +3,7 @@ */ #pragma once +#include "mlrl/common/thresholds/coverage_mask.hpp" #include "mlrl/common/util/quality.hpp" #include @@ -15,7 +16,6 @@ class IInstanceSamplingFactory; class IRowWiseLabelMatrix; class IStatistics; class IThresholdsSubset; -class ICoverageState; class IPrediction; class IMarginalProbabilityCalibrationModel; class IMarginalProbabilityCalibrator; @@ -59,31 +59,31 @@ class IPartition { /** * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples * that do not belong to the current sample and are marked as covered according to a given object of type - * `ICoverageState`. + * `CoverageMask`. * * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to * evaluate the prediction - * @param coverageState A reference to an object of type `ICoverageState` that keeps track of the examples + * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples * that are covered by the rule * @param head A reference to an object of type `IPrediction` that stores the scores that are * predicted by the rule * @return An object of type `Quality` that stores the calculated quality */ - virtual Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, - const ICoverageState& coverageState, const IPrediction& head) = 0; + virtual Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, + const IPrediction& head) = 0; /** * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `ICoverageState`. + * covered according to a given object of type `CoverageMask`. * * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to * recalculate the prediction - * @param coverageState A reference to an object of type `ICoverageState` that keeps track of the examples + * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples * that are covered by the rule * @param head A reference to an object of type `IPrediction` to be updated */ - virtual void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, - const ICoverageState& coverageState, IPrediction& head) = 0; + virtual void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, + IPrediction& head) = 0; /** * Fits and returns a model for the calibration of marginal probabilities, based on the type of this partition. diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/partition_bi.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/partition_bi.hpp index 2b11c52161..45edcaa216 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/partition_bi.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/partition_bi.hpp @@ -126,10 +126,10 @@ class BiPartition final : public VectorDecorator>, const IRowWiseLabelMatrix& labelMatrix, IStatistics& statistics) override; - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, + Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, const IPrediction& head) override; - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, + void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, IPrediction& head) override; std::unique_ptr fitMarginalProbabilityCalibrationModel( diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/partition_single.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/partition_single.hpp index 9647f152a0..95717d3db0 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/partition_single.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/partition_single.hpp @@ -54,10 +54,10 @@ class SinglePartition final : public IPartition { const IRowWiseLabelMatrix& labelMatrix, IStatistics& statistics) override; - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, + Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, const IPrediction& head) override; - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, + void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, IPrediction& head) override; std::unique_ptr fitMarginalProbabilityCalibrationModel( diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp index 534a0dff2c..00cf603bd6 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp @@ -4,16 +4,22 @@ #pragma once #include "mlrl/common/data/vector_dense.hpp" -#include "mlrl/common/thresholds/coverage_state.hpp" +#include "mlrl/common/util/quality.hpp" + +#include + +// Forward declarations +class IThresholdsSubset; +class SinglePartition; +class BiPartition; +class IPrediction; /** * Allows to check whether individual examples are covered by a rule or not. For each example, an integer is stored in a * C-contiguous array that may be updated when the rule is refined. If the value that corresponds to a certain example * is equal to the "indicator value", it is considered to be covered. */ -// TODO: Delete base class and move into directory "data" -class CoverageMask final : public DenseVectorDecorator>, - public ICoverageState { +class CoverageMask final : public DenseVectorDecorator> { private: uint32 indicatorValue_; @@ -56,18 +62,4 @@ class CoverageMask final : public DenseVectorDecorator>, * @return True, if the example at the given index is covered, false otherwise */ bool isCovered(uint32 pos) const; - - std::unique_ptr copy() const override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const IPrediction& head) const override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const IPrediction& head) const override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - IPrediction& head) const override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - IPrediction& head) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_state.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_state.hpp deleted file mode 100644 index 5af4578749..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_state.hpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/util/quality.hpp" - -#include - -// Forward declarations -class IThresholdsSubset; -class SinglePartition; -class BiPartition; -class IPrediction; - -/** - * Defines an interface for all classes that allow to keep track of the examples that are covered by a rule. - */ -class ICoverageState { - public: - - virtual ~ICoverageState() {} - - /** - * Creates and returns a deep copy of the coverage state. - * - * @return An unique pointer to an object of type `ICoverageState` that has been created - */ - virtual std::unique_ptr copy() const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * evaluate the prediction - * @param partition A reference to an object of type `SinglePartition` that provides access to the - * indices of the training examples that belong to the training set - * @param head A reference to an object of type `IPrediction` that stores the scores that are - * predicted by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const IPrediction& head) const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * evaluate the prediction - * @param partition A reference to an object of type `BiPartition` that provides access to the indices - * of the training examples that belong to the training set - * @param head A reference to an object of type `IPrediction` that stores the scores that are - * predicted by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const IPrediction& head) const = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * recalculate the prediction - * @param partition A reference to an object of type `SinglePartition` that provides access to the - * indices of the training examples that belong to the training set - * @param head A reference to an object of type `IPrediction` to be updated - */ - virtual void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - IPrediction& head) const = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * recalculate the prediction - * @param partition A reference to an object of type `BiPartition` that provides access to the indices - * of the training examples that belong to the training set - * @param head A reference to an object of type `IPrediction` to be updated - */ - virtual void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - IPrediction& head) const = 0; -}; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp index 5083f74eb1..712dc8dc24 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp @@ -70,13 +70,13 @@ class IThresholdsSubset { virtual void resetThresholds() = 0; /** - * Returns an object of type `ICoverageState` that keeps track of the elements that are covered by the - * refinement that has been applied via the function `applyRefinement`. + * Returns an object of type `CoverageMask` that keeps track of the elements that are covered by the refinement + * that has been applied via the function `applyRefinement`. * - * @return A reference to an object of type `ICoverageState` that keeps track of the elements that are covered - * by the refinement + * @return A reference to an object of type `CoverageMask` that keeps track of the elements that are covered by + * the refinement */ - virtual const ICoverageState& getCoverageState() const = 0; + virtual const CoverageMask& getCoverageMask() const = 0; /** * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples @@ -88,13 +88,13 @@ class IThresholdsSubset { * * @param partition A reference to an object of type `SinglePartition` that provides access to the indices * of the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that + * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples that * are covered by the rule * @param head A reference to an object of type `IPrediction` that stores the scores that are predicted * by the rule * @return An object of type `Quality` that stores the calculated quality */ - virtual Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageMask& coverageState, + virtual Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageMask& coverageMask, const IPrediction& head) const = 0; /** @@ -107,13 +107,13 @@ class IThresholdsSubset { * * @param partition A reference to an object of type `BiPartition` that provides access to the indices of * the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that + * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples that * are covered by the rule * @param head A reference to an object of type `IPrediction` that stores the scores that are predicted * by the rule * @return An object of type `Quality` that stores the calculated quality */ - virtual Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageState, + virtual Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageMask, const IPrediction& head) const = 0; /** @@ -125,11 +125,11 @@ class IThresholdsSubset { * * @param partition A reference to an object of type `SinglePartition` that provides access to the indices * of the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that + * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples that * are covered by the rule * @param head A reference to an object of type `IPrediction` to be updated */ - virtual void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageState, + virtual void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageMask, IPrediction& head) const = 0; /** @@ -141,11 +141,11 @@ class IThresholdsSubset { * * @param partition A reference to an object of type `BiPartition` that provides access to the indices of * the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that + * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples that * are covered by the rule * @param head A reference to an object of type `IPrediction` to be updated */ - virtual void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageState, + virtual void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageMask, IPrediction& head) const = 0; /** diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp index f81c5e6986..6347fef0c7 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp @@ -93,15 +93,15 @@ class AbstractRuleInduction : public IRuleInduction { // Prune rule... IStatisticsProvider& statisticsProvider = thresholds.getStatisticsProvider(); statisticsProvider.switchToPruningRuleEvaluation(); - std::unique_ptr coverageStatePtr = + std::unique_ptr coverageMaskPtr = rulePruning.prune(*thresholdsSubsetPtr, partition, *conditionListPtr, *headPtr); statisticsProvider.switchToRegularRuleEvaluation(); // Re-calculate the scores in the head based on the entire training data... if (recalculatePredictions_) { - const ICoverageState& coverageState = - coverageStatePtr ? *coverageStatePtr : thresholdsSubsetPtr->getCoverageState(); - partition.recalculatePrediction(*thresholdsSubsetPtr, coverageState, *headPtr); + const CoverageMask& coverageMask = + coverageMaskPtr ? *coverageMaskPtr : thresholdsSubsetPtr->getCoverageMask(); + partition.recalculatePrediction(*thresholdsSubsetPtr, coverageMask, *headPtr); } } diff --git a/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_irep.cpp b/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_irep.cpp index 902fe52586..a093f3b723 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_irep.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_irep.cpp @@ -17,19 +17,19 @@ class Irep final : public IRulePruning { */ Irep(RuleCompareFunction ruleCompareFunction) : ruleCompareFunction_(ruleCompareFunction) {} - std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, - ConditionList& conditions, const IPrediction& head) const override { + std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, + ConditionList& conditions, const IPrediction& head) const override { uint32 numConditions = conditions.getNumConditions(); - std::unique_ptr bestCoverageStatePtr; + std::unique_ptr bestCoverageMaskPtr; // Only rules with more than one condition can be pruned... if (numConditions > 1) { // Calculate the quality of the original rule on the prune set... - const ICoverageState& originalCoverageState = thresholdsSubset.getCoverageState(); - Quality bestQuality = partition.evaluateOutOfSample(thresholdsSubset, originalCoverageState, head); + const CoverageMask& originalCoverageMask = thresholdsSubset.getCoverageMask(); + Quality bestQuality = partition.evaluateOutOfSample(thresholdsSubset, originalCoverageMask, head); // Create a copy of the original coverage mask... - bestCoverageStatePtr = originalCoverageState.copy(); + bestCoverageMaskPtr = std::make_unique(originalCoverageMask); // Reset the given thresholds... thresholdsSubset.resetThresholds(); @@ -46,15 +46,15 @@ class Irep final : public IRulePruning { thresholdsSubset.filterThresholds(condition); // Calculate the quality of a rule that contains the conditions that have been processed so far... - const ICoverageState& coverageState = thresholdsSubset.getCoverageState(); - Quality quality = partition.evaluateOutOfSample(thresholdsSubset, coverageState, head); + const CoverageMask& coverageMask = thresholdsSubset.getCoverageMask(); + Quality quality = partition.evaluateOutOfSample(thresholdsSubset, coverageMask, head); // Check if the quality is better than the best quality seen so far (reaching the same quality with // fewer conditions is considered an improvement)... if (ruleCompareFunction_.compare(quality, bestQuality) || (numPrunedConditions == 0 && !ruleCompareFunction_.compare(bestQuality, quality))) { bestQuality = quality; - bestCoverageStatePtr = coverageState.copy(); + bestCoverageMaskPtr = std::make_unique(coverageMask); numPrunedConditions = (numConditions - n); } @@ -68,7 +68,7 @@ class Irep final : public IRulePruning { } } - return bestCoverageStatePtr; + return bestCoverageMaskPtr; } }; diff --git a/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_no.cpp b/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_no.cpp index 83df8c56d4..eb78950e33 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_no.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_no.cpp @@ -6,8 +6,8 @@ class NoRulePruning final : public IRulePruning { public: - std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, - ConditionList& conditions, const IPrediction& head) const override { + std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, + ConditionList& conditions, const IPrediction& head) const override { return nullptr; } }; diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp index c0be442b71..1c44bd4a62 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp @@ -76,14 +76,14 @@ std::unique_ptr BiPartition::createInstanceSampling(const IIn return labelMatrix.createInstanceSampling(factory, *this, statistics); } -Quality BiPartition::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, +Quality BiPartition::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, const IPrediction& head) { - return coverageState.evaluateOutOfSample(thresholdsSubset, *this, head); + return thresholdsSubset.evaluateOutOfSample(*this, coverageMask, head); } -void BiPartition::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, +void BiPartition::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, IPrediction& head) { - coverageState.recalculatePrediction(thresholdsSubset, *this, head); + thresholdsSubset.recalculatePrediction(*this, coverageMask, head); } std::unique_ptr BiPartition::fitMarginalProbabilityCalibrationModel( diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp index 9c1c07035c..47f47794f0 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp @@ -31,13 +31,13 @@ std::unique_ptr SinglePartition::createInstanceSampling(const } Quality SinglePartition::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, - const ICoverageState& coverageState, const IPrediction& head) { - return coverageState.evaluateOutOfSample(thresholdsSubset, *this, head); + const CoverageMask& coverageMask, const IPrediction& head) { + return thresholdsSubset.evaluateOutOfSample(*this, coverageMask, head); } -void SinglePartition::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, - const ICoverageState& coverageState, IPrediction& head) { - coverageState.recalculatePrediction(thresholdsSubset, *this, head); +void SinglePartition::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, + IPrediction& head) { + thresholdsSubset.recalculatePrediction(*this, coverageMask, head); } std::unique_ptr SinglePartition::fitMarginalProbabilityCalibrationModel( diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp index 388baa862e..dd187a85ae 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp @@ -28,27 +28,3 @@ void CoverageMask::reset() { bool CoverageMask::isCovered(uint32 pos) const { return this->view.array[pos] == indicatorValue_; } - -std::unique_ptr CoverageMask::copy() const { - return std::make_unique(*this); -} - -Quality CoverageMask::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const IPrediction& head) const { - return thresholdsSubset.evaluateOutOfSample(partition, *this, head); -} - -Quality CoverageMask::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const IPrediction& head) const { - return thresholdsSubset.evaluateOutOfSample(partition, *this, head); -} - -void CoverageMask::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - IPrediction& head) const { - thresholdsSubset.recalculatePrediction(partition, *this, head); -} - -void CoverageMask::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - IPrediction& head) const { - thresholdsSubset.recalculatePrediction(partition, *this, head); -} diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index cbc00c2ac0..7350ed4627 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -252,35 +252,35 @@ class ExactThresholds final : public IThresholds { coverageMask_.reset(); } - const ICoverageState& getCoverageState() const override { + const CoverageMask& getCoverageMask() const override { return coverageMask_; } - Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageMask& coverageState, + Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageMask& coverageMask, const IPrediction& head) const override { return evaluateOutOfSampleInternally( - partition.cbegin(), partition.getNumElements(), weights_, coverageState, + partition.cbegin(), partition.getNumElements(), weights_, coverageMask, thresholds_.statisticsProvider_.get(), head); } - Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageState, + Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageMask, const IPrediction& head) const override { return evaluateOutOfSampleInternally( - partition.first_cbegin(), partition.getNumFirst(), weights_, coverageState, + partition.first_cbegin(), partition.getNumFirst(), weights_, coverageMask, thresholds_.statisticsProvider_.get(), head); } - void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageState, + void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageMask, IPrediction& head) const override { recalculatePredictionInternally( - partition.cbegin(), partition.getNumElements(), coverageState, + partition.cbegin(), partition.getNumElements(), coverageMask, thresholds_.statisticsProvider_.get(), head); } - void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageState, + void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageMask, IPrediction& head) const override { recalculatePredictionInternally( - partition.first_cbegin(), partition.getNumFirst(), coverageState, + partition.first_cbegin(), partition.getNumFirst(), coverageMask, thresholds_.statisticsProvider_.get(), head); } From 1095b6bdffc0d2ef528e9740796d82075527df0e Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 20 Feb 2024 23:38:11 +0100 Subject: [PATCH 34/53] Rename struct RuleRefinement to RuleRefinementEntry. --- .../rule_induction_top_down_common.hpp | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp index a45e639985..8eefd0c4c4 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp @@ -14,7 +14,7 @@ * @tparam The type of the comparator that allows comparing different refinements and keeping track of the best one(s) */ template -struct RuleRefinement final { +struct RuleRefinementEntry final { public: /** @@ -55,31 +55,32 @@ static inline bool findRefinement(RefinementComparator& refinementComparator, IT // For each feature, create an object of type `RuleRefinement`... uint32 numFeatures = featureIndices.getNumElements(); - RuleRefinement* ruleRefinements = new RuleRefinement[numFeatures]; + RuleRefinementEntry* ruleRefinementEntries = + new RuleRefinementEntry[numFeatures]; for (uint32 i = 0; i < numFeatures; i++) { uint32 featureIndex = featureIndices.getIndex(i); - RuleRefinement& ruleRefinement = ruleRefinements[i]; - ruleRefinement.comparatorPtr = std::make_unique(refinementComparator); - ruleRefinement.ruleRefinementPtr = labelIndices.createRuleRefinement(thresholdsSubset, featureIndex); + RuleRefinementEntry& ruleRefinementEntry = ruleRefinementEntries[i]; + ruleRefinementEntry.comparatorPtr = std::make_unique(refinementComparator); + ruleRefinementEntry.ruleRefinementPtr = labelIndices.createRuleRefinement(thresholdsSubset, featureIndex); } // Search for the best condition among all available features to be added to the current rule... #if MULTI_THREADING_SUPPORT_ENABLED - #pragma omp parallel for firstprivate(numFeatures) firstprivate(ruleRefinements) firstprivate(minCoverage) \ + #pragma omp parallel for firstprivate(numFeatures) firstprivate(ruleRefinementEntries) firstprivate(minCoverage) \ schedule(dynamic) num_threads(numThreads) #endif for (int64 i = 0; i < numFeatures; i++) { - RuleRefinement& ruleRefinement = ruleRefinements[i]; - ruleRefinement.ruleRefinementPtr->findRefinement(*ruleRefinement.comparatorPtr, minCoverage); + RuleRefinementEntry& ruleRefinementEntry = ruleRefinementEntries[i]; + ruleRefinementEntry.ruleRefinementPtr->findRefinement(*ruleRefinementEntry.comparatorPtr, minCoverage); } // Pick the best refinement among the refinements that have been found for the different features... for (uint32 i = 0; i < numFeatures; i++) { - RuleRefinement& ruleRefinement = ruleRefinements[i]; - foundRefinement |= refinementComparator.merge(*ruleRefinement.comparatorPtr); + RuleRefinementEntry& ruleRefinementEntry = ruleRefinementEntries[i]; + foundRefinement |= refinementComparator.merge(*ruleRefinementEntry.comparatorPtr); } - delete[] ruleRefinements; + delete[] ruleRefinementEntries; return foundRefinement; } From 1849e3ce419f18e561270feaf0b501ce08a89d7e Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 20 Feb 2024 23:51:55 +0100 Subject: [PATCH 35/53] Add const modifiers to "findRefinement" functions of the class IRuleRefinement. --- .../include/mlrl/common/rule_refinement/rule_refinement.hpp | 4 ++-- .../mlrl/common/rule_refinement/rule_refinement_exact.hpp | 4 ++-- .../mlrl/common/rule_refinement/rule_refinement_exact.cpp | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement.hpp index 8b122c3295..01a2f83da0 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement.hpp @@ -21,7 +21,7 @@ class IRuleRefinement { * comparing potential refinements * @param minCoverage The minimum number of examples that must be covered by the refinement */ - virtual void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) = 0; + virtual void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) const = 0; /** * Finds the best refinements of an existing rule. @@ -30,5 +30,5 @@ class IRuleRefinement { * comparing potential refinements * @param minCoverage The minimum number of examples that must be covered by the refinements */ - virtual void findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) = 0; + virtual void findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp index bd608e489d..dff8c68603 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp @@ -44,7 +44,7 @@ class ExactRuleRefinement final : public IRuleRefinement { ExactRuleRefinement(const IndexVector& labelIndices, uint32 featureIndex, uint32 numExamplesWithNonZeroWeights, std::unique_ptr callbackPtr); - void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) override; + void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) const override; - void findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) override; + void findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) const override; }; diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp index a3f35fd3ce..43d0503669 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp @@ -29,13 +29,14 @@ ExactRuleRefinement::ExactRuleRefinement(const IndexVector& labelIn numExamplesWithNonZeroWeights_(numExamplesWithNonZeroWeights), callbackPtr_(std::move(callbackPtr)) {} template -void ExactRuleRefinement::findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) { +void ExactRuleRefinement::findRefinement(SingleRefinementComparator& comparator, + uint32 minCoverage) const { findRefinementInternally(labelIndices_, featureIndex_, numExamplesWithNonZeroWeights_, *callbackPtr_, comparator, minCoverage); } template -void ExactRuleRefinement::findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) { +void ExactRuleRefinement::findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) const { findRefinementInternally(labelIndices_, featureIndex_, numExamplesWithNonZeroWeights_, *callbackPtr_, comparator, minCoverage); } From 3b8ef5a444e35028f98062f03ce8ef21be3960fd Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 20 Feb 2024 23:57:21 +0100 Subject: [PATCH 36/53] Rename class ExactRuleRefinement to FeatureBasedRuleRefinement. --- ....hpp => rule_refinement_feature_based.hpp} | 7 ++++--- cpp/subprojects/common/meson.build | 2 +- ....cpp => rule_refinement_feature_based.cpp} | 19 ++++++++++--------- .../common/thresholds/thresholds_exact.cpp | 14 +++++++------- 4 files changed, 22 insertions(+), 20 deletions(-) rename cpp/subprojects/common/include/mlrl/common/rule_refinement/{rule_refinement_exact.hpp => rule_refinement_feature_based.hpp} (87%) rename cpp/subprojects/common/src/mlrl/common/rule_refinement/{rule_refinement_exact.cpp => rule_refinement_feature_based.cpp} (68%) diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_feature_based.hpp similarity index 87% rename from cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp rename to cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_feature_based.hpp index dff8c68603..cf2ca03578 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_exact.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_feature_based.hpp @@ -17,7 +17,7 @@ * rule is allowed to predict */ template -class ExactRuleRefinement final : public IRuleRefinement { +class FeatureBasedRuleRefinement final : public IRuleRefinement { private: const IndexVector& labelIndices_; @@ -41,8 +41,9 @@ class ExactRuleRefinement final : public IRuleRefinement { * allows to retrieve the information that is required to search for * potential refinements */ - ExactRuleRefinement(const IndexVector& labelIndices, uint32 featureIndex, uint32 numExamplesWithNonZeroWeights, - std::unique_ptr callbackPtr); + FeatureBasedRuleRefinement(const IndexVector& labelIndices, uint32 featureIndex, + uint32 numExamplesWithNonZeroWeights, + std::unique_ptr callbackPtr); void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) const override; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index d55aa85f18..b97f06c82b 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -55,7 +55,7 @@ source_files = [ 'src/mlrl/common/rule_refinement/prediction_partial.cpp', 'src/mlrl/common/rule_refinement/refinement_comparator_fixed.cpp', 'src/mlrl/common/rule_refinement/refinement_comparator_single.cpp', - 'src/mlrl/common/rule_refinement/rule_refinement_exact.cpp', + 'src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp', 'src/mlrl/common/rule_refinement/rule_refinement_search.cpp', 'src/mlrl/common/rule_refinement/score_processor.cpp', 'src/mlrl/common/sampling/feature_sampling_no.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp similarity index 68% rename from cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp index 43d0503669..5abb1f0351 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp @@ -1,4 +1,4 @@ -#include "mlrl/common/rule_refinement/rule_refinement_exact.hpp" +#include "mlrl/common/rule_refinement/rule_refinement_feature_based.hpp" #include "mlrl/common/rule_refinement/rule_refinement_search.hpp" @@ -22,24 +22,25 @@ static inline void findRefinementInternally(const IndexVector& labelIndices, uin } template -ExactRuleRefinement::ExactRuleRefinement(const IndexVector& labelIndices, uint32 featureIndex, - uint32 numExamplesWithNonZeroWeights, - std::unique_ptr callbackPtr) +FeatureBasedRuleRefinement::FeatureBasedRuleRefinement( + const IndexVector& labelIndices, uint32 featureIndex, uint32 numExamplesWithNonZeroWeights, + std::unique_ptr callbackPtr) : labelIndices_(labelIndices), featureIndex_(featureIndex), numExamplesWithNonZeroWeights_(numExamplesWithNonZeroWeights), callbackPtr_(std::move(callbackPtr)) {} template -void ExactRuleRefinement::findRefinement(SingleRefinementComparator& comparator, - uint32 minCoverage) const { +void FeatureBasedRuleRefinement::findRefinement(SingleRefinementComparator& comparator, + uint32 minCoverage) const { findRefinementInternally(labelIndices_, featureIndex_, numExamplesWithNonZeroWeights_, *callbackPtr_, comparator, minCoverage); } template -void ExactRuleRefinement::findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) const { +void FeatureBasedRuleRefinement::findRefinement(FixedRefinementComparator& comparator, + uint32 minCoverage) const { findRefinementInternally(labelIndices_, featureIndex_, numExamplesWithNonZeroWeights_, *callbackPtr_, comparator, minCoverage); } -template class ExactRuleRefinement; -template class ExactRuleRefinement; +template class FeatureBasedRuleRefinement; +template class FeatureBasedRuleRefinement; diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 7350ed4627..136d297d3c 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -1,6 +1,6 @@ #include "mlrl/common/thresholds/thresholds_exact.hpp" -#include "mlrl/common/rule_refinement/rule_refinement_exact.hpp" +#include "mlrl/common/rule_refinement/rule_refinement_feature_based.hpp" #include "mlrl/common/util/openmp.hpp" #include @@ -157,8 +157,8 @@ class ExactThresholds final : public IThresholds { std::unordered_map cacheFiltered_; template - std::unique_ptr createExactRuleRefinement(const IndexVector& labelIndices, - uint32 featureIndex) { + std::unique_ptr createRuleRefinementInternally(const IndexVector& labelIndices, + uint32 featureIndex) { // Retrieve the `FilteredCacheEntry` from the cache, or insert a new one if it does not already // exist... auto cacheFilteredIterator = cacheFiltered_.emplace(featureIndex, FilteredCacheEntry()).first; @@ -172,8 +172,8 @@ class ExactThresholds final : public IThresholds { std::unique_ptr callbackPtr = std::make_unique(*this, thresholds_.featureInfo_, featureIndex); - return std::make_unique>(labelIndices, featureIndex, numCovered_, - std::move(callbackPtr)); + return std::make_unique>( + labelIndices, featureIndex, numCovered_, std::move(callbackPtr)); } public: @@ -209,12 +209,12 @@ class ExactThresholds final : public IThresholds { std::unique_ptr createRuleRefinement(const CompleteIndexVector& labelIndices, uint32 featureIndex) override { - return createExactRuleRefinement(labelIndices, featureIndex); + return createRuleRefinementInternally(labelIndices, featureIndex); } std::unique_ptr createRuleRefinement(const PartialIndexVector& labelIndices, uint32 featureIndex) override { - return createExactRuleRefinement(labelIndices, featureIndex); + return createRuleRefinementInternally(labelIndices, featureIndex); } void filterThresholds(const Condition& condition) override { From d5dcea9ec1a5c4a9fa3847a69a6351a49df218b5 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 21 Feb 2024 00:04:34 +0100 Subject: [PATCH 37/53] Remove class IThresholdsConfig. --- .../common/include/mlrl/common/learner.hpp | 17 ------------ .../mlrl/common/thresholds/thresholds.hpp | 23 ---------------- .../common/thresholds/thresholds_exact.hpp | 27 ------------------- .../common/src/mlrl/common/learner.cpp | 12 ++++----- .../common/thresholds/thresholds_exact.cpp | 12 --------- 5 files changed, 5 insertions(+), 86 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/learner.hpp b/cpp/subprojects/common/include/mlrl/common/learner.hpp index 7878bf2a6f..15f10d60ee 100644 --- a/cpp/subprojects/common/include/mlrl/common/learner.hpp +++ b/cpp/subprojects/common/include/mlrl/common/learner.hpp @@ -192,15 +192,6 @@ class MLRLCOMMON_API IRuleLearner { */ virtual std::unique_ptr& getFeatureBinningConfigPtr() = 0; - /** - * Returns an unique pointer to the configuration of the class that provides access to the thresholds - * that may be used by the conditions of rules. - * - * @return A reference to an unique pointer of type `IThresholdsConfig` that stores the configuration of - * the class that provides access to the thresholds that may be used by the conditions of rules - */ - virtual std::unique_ptr& getThresholdsConfigPtr() = 0; - /** * Returns an unique pointer to the configuration of the method for sampling labels. * @@ -1642,12 +1633,6 @@ class AbstractRuleLearner : virtual public IRuleLearner { */ std::unique_ptr featureBinningConfigPtr_; - /** - * An unique pointer that stores the configuration of the class that provides access to the thresholds - * that may be used by the conditions of rules. - */ - std::unique_ptr thresholdsConfigPtr_; - /** * An unique pointer that stores the configuration of the method for sampling labels. */ @@ -1770,8 +1755,6 @@ class AbstractRuleLearner : virtual public IRuleLearner { std::unique_ptr& getFeatureBinningConfigPtr() override final; - std::unique_ptr& getThresholdsConfigPtr() override final; - std::unique_ptr& getLabelSamplingConfigPtr() override final; std::unique_ptr& getInstanceSamplingConfigPtr() override final; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp index 178dd9b0e9..809c61d063 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp @@ -5,7 +5,6 @@ #include "mlrl/common/input/feature_info.hpp" #include "mlrl/common/input/feature_matrix_column_wise.hpp" -#include "mlrl/common/input/label_matrix.hpp" #include "mlrl/common/sampling/weight_vector_bit.hpp" #include "mlrl/common/sampling/weight_vector_dense.hpp" #include "mlrl/common/sampling/weight_vector_equal.hpp" @@ -80,25 +79,3 @@ class IThresholdsFactory { const IFeatureInfo& featureInfo, IStatisticsProvider& statisticsProvider) const = 0; }; - -/** - * Defines an interface for all classes that allow to configure a class that provides access to thresholds that may be - * used by the conditions of rules. - */ -class IThresholdsConfig { - public: - - virtual ~IThresholdsConfig() {} - - /** - * Creates and returns a new object of type `IThresholdsFactory` according to the specified configuration. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the feature - * values of the training examples - * @param labelMatrix A reference to an object of type `ILabelMatrix` that provides access to the labels of - * the training examples - * @return An unique pointer to an object of type `IThresholdsFactory` that has been created - */ - virtual std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const = 0; -}; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp index 3fb7af2d3d..cab40ea710 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp @@ -4,7 +4,6 @@ #pragma once #include "mlrl/common/input/feature_binning.hpp" -#include "mlrl/common/multi_threading/multi_threading.hpp" #include "mlrl/common/thresholds/thresholds.hpp" /** @@ -32,29 +31,3 @@ class ExactThresholdsFactory final : public IThresholdsFactory { const IFeatureInfo& featureInfo, IStatisticsProvider& statisticsProvider) const override; }; - -/** - * Allows to configure a class that provides access to thresholds that may be used by the conditions of rules. - */ -class ExactThresholdsConfig final : public IThresholdsConfig { - private: - - const std::unique_ptr& featureBinningConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param featureBinningconfigPtr A reference to an unique pointer that stores the configuration of the method - * that should be used for assigning numerical feature values to bins - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - ExactThresholdsConfig(const std::unique_ptr& featureBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/src/mlrl/common/learner.cpp b/cpp/subprojects/common/src/mlrl/common/learner.cpp index f692ecce66..0dbb901b56 100644 --- a/cpp/subprojects/common/src/mlrl/common/learner.cpp +++ b/cpp/subprojects/common/src/mlrl/common/learner.cpp @@ -90,8 +90,6 @@ AbstractRuleLearner::Config::Config(RuleCompareFunction ruleCompareFunction) ruleInductionConfigPtr_( std::make_unique(ruleCompareFunction_, parallelRuleRefinementConfigPtr_)), featureBinningConfigPtr_(std::make_unique()), - thresholdsConfigPtr_( - std::make_unique(featureBinningConfigPtr_, parallelStatisticUpdateConfigPtr_)), labelSamplingConfigPtr_(std::make_unique()), instanceSamplingConfigPtr_(std::make_unique()), featureSamplingConfigPtr_(std::make_unique()), @@ -125,10 +123,6 @@ std::unique_ptr& AbstractRuleLearner::Config::getFeatureB return featureBinningConfigPtr_; } -std::unique_ptr& AbstractRuleLearner::Config::getThresholdsConfigPtr() { - return thresholdsConfigPtr_; -} - std::unique_ptr& AbstractRuleLearner::Config::getLabelSamplingConfigPtr() { return labelSamplingConfigPtr_; } @@ -217,7 +211,11 @@ std::unique_ptr AbstractRuleLearner::createRuleMode std::unique_ptr AbstractRuleLearner::createThresholdsFactory( const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - return config_.getThresholdsConfigPtr()->createThresholdsFactory(featureMatrix, labelMatrix); + std::unique_ptr featureBinningFactoryPtr = + config_.getFeatureBinningConfigPtr()->createFeatureBinningFactory(featureMatrix, labelMatrix); + uint32 numThreads = + config_.getParallelStatisticUpdateConfigPtr()->getNumThreads(featureMatrix, labelMatrix.getNumLabels()); + return std::make_unique(std::move(featureBinningFactoryPtr), numThreads); } std::unique_ptr AbstractRuleLearner::createRuleInductionFactory( diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 136d297d3c..379e4ccfde 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -402,15 +402,3 @@ std::unique_ptr ExactThresholdsFactory::create(const IColumnWiseFea return std::make_unique(featureMatrix, featureInfo, statisticsProvider, *featureBinningFactoryPtr_, numThreads_); } - -ExactThresholdsConfig::ExactThresholdsConfig(const std::unique_ptr& featureBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : featureBinningConfigPtr_(featureBinningConfigPtr), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - -std::unique_ptr ExactThresholdsConfig::createThresholdsFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - std::unique_ptr featureBinningFactoryPtr = - featureBinningConfigPtr_->createFeatureBinningFactory(featureMatrix, labelMatrix); - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumLabels()); - return std::make_unique(std::move(featureBinningFactoryPtr), numThreads); -} From b965636adb2d5319fe421efdb00d37bb5d67730d Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 21 Feb 2024 00:12:19 +0100 Subject: [PATCH 38/53] The class IRuleRefinementCallback is now an inner class of IRuleRefinement. --- .../rule_refinement/rule_refinement.hpp | 50 +++++++++++++++++ .../rule_refinement_callback.hpp | 55 ------------------- .../rule_refinement_feature_based.hpp | 8 +-- .../rule_refinement_feature_based.cpp | 6 +- .../common/thresholds/thresholds_exact.cpp | 2 +- 5 files changed, 57 insertions(+), 64 deletions(-) delete mode 100644 cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_callback.hpp diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement.hpp index 01a2f83da0..a784229524 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement.hpp @@ -3,8 +3,10 @@ */ #pragma once +#include "mlrl/common/input/feature_vector.hpp" #include "mlrl/common/rule_refinement/refinement_comparator_fixed.hpp" #include "mlrl/common/rule_refinement/refinement_comparator_single.hpp" +#include "mlrl/common/statistics/statistics_weighted_immutable.hpp" /** * Defines an interface for all classes that allow to find the best refinement of existing rules. @@ -12,6 +14,54 @@ class IRuleRefinement { public: + /** + * Defines an interface for callbacks that may be invoked by subclasses of the the class `IRuleRefinement` in + * order to retrieve the information that is required to search for potential refinements. It consists of + * `IImmutableWeightedStatistics`, as well as an `IFeatureVector` that allows to determine the thresholds that + * may be used by potential conditions. + */ + class ICallback { + public: + + /** + * The data that is provided via the callback's `get` function. + */ + struct Result final { + public: + + /** + * @param statistics A reference to an object of type `IImmutableWeightedStatistics` that + * should be used to search for potential refinements + * @param featureVector A reference to an object of type `IFeatureVector` that should be used to + * search for potential refinements + */ + Result(const IImmutableWeightedStatistics& statistics, const IFeatureVector& featureVector) + : statistics(statistics), featureVector(featureVector) {} + + /** + * A reference to an object of type `IImmutableWeightedStatistics` that should be used to search + * for potential refinements. + */ + const IImmutableWeightedStatistics& statistics; + + /** + * A reference to an object of type `IFeatureVector` that should be used to search for potential + * refinements. + */ + const IFeatureVector& featureVector; + }; + + virtual ~ICallback() {} + + /** + * Invokes the callback and returns its result. + * + * @return An object of type `Result` that stores references to the statistics and the feature vector + * that may be used to search for potential refinements + */ + virtual Result get() = 0; + }; + virtual ~IRuleRefinement() {} /** diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_callback.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_callback.hpp deleted file mode 100644 index 2ca3b40b71..0000000000 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_callback.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "mlrl/common/input/feature_vector.hpp" -#include "mlrl/common/statistics/statistics_weighted_immutable.hpp" - -/** - * Defines an interface for callbacks that may be invoked by subclasses of the the class `IRuleRefinement` in order to - * retrieve the information that is required to search for potential refinements. It consists of - * `IImmutableWeightedStatistics`, as well as an `IFeatureVector` that allows to determine the thresholds that may be - * used by potential conditions. - */ -class IRuleRefinementCallback { - public: - - /** - * The data that is provided via the callback's `get` function. - */ - struct Result final { - public: - - /** - * @param statistics A reference to an object of type `IImmutableWeightedStatistics` that should be - * used to search for potential refinements - * @param featureVector A reference to an object of type `IFeatureVector` that should be used to search - * for potential refinements - */ - Result(const IImmutableWeightedStatistics& statistics, const IFeatureVector& featureVector) - : statistics(statistics), featureVector(featureVector) {} - - /** - * A reference to an object of type `IImmutableWeightedStatistics` that should be used to search for - * potential refinements. - */ - const IImmutableWeightedStatistics& statistics; - - /** - * A reference to an object of type `IFeatureVector` that should be used to search for potential - * refinements. - */ - const IFeatureVector& featureVector; - }; - - virtual ~IRuleRefinementCallback() {} - - /** - * Invokes the callback and returns its result. - * - * @return An object of type `Result` that stores references to the statistics and the feature vector that may - * be used to search for potential refinements - */ - virtual Result get() = 0; -}; diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_feature_based.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_feature_based.hpp index cf2ca03578..c67f0bd3df 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_feature_based.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_feature_based.hpp @@ -3,9 +3,7 @@ */ #pragma once -#include "mlrl/common/input/feature_vector.hpp" #include "mlrl/common/rule_refinement/rule_refinement.hpp" -#include "mlrl/common/rule_refinement/rule_refinement_callback.hpp" #include "mlrl/common/statistics/statistics_weighted.hpp" /** @@ -26,7 +24,7 @@ class FeatureBasedRuleRefinement final : public IRuleRefinement { const uint32 numExamplesWithNonZeroWeights_; - const std::unique_ptr callbackPtr_; + const std::unique_ptr callbackPtr_; public: @@ -37,13 +35,13 @@ class FeatureBasedRuleRefinement final : public IRuleRefinement { * @param featureIndex The index of the feature, the new condition corresponds to * @param numExamplesWithNonZeroWeights The total number of examples with non-zero weights that may be covered * by a refinement - * @param callbackPtr An unique pointer to an object of type `IRuleRefinementCallback` that + * @param callbackPtr An unique pointer to an object of type `IRuleRefinement::ICallback` that * allows to retrieve the information that is required to search for * potential refinements */ FeatureBasedRuleRefinement(const IndexVector& labelIndices, uint32 featureIndex, uint32 numExamplesWithNonZeroWeights, - std::unique_ptr callbackPtr); + std::unique_ptr callbackPtr); void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) const override; diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp index 5abb1f0351..b172e2e073 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp @@ -4,10 +4,10 @@ template static inline void findRefinementInternally(const IndexVector& labelIndices, uint32 featureIndex, - uint32 numExamplesWithNonZeroWeights, IRuleRefinementCallback& callback, + uint32 numExamplesWithNonZeroWeights, IRuleRefinement::ICallback& callback, Comparator& comparator, uint32 minCoverage) { // Invoke the callback... - IRuleRefinementCallback::Result callbackResult = callback.get(); + IRuleRefinement::ICallback::Result callbackResult = callback.get(); const IImmutableWeightedStatistics& statistics = callbackResult.statistics; const IFeatureVector& featureVector = callbackResult.featureVector; @@ -24,7 +24,7 @@ static inline void findRefinementInternally(const IndexVector& labelIndices, uin template FeatureBasedRuleRefinement::FeatureBasedRuleRefinement( const IndexVector& labelIndices, uint32 featureIndex, uint32 numExamplesWithNonZeroWeights, - std::unique_ptr callbackPtr) + std::unique_ptr callbackPtr) : labelIndices_(labelIndices), featureIndex_(featureIndex), numExamplesWithNonZeroWeights_(numExamplesWithNonZeroWeights), callbackPtr_(std::move(callbackPtr)) {} diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 379e4ccfde..655d57af42 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -84,7 +84,7 @@ class ExactThresholds final : public IThresholds { * A callback that allows to retrieve feature vectors. If available, the feature vectors are retrieved * from the cache. Otherwise, they are fetched from the feature matrix. */ - class Callback final : public IRuleRefinementCallback { + class Callback final : public IRuleRefinement::ICallback { private: ThresholdsSubset& thresholdsSubset_; From c2a53fad23c69efb94bb8b8070e43e2e1c84921f Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 21 Feb 2024 00:38:11 +0100 Subject: [PATCH 39/53] Rename class RuleRefinementSearch to FeatureBasedSearch. --- .../mlrl/common/input/feature_vector.hpp | 10 +- .../common/input/feature_vector_equal.hpp | 14 +- ...nt_search.hpp => feature_based_search.hpp} | 2 +- cpp/subprojects/common/meson.build | 2 +- .../input/feature_vector_decorator_binary.hpp | 26 ++-- .../input/feature_vector_decorator_binned.hpp | 30 ++--- .../feature_vector_decorator_nominal.hpp | 26 ++-- .../feature_vector_decorator_numerical.hpp | 26 ++-- .../feature_vector_decorator_ordinal.hpp | 78 +++++------ .../common/input/feature_vector_equal.cpp | 4 +- .../rule_refinement/feature_based_search.cpp | 125 ++++++++++++++++++ ...ry.hpp => feature_based_search_binary.hpp} | 2 +- ...ed.hpp => feature_based_search_binned.hpp} | 2 +- ...=> feature_based_search_binned_common.hpp} | 0 ...l.hpp => feature_based_search_nominal.hpp} | 2 +- ...hpp => feature_based_search_numerical.hpp} | 0 ...l.hpp => feature_based_search_ordinal.hpp} | 2 +- .../rule_refinement_feature_based.cpp | 6 +- .../rule_refinement_search.cpp | 125 ------------------ 19 files changed, 233 insertions(+), 249 deletions(-) rename cpp/subprojects/common/include/mlrl/common/rule_refinement/{rule_refinement_search.hpp => feature_based_search.hpp} (99%) create mode 100644 cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search.cpp rename cpp/subprojects/common/src/mlrl/common/rule_refinement/{rule_refinement_search_binary.hpp => feature_based_search_binary.hpp} (98%) rename cpp/subprojects/common/src/mlrl/common/rule_refinement/{rule_refinement_search_binned.hpp => feature_based_search_binned.hpp} (99%) rename cpp/subprojects/common/src/mlrl/common/rule_refinement/{rule_refinement_search_binned_common.hpp => feature_based_search_binned_common.hpp} (100%) rename cpp/subprojects/common/src/mlrl/common/rule_refinement/{rule_refinement_search_nominal.hpp => feature_based_search_nominal.hpp} (98%) rename cpp/subprojects/common/src/mlrl/common/rule_refinement/{rule_refinement_search_numerical.hpp => feature_based_search_numerical.hpp} (100%) rename cpp/subprojects/common/src/mlrl/common/rule_refinement/{rule_refinement_search_ordinal.hpp => feature_based_search_ordinal.hpp} (99%) delete mode 100644 cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp index df8c794006..638e60e983 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp @@ -4,7 +4,7 @@ #pragma once #include "mlrl/common/input/interval.hpp" -#include "mlrl/common/rule_refinement/rule_refinement_search.hpp" +#include "mlrl/common/rule_refinement/feature_based_search.hpp" #include "mlrl/common/statistics/statistics_weighted.hpp" #include "mlrl/common/thresholds/coverage_mask.hpp" @@ -20,7 +20,7 @@ class IFeatureVector { /** * Conducts a search for the best refinement of an existing rule that can be created from a this feature vector. * - * @param ruleRefinementSearch A reference to an object of type `RuleRefinementSearch` that should be + * @param featureBasedSearch A reference to an object of type `FeatureBasedSearch` that should be * used for conducting the search * @param statisticsSubset A reference to an object of type `IWeightedStatisticsSubset` that * provides access to weighted statistics about the labels of the training @@ -34,7 +34,7 @@ class IFeatureVector { * @param refinement A reference to an object of type `Refinement` that should be used for * storing the properties of the best refinement that is found */ - virtual void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, + virtual void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, Refinement& refinement) const = 0; @@ -42,7 +42,7 @@ class IFeatureVector { /** * Conducts a search for the best refinement of an existing rule that can be created from a this feature vector. * - * @param ruleRefinementSearch A reference to an object of type `RuleRefinementSearch` that should be + * @param featureBasedSearch A reference to an object of type `FeatureBasedSearch` that should be * used for conducting the search * @param statisticsSubset A reference to an object of type `IWeightedStatisticsSubset` that * provides access to weighted statistics about the labels of the training @@ -56,7 +56,7 @@ class IFeatureVector { * @param refinement A reference to an object of type `Refinement` that should be used for * storing the properties of the best refinement that is found */ - virtual void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, + virtual void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, Refinement& refinement) const = 0; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_equal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_equal.hpp index 996747d1e7..0aa8681626 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_equal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_equal.hpp @@ -12,15 +12,13 @@ class EqualFeatureVector final : public IFeatureVector { public: - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, - uint32 numExamlesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override; + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamlesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override; - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, - uint32 numExamlesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override; + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamlesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override; void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, uint32 indicatorValue, IWeightedStatistics& statistics) const override; diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_search.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_based_search.hpp similarity index 99% rename from cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_search.hpp rename to cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_based_search.hpp index dc5e19dfd6..d629a517e6 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/rule_refinement_search.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_based_search.hpp @@ -17,7 +17,7 @@ * Allows to conduct a search for finding the best refinement of an existing rule that can be created from a given * feature vector. */ -class RuleRefinementSearch final { +class FeatureBasedSearch final { public: /** diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index b97f06c82b..6d685cbbfa 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -51,12 +51,12 @@ source_files = [ 'src/mlrl/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp', 'src/mlrl/common/rule_pruning/rule_pruning_irep.cpp', 'src/mlrl/common/rule_pruning/rule_pruning_no.cpp', + 'src/mlrl/common/rule_refinement/feature_based_search.cpp', 'src/mlrl/common/rule_refinement/prediction_complete.cpp', 'src/mlrl/common/rule_refinement/prediction_partial.cpp', 'src/mlrl/common/rule_refinement/refinement_comparator_fixed.cpp', 'src/mlrl/common/rule_refinement/refinement_comparator_single.cpp', 'src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp', - 'src/mlrl/common/rule_refinement/rule_refinement_search.cpp', 'src/mlrl/common/rule_refinement/score_processor.cpp', 'src/mlrl/common/sampling/feature_sampling_no.cpp', 'src/mlrl/common/sampling/feature_sampling_predefined.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binary.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binary.hpp index e2837b8637..c7544074f2 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binary.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binary.hpp @@ -26,22 +26,20 @@ class BinaryFeatureVectorDecorator final : public AbstractNominalFeatureVectorDe BinaryFeatureVectorDecorator(const BinaryFeatureVectorDecorator& other) : AbstractNominalFeatureVectorDecorator(other) {} - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForBinaryRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForBinaryRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForBinaryRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForBinaryRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp index cac0fc9ea2..a21f3d5bc8 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp @@ -214,22 +214,20 @@ class BinnedFeatureVectorDecorator final : public AbstractFeatureVectorDecorator other.getView().firstView.sparseBinIndex), AllocatedMissingFeatureVector()) {} - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); - } - - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); + } + + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForBinnedRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal.hpp index f2136b4ec3..22454ebb85 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal.hpp @@ -88,22 +88,20 @@ class NominalFeatureVectorDecorator final : public AbstractNominalFeatureVectorD NominalFeatureVectorDecorator(const NominalFeatureVectorDecorator& other) : AbstractNominalFeatureVectorDecorator(other) {} - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForNominalRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForNominalRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForNominalRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForNominalRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp index da2f246cef..9fe308c74d 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp @@ -90,22 +90,20 @@ class AbstractNumericalFeatureVectorDecorator : public AbstractFeatureVectorDeco virtual ~AbstractNumericalFeatureVectorDecorator() override {} - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForNumericalRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, - numExamplesWithNonZeroWeights, minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForNumericalRefinement(this->view.firstView, this->view.secondView, + statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); } - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForNumericalRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, - numExamplesWithNonZeroWeights, minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForNumericalRefinement(this->view.firstView, this->view.secondView, + statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); } void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp index ded0da3b1c..ba7eb562b6 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp @@ -59,22 +59,20 @@ class OrdinalFeatureVectorView final : public AbstractFeatureVectorDecoratorview.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, @@ -125,22 +123,20 @@ class AllocatedOrdinalFeatureVectorView final : public AbstractFeatureVectorDeco AllocatedMissingFeatureVector()), allocatedView(std::move(allocatedView)) {} - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, @@ -219,22 +215,20 @@ class OrdinalFeatureVectorDecorator final : public AbstractNominalFeatureVectorD other.getView().firstView.majorityValue), AllocatedMissingFeatureVector()) {} - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } - void searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, - IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const override { - ruleRefinementSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, - statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); + void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, + uint32 minCoverage, Refinement& refinement) const override { + featureBasedSearch.searchForOrdinalRefinement(this->view.firstView, this->view.secondView, statisticsSubset, + comparator, numExamplesWithNonZeroWeights, minCoverage, + refinement); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_equal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_equal.cpp index 61b2c32538..45338ee8d4 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_equal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_equal.cpp @@ -1,12 +1,12 @@ #include "mlrl/common/input/feature_vector_equal.hpp" -void EqualFeatureVector::searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, +void EqualFeatureVector::searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, Refinement& refinement) const {} -void EqualFeatureVector::searchForRefinement(RuleRefinementSearch& ruleRefinementSearch, +void EqualFeatureVector::searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, FixedRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search.cpp new file mode 100644 index 0000000000..273633cd52 --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search.cpp @@ -0,0 +1,125 @@ +#include "mlrl/common/rule_refinement/feature_based_search.hpp" + +#include "feature_based_search_binary.hpp" +#include "feature_based_search_binned.hpp" +#include "feature_based_search_nominal.hpp" +#include "feature_based_search_numerical.hpp" +#include "feature_based_search_ordinal.hpp" + +static inline void addMissingStatistics(IWeightedStatisticsSubset& statisticsSubset, + const MissingFeatureVector& missingFeatureVector) { + for (auto it = missingFeatureVector.indices_cbegin(); it != missingFeatureVector.indices_cend(); it++) { + uint32 index = *it; + statisticsSubset.addToMissing(index); + } +} + +void FeatureBasedSearch::searchForNumericalRefinement(const NumericalFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForNumericalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} + +void FeatureBasedSearch::searchForNumericalRefinement(const NumericalFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForNumericalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} + +void FeatureBasedSearch::searchForNominalRefinement(const NominalFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForNominalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} + +void FeatureBasedSearch::searchForNominalRefinement(const NominalFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForNominalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} + +void FeatureBasedSearch::searchForBinaryRefinement(const BinaryFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForBinaryRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} + +void FeatureBasedSearch::searchForBinaryRefinement(const BinaryFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForBinaryRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} + +void FeatureBasedSearch::searchForOrdinalRefinement(const OrdinalFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForOrdinalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} + +void FeatureBasedSearch::searchForOrdinalRefinement(const OrdinalFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForOrdinalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} + +void FeatureBasedSearch::searchForBinnedRefinement(const BinnedFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + SingleRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForBinnedRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} + +void FeatureBasedSearch::searchForBinnedRefinement(const BinnedFeatureVector& featureVector, + const MissingFeatureVector& missingFeatureVector, + IWeightedStatisticsSubset& statisticsSubset, + FixedRefinementComparator& comparator, + uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, + Refinement& refinement) const { + addMissingStatistics(statisticsSubset, missingFeatureVector); + searchForBinnedRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, + minCoverage, refinement); +} diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binary.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binary.hpp similarity index 98% rename from cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binary.hpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binary.hpp index abd0da416b..fd3ddeccd2 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binary.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binary.hpp @@ -3,9 +3,9 @@ */ #pragma once +#include "feature_based_search_binned_common.hpp" #include "mlrl/common/input/feature_vector_binary.hpp" #include "mlrl/common/rule_refinement/refinement.hpp" -#include "rule_refinement_search_binned_common.hpp" template static inline void searchForBinaryRefinementInternally(const BinaryFeatureVector& featureVector, diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned.hpp similarity index 99% rename from cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned.hpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned.hpp index 9051b4dc2d..a39fcf74db 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned.hpp @@ -3,10 +3,10 @@ */ #pragma once +#include "feature_based_search_binned_common.hpp" #include "mlrl/common/input/feature_vector_binned.hpp" #include "mlrl/common/rule_refinement/refinement.hpp" #include "mlrl/common/statistics/statistics_subset_weighted.hpp" -#include "rule_refinement_search_binned_common.hpp" template static inline void searchForBinnedRefinementInternally(const BinnedFeatureVector& featureVector, diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp similarity index 100% rename from cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_binned_common.hpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_nominal.hpp similarity index 98% rename from cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal.hpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_nominal.hpp index 9289498c30..672725e77e 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_nominal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_nominal.hpp @@ -3,9 +3,9 @@ */ #pragma once +#include "feature_based_search_binned_common.hpp" #include "mlrl/common/input/feature_vector_nominal.hpp" #include "mlrl/common/rule_refinement/refinement.hpp" -#include "rule_refinement_search_binned_common.hpp" template static inline void searchForNominalRefinementInternally(const NominalFeatureVector& featureVector, diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_numerical.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_numerical.hpp similarity index 100% rename from cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_numerical.hpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_numerical.hpp diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_ordinal.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_ordinal.hpp similarity index 99% rename from cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_ordinal.hpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_ordinal.hpp index 718e0a4b67..c1ccc3f025 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search_ordinal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_ordinal.hpp @@ -3,9 +3,9 @@ */ #pragma once +#include "feature_based_search_binned_common.hpp" #include "mlrl/common/input/feature_vector_ordinal.hpp" #include "mlrl/common/rule_refinement/refinement.hpp" -#include "rule_refinement_search_binned_common.hpp" template static inline void searchForOrdinalRefinementInternally(const OrdinalFeatureVector& featureVector, diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp index b172e2e073..e327e83313 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_feature_based.cpp @@ -1,6 +1,6 @@ #include "mlrl/common/rule_refinement/rule_refinement_feature_based.hpp" -#include "mlrl/common/rule_refinement/rule_refinement_search.hpp" +#include "mlrl/common/rule_refinement/feature_based_search.hpp" template static inline void findRefinementInternally(const IndexVector& labelIndices, uint32 featureIndex, @@ -14,10 +14,10 @@ static inline void findRefinementInternally(const IndexVector& labelIndices, uin // Create a new, empty subset of the statistics... std::unique_ptr statisticsSubsetPtr = statistics.createSubset(labelIndices); - RuleRefinementSearch ruleRefinementSearch; + FeatureBasedSearch featureBasedSearch; Refinement refinement; refinement.featureIndex = featureIndex; - featureVector.searchForRefinement(ruleRefinementSearch, *statisticsSubsetPtr, comparator, + featureVector.searchForRefinement(featureBasedSearch, *statisticsSubsetPtr, comparator, numExamplesWithNonZeroWeights, minCoverage, refinement); } diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp deleted file mode 100644 index ba0fb57d11..0000000000 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/rule_refinement_search.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include "mlrl/common/rule_refinement/rule_refinement_search.hpp" - -#include "rule_refinement_search_binary.hpp" -#include "rule_refinement_search_binned.hpp" -#include "rule_refinement_search_nominal.hpp" -#include "rule_refinement_search_numerical.hpp" -#include "rule_refinement_search_ordinal.hpp" - -static inline void addMissingStatistics(IWeightedStatisticsSubset& statisticsSubset, - const MissingFeatureVector& missingFeatureVector) { - for (auto it = missingFeatureVector.indices_cbegin(); it != missingFeatureVector.indices_cend(); it++) { - uint32 index = *it; - statisticsSubset.addToMissing(index); - } -} - -void RuleRefinementSearch::searchForNumericalRefinement(const NumericalFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForNumericalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} - -void RuleRefinementSearch::searchForNumericalRefinement(const NumericalFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForNumericalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} - -void RuleRefinementSearch::searchForNominalRefinement(const NominalFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForNominalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} - -void RuleRefinementSearch::searchForNominalRefinement(const NominalFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForNominalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} - -void RuleRefinementSearch::searchForBinaryRefinement(const BinaryFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForBinaryRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} - -void RuleRefinementSearch::searchForBinaryRefinement(const BinaryFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForBinaryRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} - -void RuleRefinementSearch::searchForOrdinalRefinement(const OrdinalFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForOrdinalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} - -void RuleRefinementSearch::searchForOrdinalRefinement(const OrdinalFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForOrdinalRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} - -void RuleRefinementSearch::searchForBinnedRefinement(const BinnedFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - SingleRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForBinnedRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} - -void RuleRefinementSearch::searchForBinnedRefinement(const BinnedFeatureVector& featureVector, - const MissingFeatureVector& missingFeatureVector, - IWeightedStatisticsSubset& statisticsSubset, - FixedRefinementComparator& comparator, - uint32 numExamplesWithNonZeroWeights, uint32 minCoverage, - Refinement& refinement) const { - addMissingStatistics(statisticsSubset, missingFeatureVector); - searchForBinnedRefinementInternally(featureVector, statisticsSubset, comparator, numExamplesWithNonZeroWeights, - minCoverage, refinement); -} From 17ba009439c7170e502b0a760db3d15480cd37a1 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 21 Feb 2024 01:21:16 +0100 Subject: [PATCH 40/53] Rename class IThresholds to IFeatureSpace. --- .../common/include/mlrl/common/learner.hpp | 4 +-- .../post_optimization/post_optimization.hpp | 15 ++++----- .../common/rule_induction/rule_induction.hpp | 10 +++--- .../rule_model_assemblage.hpp | 8 ++--- .../mlrl/common/rule_pruning/rule_pruning.hpp | 4 +-- .../mlrl/common/sampling/weight_vector.hpp | 6 ++-- .../common/sampling/weight_vector_bit.hpp | 2 +- .../common/sampling/weight_vector_dense.hpp | 2 +- .../common/sampling/weight_vector_equal.hpp | 2 +- .../{thresholds.hpp => feature_space.hpp} | 31 +++++++++---------- .../common/thresholds/thresholds_exact.hpp | 10 +++--- .../common/thresholds/thresholds_subset.hpp | 7 ++--- .../common/src/mlrl/common/learner.cpp | 16 +++++----- .../post_optimization_phase_list.cpp | 6 ++-- .../post_optimization_sequential.cpp | 9 +++--- .../post_optimization_unused_rule_removal.cpp | 2 +- .../rule_induction/rule_induction_common.hpp | 13 ++++---- .../rule_induction_top_down_beam_search.cpp | 4 +-- .../rule_induction_top_down_greedy.cpp | 4 +-- .../rule_model_assemblage_sequential.cpp | 6 ++-- .../common/sampling/weight_vector_bit.cpp | 6 ++-- .../common/sampling/weight_vector_dense.cpp | 6 ++-- .../common/sampling/weight_vector_equal.cpp | 6 ++-- .../common/thresholds/thresholds_exact.cpp | 20 +++--------- 24 files changed, 94 insertions(+), 105 deletions(-) rename cpp/subprojects/common/include/mlrl/common/thresholds/{thresholds.hpp => feature_space.hpp} (72%) diff --git a/cpp/subprojects/common/include/mlrl/common/learner.hpp b/cpp/subprojects/common/include/mlrl/common/learner.hpp index 15f10d60ee..3af5124b1a 100644 --- a/cpp/subprojects/common/include/mlrl/common/learner.hpp +++ b/cpp/subprojects/common/include/mlrl/common/learner.hpp @@ -1812,8 +1812,8 @@ class AbstractRuleLearner : virtual public IRuleLearner { std::unique_ptr createRuleModelAssemblageFactory( const IRowWiseLabelMatrix& labelMatrix) const; - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const; + std::unique_ptr createFeatureSpaceFactory(const IFeatureMatrix& featureMatrix, + const ILabelMatrix& labelMatrix) const; std::unique_ptr createRuleInductionFactory(const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const; diff --git a/cpp/subprojects/common/include/mlrl/common/post_optimization/post_optimization.hpp b/cpp/subprojects/common/include/mlrl/common/post_optimization/post_optimization.hpp index 731f36f696..20a5e29bbb 100644 --- a/cpp/subprojects/common/include/mlrl/common/post_optimization/post_optimization.hpp +++ b/cpp/subprojects/common/include/mlrl/common/post_optimization/post_optimization.hpp @@ -9,7 +9,7 @@ #include "mlrl/common/rule_pruning/rule_pruning.hpp" #include "mlrl/common/sampling/feature_sampling.hpp" #include "mlrl/common/sampling/label_sampling.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" +#include "mlrl/common/thresholds/feature_space.hpp" /** * Defines an interface for all classes that allow to optimize a rule-based model globally once it has been learned. @@ -22,8 +22,8 @@ class IPostOptimizationPhase { /** * Optimizes a rule-based model globally once it has been learned. * - * @param thresholds A reference to an object of type `IThresholds` that provides access to the - * thresholds that may be used by the conditions of the rule + * @param featureSpace A reference to an object of type `IFeatureSpace` that provides access to the feature + * space * @param ruleInduction A reference to an object of type `IRuleInduction` that should be used for inducing * new rules * @param partition A reference to an object of type `IPartition` that provides access to the indices of @@ -42,10 +42,11 @@ class IPostOptimizationPhase { * @param rng A reference to an object of type `RNG` that implements the random number generator * to be used */ - virtual void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, - ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, - IFeatureSampling& featureSampling, const IRulePruning& rulePruning, - const IPostProcessor& postProcessor, RNG& rng) const = 0; + virtual void optimizeModel(IFeatureSpace& featureSpace, const IRuleInduction& ruleInduction, + IPartition& partition, ILabelSampling& labelSampling, + IInstanceSampling& instanceSampling, IFeatureSampling& featureSampling, + const IRulePruning& rulePruning, const IPostProcessor& postProcessor, + RNG& rng) const = 0; }; /** diff --git a/cpp/subprojects/common/include/mlrl/common/rule_induction/rule_induction.hpp b/cpp/subprojects/common/include/mlrl/common/rule_induction/rule_induction.hpp index 11fb32fec0..3549b3d931 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_induction/rule_induction.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_induction/rule_induction.hpp @@ -12,7 +12,7 @@ #include "mlrl/common/sampling/partition.hpp" #include "mlrl/common/sampling/weight_vector.hpp" #include "mlrl/common/statistics/statistics.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" +#include "mlrl/common/thresholds/feature_space.hpp" /** * Defines an interface for all classes that implement an algorithm for the induction of individual rules. @@ -34,8 +34,8 @@ class IRuleInduction { /** * Induces a new rule. * - * @param thresholds A reference to an object of type `IThresholds` that provides access to the - * thresholds that may be used by the conditions of the rule + * @param featureSpace A reference to an object of type `IFeatureSpace` that provides access to the feature + * space * @param labelIndices A reference to an object of type `IIndexVector` that provides access to the indices * of the labels for which the rule may predict * @param weights A reference to an object of type `IWeightVector` that provides access to the weights @@ -54,8 +54,8 @@ class IRuleInduction { * @param modelBuilder A reference to an object of type `IModelBuilder`, the rule should be added to * @return True, if a rule has been induced, false otherwise */ - virtual bool induceRule(IThresholds& thresholds, const IIndexVector& labelIndices, const IWeightVector& weights, - IPartition& partition, IFeatureSampling& featureSampling, + virtual bool induceRule(IFeatureSpace& featureSpace, const IIndexVector& labelIndices, + const IWeightVector& weights, IPartition& partition, IFeatureSampling& featureSampling, const IRulePruning& rulePruning, const IPostProcessor& postProcessor, RNG& rng, IModelBuilder& modelBuilder) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/rule_model_assemblage/rule_model_assemblage.hpp b/cpp/subprojects/common/include/mlrl/common/rule_model_assemblage/rule_model_assemblage.hpp index 67c2fbd8f9..03bd160560 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_model_assemblage/rule_model_assemblage.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_model_assemblage/rule_model_assemblage.hpp @@ -12,7 +12,7 @@ #include "mlrl/common/sampling/partition_sampling.hpp" #include "mlrl/common/statistics/statistics_provider.hpp" #include "mlrl/common/stopping/stopping_criterion.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" +#include "mlrl/common/thresholds/feature_space.hpp" /** * Defines an interface for all classes that implement an algorithm for the induction of several rules that will be @@ -42,8 +42,8 @@ class IRuleModelAssemblage { * features that may be used by the conditions of a rule * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides access to * the statistics which serve as the basis for learning rules - * @param thresholds A reference to an object of type `IThresholds` that provides access to the - * thresholds that may be used by the conditions of rules + * @param featureSpace A reference to an object of type `IFeatureSpace` that provides access to the + * feature space * @param rng A reference to an object of type `RNG` that implements the random number * generator to be used * @param modelBuilder A reference to an object of type `IModelBuilder`, the rules should be added to @@ -52,7 +52,7 @@ class IRuleModelAssemblage { const IPostProcessor& postProcessor, IPartition& partition, ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, IFeatureSampling& featureSampling, IStatisticsProvider& statisticsProvider, - IThresholds& thresholds, IModelBuilder& modelBuilder, RNG& rng) const = 0; + IFeatureSpace& featureSpace, IModelBuilder& modelBuilder, RNG& rng) const = 0; }; /** diff --git a/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp b/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp index c82ec0d9af..9f093697d8 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp @@ -23,8 +23,8 @@ class IRulePruning { * prune set. * * @param thresholdsSubset A reference to an object of type `IThresholdsSubset`, which contains the thresholds - * that correspond to the subspace of the instance space that is covered by the - * existing rule + * that correspond to the subspace of the feature space that is covered by the existing + * rule * @param partition A reference to an object of type `IPartition` that provides access to the indices of * the training examples that belong to the training set and the holdout set, * respectively diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector.hpp index 5f58828aba..448602763d 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector.hpp @@ -8,7 +8,7 @@ #include // Forward declarations -class IThresholds; +class IFeatureSpace; class IThresholdsSubset; /** @@ -30,9 +30,9 @@ class IWeightVector { * Creates and returns a new instance of type `IThresholdsSubset` that provides access to the statistics that * correspond to individual training examples whose weights are stored in this vector. * - * @param thresholds A reference to an object of type `IThresholds` that should be used to create the + * @param featureSpace A reference to an object of type `IFeatureSpace` that should be used to create the * instance * @return An unique pointer to an object of type `IThresholdsSubset` that has been created */ - virtual std::unique_ptr createThresholdsSubset(IThresholds& thresholds) const = 0; + virtual std::unique_ptr createThresholdsSubset(IFeatureSpace& featureSpace) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_bit.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_bit.hpp index 8699c6e34b..cb4f4e2dea 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_bit.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_bit.hpp @@ -68,5 +68,5 @@ class BitWeightVector final : public IWeightVector { bool hasZeroWeights() const override; - std::unique_ptr createThresholdsSubset(IThresholds& thresholds) const override; + std::unique_ptr createThresholdsSubset(IFeatureSpace& featureSpace) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_dense.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_dense.hpp index a4a807f95c..bbc8118f5c 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_dense.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_dense.hpp @@ -42,5 +42,5 @@ class DenseWeightVector final : public DenseVectorDecorator>, bool hasZeroWeights() const override; - std::unique_ptr createThresholdsSubset(IThresholds& thresholds) const override; + std::unique_ptr createThresholdsSubset(IFeatureSpace& featureSpace) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_equal.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_equal.hpp index 8c929324ed..cf2bae5933 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_equal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_equal.hpp @@ -44,5 +44,5 @@ class EqualWeightVector final : public IWeightVector { bool hasZeroWeights() const override; - std::unique_ptr createThresholdsSubset(IThresholds& thresholds) const override; + std::unique_ptr createThresholdsSubset(IFeatureSpace& featureSpace) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp similarity index 72% rename from cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp rename to cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp index 809c61d063..98a2fe392b 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp @@ -12,16 +12,15 @@ #include "mlrl/common/thresholds/thresholds_subset.hpp" /** - * Defines an interface for all classes that provide access to thresholds that may be used by the first condition of a - * rule that currently has an empty body and therefore covers the entire instance space. + * Defines an interface for all classes that provide access to the feature space. */ -class IThresholds { +class IFeatureSpace { public: - virtual ~IThresholds() {} + virtual ~IFeatureSpace() {} /** - * Creates and returns a new subset of the thresholds, which initially contains all of the thresholds. + * Creates and returns a new subspace of the feature space, which initially includes the entire feature space. * * @param weights A reference to an object of type `EqualWeightVector` that provides access to the weights of * individual training examples @@ -30,7 +29,7 @@ class IThresholds { virtual std::unique_ptr createSubset(const EqualWeightVector& weights) = 0; /** - * Creates and returns a new subset of the thresholds, which initially contains all of the thresholds. + * Creates and returns a new subspace of the feature space, which initially includes the entire feature space. * * @param weights A reference to an object of type `BitWeightVector` that provides access to the weights of * individual training examples @@ -39,7 +38,7 @@ class IThresholds { virtual std::unique_ptr createSubset(const BitWeightVector& weights) = 0; /** - * Creates and returns a new subset of the thresholds, which initially contains all of the thresholds. + * Creates and returns a new subspace of the feature space, which initially includes the entire feature space. * * @param weights A reference to an object of type `DenseWeightVector` that provides access to the * weights of individual training examples @@ -49,7 +48,7 @@ class IThresholds { /** * Returns a reference to an object of type `IStatisticsProvider` that provides access to the statistics that - * correspond to individual training examples in the instance space. + * correspond to individual training examples in the feature space. * * @return A reference to an object of type `IStatisticsProvider` */ @@ -57,15 +56,15 @@ class IThresholds { }; /** - * Defines an interface for all classes that allow to create instances of the type `IThresholds`. + * Defines an interface for all classes that allow to create instances of the type `IFeatureSpace`. */ -class IThresholdsFactory { +class IFeatureSpaceFactory { public: - virtual ~IThresholdsFactory() {} + virtual ~IFeatureSpaceFactory() {} /** - * Creates and returns a new object of type `IThresholds`. + * Creates and returns a new object of type `IFeatureSpace`. * * @param featureMatrix A reference to an object of type `IColumnWiseFeatureMatrix` that provides * column-wise access to the feature values of individual training examples @@ -73,9 +72,9 @@ class IThresholdsFactory { * the types of individual features * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides access to * statistics about the labels of the training examples - * @return An unique pointer to an object of type `IThresholds` that has been created + * @return An unique pointer to an object of type `IFeatureSpace` that has been created */ - virtual std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const = 0; + virtual std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, + const IFeatureInfo& featureInfo, + IStatisticsProvider& statisticsProvider) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp index cab40ea710..07bab3ab61 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp @@ -4,12 +4,12 @@ #pragma once #include "mlrl/common/input/feature_binning.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" +#include "mlrl/common/thresholds/feature_space.hpp" /** * A factory that allows to create instances of the type `ExactThresholds`. */ -class ExactThresholdsFactory final : public IThresholdsFactory { +class ExactThresholdsFactory final : public IFeatureSpaceFactory { private: const std::unique_ptr featureBinningFactoryPtr_; @@ -27,7 +27,7 @@ class ExactThresholdsFactory final : public IThresholdsFactory { */ ExactThresholdsFactory(std::unique_ptr featureBinningFactoryPtr, uint32 numThreads); - std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const override; + std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, + const IFeatureInfo& featureInfo, + IStatisticsProvider& statisticsProvider) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp index 712dc8dc24..0384375f0c 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp @@ -56,16 +56,15 @@ class IThresholdsSubset { uint32 featureIndex) = 0; /** - * Filters the thresholds such that only those thresholds, which correspond to the instance space that is - * covered by specific condition of a rule, are included. + * Filters the subspace such that it only includes those examples that are covered by specific condition of a + * rule. * * @param condition A reference to an object of type `Condition` that stores the properties of the condition */ virtual void filterThresholds(const Condition& condition) = 0; /** - * Resets the filtered thresholds. This reverts the effects of all previous calls to the function - * `filterThresholds`. + * Resets the subspace. This reverts the effects of all previous calls to the function `filterThresholds`. */ virtual void resetThresholds() = 0; diff --git a/cpp/subprojects/common/src/mlrl/common/learner.cpp b/cpp/subprojects/common/src/mlrl/common/learner.cpp index 0dbb901b56..89400642e8 100644 --- a/cpp/subprojects/common/src/mlrl/common/learner.cpp +++ b/cpp/subprojects/common/src/mlrl/common/learner.cpp @@ -209,7 +209,7 @@ std::unique_ptr AbstractRuleLearner::createRuleMode return config_.getRuleModelAssemblageConfigPtr()->createRuleModelAssemblageFactory(labelMatrix); } -std::unique_ptr AbstractRuleLearner::createThresholdsFactory( +std::unique_ptr AbstractRuleLearner::createFeatureSpaceFactory( const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { std::unique_ptr featureBinningFactoryPtr = config_.getFeatureBinningConfigPtr()->createFeatureBinningFactory(featureMatrix, labelMatrix); @@ -405,11 +405,11 @@ std::unique_ptr AbstractRuleLearner::fit(const IFeatureInfo& fe std::unique_ptr statisticsProviderPtr = labelMatrix.createStatisticsProvider(*statisticsProviderFactoryPtr); - // Create thresholds... - std::unique_ptr thresholdsFactoryPtr = - this->createThresholdsFactory(featureMatrix, labelMatrix); - std::unique_ptr thresholdsPtr = - thresholdsFactoryPtr->create(featureMatrix, featureInfo, *statisticsProviderPtr); + // Create feature space... + std::unique_ptr featureSpaceFactoryPtr = + this->createFeatureSpaceFactory(featureMatrix, labelMatrix); + std::unique_ptr featureSpacePtr = + featureSpaceFactoryPtr->create(featureMatrix, featureInfo, *statisticsProviderPtr); // Create rule induction... std::unique_ptr ruleInductionFactoryPtr = @@ -445,10 +445,10 @@ std::unique_ptr AbstractRuleLearner::fit(const IFeatureInfo& fe ruleModelAssemblageFactoryPtr->create(std::move(stoppingCriterionFactoryPtr)); ruleModelAssemblagePtr->induceRules(*ruleInductionPtr, *rulePruningPtr, *postProcessorPtr, partition, *labelSamplingPtr, *instanceSamplingPtr, *featureSamplingPtr, - *statisticsProviderPtr, *thresholdsPtr, modelBuilder, rng); + *statisticsProviderPtr, *featureSpacePtr, modelBuilder, rng); // Post-optimize the model... - postOptimizationPtr->optimizeModel(*thresholdsPtr, *ruleInductionPtr, partition, *labelSamplingPtr, + postOptimizationPtr->optimizeModel(*featureSpacePtr, *ruleInductionPtr, partition, *labelSamplingPtr, *instanceSamplingPtr, *featureSamplingPtr, *rulePruningPtr, *postProcessorPtr, rng); diff --git a/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_phase_list.cpp b/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_phase_list.cpp index a1bfe77514..2cb0ea4615 100644 --- a/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_phase_list.cpp +++ b/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_phase_list.cpp @@ -36,13 +36,13 @@ class PostOptimizationPhaseList final : public IPostOptimization { return *intermediateModelBuilderPtr_; } - void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, + void optimizeModel(IFeatureSpace& featureSpace, const IRuleInduction& ruleInduction, IPartition& partition, ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, IFeatureSampling& featureSampling, const IRulePruning& rulePruning, const IPostProcessor& postProcessor, RNG& rng) const override { for (auto it = postOptimizationPhases_.cbegin(); it != postOptimizationPhases_.cend(); it++) { const std::unique_ptr& postOptimizationPhasePtr = *it; - postOptimizationPhasePtr->optimizeModel(thresholds, ruleInduction, partition, labelSampling, + postOptimizationPhasePtr->optimizeModel(featureSpace, ruleInduction, partition, labelSampling, instanceSampling, featureSampling, rulePruning, postProcessor, rng); } @@ -71,7 +71,7 @@ class NoPostOptimization final : public IPostOptimization { return *modelBuilderPtr_; } - void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, + void optimizeModel(IFeatureSpace& featureSpace, const IRuleInduction& ruleInduction, IPartition& partition, ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, IFeatureSampling& featureSampling, const IRulePruning& rulePruning, const IPostProcessor& postProcessor, RNG& rng) const override { diff --git a/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_sequential.cpp b/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_sequential.cpp index 3be97ccf18..a8651c1fb1 100644 --- a/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_sequential.cpp +++ b/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_sequential.cpp @@ -67,7 +67,7 @@ class SequentialPostOptimization final : public IPostOptimizationPhase { : modelBuilder_(modelBuilder), numIterations_(numIterations), refineHeads_(refineHeads), resampleFeatures_(resampleFeatures) {} - void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, + void optimizeModel(IFeatureSpace& featureSpace, const IRuleInduction& ruleInduction, IPartition& partition, ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, IFeatureSampling& featureSampling, const IRulePruning& rulePruning, const IPostProcessor& postProcessor, RNG& rng) const override { @@ -79,7 +79,8 @@ class SequentialPostOptimization final : public IPostOptimizationPhase { // Create a new subset of the given thresholds... const IWeightVector& weights = instanceSampling.sample(rng); - std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(thresholds); + std::unique_ptr thresholdsSubsetPtr = + weights.createThresholdsSubset(featureSpace); // Filter the thresholds subset according to the conditions of the current rule... for (auto it2 = conditionList.cbegin(); it2 != conditionList.cend(); it2++) { @@ -95,7 +96,7 @@ class SequentialPostOptimization final : public IPostOptimizationPhase { RuleReplacementBuilder ruleReplacementBuilder(intermediateRule); if (resampleFeatures_) { - ruleInduction.induceRule(thresholds, labelIndices, weights, partition, featureSampling, + ruleInduction.induceRule(featureSpace, labelIndices, weights, partition, featureSampling, rulePruning, postProcessor, rng, ruleReplacementBuilder); } else { std::unordered_set uniqueFeatureIndices; @@ -114,7 +115,7 @@ class SequentialPostOptimization final : public IPostOptimizationPhase { } PredefinedFeatureSampling predefinedFeatureSampling(indexVector); - ruleInduction.induceRule(thresholds, labelIndices, weights, partition, + ruleInduction.induceRule(featureSpace, labelIndices, weights, partition, predefinedFeatureSampling, rulePruning, postProcessor, rng, ruleReplacementBuilder); } diff --git a/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_unused_rule_removal.cpp b/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_unused_rule_removal.cpp index b3a81cdf87..6ee4ae131c 100644 --- a/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_unused_rule_removal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_unused_rule_removal.cpp @@ -16,7 +16,7 @@ class UnusedRuleRemoval final : public IPostOptimizationPhase { */ UnusedRuleRemoval(IntermediateModelBuilder& modelBuilder) : modelBuilder_(modelBuilder) {} - void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, + void optimizeModel(IFeatureSpace& featureSpace, const IRuleInduction& ruleInduction, IPartition& partition, ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, IFeatureSampling& featureSampling, const IRulePruning& rulePruning, const IPostProcessor& postProcessor, RNG& rng) const override { diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp index 6347fef0c7..6851ccd412 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp @@ -20,8 +20,8 @@ class AbstractRuleInduction : public IRuleInduction { /** * Must be implemented by subclasses in order to grow a rule. * - * @param thresholds A reference to an object of type `IThresholds` that provides access to the - * thresholds that may be used by the conditions of the rule + * @param featureSpace A reference to an object of type `IFeatureSpace` that provides access to the feature + * space * @param labelIndices A reference to an object of type `IIndexVector` that provides access to the indices * of the labels for which the rule may predict * @param weights A reference to an object of type `IWeightVector` that provides access to the weights @@ -40,7 +40,8 @@ class AbstractRuleInduction : public IRuleInduction { * @return An unique pointer to an object of type `IThresholdsSubset` that has been used to * grow the rule */ - virtual std::unique_ptr growRule(IThresholds& thresholds, const IIndexVector& labelIndices, + virtual std::unique_ptr growRule(IFeatureSpace& featureSpace, + const IIndexVector& labelIndices, const IWeightVector& weights, IPartition& partition, IFeatureSampling& featureSampling, RNG& rng, std::unique_ptr& conditionListPtr, @@ -79,19 +80,19 @@ class AbstractRuleInduction : public IRuleInduction { modelBuilder.setDefaultRule(defaultPredictionPtr); } - bool induceRule(IThresholds& thresholds, const IIndexVector& labelIndices, const IWeightVector& weights, + bool induceRule(IFeatureSpace& featureSpace, const IIndexVector& labelIndices, const IWeightVector& weights, IPartition& partition, IFeatureSampling& featureSampling, const IRulePruning& rulePruning, const IPostProcessor& postProcessor, RNG& rng, IModelBuilder& modelBuilder) const override final { std::unique_ptr conditionListPtr; std::unique_ptr headPtr; std::unique_ptr thresholdsSubsetPtr = this->growRule( - thresholds, labelIndices, weights, partition, featureSampling, rng, conditionListPtr, headPtr); + featureSpace, labelIndices, weights, partition, featureSampling, rng, conditionListPtr, headPtr); if (headPtr) { if (weights.hasZeroWeights()) { // Prune rule... - IStatisticsProvider& statisticsProvider = thresholds.getStatisticsProvider(); + IStatisticsProvider& statisticsProvider = featureSpace.getStatisticsProvider(); statisticsProvider.switchToPruningRuleEvaluation(); std::unique_ptr coverageMaskPtr = rulePruning.prune(*thresholdsSubsetPtr, partition, *conditionListPtr, *headPtr); diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_beam_search.cpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_beam_search.cpp index 27b20aafb1..b20c707da3 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_beam_search.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_beam_search.cpp @@ -312,13 +312,13 @@ class BeamSearchTopDownRuleInduction final : public AbstractRuleInduction { protected: - std::unique_ptr growRule(IThresholds& thresholds, const IIndexVector& labelIndices, + std::unique_ptr growRule(IFeatureSpace& featureSpace, const IIndexVector& labelIndices, const IWeightVector& weights, IPartition& partition, IFeatureSampling& featureSampling, RNG& rng, std::unique_ptr& conditionListPtr, std::unique_ptr& headPtr) const override { // Create a new subset of the given thresholds... - std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(thresholds); + std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(featureSpace); // Sample features... const IIndexVector& sampledFeatureIndices = featureSampling.sample(rng); diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_greedy.cpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_greedy.cpp index 56593dfcff..2e48ab9fa6 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_greedy.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_greedy.cpp @@ -47,7 +47,7 @@ class GreedyTopDownRuleInduction final : public AbstractRuleInduction { protected: - std::unique_ptr growRule(IThresholds& thresholds, const IIndexVector& labelIndices, + std::unique_ptr growRule(IFeatureSpace& featureSpace, const IIndexVector& labelIndices, const IWeightVector& weights, IPartition& partition, IFeatureSampling& featureSampling, RNG& rng, std::unique_ptr& conditionListPtr, @@ -62,7 +62,7 @@ class GreedyTopDownRuleInduction final : public AbstractRuleInduction { bool foundRefinement = true; // Create a new subset of the given thresholds... - std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(thresholds); + std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(featureSpace); // Search for the best refinement until no improvement in terms of the rule's quality is possible anymore or // until the maximum number of conditions has been reached... diff --git a/cpp/subprojects/common/src/mlrl/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp b/cpp/subprojects/common/src/mlrl/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp index 19601b62a8..eea6812345 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp @@ -26,8 +26,8 @@ class SequentialRuleModelAssemblage final : public IRuleModelAssemblage { void induceRules(const IRuleInduction& ruleInduction, const IRulePruning& rulePruning, const IPostProcessor& postProcessor, IPartition& partition, ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, IFeatureSampling& featureSampling, - IStatisticsProvider& statisticsProvider, IThresholds& thresholds, IModelBuilder& modelBuilder, - RNG& rng) const override { + IStatisticsProvider& statisticsProvider, IFeatureSpace& featureSpace, + IModelBuilder& modelBuilder, RNG& rng) const override { uint32 numRules = useDefaultRule_ ? 1 : 0; uint32 numUsedRules = 0; @@ -56,7 +56,7 @@ class SequentialRuleModelAssemblage final : public IRuleModelAssemblage { const IWeightVector& weights = instanceSampling.sample(rng); const IIndexVector& labelIndices = labelSampling.sample(rng); - bool success = ruleInduction.induceRule(thresholds, labelIndices, weights, partition, featureSampling, + bool success = ruleInduction.induceRule(featureSpace, labelIndices, weights, partition, featureSampling, rulePruning, postProcessor, rng, modelBuilder); if (success) { diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp index 2e15008502..0e7f7a37f5 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp @@ -1,6 +1,6 @@ #include "mlrl/common/sampling/weight_vector_bit.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" +#include "mlrl/common/thresholds/feature_space.hpp" #include "mlrl/common/thresholds/thresholds_subset.hpp" BitWeightVector::BitWeightVector(uint32 numElements, bool init) : vector_(numElements, init), numNonZeroWeights_(0) {} @@ -33,6 +33,6 @@ void BitWeightVector::clear() { vector_.clear(); } -std::unique_ptr BitWeightVector::createThresholdsSubset(IThresholds& thresholds) const { - return thresholds.createSubset(*this); +std::unique_ptr BitWeightVector::createThresholdsSubset(IFeatureSpace& featureSpace) const { + return featureSpace.createSubset(*this); } diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp index 6967d02b89..5e76fd1797 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp @@ -1,6 +1,6 @@ #include "mlrl/common/sampling/weight_vector_dense.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" +#include "mlrl/common/thresholds/feature_space.hpp" #include "mlrl/common/thresholds/thresholds_subset.hpp" template @@ -23,8 +23,8 @@ bool DenseWeightVector::hasZeroWeights() const { } template -std::unique_ptr DenseWeightVector::createThresholdsSubset(IThresholds& thresholds) const { - return thresholds.createSubset(*this); +std::unique_ptr DenseWeightVector::createThresholdsSubset(IFeatureSpace& featureSpace) const { + return featureSpace.createSubset(*this); } template class DenseWeightVector; diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp index 3036cad419..78a193df7d 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp @@ -1,6 +1,6 @@ #include "mlrl/common/sampling/weight_vector_equal.hpp" -#include "mlrl/common/thresholds/thresholds.hpp" +#include "mlrl/common/thresholds/feature_space.hpp" #include "mlrl/common/thresholds/thresholds_subset.hpp" EqualWeightVector::EqualWeightVector(uint32 numElements) : numElements_(numElements) {} @@ -21,6 +21,6 @@ bool EqualWeightVector::hasZeroWeights() const { return false; } -std::unique_ptr EqualWeightVector::createThresholdsSubset(IThresholds& thresholds) const { - return thresholds.createSubset(*this); +std::unique_ptr EqualWeightVector::createThresholdsSubset(IFeatureSpace& featureSpace) const { + return featureSpace.createSubset(*this); } diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 655d57af42..81aefee7e4 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -67,7 +67,7 @@ struct FilteredCacheEntry final { /** * Provides access to all thresholds that result from the feature values of the training examples. */ -class ExactThresholds final : public IThresholds { +class ExactThresholds final : public IFeatureSpace { private: /** @@ -321,22 +321,10 @@ class ExactThresholds final : public IThresholds { } }; - /** - * A reference to an object of type `IColumnWiseFeatureMatrix` that provides column-wise access to the feature - * values of the training examples. - */ const IColumnWiseFeatureMatrix& featureMatrix_; - /** - * A reference to an object of type `IFeatureInfo` that provides information about the types of individual - * features. - */ const IFeatureInfo& featureInfo_; - /** - * A reference to an object of type `IStatisticsProvider` that provides access to statistics about the labels of - * the training examples. - */ IStatisticsProvider& statisticsProvider_; const IFeatureBinningFactory& featureBinningFactory_; @@ -396,9 +384,9 @@ ExactThresholdsFactory::ExactThresholdsFactory(std::unique_ptr ExactThresholdsFactory::create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const { +std::unique_ptr ExactThresholdsFactory::create(const IColumnWiseFeatureMatrix& featureMatrix, + const IFeatureInfo& featureInfo, + IStatisticsProvider& statisticsProvider) const { return std::make_unique(featureMatrix, featureInfo, statisticsProvider, *featureBinningFactoryPtr_, numThreads_); } From 47405604e91a926c1bfaf4f3a1c6bf295eab0577 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 21 Feb 2024 21:50:53 +0100 Subject: [PATCH 41/53] Rename class IThresholdsSubset to IFeatureSubspace. --- .../mlrl/common/indices/index_vector.hpp | 8 +-- .../common/indices/index_vector_complete.hpp | 2 +- .../common/indices/index_vector_partial.hpp | 2 +- .../mlrl/common/rule_pruning/rule_pruning.hpp | 9 ++- .../rule_refinement/prediction_complete.hpp | 2 +- .../rule_refinement/prediction_partial.hpp | 2 +- .../mlrl/common/sampling/partition.hpp | 30 ++++----- .../mlrl/common/sampling/partition_bi.hpp | 4 +- .../mlrl/common/sampling/partition_single.hpp | 4 +- .../mlrl/common/sampling/weight_vector.hpp | 10 +-- .../common/sampling/weight_vector_bit.hpp | 2 +- .../common/sampling/weight_vector_dense.hpp | 2 +- .../common/sampling/weight_vector_equal.hpp | 2 +- .../mlrl/common/thresholds/coverage_mask.hpp | 2 +- .../mlrl/common/thresholds/feature_space.hpp | 24 +++---- ...sholds_subset.hpp => feature_subspace.hpp} | 60 +++++++++--------- .../common/indices/index_vector_complete.cpp | 6 +- .../common/indices/index_vector_partial.cpp | 6 +- .../post_optimization_sequential.cpp | 7 +-- .../rule_induction/rule_induction_common.hpp | 24 +++---- .../rule_induction_top_down_beam_search.cpp | 62 +++++++++---------- .../rule_induction_top_down_common.hpp | 8 +-- .../rule_induction_top_down_greedy.cpp | 18 +++--- .../common/rule_pruning/rule_pruning_irep.cpp | 14 ++--- .../common/rule_pruning/rule_pruning_no.cpp | 2 +- .../rule_refinement/prediction_complete.cpp | 4 +- .../rule_refinement/prediction_partial.cpp | 4 +- .../src/mlrl/common/sampling/partition_bi.cpp | 10 +-- .../mlrl/common/sampling/partition_single.cpp | 12 ++-- .../common/sampling/weight_vector_bit.cpp | 6 +- .../common/sampling/weight_vector_dense.cpp | 6 +- .../common/sampling/weight_vector_equal.cpp | 6 +- .../mlrl/common/thresholds/coverage_mask.cpp | 2 +- .../common/thresholds/thresholds_exact.cpp | 14 ++--- 34 files changed, 188 insertions(+), 188 deletions(-) rename cpp/subprojects/common/include/mlrl/common/thresholds/{thresholds_subset.hpp => feature_subspace.hpp} (78%) diff --git a/cpp/subprojects/common/include/mlrl/common/indices/index_vector.hpp b/cpp/subprojects/common/include/mlrl/common/indices/index_vector.hpp index 1d7732754a..8ebcd6d621 100644 --- a/cpp/subprojects/common/include/mlrl/common/indices/index_vector.hpp +++ b/cpp/subprojects/common/include/mlrl/common/indices/index_vector.hpp @@ -9,7 +9,7 @@ // Forward declarations class IRuleRefinement; -class IThresholdsSubset; +class IFeatureSubspace; /** * Defines an interface for all classes that provide random access to indices. @@ -46,11 +46,11 @@ class IIndexVector { * Creates and return a new instance of type `IRuleRefinement` that allows to search for the best refinement of * an existing rule that predicts only for the labels whose indices are stored in this vector. * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to create - * the instance + * @param featureSubspace A reference to an object of type `IFeatureSubspace` that should be to search for the + * refinement * @param featureIndex The index of the feature that should be considered when searching for the refinement * @return An unique pointer to an object of type `IRuleRefinement` that has been created */ - virtual std::unique_ptr createRuleRefinement(IThresholdsSubset& thresholdsSubset, + virtual std::unique_ptr createRuleRefinement(IFeatureSubspace& featureSubspace, uint32 featureIndex) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/indices/index_vector_complete.hpp b/cpp/subprojects/common/include/mlrl/common/indices/index_vector_complete.hpp index 270e514fc3..ee43590860 100644 --- a/cpp/subprojects/common/include/mlrl/common/indices/index_vector_complete.hpp +++ b/cpp/subprojects/common/include/mlrl/common/indices/index_vector_complete.hpp @@ -54,6 +54,6 @@ class CompleteIndexVector final : public IIndexVector { uint32 getIndex(uint32 pos) const override; - std::unique_ptr createRuleRefinement(IThresholdsSubset& thresholdsSubset, + std::unique_ptr createRuleRefinement(IFeatureSubspace& featureSubspace, uint32 featureIndex) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/indices/index_vector_partial.hpp b/cpp/subprojects/common/include/mlrl/common/indices/index_vector_partial.hpp index c50b07b597..3433da667c 100644 --- a/cpp/subprojects/common/include/mlrl/common/indices/index_vector_partial.hpp +++ b/cpp/subprojects/common/include/mlrl/common/indices/index_vector_partial.hpp @@ -25,6 +25,6 @@ class PartialIndexVector final : public ResizableVectorDecorator createRuleRefinement(IThresholdsSubset& thresholdsSubset, + std::unique_ptr createRuleRefinement(IFeatureSubspace& featureSubspace, uint32 featureIndex) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp b/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp index 9f093697d8..9e11451f06 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp @@ -5,7 +5,7 @@ #include "mlrl/common/model/condition_list.hpp" #include "mlrl/common/sampling/partition.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" /** * Defines an interface for all classes that implement a strategy for pruning individual rules based on a "prune set", @@ -22,9 +22,8 @@ class IRulePruning { * pruned by removing individual conditions in a way that improves over its original quality, measured on the * prune set. * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset`, which contains the thresholds - * that correspond to the subspace of the feature space that is covered by the existing - * rule + * @param featureSubspace A reference to an object of type `IFeatureSubspace` that includes the training + * examples covered by the existing rule * @param partition A reference to an object of type `IPartition` that provides access to the indices of * the training examples that belong to the training set and the holdout set, * respectively @@ -36,7 +35,7 @@ class IRulePruning { * examples that are covered by the pruned rule or a null pointer if the rule was not * pruned */ - virtual std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, + virtual std::unique_ptr prune(IFeatureSubspace& featureSubspace, IPartition& partition, ConditionList& conditions, const IPrediction& head) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/prediction_complete.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/prediction_complete.hpp index 70830ff71a..9b14c90280 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/prediction_complete.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/prediction_complete.hpp @@ -113,7 +113,7 @@ class CompletePrediction final : public VectorDecorator const IStatistics& statistics, const OutOfSampleWeightVector>& weights) const override; - std::unique_ptr createRuleRefinement(IThresholdsSubset& thresholdsSubset, + std::unique_ptr createRuleRefinement(IFeatureSubspace& featureSubspace, uint32 featureIndex) const override; void apply(IStatistics& statistics, uint32 statisticIndex) const override; diff --git a/cpp/subprojects/common/include/mlrl/common/rule_refinement/prediction_partial.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/prediction_partial.hpp index 986b6448da..a83b9a84a2 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_refinement/prediction_partial.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/prediction_partial.hpp @@ -147,7 +147,7 @@ class PartialPrediction final : public ResizableVectorDecorator>& weights) const override; - std::unique_ptr createRuleRefinement(IThresholdsSubset& thresholdsSubset, + std::unique_ptr createRuleRefinement(IFeatureSubspace& featureSubspace, uint32 featureIndex) const override; void apply(IStatistics& statistics, uint32 statisticIndex) const override; diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp index cb2e283db1..d51b90f711 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp @@ -15,7 +15,7 @@ class IInstanceSampling; class IInstanceSamplingFactory; class IRowWiseLabelMatrix; class IStatistics; -class IThresholdsSubset; +class IFeatureSubspace; class IPrediction; class IMarginalProbabilityCalibrationModel; class IMarginalProbabilityCalibrator; @@ -61,28 +61,28 @@ class IPartition { * that do not belong to the current sample and are marked as covered according to a given object of type * `CoverageMask`. * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * evaluate the prediction - * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples - * that are covered by the rule - * @param head A reference to an object of type `IPrediction` that stores the scores that are - * predicted by the rule - * @return An object of type `Quality` that stores the calculated quality + * @param featureSubspace A reference to an object of type `IFeatureSubspace` that should be used to evaluate + * the prediction + * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples that + * are covered by the rule + * @param head A reference to an object of type `IPrediction` that stores the scores that are + * predicted by the rule + * @return An object of type `Quality` that stores the calculated quality */ - virtual Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, + virtual Quality evaluateOutOfSample(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, const IPrediction& head) = 0; /** * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as * covered according to a given object of type `CoverageMask`. * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * recalculate the prediction - * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples - * that are covered by the rule - * @param head A reference to an object of type `IPrediction` to be updated + * @param featureSubspace A reference to an object of type `IFeatureSubspace` that should be used to recalculate + * the prediction + * @param coverageMask A reference to an object of type `CoverageMask` that keeps track of the examples that + * are covered by the rule + * @param head A reference to an object of type `IPrediction` to be updated */ - virtual void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, + virtual void recalculatePrediction(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, IPrediction& head) = 0; /** diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/partition_bi.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/partition_bi.hpp index 45edcaa216..0690f726ff 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/partition_bi.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/partition_bi.hpp @@ -126,10 +126,10 @@ class BiPartition final : public VectorDecorator>, const IRowWiseLabelMatrix& labelMatrix, IStatistics& statistics) override; - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, + Quality evaluateOutOfSample(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, const IPrediction& head) override; - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, + void recalculatePrediction(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, IPrediction& head) override; std::unique_ptr fitMarginalProbabilityCalibrationModel( diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/partition_single.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/partition_single.hpp index 95717d3db0..78ff3a7cde 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/partition_single.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/partition_single.hpp @@ -54,10 +54,10 @@ class SinglePartition final : public IPartition { const IRowWiseLabelMatrix& labelMatrix, IStatistics& statistics) override; - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, + Quality evaluateOutOfSample(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, const IPrediction& head) override; - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, + void recalculatePrediction(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, IPrediction& head) override; std::unique_ptr fitMarginalProbabilityCalibrationModel( diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector.hpp index 448602763d..6a1084bfb1 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector.hpp @@ -9,7 +9,7 @@ // Forward declarations class IFeatureSpace; -class IThresholdsSubset; +class IFeatureSubspace; /** * Defines an interface for one-dimensional vectors that provide access to weights. @@ -27,12 +27,12 @@ class IWeightVector { virtual bool hasZeroWeights() const = 0; /** - * Creates and returns a new instance of type `IThresholdsSubset` that provides access to the statistics that - * correspond to individual training examples whose weights are stored in this vector. + * Creates and returns a new instance of type `IFeatureSubspace` that uses the weights in this vector for the + * training examples it includes. * * @param featureSpace A reference to an object of type `IFeatureSpace` that should be used to create the * instance - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created + * @return An unique pointer to an object of type `IFeatureSubspace` that has been created */ - virtual std::unique_ptr createThresholdsSubset(IFeatureSpace& featureSpace) const = 0; + virtual std::unique_ptr createFeatureSubspace(IFeatureSpace& featureSpace) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_bit.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_bit.hpp index cb4f4e2dea..d134b85c1f 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_bit.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_bit.hpp @@ -68,5 +68,5 @@ class BitWeightVector final : public IWeightVector { bool hasZeroWeights() const override; - std::unique_ptr createThresholdsSubset(IFeatureSpace& featureSpace) const override; + std::unique_ptr createFeatureSubspace(IFeatureSpace& featureSpace) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_dense.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_dense.hpp index bbc8118f5c..ba96277de2 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_dense.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_dense.hpp @@ -42,5 +42,5 @@ class DenseWeightVector final : public DenseVectorDecorator>, bool hasZeroWeights() const override; - std::unique_ptr createThresholdsSubset(IFeatureSpace& featureSpace) const override; + std::unique_ptr createFeatureSubspace(IFeatureSpace& featureSpace) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_equal.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_equal.hpp index cf2bae5933..a1e28c473b 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_equal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/weight_vector_equal.hpp @@ -44,5 +44,5 @@ class EqualWeightVector final : public IWeightVector { bool hasZeroWeights() const override; - std::unique_ptr createThresholdsSubset(IFeatureSpace& featureSpace) const override; + std::unique_ptr createFeatureSubspace(IFeatureSpace& featureSpace) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp index 00cf603bd6..3eef72a213 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp @@ -9,7 +9,7 @@ #include // Forward declarations -class IThresholdsSubset; +class IFeatureSubspace; class SinglePartition; class BiPartition; class IPrediction; diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp index 98a2fe392b..5daf3abc2f 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp @@ -9,7 +9,7 @@ #include "mlrl/common/sampling/weight_vector_dense.hpp" #include "mlrl/common/sampling/weight_vector_equal.hpp" #include "mlrl/common/statistics/statistics_provider.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" /** * Defines an interface for all classes that provide access to the feature space. @@ -20,31 +20,31 @@ class IFeatureSpace { virtual ~IFeatureSpace() {} /** - * Creates and returns a new subspace of the feature space, which initially includes the entire feature space. + * Creates and returns a new subspace of this feature space. * * @param weights A reference to an object of type `EqualWeightVector` that provides access to the weights of * individual training examples - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created + * @return An unique pointer to an object of type `IFeatureSubspace` that has been created */ - virtual std::unique_ptr createSubset(const EqualWeightVector& weights) = 0; + virtual std::unique_ptr createSubspace(const EqualWeightVector& weights) = 0; /** - * Creates and returns a new subspace of the feature space, which initially includes the entire feature space. + * Creates and returns a new subspace of this feature space. * * @param weights A reference to an object of type `BitWeightVector` that provides access to the weights of * individual training examples - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created + * @return An unique pointer to an object of type `IFeatureSubspace` that has been created */ - virtual std::unique_ptr createSubset(const BitWeightVector& weights) = 0; + virtual std::unique_ptr createSubspace(const BitWeightVector& weights) = 0; /** - * Creates and returns a new subspace of the feature space, which initially includes the entire feature space. + * Creates and returns a new subspace of this feature space. * - * @param weights A reference to an object of type `DenseWeightVector` that provides access to the - * weights of individual training examples - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created + * @param weights A reference to an object of type `DenseWeightVector` that provides access to the weights of + * individual training examples + * @return An unique pointer to an object of type `IFeatureSubspace` that has been created */ - virtual std::unique_ptr createSubset(const DenseWeightVector& weights) = 0; + virtual std::unique_ptr createSubspace(const DenseWeightVector& weights) = 0; /** * Returns a reference to an object of type `IStatisticsProvider` that provides access to the statistics that diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/feature_subspace.hpp similarity index 78% rename from cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp rename to cpp/subprojects/common/include/mlrl/common/thresholds/feature_subspace.hpp index 0384375f0c..4759cf23c4 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_subset.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/feature_subspace.hpp @@ -15,25 +15,24 @@ #include /** - * Defines an interface for all classes that provide access a subset of thresholds that may be used by the conditions of - * a rule with arbitrary body. The thresholds may include only those that correspond to the subspace of the instance - * space that is covered by the rule. + * Defines an interface for all classes that provide access a subspace of the feature space that includes the training + * examples covered by a rule. */ -class IThresholdsSubset { +class IFeatureSubspace { public: - virtual ~IThresholdsSubset() {} + virtual ~IFeatureSubspace() {} /** * Creates and returns a copy of this object. * - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created + * @return An unique pointer to an object of type `IFeatureSubspace` that has been created */ - virtual std::unique_ptr copy() const = 0; + virtual std::unique_ptr copy() const = 0; /** * Creates and returns a new instance of the type `IRuleRefinement` that allows to find the best refinement of - * an existing rule that predicts for all available labels. + * a rule that covers all examples included in this subspace and predicts for all available labels. * * @param labelIndices A reference to an object of type `CompleteIndexVector` that provides access to the * indices of the labels for which the existing rule predicts @@ -45,7 +44,7 @@ class IThresholdsSubset { /** * Creates and returns a new instance of the type `IRuleRefinement` that allows to find the best refinement of - * an existing rule that predicts for a subset of the available labels. + * a rule that covers all examples included in this subspace and predicts for a subset of the available labels. * * @param labelIndices A reference to an object of type `PartialIndexVector` that provides access to the * indices of the labels for which the existing rule predicts @@ -56,34 +55,34 @@ class IThresholdsSubset { uint32 featureIndex) = 0; /** - * Filters the subspace such that it only includes those examples that are covered by specific condition of a - * rule. + * Filters the subspace such that it only includes those training examples that statisfy a specific condition. * - * @param condition A reference to an object of type `Condition` that stores the properties of the condition + * @param condition A reference to an object of type `Condition` */ - virtual void filterThresholds(const Condition& condition) = 0; + virtual void filterSubspace(const Condition& condition) = 0; /** - * Resets the subspace. This reverts the effects of all previous calls to the function `filterThresholds`. + * Resets the subspace. This reverts the effects of all previous calls to the function `filterSubspace`. */ - virtual void resetThresholds() = 0; + virtual void resetSubspace() = 0; /** - * Returns an object of type `CoverageMask` that keeps track of the elements that are covered by the refinement - * that has been applied via the function `applyRefinement`. + * Returns an object of type `CoverageMask` that keeps track of the training examples that are included in this + * subspace. * - * @return A reference to an object of type `CoverageMask` that keeps track of the elements that are covered by - * the refinement + * @return A reference to an object of type `CoverageMask` that keeps track of the training examples that are + * included in this subspace */ virtual const CoverageMask& getCoverageMask() const = 0; /** * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered according to a given object of type + * that do not belong to the current instance sub-sample and are marked as covered according to a given * `CoverageMask`. * * For calculating the quality, only examples that belong to the training set and are not included in the - * current sub-sample, i.e., only examples with zero weights, are considered. + * current instance sub-sample, i.e., only examples with zero weights, are considered and assigned equally + * distributed weights. * * @param partition A reference to an object of type `SinglePartition` that provides access to the indices * of the training examples that belong to the training set @@ -98,11 +97,12 @@ class IThresholdsSubset { /** * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered according to a given object of type + * that do not belong to the current instance sub-sample and are marked as covered according to a given * `CoverageMask`. * * For calculating the quality, only examples that belong to the training set and are not included in the - * current sub-sample, i.e., only examples with zero weights, are considered. + * current instance sub-sample, i.e., only examples with zero weights, are considered and assigned equally + * distributed weights. * * @param partition A reference to an object of type `BiPartition` that provides access to the indices of * the training examples that belong to the training set @@ -117,10 +117,10 @@ class IThresholdsSubset { /** * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `CoverageMask`. + * covered according to a given `CoverageMask`. * * When calculating the updated prediction, the weights of the individual training examples are ignored and - * equally distributed weights are assumed instead. + * equally distributed weights are used instead. * * @param partition A reference to an object of type `SinglePartition` that provides access to the indices * of the training examples that belong to the training set @@ -133,10 +133,10 @@ class IThresholdsSubset { /** * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `CoverageMask`. + * covered according to a given `CoverageMask`. * * When calculating the updated prediction, the weights of the individual training examples are ignored and - * equally distributed weights are assumed instead. + * equally distributed weights are used instead. * * @param partition A reference to an object of type `BiPartition` that provides access to the indices of * the training examples that belong to the training set @@ -148,14 +148,16 @@ class IThresholdsSubset { IPrediction& head) const = 0; /** - * Updates the statistics that correspond to the current subset based on the prediction of a rule. + * Updates the statistics that correspond to the training examples included in this subspace based on the + * prediction of a rule. * * @param prediction A reference to an object of type `IPrediction` that stores the prediction of the rule */ virtual void applyPrediction(const IPrediction& prediction) = 0; /** - * Reverts the statistics that correspond to the current subset based on the predictions of a rule. + * Reverts the statistics that correspond to the training examples included in this subspace based on the + * predictions of a rule. * * @param prediction A reference to an object of type `IPrediction` that stores the prediction of the rule */ diff --git a/cpp/subprojects/common/src/mlrl/common/indices/index_vector_complete.cpp b/cpp/subprojects/common/src/mlrl/common/indices/index_vector_complete.cpp index f951627fb4..85b3ae8351 100644 --- a/cpp/subprojects/common/src/mlrl/common/indices/index_vector_complete.cpp +++ b/cpp/subprojects/common/src/mlrl/common/indices/index_vector_complete.cpp @@ -1,6 +1,6 @@ #include "mlrl/common/indices/index_vector_complete.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" CompleteIndexVector::CompleteIndexVector(uint32 numElements) { numElements_ = numElements; @@ -30,7 +30,7 @@ CompleteIndexVector::const_iterator CompleteIndexVector::cend() const { return IndexIterator(numElements_); } -std::unique_ptr CompleteIndexVector::createRuleRefinement(IThresholdsSubset& thresholdsSubset, +std::unique_ptr CompleteIndexVector::createRuleRefinement(IFeatureSubspace& featureSubspace, uint32 featureIndex) const { - return thresholdsSubset.createRuleRefinement(*this, featureIndex); + return featureSubspace.createRuleRefinement(*this, featureIndex); } diff --git a/cpp/subprojects/common/src/mlrl/common/indices/index_vector_partial.cpp b/cpp/subprojects/common/src/mlrl/common/indices/index_vector_partial.cpp index e6594c90e8..47f76c78d8 100644 --- a/cpp/subprojects/common/src/mlrl/common/indices/index_vector_partial.cpp +++ b/cpp/subprojects/common/src/mlrl/common/indices/index_vector_partial.cpp @@ -1,6 +1,6 @@ #include "mlrl/common/indices/index_vector_partial.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" PartialIndexVector::PartialIndexVector(uint32 numElements, bool init) : ResizableVectorDecorator>>( @@ -18,7 +18,7 @@ uint32 PartialIndexVector::getIndex(uint32 pos) const { return (*this)[pos]; } -std::unique_ptr PartialIndexVector::createRuleRefinement(IThresholdsSubset& thresholdsSubset, +std::unique_ptr PartialIndexVector::createRuleRefinement(IFeatureSubspace& featureSubspace, uint32 featureIndex) const { - return thresholdsSubset.createRuleRefinement(*this, featureIndex); + return featureSubspace.createRuleRefinement(*this, featureIndex); } diff --git a/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_sequential.cpp b/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_sequential.cpp index a8651c1fb1..99bd450a50 100644 --- a/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_sequential.cpp +++ b/cpp/subprojects/common/src/mlrl/common/post_optimization/post_optimization_sequential.cpp @@ -79,17 +79,16 @@ class SequentialPostOptimization final : public IPostOptimizationPhase { // Create a new subset of the given thresholds... const IWeightVector& weights = instanceSampling.sample(rng); - std::unique_ptr thresholdsSubsetPtr = - weights.createThresholdsSubset(featureSpace); + std::unique_ptr featureSubspacePtr = weights.createFeatureSubspace(featureSpace); // Filter the thresholds subset according to the conditions of the current rule... for (auto it2 = conditionList.cbegin(); it2 != conditionList.cend(); it2++) { const Condition& condition = *it2; - thresholdsSubsetPtr->filterThresholds(condition); + featureSubspacePtr->filterSubspace(condition); } // Revert the statistics based on the predictions of the current rule... - thresholdsSubsetPtr->revertPrediction(prediction); + featureSubspacePtr->revertPrediction(prediction); // Learn a new rule... const IIndexVector& labelIndices = refineHeads_ ? labelSampling.sample(rng) : prediction; diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp index 6851ccd412..29a81e4102 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_common.hpp @@ -37,15 +37,15 @@ class AbstractRuleInduction : public IRuleInduction { * store the conditions of the rule * @param headPtr A reference to an unique pointer of type `IEvaluatedPrediction` that should be used * to store the head of the rule - * @return An unique pointer to an object of type `IThresholdsSubset` that has been used to + * @return An unique pointer to an object of type `IFeatureSubspace` that has been used to * grow the rule */ - virtual std::unique_ptr growRule(IFeatureSpace& featureSpace, - const IIndexVector& labelIndices, - const IWeightVector& weights, IPartition& partition, - IFeatureSampling& featureSampling, RNG& rng, - std::unique_ptr& conditionListPtr, - std::unique_ptr& headPtr) const = 0; + virtual std::unique_ptr growRule(IFeatureSpace& featureSpace, + const IIndexVector& labelIndices, + const IWeightVector& weights, IPartition& partition, + IFeatureSampling& featureSampling, RNG& rng, + std::unique_ptr& conditionListPtr, + std::unique_ptr& headPtr) const = 0; public: @@ -86,7 +86,7 @@ class AbstractRuleInduction : public IRuleInduction { IModelBuilder& modelBuilder) const override final { std::unique_ptr conditionListPtr; std::unique_ptr headPtr; - std::unique_ptr thresholdsSubsetPtr = this->growRule( + std::unique_ptr featureSubspacePtr = this->growRule( featureSpace, labelIndices, weights, partition, featureSampling, rng, conditionListPtr, headPtr); if (headPtr) { @@ -95,14 +95,14 @@ class AbstractRuleInduction : public IRuleInduction { IStatisticsProvider& statisticsProvider = featureSpace.getStatisticsProvider(); statisticsProvider.switchToPruningRuleEvaluation(); std::unique_ptr coverageMaskPtr = - rulePruning.prune(*thresholdsSubsetPtr, partition, *conditionListPtr, *headPtr); + rulePruning.prune(*featureSubspacePtr, partition, *conditionListPtr, *headPtr); statisticsProvider.switchToRegularRuleEvaluation(); // Re-calculate the scores in the head based on the entire training data... if (recalculatePredictions_) { const CoverageMask& coverageMask = - coverageMaskPtr ? *coverageMaskPtr : thresholdsSubsetPtr->getCoverageMask(); - partition.recalculatePrediction(*thresholdsSubsetPtr, coverageMask, *headPtr); + coverageMaskPtr ? *coverageMaskPtr : featureSubspacePtr->getCoverageMask(); + partition.recalculatePrediction(*featureSubspacePtr, coverageMask, *headPtr); } } @@ -110,7 +110,7 @@ class AbstractRuleInduction : public IRuleInduction { headPtr->postProcess(postProcessor); // Update the statistics by applying the predictions of the new rule... - thresholdsSubsetPtr->applyPrediction(*headPtr); + featureSubspacePtr->applyPrediction(*headPtr); // Add the induced rule to the model... modelBuilder.addRule(conditionListPtr, headPtr); diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_beam_search.cpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_beam_search.cpp index b20c707da3..a44c63fa4f 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_beam_search.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_beam_search.cpp @@ -9,7 +9,7 @@ /** * A single entry of a beam, corresponding to a rule that may be further refined. It stores the conditions and the head - * of the current rule, as well as an object of type `IThresholdsSubset` that is required to search for potential + * of the current rule, as well as an object of type `IFeatureSubspace` that is required to search for potential * refinements of the rule and an `IIndexVector` that provides access to the indices of the labels for which these * refinements may predict. */ @@ -28,10 +28,10 @@ struct BeamEntry final { std::unique_ptr headPtr; /** - * An unique pointer to an object of type `IThresholdsSubset` that may be used to search for potential + * An unique pointer to an object of type `IFeatureSubspace` that may be used to search for potential * refinements of the rule. */ - std::unique_ptr thresholdsSubsetPtr; + std::unique_ptr featureSubspacePtr; /** * A pointer to an object of type `IIndexVector` that provides access to the indices of the labels for which @@ -41,10 +41,10 @@ struct BeamEntry final { }; static inline void initializeEntry(BeamEntry& entry, Refinement& refinement, - std::unique_ptr thresholdsSubsetPtr, + std::unique_ptr featureSubspacePtr, const IIndexVector& labelIndices, bool keepHead) { - thresholdsSubsetPtr->filterThresholds(refinement); - entry.thresholdsSubsetPtr = std::move(thresholdsSubsetPtr); + featureSubspacePtr->filterSubspace(refinement); + entry.featureSubspacePtr = std::move(featureSubspacePtr); entry.conditionListPtr = std::make_unique(); entry.conditionListPtr->addCondition(refinement); entry.headPtr = std::move(refinement.headPtr); @@ -52,10 +52,10 @@ static inline void initializeEntry(BeamEntry& entry, Refinement& refinement, } static inline void copyEntry(BeamEntry& newEntry, BeamEntry& oldEntry, Refinement& refinement, - std::unique_ptr thresholdsSubsetPtr, + std::unique_ptr featureSubspacePtr, std::unique_ptr conditionListPtr, bool keepHead, uint32 minCoverage) { - thresholdsSubsetPtr->filterThresholds(refinement); - newEntry.thresholdsSubsetPtr = std::move(thresholdsSubsetPtr); + featureSubspacePtr->filterSubspace(refinement); + newEntry.featureSubspacePtr = std::move(featureSubspacePtr); newEntry.conditionListPtr = std::move(conditionListPtr); newEntry.conditionListPtr->addCondition(refinement); newEntry.headPtr = std::move(refinement.headPtr); @@ -68,7 +68,7 @@ static inline void copyEntry(BeamEntry& newEntry, BeamEntry& oldEntry, Refinemen } static inline void copyEntry(BeamEntry& newEntry, BeamEntry& oldEntry) { - newEntry.thresholdsSubsetPtr = std::move(oldEntry.thresholdsSubsetPtr); + newEntry.featureSubspacePtr = std::move(oldEntry.featureSubspacePtr); newEntry.conditionListPtr = std::move(oldEntry.conditionListPtr); newEntry.headPtr = std::move(oldEntry.headPtr); newEntry.labelIndices = nullptr; @@ -107,14 +107,14 @@ class Beam final { /** * @param refinementComparator A reference to an object of type `FixedRefinementComparator` that keeps track of * existing refinements of rules - * @param thresholdsSubsetPtr An unique pointer to an object of type `IThresholdsSubset` that has been used to + * @param featureSubspacePtr An unique pointer to an object of type `IFeatureSubspace` that has been used to * find the existing refinements of rules * @param labelIndices A reference to an object of type `IIndexVector` that provides access to the * indices of the labels for which further refinement may predict * @param keepHeads True, if further refinements should predict for the same labels as before, false * otherwise */ - Beam(FixedRefinementComparator& refinementComparator, std::unique_ptr thresholdsSubsetPtr, + Beam(FixedRefinementComparator& refinementComparator, std::unique_ptr featureSubspacePtr, const IIndexVector& labelIndices, bool keepHeads) : Beam(refinementComparator.getNumElements()) { FixedRefinementComparator::iterator iterator = refinementComparator.begin(); @@ -123,13 +123,13 @@ class Beam final { for (; i < numEntries_ - 1; i++) { Refinement& refinement = iterator[i]; BeamEntry& entry = entries_[i]; - initializeEntry(entry, refinement, thresholdsSubsetPtr->copy(), labelIndices, keepHeads); + initializeEntry(entry, refinement, featureSubspacePtr->copy(), labelIndices, keepHeads); order_.push_back(entry); } Refinement& refinement = iterator[i]; BeamEntry& entry = entries_[i]; - initializeEntry(entry, refinement, std::move(thresholdsSubsetPtr), labelIndices, keepHeads); + initializeEntry(entry, refinement, std::move(featureSubspacePtr), labelIndices, keepHeads); order_.push_back(entry); } @@ -181,7 +181,7 @@ class Beam final { // Search for refinements of the existing beam entry... FixedRefinementComparator refinementComparator(ruleCompareFunction, beamWidth, minQuality); - foundRefinement = findRefinement(refinementComparator, *entry.thresholdsSubsetPtr, featureIndices, + foundRefinement = findRefinement(refinementComparator, *entry.featureSubspacePtr, featureIndices, *entry.labelIndices, minCoverage, numThreads); if (foundRefinement) { @@ -191,39 +191,39 @@ class Beam final { uint32 i = 0; // Include all refinements, except for the last one, in the new beam. The corresponding - // `IThresholdsSubset` and `ConditionList` are copied... + // `IFeatureSubspace` and `ConditionList` are copied... for (; i < numRefinements - 1; i++) { Refinement& refinement = iterator[i]; if (n < beamWidth) { BeamEntry& newEntry = newEntries[n]; - copyEntry(newEntry, entry, refinement, entry.thresholdsSubsetPtr->copy(), + copyEntry(newEntry, entry, refinement, entry.featureSubspacePtr->copy(), std::make_unique(*entry.conditionListPtr), keepHeads, minCoverage); newOrder.push_back(newEntry); n++; } else { BeamEntry& newEntry = newOrder.back(); - copyEntry(newEntry, entry, refinement, entry.thresholdsSubsetPtr->copy(), + copyEntry(newEntry, entry, refinement, entry.featureSubspacePtr->copy(), std::make_unique(*entry.conditionListPtr), keepHeads, minCoverage); minQuality = updateOrder(ruleCompareFunction, newOrder); } } - // Include the last refinement in the beam. The corresponding `IThresholdsSubset` and + // Include the last refinement in the beam. The corresponding `IFeatureSubspace` and // `ConditionList` are reused... Refinement& refinement = iterator[i]; if (n < beamWidth) { BeamEntry& newEntry = newEntries[n]; - copyEntry(newEntry, entry, refinement, std::move(entry.thresholdsSubsetPtr), + copyEntry(newEntry, entry, refinement, std::move(entry.featureSubspacePtr), std::move(entry.conditionListPtr), keepHeads, minCoverage); newOrder.push_back(newEntry); n++; } else { BeamEntry& newEntry = newOrder.back(); - copyEntry(newEntry, entry, refinement, std::move(entry.thresholdsSubsetPtr), + copyEntry(newEntry, entry, refinement, std::move(entry.featureSubspacePtr), std::move(entry.conditionListPtr), keepHeads, minCoverage); minQuality = updateOrder(ruleCompareFunction, newOrder); } @@ -312,26 +312,26 @@ class BeamSearchTopDownRuleInduction final : public AbstractRuleInduction { protected: - std::unique_ptr growRule(IFeatureSpace& featureSpace, const IIndexVector& labelIndices, - const IWeightVector& weights, IPartition& partition, - IFeatureSampling& featureSampling, RNG& rng, - std::unique_ptr& conditionListPtr, - std::unique_ptr& headPtr) const override { + std::unique_ptr growRule(IFeatureSpace& featureSpace, const IIndexVector& labelIndices, + const IWeightVector& weights, IPartition& partition, + IFeatureSampling& featureSampling, RNG& rng, + std::unique_ptr& conditionListPtr, + std::unique_ptr& headPtr) const override { // Create a new subset of the given thresholds... - std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(featureSpace); + std::unique_ptr featureSubspacePtr = weights.createFeatureSubspace(featureSpace); // Sample features... const IIndexVector& sampledFeatureIndices = featureSampling.sample(rng); // Search for the best refinements using a single condition... FixedRefinementComparator refinementComparator(ruleCompareFunction_, beamWidth_); - bool foundRefinement = findRefinement(refinementComparator, *thresholdsSubsetPtr, sampledFeatureIndices, + bool foundRefinement = findRefinement(refinementComparator, *featureSubspacePtr, sampledFeatureIndices, labelIndices, minCoverage_, numThreads_); if (foundRefinement) { bool keepHeads = maxHeadRefinements_ == 1; std::unique_ptr beamPtr = - std::make_unique(refinementComparator, std::move(thresholdsSubsetPtr), labelIndices, keepHeads); + std::make_unique(refinementComparator, std::move(featureSubspacePtr), labelIndices, keepHeads); uint32 searchDepth = 1; while (foundRefinement && (maxConditions_ == 0 || searchDepth < maxConditions_)) { @@ -351,10 +351,10 @@ class BeamSearchTopDownRuleInduction final : public AbstractRuleInduction { BeamEntry& entry = beamPtr->getBestEntry(); conditionListPtr = std::move(entry.conditionListPtr); headPtr = std::move(entry.headPtr); - return std::move(entry.thresholdsSubsetPtr); + return std::move(entry.featureSubspacePtr); } - return thresholdsSubsetPtr; + return featureSubspacePtr; } }; diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp index 8eefd0c4c4..4dac998b4c 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp @@ -3,7 +3,7 @@ */ #pragma once -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" #include "mlrl/common/util/openmp.hpp" /** @@ -36,7 +36,7 @@ struct RuleRefinementEntry final { * @tparam RefinementComparator The type of the comparator that is used to compare the potential refinements * @param refinementComparator A reference to an object of template type `RefinementComparator` that should be used to * compare the potential refinements - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to search for + * @param featureSubspace A reference to an object of type `IFeatureSubspace` that should be used to search for * the potential refinements * @param featureIndices A reference to an object of type `IIndexVector` that provides access to the indices of * the features that should be considered @@ -48,7 +48,7 @@ struct RuleRefinementEntry final { * @return True, if at least one refinement has been found, false otherwise */ template -static inline bool findRefinement(RefinementComparator& refinementComparator, IThresholdsSubset& thresholdsSubset, +static inline bool findRefinement(RefinementComparator& refinementComparator, IFeatureSubspace& featureSubspace, const IIndexVector& featureIndices, const IIndexVector& labelIndices, uint32 minCoverage, uint32 numThreads) { bool foundRefinement = false; @@ -62,7 +62,7 @@ static inline bool findRefinement(RefinementComparator& refinementComparator, IT uint32 featureIndex = featureIndices.getIndex(i); RuleRefinementEntry& ruleRefinementEntry = ruleRefinementEntries[i]; ruleRefinementEntry.comparatorPtr = std::make_unique(refinementComparator); - ruleRefinementEntry.ruleRefinementPtr = labelIndices.createRuleRefinement(thresholdsSubset, featureIndex); + ruleRefinementEntry.ruleRefinementPtr = labelIndices.createRuleRefinement(featureSubspace, featureIndex); } // Search for the best condition among all available features to be added to the current rule... diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_greedy.cpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_greedy.cpp index 2e48ab9fa6..bb5abf42f1 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_greedy.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_greedy.cpp @@ -47,11 +47,11 @@ class GreedyTopDownRuleInduction final : public AbstractRuleInduction { protected: - std::unique_ptr growRule(IFeatureSpace& featureSpace, const IIndexVector& labelIndices, - const IWeightVector& weights, IPartition& partition, - IFeatureSampling& featureSampling, RNG& rng, - std::unique_ptr& conditionListPtr, - std::unique_ptr& headPtr) const override { + std::unique_ptr growRule(IFeatureSpace& featureSpace, const IIndexVector& labelIndices, + const IWeightVector& weights, IPartition& partition, + IFeatureSampling& featureSampling, RNG& rng, + std::unique_ptr& conditionListPtr, + std::unique_ptr& headPtr) const override { // The label indices for which the next refinement of the rule may predict const IIndexVector* currentLabelIndices = &labelIndices; // A list that contains the conditions in the rule's body (in the order they have been learned) @@ -62,7 +62,7 @@ class GreedyTopDownRuleInduction final : public AbstractRuleInduction { bool foundRefinement = true; // Create a new subset of the given thresholds... - std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(featureSpace); + std::unique_ptr featureSubspacePtr = weights.createFeatureSubspace(featureSpace); // Search for the best refinement until no improvement in terms of the rule's quality is possible anymore or // until the maximum number of conditions has been reached... @@ -71,7 +71,7 @@ class GreedyTopDownRuleInduction final : public AbstractRuleInduction { const IIndexVector& sampledFeatureIndices = featureSampling.sample(rng); // Search for the best refinement... - foundRefinement = findRefinement(refinementComparator, *thresholdsSubsetPtr, sampledFeatureIndices, + foundRefinement = findRefinement(refinementComparator, *featureSubspacePtr, sampledFeatureIndices, *currentLabelIndices, minCoverage_, numThreads_); if (foundRefinement) { @@ -81,7 +81,7 @@ class GreedyTopDownRuleInduction final : public AbstractRuleInduction { bestRefinement.headPtr->sort(); // Filter the current subset of thresholds by applying the best refinement that has been found... - thresholdsSubsetPtr->filterThresholds(bestRefinement); + featureSubspacePtr->filterSubspace(bestRefinement); // Add the new condition... conditionListPtr->addCondition(bestRefinement); @@ -100,7 +100,7 @@ class GreedyTopDownRuleInduction final : public AbstractRuleInduction { Refinement& bestRefinement = *refinementComparator.begin(); headPtr = std::move(bestRefinement.headPtr); - return thresholdsSubsetPtr; + return featureSubspacePtr; } }; diff --git a/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_irep.cpp b/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_irep.cpp index a093f3b723..f2bff6cb71 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_irep.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_irep.cpp @@ -17,7 +17,7 @@ class Irep final : public IRulePruning { */ Irep(RuleCompareFunction ruleCompareFunction) : ruleCompareFunction_(ruleCompareFunction) {} - std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, + std::unique_ptr prune(IFeatureSubspace& featureSubspace, IPartition& partition, ConditionList& conditions, const IPrediction& head) const override { uint32 numConditions = conditions.getNumConditions(); std::unique_ptr bestCoverageMaskPtr; @@ -25,14 +25,14 @@ class Irep final : public IRulePruning { // Only rules with more than one condition can be pruned... if (numConditions > 1) { // Calculate the quality of the original rule on the prune set... - const CoverageMask& originalCoverageMask = thresholdsSubset.getCoverageMask(); - Quality bestQuality = partition.evaluateOutOfSample(thresholdsSubset, originalCoverageMask, head); + const CoverageMask& originalCoverageMask = featureSubspace.getCoverageMask(); + Quality bestQuality = partition.evaluateOutOfSample(featureSubspace, originalCoverageMask, head); // Create a copy of the original coverage mask... bestCoverageMaskPtr = std::make_unique(originalCoverageMask); // Reset the given thresholds... - thresholdsSubset.resetThresholds(); + featureSubspace.resetSubspace(); // We process the existing rule's conditions (except for the last one) in the order they have been // learned. At each iteration, we calculate the quality of a rule that only contains the conditions @@ -43,11 +43,11 @@ class Irep final : public IRulePruning { for (uint32 n = 1; n < numConditions; n++) { // Filter the thresholds by applying the current condition... const Condition& condition = *conditionIterator; - thresholdsSubset.filterThresholds(condition); + featureSubspace.filterSubspace(condition); // Calculate the quality of a rule that contains the conditions that have been processed so far... - const CoverageMask& coverageMask = thresholdsSubset.getCoverageMask(); - Quality quality = partition.evaluateOutOfSample(thresholdsSubset, coverageMask, head); + const CoverageMask& coverageMask = featureSubspace.getCoverageMask(); + Quality quality = partition.evaluateOutOfSample(featureSubspace, coverageMask, head); // Check if the quality is better than the best quality seen so far (reaching the same quality with // fewer conditions is considered an improvement)... diff --git a/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_no.cpp b/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_no.cpp index eb78950e33..3f895bf532 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_no.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_pruning/rule_pruning_no.cpp @@ -6,7 +6,7 @@ class NoRulePruning final : public IRulePruning { public: - std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, + std::unique_ptr prune(IFeatureSubspace& featureSubspace, IPartition& partition, ConditionList& conditions, const IPrediction& head) const override { return nullptr; } diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/prediction_complete.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/prediction_complete.cpp index 1b3687461a..ba8efb993f 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/prediction_complete.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/prediction_complete.cpp @@ -88,9 +88,9 @@ std::unique_ptr CompletePrediction::createStatisticsSubset( return statistics.createSubset(indexVector_, weights); } -std::unique_ptr CompletePrediction::createRuleRefinement(IThresholdsSubset& thresholdsSubset, +std::unique_ptr CompletePrediction::createRuleRefinement(IFeatureSubspace& featureSubspace, uint32 featureIndex) const { - return indexVector_.createRuleRefinement(thresholdsSubset, featureIndex); + return indexVector_.createRuleRefinement(featureSubspace, featureIndex); } void CompletePrediction::apply(IStatistics& statistics, uint32 statisticIndex) const { diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/prediction_partial.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/prediction_partial.cpp index 69ed32b8d9..91002968a6 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/prediction_partial.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/prediction_partial.cpp @@ -134,9 +134,9 @@ std::unique_ptr PartialPrediction::createStatisticsSubset( return statistics.createSubset(indexVector_, weights); } -std::unique_ptr PartialPrediction::createRuleRefinement(IThresholdsSubset& thresholdsSubset, +std::unique_ptr PartialPrediction::createRuleRefinement(IFeatureSubspace& featureSubspace, uint32 featureIndex) const { - return indexVector_.createRuleRefinement(thresholdsSubset, featureIndex); + return indexVector_.createRuleRefinement(featureSubspace, featureIndex); } void PartialPrediction::apply(IStatistics& statistics, uint32 statisticIndex) const { diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp index 1c44bd4a62..0222319f77 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp @@ -4,7 +4,7 @@ #include "mlrl/common/rule_refinement/prediction.hpp" #include "mlrl/common/sampling/instance_sampling.hpp" #include "mlrl/common/stopping/stopping_criterion.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" #include @@ -76,14 +76,14 @@ std::unique_ptr BiPartition::createInstanceSampling(const IIn return labelMatrix.createInstanceSampling(factory, *this, statistics); } -Quality BiPartition::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, +Quality BiPartition::evaluateOutOfSample(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, const IPrediction& head) { - return thresholdsSubset.evaluateOutOfSample(*this, coverageMask, head); + return featureSubspace.evaluateOutOfSample(*this, coverageMask, head); } -void BiPartition::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, +void BiPartition::recalculatePrediction(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, IPrediction& head) { - thresholdsSubset.recalculatePrediction(*this, coverageMask, head); + featureSubspace.recalculatePrediction(*this, coverageMask, head); } std::unique_ptr BiPartition::fitMarginalProbabilityCalibrationModel( diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp index 47f47794f0..02de98a49e 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp @@ -4,7 +4,7 @@ #include "mlrl/common/rule_refinement/prediction.hpp" #include "mlrl/common/sampling/instance_sampling.hpp" #include "mlrl/common/stopping/stopping_criterion.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" SinglePartition::SinglePartition(uint32 numElements) : numElements_(numElements) {} @@ -30,14 +30,14 @@ std::unique_ptr SinglePartition::createInstanceSampling(const return labelMatrix.createInstanceSampling(factory, *this, statistics); } -Quality SinglePartition::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, - const CoverageMask& coverageMask, const IPrediction& head) { - return thresholdsSubset.evaluateOutOfSample(*this, coverageMask, head); +Quality SinglePartition::evaluateOutOfSample(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, + const IPrediction& head) { + return featureSubspace.evaluateOutOfSample(*this, coverageMask, head); } -void SinglePartition::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const CoverageMask& coverageMask, +void SinglePartition::recalculatePrediction(const IFeatureSubspace& featureSubspace, const CoverageMask& coverageMask, IPrediction& head) { - thresholdsSubset.recalculatePrediction(*this, coverageMask, head); + featureSubspace.recalculatePrediction(*this, coverageMask, head); } std::unique_ptr SinglePartition::fitMarginalProbabilityCalibrationModel( diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp index 0e7f7a37f5..c7a4fb1f49 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/sampling/weight_vector_bit.hpp" #include "mlrl/common/thresholds/feature_space.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" BitWeightVector::BitWeightVector(uint32 numElements, bool init) : vector_(numElements, init), numNonZeroWeights_(0) {} @@ -33,6 +33,6 @@ void BitWeightVector::clear() { vector_.clear(); } -std::unique_ptr BitWeightVector::createThresholdsSubset(IFeatureSpace& featureSpace) const { - return featureSpace.createSubset(*this); +std::unique_ptr BitWeightVector::createFeatureSubspace(IFeatureSpace& featureSpace) const { + return featureSpace.createSubspace(*this); } diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp index 5e76fd1797..ad9f677c02 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/sampling/weight_vector_dense.hpp" #include "mlrl/common/thresholds/feature_space.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" template DenseWeightVector::DenseWeightVector(uint32 numElements, bool init) @@ -23,8 +23,8 @@ bool DenseWeightVector::hasZeroWeights() const { } template -std::unique_ptr DenseWeightVector::createThresholdsSubset(IFeatureSpace& featureSpace) const { - return featureSpace.createSubset(*this); +std::unique_ptr DenseWeightVector::createFeatureSubspace(IFeatureSpace& featureSpace) const { + return featureSpace.createSubspace(*this); } template class DenseWeightVector; diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp index 78a193df7d..bdfa996936 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/sampling/weight_vector_equal.hpp" #include "mlrl/common/thresholds/feature_space.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" EqualWeightVector::EqualWeightVector(uint32 numElements) : numElements_(numElements) {} @@ -21,6 +21,6 @@ bool EqualWeightVector::hasZeroWeights() const { return false; } -std::unique_ptr EqualWeightVector::createThresholdsSubset(IFeatureSpace& featureSpace) const { - return featureSpace.createSubset(*this); +std::unique_ptr EqualWeightVector::createFeatureSubspace(IFeatureSpace& featureSpace) const { + return featureSpace.createSubspace(*this); } diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp index dd187a85ae..53bed9958e 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/thresholds/coverage_mask.hpp" #include "mlrl/common/rule_refinement/prediction.hpp" -#include "mlrl/common/thresholds/thresholds_subset.hpp" +#include "mlrl/common/thresholds/feature_subspace.hpp" CoverageMask::CoverageMask(uint32 numElements) : DenseVectorDecorator>(AllocatedVector(numElements, true)), indicatorValue_(0) {} diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index 81aefee7e4..a6229df8c5 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -77,7 +77,7 @@ class ExactThresholds final : public IFeatureSpace { * examples */ template - class ThresholdsSubset final : public IThresholdsSubset { + class ThresholdsSubset final : public IFeatureSubspace { private: /** @@ -203,7 +203,7 @@ class ExactThresholds final : public IFeatureSpace { coverageMask_(thresholdsSubset.coverageMask_), numModifications_(thresholdsSubset.numModifications_) {} - std::unique_ptr copy() const override { + std::unique_ptr copy() const override { return std::make_unique>(*this); } @@ -217,7 +217,7 @@ class ExactThresholds final : public IFeatureSpace { return createRuleRefinementInternally(labelIndices, featureIndex); } - void filterThresholds(const Condition& condition) override { + void filterSubspace(const Condition& condition) override { uint32 featureIndex = condition.featureIndex; auto cacheFilteredIterator = cacheFiltered_.emplace(featureIndex, FilteredCacheEntry()).first; FilteredCacheEntry& cacheEntry = cacheFilteredIterator->second; @@ -245,7 +245,7 @@ class ExactThresholds final : public IFeatureSpace { cacheEntry.numConditions = numModifications_; } - void resetThresholds() override { + void resetSubspace() override { numModifications_ = 0; numCovered_ = weights_.getNumNonZeroWeights(); cacheFiltered_.clear(); @@ -358,21 +358,21 @@ class ExactThresholds final : public IFeatureSpace { return statisticsProvider_; } - std::unique_ptr createSubset(const EqualWeightVector& weights) override { + std::unique_ptr createSubspace(const EqualWeightVector& weights) override { IStatistics& statistics = statisticsProvider_.get(); std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); return std::make_unique>( *this, std::move(weightedStatisticsPtr), weights); } - std::unique_ptr createSubset(const BitWeightVector& weights) override { + std::unique_ptr createSubspace(const BitWeightVector& weights) override { IStatistics& statistics = statisticsProvider_.get(); std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); return std::make_unique>( *this, std::move(weightedStatisticsPtr), weights); } - std::unique_ptr createSubset(const DenseWeightVector& weights) override { + std::unique_ptr createSubspace(const DenseWeightVector& weights) override { IStatistics& statistics = statisticsProvider_.get(); std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); return std::make_unique>>( From 400d783f13800e2ad5feec2e26f9fa33a236b40f Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 21 Feb 2024 21:58:35 +0100 Subject: [PATCH 42/53] Replace "getIndicatorValue" and "setIndicatorValue" functions of the class Coverage with publicly accessible field. --- .../mlrl/common/thresholds/coverage_mask.hpp | 27 +++++-------------- ...eature_vector_decorator_nominal_common.hpp | 2 +- .../feature_vector_decorator_numerical.hpp | 2 +- .../mlrl/common/thresholds/coverage_mask.cpp | 16 +++-------- .../input/feature_vector_decorator_binary.cpp | 10 +++---- .../feature_vector_decorator_nominal.cpp | 10 +++---- .../feature_vector_decorator_numerical.cpp | 16 +++++------ .../feature_vector_decorator_ordinal.cpp | 16 +++++------ 8 files changed, 39 insertions(+), 60 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp index 3eef72a213..00a47a7e45 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp @@ -16,16 +16,17 @@ class IPrediction; /** * Allows to check whether individual examples are covered by a rule or not. For each example, an integer is stored in a - * C-contiguous array that may be updated when the rule is refined. If the value that corresponds to a certain example - * is equal to the "indicator value", it is considered to be covered. + * vector that may be updated when the rule is refined. If the value that corresponds to a certain example is equal to + * the "indicator value", it is considered to be covered, otherwise it is not. */ class CoverageMask final : public DenseVectorDecorator> { - private: - - uint32 indicatorValue_; - public: + /** + * The "indicator value". + */ + uint32 indicatorValue; + /** * @param numElements The total number of examples */ @@ -36,20 +37,6 @@ class CoverageMask final : public DenseVectorDecorator> */ CoverageMask(const CoverageMask& other); - /** - * Returns the "indicator value". - * - * @return The "indicator value" - */ - uint32 getIndicatorValue() const; - - /** - * Sets the "indicator value". - * - * @param indicatorValue The "indicator value" to be set - */ - void setIndicatorValue(uint32 indicatorValue); - /** * Resets the mask and the "indicator value" such that all examples are marked as covered. */ diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp index 506f0bedb3..32ea8b4830 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp @@ -34,7 +34,7 @@ static inline void updateCoverageMaskAndStatisticsBasedOnNominalFeatureVector(co updateCoverageMaskAndStatisticsBasedOnMissingFeatureVector(view, coverageMaskIterator, indicatorValue, statistics); } else { - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; statistics.resetCoveredStatistics(); // Retain the indices in the range [interval.start, interval.end) and set the corresponding values in the given diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp index 9fe308c74d..f049531f04 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp @@ -124,7 +124,7 @@ class AbstractNumericalFeatureVectorDecorator : public AbstractFeatureVectorDeco updateCoverageMaskAndStatisticsBasedOnMissingFeatureVector(*this, coverageMaskIterator, indicatorValue, statistics); } else { - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; statistics.resetCoveredStatistics(); // Retain the indices in the range [interval.start, interval.end) and set the corresponding values in diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp index 53bed9958e..3cb8331ea1 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp @@ -4,27 +4,19 @@ #include "mlrl/common/thresholds/feature_subspace.hpp" CoverageMask::CoverageMask(uint32 numElements) - : DenseVectorDecorator>(AllocatedVector(numElements, true)), indicatorValue_(0) {} + : DenseVectorDecorator>(AllocatedVector(numElements, true)), indicatorValue(0) {} CoverageMask::CoverageMask(const CoverageMask& other) : DenseVectorDecorator>(AllocatedVector(other.getNumElements())), - indicatorValue_(other.indicatorValue_) { + indicatorValue(other.indicatorValue) { copyView(other.cbegin(), this->begin(), this->getNumElements()); } -uint32 CoverageMask::getIndicatorValue() const { - return indicatorValue_; -} - -void CoverageMask::setIndicatorValue(uint32 indicatorValue) { - indicatorValue_ = indicatorValue; -} - void CoverageMask::reset() { - indicatorValue_ = 0; + indicatorValue = 0; setViewToZeros(this->begin(), this->getNumElements()); } bool CoverageMask::isCovered(uint32 pos) const { - return this->view.array[pos] == indicatorValue_; + return this->view.array[pos] == indicatorValue; } diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binary.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binary.cpp index 160cecf219..fdef997fce 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binary.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binary.cpp @@ -26,7 +26,7 @@ TEST(BinaryFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), indicatorValue); + EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); const BinaryFeatureVector& binaryFeatureVector = decorator.getView().firstView; for (auto it = binaryFeatureVector.indices_cbegin(0); it != binaryFeatureVector.indices_cend(0); it++) { @@ -69,7 +69,7 @@ TEST(BinaryFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) { CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), (uint32) 0); + EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); const BinaryFeatureVector& binaryFeatureVector = decorator.getView().firstView; for (auto it = binaryFeatureVector.indices_cbegin(0); it != binaryFeatureVector.indices_cend(0); it++) { @@ -116,7 +116,7 @@ TEST(BinaryFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageMa CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numExamples; i++) { @@ -185,7 +185,7 @@ TEST(BinaryFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageMa CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numExamples; i++) { @@ -246,7 +246,7 @@ TEST(BinaryFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageMa } CoverageMask coverageMask(numMinorityExamples); - coverageMask.setIndicatorValue(1); + coverageMask.indicatorValue = 1; BinaryFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); std::unique_ptr existing; diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_nominal.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_nominal.cpp index 41f52a2e5d..2d432fa6e5 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_nominal.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_nominal.cpp @@ -35,7 +35,7 @@ TEST(NominalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), indicatorValue); + EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); const NominalFeatureVector& nominalFeatureVector = decorator.getView().firstView; for (uint32 i = 0; i < interval.start; i++) { @@ -106,7 +106,7 @@ TEST(NominalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), (uint32) 0); + EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); const NominalFeatureVector& nominalFeatureVector = decorator.getView().firstView; for (uint32 i = 0; i < interval.start; i++) { @@ -327,7 +327,7 @@ TEST(NominalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageM CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numExamples; i++) { @@ -412,7 +412,7 @@ TEST(NominalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageM CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numExamples; i++) { @@ -484,7 +484,7 @@ TEST(NominalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageM } CoverageMask coverageMask(numMinorityExamples); - coverageMask.setIndicatorValue(1); + coverageMask.indicatorValue = 1; NominalFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); std::unique_ptr existing; diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_numerical.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_numerical.cpp index bfaf9a6d0a..1c70c29537 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_numerical.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_numerical.cpp @@ -26,7 +26,7 @@ TEST(NumericalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { CoverageMask coverageMask(numDenseExamples); uint32 indicatorValue = 1; decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), indicatorValue); + EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); for (uint32 i = 0; i < interval.start; i++) { EXPECT_FALSE(coverageMask.isCovered(i)); @@ -74,7 +74,7 @@ TEST(NumericalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), (uint32) 0); + EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); for (uint32 i = 0; i < interval.start; i++) { EXPECT_TRUE(coverageMask.isCovered(i)); @@ -121,7 +121,7 @@ TEST(NumericalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromVie CoverageMask coverageMask(numDenseExamples); uint32 indicatorValue = 1; filtered->updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), indicatorValue); + EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); for (uint32 i = 0; i < interval.start; i++) { EXPECT_FALSE(coverageMask.isCovered(i)); @@ -164,7 +164,7 @@ TEST(NumericalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromVie CoverageMask coverageMask(numDenseExamples); uint32 indicatorValue = 1; filtered->updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), (uint32) 0); + EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); for (uint32 i = 0; i < interval.start; i++) { EXPECT_TRUE(coverageMask.isCovered(i)); @@ -368,7 +368,7 @@ TEST(NumericalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromViewWit CoverageMask coverageMask(numDenseExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numDenseExamples; i++) { @@ -430,7 +430,7 @@ TEST(NumericalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverag CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numExamples; i++) { @@ -498,7 +498,7 @@ TEST(NumericalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverag CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numExamples; i++) { @@ -559,7 +559,7 @@ TEST(NumericalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverag } CoverageMask coverageMask(numDenseExamples); - coverageMask.setIndicatorValue(1); + coverageMask.indicatorValue = 1; NumericalFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); std::unique_ptr existing; diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_ordinal.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_ordinal.cpp index 8d30bb0463..661c968a77 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_ordinal.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_ordinal.cpp @@ -36,7 +36,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), indicatorValue); + EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); const OrdinalFeatureVector& ordinalFeatureVector = decorator.getView().firstView; for (uint32 i = 0; i < interval.start; i++) { @@ -107,7 +107,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), (uint32) 0); + EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); const OrdinalFeatureVector& ordinalFeatureVector = decorator.getView().firstView; for (uint32 i = 0; i < interval.start; i++) { @@ -178,7 +178,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromView) std::unique_ptr existing; decorator.createFilteredFeatureVector(existing, Interval(0, numValues)) ->updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), indicatorValue); + EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); const OrdinalFeatureVector& ordinalFeatureVector = decorator.getView().firstView; for (uint32 i = 0; i < interval.start; i++) { @@ -244,7 +244,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromViewI std::unique_ptr existing; decorator.createFilteredFeatureVector(existing, Interval(0, numValues)) ->updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); - EXPECT_EQ(coverageMask.getIndicatorValue(), (uint32) 0); + EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); const OrdinalFeatureVector& ordinalFeatureVector = decorator.getView().firstView; for (uint32 i = 0; i < interval.start; i++) { @@ -484,7 +484,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromViewWithC CoverageMask coverageMask(numMinorityExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numMinorityExamples; i++) { @@ -559,7 +559,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageM CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numExamples; i++) { @@ -644,7 +644,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageM CoverageMask coverageMask(numExamples); uint32 indicatorValue = 1; - coverageMask.setIndicatorValue(indicatorValue); + coverageMask.indicatorValue = indicatorValue; CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); for (uint32 i = 0; i < numExamples; i++) { @@ -716,7 +716,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageM } CoverageMask coverageMask(numMinorityExamples); - coverageMask.setIndicatorValue(1); + coverageMask.indicatorValue = 1; OrdinalFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); std::unique_ptr existing; From d4e925cbf3f1d9559557220f5ede6a975fefb954 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 21 Feb 2024 22:04:06 +0100 Subject: [PATCH 43/53] Replace function "isCovered" of the class CoverageMask with "operator[]". --- .../mlrl/common/thresholds/coverage_mask.hpp | 4 +-- .../common/input/feature_vector_decorator.hpp | 4 +-- ...eature_vector_decorator_nominal_common.hpp | 2 +- .../feature_vector_decorator_numerical.hpp | 2 +- .../mlrl/common/thresholds/coverage_mask.cpp | 4 +-- .../common/thresholds/thresholds_exact.cpp | 8 ++--- .../input/feature_vector_decorator_binary.cpp | 10 +++--- .../feature_vector_decorator_nominal.cpp | 18 +++++----- .../feature_vector_decorator_numerical.cpp | 24 ++++++------- .../feature_vector_decorator_ordinal.cpp | 34 +++++++++---------- 10 files changed, 55 insertions(+), 55 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp index 00a47a7e45..82ade434ce 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp @@ -45,8 +45,8 @@ class CoverageMask final : public DenseVectorDecorator> /** * Returns whether the example at a specific index is covered or not. * - * @param pos The index of the example + * @param index The index of the example * @return True, if the example at the given index is covered, false otherwise */ - bool isCovered(uint32 pos) const; + bool operator[](uint32 index) const; }; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator.hpp index bc465f4e33..ada1e3ee76 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator.hpp @@ -41,7 +41,7 @@ static inline std::unique_ptr createFilteredFeatureVectorDecorator(co uint32 index = *it; it++; // Iterator must be incremented before call to `MissingFeatureVector::set` invalidates it - if (!coverageMask.isCovered(index)) { + if (!coverageMask[index]) { missingFeatureVector.set(index, false); } } @@ -56,7 +56,7 @@ static inline std::unique_ptr createFilteredFeatureVectorDecorator(co it++) { uint32 index = *it; - if (coverageMask.isCovered(index)) { + if (coverageMask[index]) { missingFeatureVector.set(index, true); } } diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp index 32ea8b4830..b74bf80f9a 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp @@ -77,7 +77,7 @@ static inline std::unique_ptr createFilteredNominalFeatureVector for (uint32 j = 0; j < numIndices; j++) { uint32 index = indexIterator[j]; - if (coverageMask.isCovered(index)) { + if (coverageMask[index]) { filteredIndexIterator[numFilteredIndices] = index; numFilteredIndices++; } diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp index f049531f04..4a43705797 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp @@ -61,7 +61,7 @@ static inline std::unique_ptr createFilteredNumericalFeatureVect for (uint32 i = 0; i < filteredFeatureVector.numElements; i++) { const IndexedValue& entry = iterator[i]; - if (coverageMask.isCovered(entry.index)) { + if (coverageMask[entry.index]) { filteredIterator[numFilteredElements] = entry; numFilteredElements++; } diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp index 3cb8331ea1..e022723a35 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp @@ -17,6 +17,6 @@ void CoverageMask::reset() { setViewToZeros(this->begin(), this->getNumElements()); } -bool CoverageMask::isCovered(uint32 pos) const { - return this->view.array[pos] == indicatorValue; +bool CoverageMask::operator[](uint32 index) const { + return this->view.array[index] == indicatorValue; } diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp index a6229df8c5..6183fb52c5 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp @@ -16,7 +16,7 @@ static inline Quality evaluateOutOfSampleInternally(IndexIterator indexIterator, for (uint32 i = 0; i < numExamples; i++) { uint32 exampleIndex = indexIterator[i]; - if (statisticsSubsetPtr->hasNonZeroWeight(exampleIndex) && coverageMask.isCovered(exampleIndex)) { + if (statisticsSubsetPtr->hasNonZeroWeight(exampleIndex) && coverageMask[exampleIndex]) { statisticsSubsetPtr->addToSubset(exampleIndex); } } @@ -34,7 +34,7 @@ static inline void recalculatePredictionInternally(IndexIterator indexIterator, for (uint32 i = 0; i < numExamples; i++) { uint32 exampleIndex = indexIterator[i]; - if (coverageMask.isCovered(exampleIndex)) { + if (coverageMask[exampleIndex]) { statisticsSubsetPtr->addToSubset(exampleIndex); } } @@ -296,7 +296,7 @@ class ExactThresholds final : public IFeatureSpace { firstprivate(statisticsPtr) schedule(dynamic) num_threads(thresholds_.numThreads_) #endif for (int64 i = 0; i < numStatistics; i++) { - if (coverageMaskPtr->isCovered(i)) { + if ((*coverageMaskPtr)[i]) { predictionPtr->apply(*statisticsPtr, i); } } @@ -314,7 +314,7 @@ class ExactThresholds final : public IFeatureSpace { firstprivate(statisticsPtr) schedule(dynamic) num_threads(thresholds_.numThreads_) #endif for (int64 i = 0; i < numStatistics; i++) { - if (coverageMaskPtr->isCovered(i)) { + if ((*coverageMaskPtr)[i]) { predictionPtr->revert(*statisticsPtr, i); } } diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binary.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binary.cpp index fdef997fce..04c7444e72 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binary.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binary.cpp @@ -31,12 +31,12 @@ TEST(BinaryFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { for (auto it = binaryFeatureVector.indices_cbegin(0); it != binaryFeatureVector.indices_cend(0); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } for (uint32 i = numMinorityExamples; i < numExamples; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } @@ -74,17 +74,17 @@ TEST(BinaryFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) { for (auto it = binaryFeatureVector.indices_cbegin(0); it != binaryFeatureVector.indices_cend(0); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingExamples; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = numMinorityExamples + numMissingExamples; i < numExamples; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_nominal.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_nominal.cpp index 2d432fa6e5..512f40638f 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_nominal.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_nominal.cpp @@ -41,7 +41,7 @@ TEST(NominalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { for (uint32 i = 0; i < interval.start; i++) { for (auto it = nominalFeatureVector.indices_cbegin(i); it != nominalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -49,7 +49,7 @@ TEST(NominalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { for (uint32 i = interval.start; i < interval.end; i++) { for (auto it = nominalFeatureVector.indices_cbegin(i); it != nominalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -57,13 +57,13 @@ TEST(NominalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { for (uint32 i = interval.end; i < numValues; i++) { for (auto it = nominalFeatureVector.indices_cbegin(i); it != nominalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } for (uint32 i = numMinorityExamples; i < numExamples; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } @@ -112,7 +112,7 @@ TEST(NominalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) for (uint32 i = 0; i < interval.start; i++) { for (auto it = nominalFeatureVector.indices_cbegin(i); it != nominalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -120,7 +120,7 @@ TEST(NominalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) for (uint32 i = interval.start; i < interval.end; i++) { for (auto it = nominalFeatureVector.indices_cbegin(i); it != nominalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -128,18 +128,18 @@ TEST(NominalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) for (uint32 i = interval.end; i < numValues; i++) { for (auto it = nominalFeatureVector.indices_cbegin(i); it != nominalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingExamples; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = numMinorityExamples + numMissingExamples; i < numExamples; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_numerical.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_numerical.cpp index 1c70c29537..2fd5cc3f64 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_numerical.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_numerical.cpp @@ -29,17 +29,17 @@ TEST(NumericalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); for (uint32 i = 0; i < interval.start; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = interval.start; i < interval.end; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = interval.end; i < numDenseExamples; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } @@ -77,17 +77,17 @@ TEST(NumericalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); for (uint32 i = 0; i < interval.start; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = interval.start; i < interval.end; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = interval.end; i < numDenseExamples; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } @@ -124,17 +124,17 @@ TEST(NumericalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromVie EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); for (uint32 i = 0; i < interval.start; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = interval.start; i < interval.end; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = interval.end; i < numDenseExamples; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } @@ -167,17 +167,17 @@ TEST(NumericalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromVie EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); for (uint32 i = 0; i < interval.start; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = interval.start; i < interval.end; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = interval.end; i < numDenseExamples; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_ordinal.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_ordinal.cpp index 661c968a77..8312a43db1 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_ordinal.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_ordinal.cpp @@ -42,7 +42,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { for (uint32 i = 0; i < interval.start; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -50,7 +50,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { for (uint32 i = interval.start; i < interval.end; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -58,13 +58,13 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { for (uint32 i = interval.end; i < numValues; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } for (uint32 i = numMinorityExamples; i < numExamples; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } @@ -113,7 +113,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) for (uint32 i = 0; i < interval.start; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -121,7 +121,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) for (uint32 i = interval.start; i < interval.end; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -129,18 +129,18 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) for (uint32 i = interval.end; i < numValues; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingExamples; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } for (uint32 i = numMinorityExamples + numMissingExamples; i < numExamples; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } @@ -184,7 +184,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromView) for (uint32 i = 0; i < interval.start; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -192,7 +192,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromView) for (uint32 i = interval.start; i < interval.end; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -200,13 +200,13 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromView) for (uint32 i = interval.end; i < numValues; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } for (uint32 i = numMinorityExamples; i < numExamples; i++) { - EXPECT_FALSE(coverageMask.isCovered(i)); + EXPECT_FALSE(coverageMask[i]); EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } @@ -250,7 +250,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromViewI for (uint32 i = 0; i < interval.start; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -258,7 +258,7 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromViewI for (uint32 i = interval.start; i < interval.end; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_FALSE(coverageMask.isCovered(index)); + EXPECT_FALSE(coverageMask[index]); EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } @@ -266,13 +266,13 @@ TEST(OrdinalFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromViewI for (uint32 i = interval.end; i < numValues; i++) { for (auto it = ordinalFeatureVector.indices_cbegin(i); it != ordinalFeatureVector.indices_cend(i); it++) { uint32 index = *it; - EXPECT_TRUE(coverageMask.isCovered(index)); + EXPECT_TRUE(coverageMask[index]); EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); } } for (uint32 i = numMinorityExamples; i < numExamples; i++) { - EXPECT_TRUE(coverageMask.isCovered(i)); + EXPECT_TRUE(coverageMask[i]); EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); } } From ae21424987fcf640cc3f57984438e9697b146fbf Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Thu, 22 Feb 2024 00:07:07 +0100 Subject: [PATCH 44/53] Rename class ExactThresholds to TabularFeatureSpace. --- ...ds_exact.hpp => feature_space_tabular.hpp} | 6 +- cpp/subprojects/common/meson.build | 2 +- .../common/src/mlrl/common/learner.cpp | 4 +- ...ds_exact.cpp => feature_space_tabular.cpp} | 112 +++++++++--------- 4 files changed, 60 insertions(+), 64 deletions(-) rename cpp/subprojects/common/include/mlrl/common/thresholds/{thresholds_exact.hpp => feature_space_tabular.hpp} (79%) rename cpp/subprojects/common/src/mlrl/common/thresholds/{thresholds_exact.cpp => feature_space_tabular.cpp} (76%) diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp b/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space_tabular.hpp similarity index 79% rename from cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp rename to cpp/subprojects/common/include/mlrl/common/thresholds/feature_space_tabular.hpp index 07bab3ab61..3f0b3a2db9 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/thresholds_exact.hpp +++ b/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space_tabular.hpp @@ -7,9 +7,9 @@ #include "mlrl/common/thresholds/feature_space.hpp" /** - * A factory that allows to create instances of the type `ExactThresholds`. + * Allows to create objects of type `IFeatureSpace` that provide access to a tabular feature space. */ -class ExactThresholdsFactory final : public IFeatureSpaceFactory { +class TabularFeatureSpaceFactory final : public IFeatureSpaceFactory { private: const std::unique_ptr featureBinningFactoryPtr_; @@ -25,7 +25,7 @@ class ExactThresholdsFactory final : public IFeatureSpaceFactory { * @param numThreads The number of CPU threads to be used to update statistics in parallel. Must * be at least 1 */ - ExactThresholdsFactory(std::unique_ptr featureBinningFactoryPtr, uint32 numThreads); + TabularFeatureSpaceFactory(std::unique_ptr featureBinningFactoryPtr, uint32 numThreads); std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index 6d685cbbfa..f19efb01b8 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -88,7 +88,7 @@ source_files = [ 'src/mlrl/common/stopping/stopping_criterion_size.cpp', 'src/mlrl/common/stopping/stopping_criterion_time.cpp', 'src/mlrl/common/thresholds/coverage_mask.cpp', - 'src/mlrl/common/thresholds/thresholds_exact.cpp', + 'src/mlrl/common/thresholds/feature_space_tabular.cpp', 'src/mlrl/common/info.cpp', 'src/mlrl/common/learner.cpp' ] diff --git a/cpp/subprojects/common/src/mlrl/common/learner.cpp b/cpp/subprojects/common/src/mlrl/common/learner.cpp index 89400642e8..8f2a584110 100644 --- a/cpp/subprojects/common/src/mlrl/common/learner.cpp +++ b/cpp/subprojects/common/src/mlrl/common/learner.cpp @@ -2,7 +2,7 @@ #include "mlrl/common/prediction/label_space_info_no.hpp" #include "mlrl/common/stopping/stopping_criterion_size.hpp" -#include "mlrl/common/thresholds/thresholds_exact.hpp" +#include "mlrl/common/thresholds/feature_space_tabular.hpp" #include "mlrl/common/util/validation.hpp" /** @@ -215,7 +215,7 @@ std::unique_ptr AbstractRuleLearner::createFeatureSpaceFac config_.getFeatureBinningConfigPtr()->createFeatureBinningFactory(featureMatrix, labelMatrix); uint32 numThreads = config_.getParallelStatisticUpdateConfigPtr()->getNumThreads(featureMatrix, labelMatrix.getNumLabels()); - return std::make_unique(std::move(featureBinningFactoryPtr), numThreads); + return std::make_unique(std::move(featureBinningFactoryPtr), numThreads); } std::unique_ptr AbstractRuleLearner::createRuleInductionFactory( diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/mlrl/common/thresholds/feature_space_tabular.cpp similarity index 76% rename from cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp rename to cpp/subprojects/common/src/mlrl/common/thresholds/feature_space_tabular.cpp index 6183fb52c5..518a8c7dca 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/thresholds_exact.cpp +++ b/cpp/subprojects/common/src/mlrl/common/thresholds/feature_space_tabular.cpp @@ -1,4 +1,4 @@ -#include "mlrl/common/thresholds/thresholds_exact.hpp" +#include "mlrl/common/thresholds/feature_space_tabular.hpp" #include "mlrl/common/rule_refinement/rule_refinement_feature_based.hpp" #include "mlrl/common/util/openmp.hpp" @@ -65,19 +65,19 @@ struct FilteredCacheEntry final { }; /** - * Provides access to all thresholds that result from the feature values of the training examples. + * Provides access to a tabular feature space. */ -class ExactThresholds final : public IFeatureSpace { +class TabularFeatureSpace final : public IFeatureSpace { private: /** - * Provides access to a subset of the thresholds that are stored by an instance of the class `ExactThresholds`. + * Provides access to a subset of a `TabularFeatureSpace`. * * @tparam WeightVector The type of the vector that provides access to the weights of individual training * examples */ template - class ThresholdsSubset final : public IFeatureSubspace { + class FeatureSubspace final : public IFeatureSubspace { private: /** @@ -87,7 +87,7 @@ class ExactThresholds final : public IFeatureSpace { class Callback final : public IRuleRefinement::ICallback { private: - ThresholdsSubset& thresholdsSubset_; + FeatureSubspace& featureSubspace_; const IFeatureInfo& featureInfo_; @@ -96,53 +96,52 @@ class ExactThresholds final : public IFeatureSpace { public: /** - * @param thresholdsSubset A reference to an object of type `ThresholdsSubset` that caches the + * @param featureSubspace A reference to an object of type `FeatureSubspace` that caches the * feature vectors * @param featureInfo A reference to an object of type `IFeatureInfo` that provides * information about the types of individual features * @param featureIndex The index of the feature for which the feature vector should be * retrieved */ - Callback(ThresholdsSubset& thresholdsSubset, const IFeatureInfo& featureInfo, - uint32 featureIndex) - : thresholdsSubset_(thresholdsSubset), featureInfo_(featureInfo), + Callback(FeatureSubspace& featureSubspace, const IFeatureInfo& featureInfo, uint32 featureIndex) + : featureSubspace_(featureSubspace), featureInfo_(featureInfo), featureIndex_(featureIndex) {} Result get() override { - auto cacheFilteredIterator = thresholdsSubset_.cacheFiltered_.find(featureIndex_); + auto cacheFilteredIterator = featureSubspace_.cacheFiltered_.find(featureIndex_); FilteredCacheEntry& cacheEntry = cacheFilteredIterator->second; IFeatureVector* featureVector = cacheEntry.vectorPtr.get(); if (!featureVector) { - auto cacheIterator = thresholdsSubset_.thresholds_.cache_.find(featureIndex_); + auto cacheIterator = featureSubspace_.featureSpace_.cache_.find(featureIndex_); featureVector = cacheIterator->second.get(); if (!featureVector) { std::unique_ptr featureTypePtr = featureInfo_.createFeatureType( - featureIndex_, thresholdsSubset_.thresholds_.featureBinningFactory_); + featureIndex_, featureSubspace_.featureSpace_.featureBinningFactory_); cacheIterator->second = - thresholdsSubset_.thresholds_.featureMatrix_.createFeatureVector(featureIndex_, - *featureTypePtr); + featureSubspace_.featureSpace_.featureMatrix_.createFeatureVector( + featureIndex_, *featureTypePtr); featureVector = cacheIterator->second.get(); } } // Filter feature vector, if only a subset of its elements are covered by the current // rule... - uint32 numConditions = thresholdsSubset_.numModifications_; + uint32 numConditions = featureSubspace_.numModifications_; if (numConditions > cacheEntry.numConditions) { cacheEntry.vectorPtr = featureVector->createFilteredFeatureVector( - cacheEntry.vectorPtr, thresholdsSubset_.coverageMask_); + cacheEntry.vectorPtr, featureSubspace_.coverageMask_); cacheEntry.numConditions = numConditions; featureVector = cacheEntry.vectorPtr.get(); } - return Result(*thresholdsSubset_.weightedStatisticsPtr_, *featureVector); + return Result(*featureSubspace_.weightedStatisticsPtr_, *featureVector); } }; - ExactThresholds& thresholds_; + TabularFeatureSpace& featureSpace_; std::unique_ptr weightedStatisticsPtr_; @@ -167,11 +166,11 @@ class ExactThresholds final : public IFeatureSpace { // If the `FilteredCacheEntry` in the cache does not refer to an `IFeatureVector`, add an empty // `unique_ptr` to the cache... if (!featureVector) { - thresholds_.cache_.emplace(featureIndex, std::unique_ptr()); + featureSpace_.cache_.emplace(featureIndex, std::unique_ptr()); } std::unique_ptr callbackPtr = - std::make_unique(*this, thresholds_.featureInfo_, featureIndex); + std::make_unique(*this, featureSpace_.featureInfo_, featureIndex); return std::make_unique>( labelIndices, featureIndex, numCovered_, std::move(callbackPtr)); } @@ -179,32 +178,29 @@ class ExactThresholds final : public IFeatureSpace { public: /** - * @param thresholds A reference to an object of type `ExactThresholds` that stores the - * thresholds + * @param featureSpace A reference to an object of type `TabularFeatureSpace`, the subspace has + * been created from * @param weightedStatisticsPtr An unique pointer to an object of type `IWeightedStatistics` that * provides access to the statistics * @param weights A reference to an object of template type `WeightVector` that provides * access to the weights of individual training examples */ - ThresholdsSubset(ExactThresholds& thresholds, - std::unique_ptr weightedStatisticsPtr, - const WeightVector& weights) - : thresholds_(thresholds), weightedStatisticsPtr_(std::move(weightedStatisticsPtr)), + FeatureSubspace(TabularFeatureSpace& featureSpace, + std::unique_ptr weightedStatisticsPtr, const WeightVector& weights) + : featureSpace_(featureSpace), weightedStatisticsPtr_(std::move(weightedStatisticsPtr)), weights_(weights), numCovered_(weights.getNumNonZeroWeights()), - coverageMask_(thresholds.featureMatrix_.getNumExamples()), numModifications_(0) {} + coverageMask_(featureSpace.featureMatrix_.getNumExamples()), numModifications_(0) {} /** - * @param thresholdsSubset A reference to an object of type `ThresholdsSubset` to be copied + * @param other A reference to an object of type `FeatureSubspace` to be copied */ - ThresholdsSubset(const ThresholdsSubset& thresholdsSubset) - : thresholds_(thresholdsSubset.thresholds_), - weightedStatisticsPtr_(thresholdsSubset.weightedStatisticsPtr_->copy()), - weights_(thresholdsSubset.weights_), numCovered_(thresholdsSubset.numCovered_), - coverageMask_(thresholdsSubset.coverageMask_), - numModifications_(thresholdsSubset.numModifications_) {} + FeatureSubspace(const FeatureSubspace& other) + : featureSpace_(other.featureSpace_), weightedStatisticsPtr_(other.weightedStatisticsPtr_->copy()), + weights_(other.weights_), numCovered_(other.numCovered_), coverageMask_(other.coverageMask_), + numModifications_(other.numModifications_) {} std::unique_ptr copy() const override { - return std::make_unique>(*this); + return std::make_unique>(*this); } std::unique_ptr createRuleRefinement(const CompleteIndexVector& labelIndices, @@ -225,7 +221,7 @@ class ExactThresholds final : public IFeatureSpace { if (!featureVector) { auto cacheIterator = - thresholds_.cache_.emplace(featureIndex, std::unique_ptr()).first; + featureSpace_.cache_.emplace(featureIndex, std::unique_ptr()).first; featureVector = cacheIterator->second.get(); } @@ -260,32 +256,32 @@ class ExactThresholds final : public IFeatureSpace { const IPrediction& head) const override { return evaluateOutOfSampleInternally( partition.cbegin(), partition.getNumElements(), weights_, coverageMask, - thresholds_.statisticsProvider_.get(), head); + featureSpace_.statisticsProvider_.get(), head); } Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageMask, const IPrediction& head) const override { return evaluateOutOfSampleInternally( partition.first_cbegin(), partition.getNumFirst(), weights_, coverageMask, - thresholds_.statisticsProvider_.get(), head); + featureSpace_.statisticsProvider_.get(), head); } void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageMask, IPrediction& head) const override { recalculatePredictionInternally( partition.cbegin(), partition.getNumElements(), coverageMask, - thresholds_.statisticsProvider_.get(), head); + featureSpace_.statisticsProvider_.get(), head); } void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageMask, IPrediction& head) const override { recalculatePredictionInternally( partition.first_cbegin(), partition.getNumFirst(), coverageMask, - thresholds_.statisticsProvider_.get(), head); + featureSpace_.statisticsProvider_.get(), head); } void applyPrediction(const IPrediction& prediction) override { - IStatistics& statistics = thresholds_.statisticsProvider_.get(); + IStatistics& statistics = featureSpace_.statisticsProvider_.get(); uint32 numStatistics = statistics.getNumStatistics(); const CoverageMask* coverageMaskPtr = &coverageMask_; const IPrediction* predictionPtr = &prediction; @@ -293,7 +289,7 @@ class ExactThresholds final : public IFeatureSpace { #if MULTI_THREADING_SUPPORT_ENABLED #pragma omp parallel for firstprivate(numStatistics) firstprivate(coverageMaskPtr) firstprivate(predictionPtr) \ - firstprivate(statisticsPtr) schedule(dynamic) num_threads(thresholds_.numThreads_) + firstprivate(statisticsPtr) schedule(dynamic) num_threads(featureSpace_.numThreads_) #endif for (int64 i = 0; i < numStatistics; i++) { if ((*coverageMaskPtr)[i]) { @@ -303,7 +299,7 @@ class ExactThresholds final : public IFeatureSpace { } void revertPrediction(const IPrediction& prediction) override { - IStatistics& statistics = thresholds_.statisticsProvider_.get(); + IStatistics& statistics = featureSpace_.statisticsProvider_.get(); uint32 numStatistics = statistics.getNumStatistics(); const CoverageMask* coverageMaskPtr = &coverageMask_; const IPrediction* predictionPtr = &prediction; @@ -311,7 +307,7 @@ class ExactThresholds final : public IFeatureSpace { #if MULTI_THREADING_SUPPORT_ENABLED #pragma omp parallel for firstprivate(numStatistics) firstprivate(coverageMaskPtr) firstprivate(predictionPtr) \ - firstprivate(statisticsPtr) schedule(dynamic) num_threads(thresholds_.numThreads_) + firstprivate(statisticsPtr) schedule(dynamic) num_threads(featureSpace_.numThreads_) #endif for (int64 i = 0; i < numStatistics; i++) { if ((*coverageMaskPtr)[i]) { @@ -348,9 +344,9 @@ class ExactThresholds final : public IFeatureSpace { * assign nominal feature values to bins * @param numThreads The number of CPU threads to be used to update statistics in parallel */ - ExactThresholds(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider, const IFeatureBinningFactory& featureBinningFactory, - uint32 numThreads) + TabularFeatureSpace(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, + IStatisticsProvider& statisticsProvider, + const IFeatureBinningFactory& featureBinningFactory, uint32 numThreads) : featureMatrix_(featureMatrix), featureInfo_(featureInfo), statisticsProvider_(statisticsProvider), featureBinningFactory_(featureBinningFactory), numThreads_(numThreads) {} @@ -361,32 +357,32 @@ class ExactThresholds final : public IFeatureSpace { std::unique_ptr createSubspace(const EqualWeightVector& weights) override { IStatistics& statistics = statisticsProvider_.get(); std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>( + return std::make_unique>( *this, std::move(weightedStatisticsPtr), weights); } std::unique_ptr createSubspace(const BitWeightVector& weights) override { IStatistics& statistics = statisticsProvider_.get(); std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>( + return std::make_unique>( *this, std::move(weightedStatisticsPtr), weights); } std::unique_ptr createSubspace(const DenseWeightVector& weights) override { IStatistics& statistics = statisticsProvider_.get(); std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>>( + return std::make_unique>>( *this, std::move(weightedStatisticsPtr), weights); } }; -ExactThresholdsFactory::ExactThresholdsFactory(std::unique_ptr featureBinningFactoryPtr, - uint32 numThreads) +TabularFeatureSpaceFactory::TabularFeatureSpaceFactory(std::unique_ptr featureBinningFactoryPtr, + uint32 numThreads) : featureBinningFactoryPtr_(std::move(featureBinningFactoryPtr)), numThreads_(numThreads) {} -std::unique_ptr ExactThresholdsFactory::create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const { - return std::make_unique(featureMatrix, featureInfo, statisticsProvider, *featureBinningFactoryPtr_, - numThreads_); +std::unique_ptr TabularFeatureSpaceFactory::create(const IColumnWiseFeatureMatrix& featureMatrix, + const IFeatureInfo& featureInfo, + IStatisticsProvider& statisticsProvider) const { + return std::make_unique(featureMatrix, featureInfo, statisticsProvider, + *featureBinningFactoryPtr_, numThreads_); } From 161b021fa67556d34db876552bdd78efa712fe2a Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Thu, 22 Feb 2024 00:20:38 +0100 Subject: [PATCH 45/53] Move files from directory "thresholds" into directory "rule_refinement". --- .../common/include/mlrl/common/input/feature_vector.hpp | 2 +- .../mlrl/common/post_optimization/post_optimization.hpp | 2 +- .../include/mlrl/common/rule_induction/rule_induction.hpp | 2 +- .../common/rule_model_assemblage/rule_model_assemblage.hpp | 2 +- .../common/include/mlrl/common/rule_pruning/rule_pruning.hpp | 2 +- .../common/{thresholds => rule_refinement}/coverage_mask.hpp | 0 .../common/{thresholds => rule_refinement}/feature_space.hpp | 2 +- .../{thresholds => rule_refinement}/feature_space_tabular.hpp | 2 +- .../{thresholds => rule_refinement}/feature_subspace.hpp | 2 +- .../common/include/mlrl/common/sampling/partition.hpp | 2 +- cpp/subprojects/common/meson.build | 4 ++-- .../common/src/mlrl/common/indices/index_vector_complete.cpp | 2 +- .../common/src/mlrl/common/indices/index_vector_partial.cpp | 2 +- cpp/subprojects/common/src/mlrl/common/learner.cpp | 2 +- .../common/rule_induction/rule_induction_top_down_common.hpp | 2 +- .../common/{thresholds => rule_refinement}/coverage_mask.cpp | 4 ++-- .../{thresholds => rule_refinement}/feature_space_tabular.cpp | 2 +- .../common/src/mlrl/common/sampling/partition_bi.cpp | 2 +- .../common/src/mlrl/common/sampling/partition_single.cpp | 2 +- .../common/src/mlrl/common/sampling/weight_vector_bit.cpp | 4 ++-- .../common/src/mlrl/common/sampling/weight_vector_dense.cpp | 4 ++-- .../common/src/mlrl/common/sampling/weight_vector_equal.cpp | 4 ++-- 22 files changed, 26 insertions(+), 26 deletions(-) rename cpp/subprojects/common/include/mlrl/common/{thresholds => rule_refinement}/coverage_mask.hpp (100%) rename cpp/subprojects/common/include/mlrl/common/{thresholds => rule_refinement}/feature_space.hpp (98%) rename cpp/subprojects/common/include/mlrl/common/{thresholds => rule_refinement}/feature_space_tabular.hpp (95%) rename cpp/subprojects/common/include/mlrl/common/{thresholds => rule_refinement}/feature_subspace.hpp (99%) rename cpp/subprojects/common/src/mlrl/common/{thresholds => rule_refinement}/coverage_mask.cpp (85%) rename cpp/subprojects/common/src/mlrl/common/{thresholds => rule_refinement}/feature_space_tabular.cpp (99%) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp index 638e60e983..0d3979d7f8 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp @@ -4,9 +4,9 @@ #pragma once #include "mlrl/common/input/interval.hpp" +#include "mlrl/common/rule_refinement/coverage_mask.hpp" #include "mlrl/common/rule_refinement/feature_based_search.hpp" #include "mlrl/common/statistics/statistics_weighted.hpp" -#include "mlrl/common/thresholds/coverage_mask.hpp" /** * Defines an interface for all one-dimensional vectors that store the values of training examples for a certain diff --git a/cpp/subprojects/common/include/mlrl/common/post_optimization/post_optimization.hpp b/cpp/subprojects/common/include/mlrl/common/post_optimization/post_optimization.hpp index 20a5e29bbb..5095499697 100644 --- a/cpp/subprojects/common/include/mlrl/common/post_optimization/post_optimization.hpp +++ b/cpp/subprojects/common/include/mlrl/common/post_optimization/post_optimization.hpp @@ -7,9 +7,9 @@ #include "mlrl/common/post_processing/post_processor.hpp" #include "mlrl/common/rule_induction/rule_induction.hpp" #include "mlrl/common/rule_pruning/rule_pruning.hpp" +#include "mlrl/common/rule_refinement/feature_space.hpp" #include "mlrl/common/sampling/feature_sampling.hpp" #include "mlrl/common/sampling/label_sampling.hpp" -#include "mlrl/common/thresholds/feature_space.hpp" /** * Defines an interface for all classes that allow to optimize a rule-based model globally once it has been learned. diff --git a/cpp/subprojects/common/include/mlrl/common/rule_induction/rule_induction.hpp b/cpp/subprojects/common/include/mlrl/common/rule_induction/rule_induction.hpp index 3549b3d931..24639285b1 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_induction/rule_induction.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_induction/rule_induction.hpp @@ -8,11 +8,11 @@ #include "mlrl/common/model/model_builder.hpp" #include "mlrl/common/post_processing/post_processor.hpp" #include "mlrl/common/rule_pruning/rule_pruning.hpp" +#include "mlrl/common/rule_refinement/feature_space.hpp" #include "mlrl/common/sampling/feature_sampling.hpp" #include "mlrl/common/sampling/partition.hpp" #include "mlrl/common/sampling/weight_vector.hpp" #include "mlrl/common/statistics/statistics.hpp" -#include "mlrl/common/thresholds/feature_space.hpp" /** * Defines an interface for all classes that implement an algorithm for the induction of individual rules. diff --git a/cpp/subprojects/common/include/mlrl/common/rule_model_assemblage/rule_model_assemblage.hpp b/cpp/subprojects/common/include/mlrl/common/rule_model_assemblage/rule_model_assemblage.hpp index 03bd160560..cf64f256b1 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_model_assemblage/rule_model_assemblage.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_model_assemblage/rule_model_assemblage.hpp @@ -6,13 +6,13 @@ #include "mlrl/common/input/label_matrix_row_wise.hpp" #include "mlrl/common/model/model_builder.hpp" #include "mlrl/common/rule_induction/rule_induction.hpp" +#include "mlrl/common/rule_refinement/feature_space.hpp" #include "mlrl/common/sampling/feature_sampling.hpp" #include "mlrl/common/sampling/instance_sampling.hpp" #include "mlrl/common/sampling/label_sampling.hpp" #include "mlrl/common/sampling/partition_sampling.hpp" #include "mlrl/common/statistics/statistics_provider.hpp" #include "mlrl/common/stopping/stopping_criterion.hpp" -#include "mlrl/common/thresholds/feature_space.hpp" /** * Defines an interface for all classes that implement an algorithm for the induction of several rules that will be diff --git a/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp b/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp index 9e11451f06..e51738d908 100644 --- a/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_pruning/rule_pruning.hpp @@ -4,8 +4,8 @@ #pragma once #include "mlrl/common/model/condition_list.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" #include "mlrl/common/sampling/partition.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" /** * Defines an interface for all classes that implement a strategy for pruning individual rules based on a "prune set", diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/coverage_mask.hpp similarity index 100% rename from cpp/subprojects/common/include/mlrl/common/thresholds/coverage_mask.hpp rename to cpp/subprojects/common/include/mlrl/common/rule_refinement/coverage_mask.hpp diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_space.hpp similarity index 98% rename from cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp rename to cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_space.hpp index 5daf3abc2f..8d76dfa38b 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_space.hpp @@ -5,11 +5,11 @@ #include "mlrl/common/input/feature_info.hpp" #include "mlrl/common/input/feature_matrix_column_wise.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" #include "mlrl/common/sampling/weight_vector_bit.hpp" #include "mlrl/common/sampling/weight_vector_dense.hpp" #include "mlrl/common/sampling/weight_vector_equal.hpp" #include "mlrl/common/statistics/statistics_provider.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" /** * Defines an interface for all classes that provide access to the feature space. diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space_tabular.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_space_tabular.hpp similarity index 95% rename from cpp/subprojects/common/include/mlrl/common/thresholds/feature_space_tabular.hpp rename to cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_space_tabular.hpp index 3f0b3a2db9..6ed98f50ce 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/feature_space_tabular.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_space_tabular.hpp @@ -4,7 +4,7 @@ #pragma once #include "mlrl/common/input/feature_binning.hpp" -#include "mlrl/common/thresholds/feature_space.hpp" +#include "mlrl/common/rule_refinement/feature_space.hpp" /** * Allows to create objects of type `IFeatureSpace` that provide access to a tabular feature space. diff --git a/cpp/subprojects/common/include/mlrl/common/thresholds/feature_subspace.hpp b/cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_subspace.hpp similarity index 99% rename from cpp/subprojects/common/include/mlrl/common/thresholds/feature_subspace.hpp rename to cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_subspace.hpp index 4759cf23c4..b46fa342d1 100644 --- a/cpp/subprojects/common/include/mlrl/common/thresholds/feature_subspace.hpp +++ b/cpp/subprojects/common/include/mlrl/common/rule_refinement/feature_subspace.hpp @@ -6,11 +6,11 @@ #include "mlrl/common/indices/index_vector_complete.hpp" #include "mlrl/common/indices/index_vector_partial.hpp" #include "mlrl/common/model/condition.hpp" +#include "mlrl/common/rule_refinement/coverage_mask.hpp" #include "mlrl/common/rule_refinement/prediction.hpp" #include "mlrl/common/rule_refinement/rule_refinement.hpp" #include "mlrl/common/sampling/partition_bi.hpp" #include "mlrl/common/sampling/partition_single.hpp" -#include "mlrl/common/thresholds/coverage_mask.hpp" #include diff --git a/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp b/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp index d51b90f711..a44bcd9fcf 100644 --- a/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp +++ b/cpp/subprojects/common/include/mlrl/common/sampling/partition.hpp @@ -3,7 +3,7 @@ */ #pragma once -#include "mlrl/common/thresholds/coverage_mask.hpp" +#include "mlrl/common/rule_refinement/coverage_mask.hpp" #include "mlrl/common/util/quality.hpp" #include diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index f19efb01b8..06bf01ab95 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -51,7 +51,9 @@ source_files = [ 'src/mlrl/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp', 'src/mlrl/common/rule_pruning/rule_pruning_irep.cpp', 'src/mlrl/common/rule_pruning/rule_pruning_no.cpp', + 'src/mlrl/common/rule_refinement/coverage_mask.cpp', 'src/mlrl/common/rule_refinement/feature_based_search.cpp', + 'src/mlrl/common/rule_refinement/feature_space_tabular.cpp', 'src/mlrl/common/rule_refinement/prediction_complete.cpp', 'src/mlrl/common/rule_refinement/prediction_partial.cpp', 'src/mlrl/common/rule_refinement/refinement_comparator_fixed.cpp', @@ -87,8 +89,6 @@ source_files = [ 'src/mlrl/common/stopping/stopping_criterion_list.cpp', 'src/mlrl/common/stopping/stopping_criterion_size.cpp', 'src/mlrl/common/stopping/stopping_criterion_time.cpp', - 'src/mlrl/common/thresholds/coverage_mask.cpp', - 'src/mlrl/common/thresholds/feature_space_tabular.cpp', 'src/mlrl/common/info.cpp', 'src/mlrl/common/learner.cpp' ] diff --git a/cpp/subprojects/common/src/mlrl/common/indices/index_vector_complete.cpp b/cpp/subprojects/common/src/mlrl/common/indices/index_vector_complete.cpp index 85b3ae8351..4d4b488f5d 100644 --- a/cpp/subprojects/common/src/mlrl/common/indices/index_vector_complete.cpp +++ b/cpp/subprojects/common/src/mlrl/common/indices/index_vector_complete.cpp @@ -1,6 +1,6 @@ #include "mlrl/common/indices/index_vector_complete.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" CompleteIndexVector::CompleteIndexVector(uint32 numElements) { numElements_ = numElements; diff --git a/cpp/subprojects/common/src/mlrl/common/indices/index_vector_partial.cpp b/cpp/subprojects/common/src/mlrl/common/indices/index_vector_partial.cpp index 47f76c78d8..374d16b71c 100644 --- a/cpp/subprojects/common/src/mlrl/common/indices/index_vector_partial.cpp +++ b/cpp/subprojects/common/src/mlrl/common/indices/index_vector_partial.cpp @@ -1,6 +1,6 @@ #include "mlrl/common/indices/index_vector_partial.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" PartialIndexVector::PartialIndexVector(uint32 numElements, bool init) : ResizableVectorDecorator>>( diff --git a/cpp/subprojects/common/src/mlrl/common/learner.cpp b/cpp/subprojects/common/src/mlrl/common/learner.cpp index 8f2a584110..3265ad2c6e 100644 --- a/cpp/subprojects/common/src/mlrl/common/learner.cpp +++ b/cpp/subprojects/common/src/mlrl/common/learner.cpp @@ -1,8 +1,8 @@ #include "mlrl/common/learner.hpp" #include "mlrl/common/prediction/label_space_info_no.hpp" +#include "mlrl/common/rule_refinement/feature_space_tabular.hpp" #include "mlrl/common/stopping/stopping_criterion_size.hpp" -#include "mlrl/common/thresholds/feature_space_tabular.hpp" #include "mlrl/common/util/validation.hpp" /** diff --git a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp index 4dac998b4c..d80504fc1e 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_induction/rule_induction_top_down_common.hpp @@ -3,7 +3,7 @@ */ #pragma once -#include "mlrl/common/thresholds/feature_subspace.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" #include "mlrl/common/util/openmp.hpp" /** diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/coverage_mask.cpp similarity index 85% rename from cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/coverage_mask.cpp index e022723a35..8f6d20b15a 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/coverage_mask.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/coverage_mask.cpp @@ -1,7 +1,7 @@ -#include "mlrl/common/thresholds/coverage_mask.hpp" +#include "mlrl/common/rule_refinement/coverage_mask.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" #include "mlrl/common/rule_refinement/prediction.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" CoverageMask::CoverageMask(uint32 numElements) : DenseVectorDecorator>(AllocatedVector(numElements, true)), indicatorValue(0) {} diff --git a/cpp/subprojects/common/src/mlrl/common/thresholds/feature_space_tabular.cpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_space_tabular.cpp similarity index 99% rename from cpp/subprojects/common/src/mlrl/common/thresholds/feature_space_tabular.cpp rename to cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_space_tabular.cpp index 518a8c7dca..408e006861 100644 --- a/cpp/subprojects/common/src/mlrl/common/thresholds/feature_space_tabular.cpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_space_tabular.cpp @@ -1,4 +1,4 @@ -#include "mlrl/common/thresholds/feature_space_tabular.hpp" +#include "mlrl/common/rule_refinement/feature_space_tabular.hpp" #include "mlrl/common/rule_refinement/rule_refinement_feature_based.hpp" #include "mlrl/common/util/openmp.hpp" diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp index 0222319f77..706f0f2767 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/partition_bi.cpp @@ -1,10 +1,10 @@ #include "mlrl/common/sampling/partition_bi.hpp" #include "mlrl/common/prediction/probability_calibration_joint.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" #include "mlrl/common/rule_refinement/prediction.hpp" #include "mlrl/common/sampling/instance_sampling.hpp" #include "mlrl/common/stopping/stopping_criterion.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" #include diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp index 02de98a49e..392e81bc61 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/partition_single.cpp @@ -1,10 +1,10 @@ #include "mlrl/common/sampling/partition_single.hpp" #include "mlrl/common/prediction/probability_calibration_joint.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" #include "mlrl/common/rule_refinement/prediction.hpp" #include "mlrl/common/sampling/instance_sampling.hpp" #include "mlrl/common/stopping/stopping_criterion.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" SinglePartition::SinglePartition(uint32 numElements) : numElements_(numElements) {} diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp index c7a4fb1f49..72f737889f 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_bit.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/sampling/weight_vector_bit.hpp" -#include "mlrl/common/thresholds/feature_space.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" +#include "mlrl/common/rule_refinement/feature_space.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" BitWeightVector::BitWeightVector(uint32 numElements, bool init) : vector_(numElements, init), numNonZeroWeights_(0) {} diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp index ad9f677c02..937a55394a 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_dense.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/sampling/weight_vector_dense.hpp" -#include "mlrl/common/thresholds/feature_space.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" +#include "mlrl/common/rule_refinement/feature_space.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" template DenseWeightVector::DenseWeightVector(uint32 numElements, bool init) diff --git a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp index bdfa996936..cd6d13e4da 100644 --- a/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/sampling/weight_vector_equal.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/sampling/weight_vector_equal.hpp" -#include "mlrl/common/thresholds/feature_space.hpp" -#include "mlrl/common/thresholds/feature_subspace.hpp" +#include "mlrl/common/rule_refinement/feature_space.hpp" +#include "mlrl/common/rule_refinement/feature_subspace.hpp" EqualWeightVector::EqualWeightVector(uint32 numElements) : numElements_(numElements) {} From 1ba76abc44bad6a2e8674c8de0f95ad364ff5169 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Thu, 22 Feb 2024 00:30:36 +0100 Subject: [PATCH 46/53] Fix MSVC compiler error. --- .../src/mlrl/common/input/feature_binning_equal_width.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp index c4e9175b9a..79f263ab0c 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_binning_equal_width.cpp @@ -35,7 +35,7 @@ static inline Tuple getMinAndMaxFeatureValue(const NumericalFeatureVect return Tuple(min, max); } -static inline constexpr uint32 getBinIndex(float32 value, float32 min, float32 width, uint32 numBins) { +static inline uint32 getBinIndex(float32 value, float32 min, float32 width, uint32 numBins) { uint32 binIndex = (uint32) std::floor((value - min) / width); return binIndex >= numBins ? numBins - 1 : binIndex; } From f3fe13234b22bb4eaa1d7e49815c7cb261aff0fe Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 28 Feb 2024 20:57:55 +0100 Subject: [PATCH 47/53] Add class AbstractBinnedFeatureVectorDecorator. --- .../input/feature_vector_decorator_binary.hpp | 8 +- .../input/feature_vector_decorator_binned.hpp | 22 ++--- ...feature_vector_decorator_binned_common.hpp | 91 +++++++++++++++++++ .../feature_vector_decorator_nominal.hpp | 8 +- ...eature_vector_decorator_nominal_common.hpp | 83 ----------------- .../feature_vector_decorator_ordinal.hpp | 17 ++-- 6 files changed, 120 insertions(+), 109 deletions(-) create mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned_common.hpp diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binary.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binary.hpp index c7544074f2..491033c9e4 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binary.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binary.hpp @@ -3,13 +3,14 @@ */ #pragma once +#include "feature_vector_decorator_binned_common.hpp" #include "feature_vector_decorator_nominal_common.hpp" /** * Provides random read and write access, as well as read and write access via iterators, to the values and indicies of * training examples stored in an `BinaryFeatureVector`. */ -class BinaryFeatureVectorDecorator final : public AbstractNominalFeatureVectorDecorator { +class BinaryFeatureVectorDecorator final : public AbstractBinnedFeatureVectorDecorator { public: /** @@ -18,13 +19,14 @@ class BinaryFeatureVectorDecorator final : public AbstractNominalFeatureVectorDe */ BinaryFeatureVectorDecorator(AllocatedNominalFeatureVector&& firstView, AllocatedMissingFeatureVector&& secondView) - : AbstractNominalFeatureVectorDecorator(std::move(firstView), std::move(secondView)) {} + : AbstractBinnedFeatureVectorDecorator(std::move(firstView), + std::move(secondView)) {} /** * @param other A reference to an object of type `BinaryFeatureVectorDecorator` that should be copied */ BinaryFeatureVectorDecorator(const BinaryFeatureVectorDecorator& other) - : AbstractNominalFeatureVectorDecorator(other) {} + : AbstractBinnedFeatureVectorDecorator(other) {} void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp index a21f3d5bc8..08b7a3292b 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp @@ -4,8 +4,7 @@ #pragma once #include "feature_vector_binned_allocated.hpp" -#include "feature_vector_decorator.hpp" -#include "mlrl/common/input/feature_vector_equal.hpp" +#include "feature_vector_decorator_binned_common.hpp" template static inline std::unique_ptr createFilteredBinnedFeatureVectorDecorator( @@ -95,7 +94,8 @@ class BinnedFeatureVectorView final : public AbstractFeatureVectorDecorator( + *this, interval, coverageMask, indicatorValue, statistics); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, @@ -151,7 +151,9 @@ class AllocatedBinnedFeatureVectorView final : public AbstractFeatureVectorDecor void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, uint32 indicatorValue, IWeightedStatistics& statistics) const override final { - // TODO Implement + updateCoverageMaskAndStatisticsBasedOnBinnedFeatureVector( + *this, interval, coverageMask, indicatorValue, statistics); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, @@ -172,7 +174,7 @@ class AllocatedBinnedFeatureVectorView final : public AbstractFeatureVectorDecor * Provides random read and write access, as well as read and write access via iterators, to the values and thresholds * stored in an `AllocatedBinnedFeatureVector`. */ -class BinnedFeatureVectorDecorator final : public AbstractFeatureVectorDecorator { +class BinnedFeatureVectorDecorator final : public AbstractBinnedFeatureVectorDecorator { public: /** @@ -181,8 +183,8 @@ class BinnedFeatureVectorDecorator final : public AbstractFeatureVectorDecorator */ BinnedFeatureVectorDecorator(AllocatedBinnedFeatureVector&& firstView, AllocatedMissingFeatureVector&& secondView) - : AbstractFeatureVectorDecorator(std::move(firstView), - std::move(secondView)) {} + : AbstractBinnedFeatureVectorDecorator(std::move(firstView), + std::move(secondView)) {} /** * @param other A reference to an object of type `BinnedFeatureVectorDecorator` that should be copied @@ -230,12 +232,6 @@ class BinnedFeatureVectorDecorator final : public AbstractFeatureVectorDecorator refinement); } - void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, - uint32 indicatorValue, - IWeightedStatistics& statistics) const override final { - // TODO Implement - } - std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, const Interval& interval) const override { // TODO Implement diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned_common.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned_common.hpp new file mode 100644 index 0000000000..8061284fed --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned_common.hpp @@ -0,0 +1,91 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "feature_vector_decorator.hpp" +#include "feature_vector_nominal_allocated.hpp" +#include "mlrl/common/input/feature_vector_equal.hpp" + +template +static inline void updateCoverageMaskAndStatisticsBasedOnBinnedFeatureVector(const View& view, const Interval& interval, + CoverageMask& coverageMask, + uint32 indicatorValue, + IWeightedStatistics& statistics) { + const FeatureVector& featureVector = view.getView().firstView; + CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); + + if (interval.inverse) { + // Discard the indices that correspond to the values in the range [interval.start, interval.end) and set the + // corresponding values in `coverageMask` to `indicatorValue`, which marks them as uncovered... + for (uint32 i = interval.start; i < interval.end; i++) { + typename FeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(i); + typename FeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indexIterator; + + for (uint32 j = 0; j < numIndices; j++) { + uint32 index = indexIterator[j]; + coverageMaskIterator[index] = indicatorValue; + statistics.removeCoveredStatistic(index); + } + } + + updateCoverageMaskAndStatisticsBasedOnMissingFeatureVector(view, coverageMaskIterator, indicatorValue, + statistics); + } else { + coverageMask.indicatorValue = indicatorValue; + statistics.resetCoveredStatistics(); + + // Retain the indices in the range [interval.start, interval.end) and set the corresponding values in the given + // `coverageMask` to `indicatorValue` to mark them as covered... + for (uint32 i = interval.start; i < interval.end; i++) { + typename FeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(i); + typename FeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indexIterator; + + for (uint32 j = 0; j < numIndices; j++) { + uint32 index = indexIterator[j]; + coverageMaskIterator[index] = indicatorValue; + statistics.addCoveredStatistic(index); + } + } + } +} + +/** + * An abstract base class for all decorators that provide access to the bins stored in a feature vector. + * + * @tparam AllocatedFeatureVector The type of the view that provides access to the bins in the feature vector + */ +template +class AbstractBinnedFeatureVectorDecorator : public AbstractFeatureVectorDecorator { + public: + + /** + * @param firstView A reference to an object of template type `AllocatedFeatureVector` + * @param secondView A reference to an object of type `AllocatedMissingFeatureVector` + */ + AbstractBinnedFeatureVectorDecorator(AllocatedFeatureVector&& firstView, + AllocatedMissingFeatureVector&& secondView) + : AbstractFeatureVectorDecorator(std::move(firstView), std::move(secondView)) {} + + /** + * @param other A reference to an object of type `AbstractBinnedFeatureVectorDecorator` that should be copied + */ + AbstractBinnedFeatureVectorDecorator(const AbstractBinnedFeatureVectorDecorator& other) + : AbstractBinnedFeatureVectorDecorator( + AllocatedFeatureVector(other.view.firstView.numValues, + other.view.firstView.indptr[other.view.firstView.numValues], + other.view.firstView.majorityValue), + AllocatedMissingFeatureVector()) {} + + virtual ~AbstractBinnedFeatureVectorDecorator() override {} + + void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, + uint32 indicatorValue, + IWeightedStatistics& statistics) const override final { + updateCoverageMaskAndStatisticsBasedOnBinnedFeatureVector( + *this, interval, coverageMask, indicatorValue, statistics); + } +}; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal.hpp index 22454ebb85..138aa7c038 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal.hpp @@ -3,6 +3,7 @@ */ #pragma once +#include "feature_vector_decorator_binned_common.hpp" #include "feature_vector_decorator_nominal_common.hpp" template @@ -71,7 +72,7 @@ static inline std::unique_ptr createFilteredNominalFeatureVector * Provides random read and write access, as well as read and write access via iterators, to the values and indices of * training examples stored in an `AllocatedNominalFeatureVector`. */ -class NominalFeatureVectorDecorator final : public AbstractNominalFeatureVectorDecorator { +class NominalFeatureVectorDecorator final : public AbstractBinnedFeatureVectorDecorator { public: /** @@ -80,13 +81,14 @@ class NominalFeatureVectorDecorator final : public AbstractNominalFeatureVectorD */ NominalFeatureVectorDecorator(AllocatedNominalFeatureVector&& firstView, AllocatedMissingFeatureVector&& secondView) - : AbstractNominalFeatureVectorDecorator(std::move(firstView), std::move(secondView)) {} + : AbstractBinnedFeatureVectorDecorator(std::move(firstView), + std::move(secondView)) {} /** * @param other A reference to an object of type `NominalFeatureVectorDecorator` that should be copied */ NominalFeatureVectorDecorator(const NominalFeatureVectorDecorator& other) - : AbstractNominalFeatureVectorDecorator(other) {} + : AbstractBinnedFeatureVectorDecorator(other) {} void searchForRefinement(FeatureBasedSearch& featureBasedSearch, IWeightedStatisticsSubset& statisticsSubset, SingleRefinementComparator& comparator, uint32 numExamplesWithNonZeroWeights, diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp index b74bf80f9a..c871065d50 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_nominal_common.hpp @@ -3,56 +3,9 @@ */ #pragma once -#include "feature_vector_decorator.hpp" #include "feature_vector_nominal_allocated.hpp" #include "mlrl/common/input/feature_vector_equal.hpp" -template -static inline void updateCoverageMaskAndStatisticsBasedOnNominalFeatureVector(const View& view, - const Interval& interval, - CoverageMask& coverageMask, - uint32 indicatorValue, - IWeightedStatistics& statistics) { - const NominalFeatureVector& featureVector = view.getView().firstView; - CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); - - if (interval.inverse) { - // Discard the indices that correspond to the values in the range [interval.start, interval.end) and set the - // corresponding values in `coverageMask` to `indicatorValue`, which marks them as uncovered... - for (uint32 i = interval.start; i < interval.end; i++) { - NominalFeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(i); - NominalFeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(i); - uint32 numIndices = indicesEnd - indexIterator; - - for (uint32 j = 0; j < numIndices; j++) { - uint32 index = indexIterator[j]; - coverageMaskIterator[index] = indicatorValue; - statistics.removeCoveredStatistic(index); - } - } - - updateCoverageMaskAndStatisticsBasedOnMissingFeatureVector(view, coverageMaskIterator, indicatorValue, - statistics); - } else { - coverageMask.indicatorValue = indicatorValue; - statistics.resetCoveredStatistics(); - - // Retain the indices in the range [interval.start, interval.end) and set the corresponding values in the given - // `coverageMask` to `indicatorValue` to mark them as covered... - for (uint32 i = interval.start; i < interval.end; i++) { - NominalFeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(i); - NominalFeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(i); - uint32 numIndices = indicesEnd - indexIterator; - - for (uint32 j = 0; j < numIndices; j++) { - uint32 index = indexIterator[j]; - coverageMaskIterator[index] = indicatorValue; - statistics.addCoveredStatistic(index); - } - } - } -} - template static inline std::unique_ptr createFilteredNominalFeatureVectorDecorator( const View& view, std::unique_ptr& existing, const CoverageMask& coverageMask) { @@ -97,39 +50,3 @@ static inline std::unique_ptr createFilteredNominalFeatureVector return std::make_unique(); } - -/** - * An abstract base class for all decorators that provide access to the values and indices of training examples stored - * in an `AllocatedNominalFeatureVector`. - */ -class AbstractNominalFeatureVectorDecorator : public AbstractFeatureVectorDecorator { - public: - - /** - * @param firstView A reference to an object of type `AllocatedNominalFeatureVector` - * @param secondView A reference to an object of type `AllocatedMissingFeatureVector` - */ - AbstractNominalFeatureVectorDecorator(AllocatedNominalFeatureVector&& firstView, - AllocatedMissingFeatureVector&& secondView) - : AbstractFeatureVectorDecorator(std::move(firstView), - std::move(secondView)) {} - - /** - * @param other A reference to an object of type `AbstractNominalFeatureVectorDecorator` that should be copied - */ - AbstractNominalFeatureVectorDecorator(const AbstractNominalFeatureVectorDecorator& other) - : AbstractNominalFeatureVectorDecorator( - AllocatedNominalFeatureVector(other.view.firstView.numValues, - other.view.firstView.indptr[other.view.firstView.numValues], - other.view.firstView.majorityValue), - AllocatedMissingFeatureVector()) {} - - virtual ~AbstractNominalFeatureVectorDecorator() override {} - - void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, - uint32 indicatorValue, - IWeightedStatistics& statistics) const override final { - updateCoverageMaskAndStatisticsBasedOnNominalFeatureVector(*this, interval, coverageMask, indicatorValue, - statistics); - } -}; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp index ba7eb562b6..2fc7a2aec2 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp @@ -3,6 +3,7 @@ */ #pragma once +#include "feature_vector_decorator_binned_common.hpp" #include "feature_vector_decorator_nominal_common.hpp" #include @@ -78,8 +79,8 @@ class OrdinalFeatureVectorView final : public AbstractFeatureVectorDecorator( + *this, interval, coverageMask, indicatorValue, statistics); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, @@ -142,8 +143,9 @@ class AllocatedOrdinalFeatureVectorView final : public AbstractFeatureVectorDeco void updateCoverageMaskAndStatistics(const Interval& interval, CoverageMask& coverageMask, uint32 indicatorValue, IWeightedStatistics& statistics) const override final { - updateCoverageMaskAndStatisticsBasedOnNominalFeatureVector(*this, interval, coverageMask, indicatorValue, - statistics); + updateCoverageMaskAndStatisticsBasedOnBinnedFeatureVector( + *this, interval, coverageMask, indicatorValue, statistics); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, @@ -178,7 +180,7 @@ class AllocatedOrdinalFeatureVectorView final : public AbstractFeatureVectorDeco * Provides random read and write access, as well as read and write access via iterators, to the values and indicies of * training examples stored in an `AllocatedNominalFeatureVector`. */ -class OrdinalFeatureVectorDecorator final : public AbstractNominalFeatureVectorDecorator { +class OrdinalFeatureVectorDecorator final : public AbstractBinnedFeatureVectorDecorator { public: /** @@ -187,13 +189,14 @@ class OrdinalFeatureVectorDecorator final : public AbstractNominalFeatureVectorD */ OrdinalFeatureVectorDecorator(AllocatedNominalFeatureVector&& firstView, AllocatedMissingFeatureVector&& secondView) - : AbstractNominalFeatureVectorDecorator(std::move(firstView), std::move(secondView)) {} + : AbstractBinnedFeatureVectorDecorator(std::move(firstView), + std::move(secondView)) {} /** * @param other A reference to an object of type `OrdinalFeatureVectorDecorator` that should be copied */ OrdinalFeatureVectorDecorator(const OrdinalFeatureVectorDecorator& other) - : AbstractNominalFeatureVectorDecorator(other) {} + : AbstractBinnedFeatureVectorDecorator(other) {} /** * @param other A reference to an object of type `OrdinalFeatureVectorView` that should be copied From 850ba50c3275f66f7d0cbc1e6f492185bc66bdbc Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 28 Feb 2024 21:19:45 +0100 Subject: [PATCH 48/53] Add function "createFilteredBinnedFeatureVectorView. --- .../input/feature_vector_decorator_binned.hpp | 87 ++- .../input/feature_vector_decorator_binned.cpp | 556 ++++++++++++++++++ 2 files changed, 637 insertions(+), 6 deletions(-) diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp index 08b7a3292b..08971fdf13 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp @@ -6,6 +6,47 @@ #include "feature_vector_binned_allocated.hpp" #include "feature_vector_decorator_binned_common.hpp" +#include + +template +static inline std::optional createFilteredBinnedFeatureVectorView( + const Decorator& decorator, std::unique_ptr& existing, const Interval& interval) { + const BinnedFeatureVector& featureVector = decorator.getView().firstView; + uint32 start; + uint32 end; + + if (interval.inverse) { + if (interval.start > 0) { + start = 0; + end = interval.start; + } else { + start = interval.end; + end = featureVector.numBins; + } + } else { + start = interval.start; + + if (start > 0) { + end = featureVector.numBins; + } else { + end = interval.end; + } + } + + uint32 numFilteredBins = end - start; + + if (numFilteredBins > 0) { + uint32 sparseBinIndex = featureVector.sparseBinIndex; + sparseBinIndex = sparseBinIndex >= start ? sparseBinIndex - start : 0; + sparseBinIndex = sparseBinIndex >= numFilteredBins ? numFilteredBins - 1 : sparseBinIndex; + return BinnedFeatureVector(&featureVector.thresholds[start], featureVector.indices, + &featureVector.indptr[start], numFilteredBins, + featureVector.indptr[featureVector.numBins], sparseBinIndex); + } + + return {}; +} + template static inline std::unique_ptr createFilteredBinnedFeatureVectorDecorator( const View& view, std::unique_ptr& existing, const CoverageMask& coverageMask) { @@ -100,8 +141,14 @@ class BinnedFeatureVectorView final : public AbstractFeatureVectorDecorator createFilteredFeatureVector(std::unique_ptr& existing, const Interval& interval) const override { - // TODO Implement - return nullptr; + std::optional filteredFeatureVector = + createFilteredBinnedFeatureVectorView(*this, existing, interval); + + if (filteredFeatureVector) { + return std::make_unique(std::move(*filteredFeatureVector)); + } + + return std::make_unique(); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, @@ -158,8 +205,22 @@ class AllocatedBinnedFeatureVectorView final : public AbstractFeatureVectorDecor std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, const Interval& interval) const override { - // TODO Implement - return nullptr; + std::optional filteredFeatureVector = + createFilteredBinnedFeatureVectorView(*this, existing, interval); + + if (filteredFeatureVector) { + AllocatedBinnedFeatureVectorView* existingView = + dynamic_cast(existing.get()); + + if (existingView) { + return std::make_unique(std::move(existingView->allocatedView), + std::move(*filteredFeatureVector)); + } + + return std::make_unique(std::move(*filteredFeatureVector)); + } + + return std::make_unique(); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, @@ -234,8 +295,22 @@ class BinnedFeatureVectorDecorator final : public AbstractBinnedFeatureVectorDec std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, const Interval& interval) const override { - // TODO Implement - return nullptr; + std::optional filteredFeatureVector = + createFilteredBinnedFeatureVectorView(*this, existing, interval); + + if (filteredFeatureVector) { + BinnedFeatureVectorDecorator* existingDecorator = + dynamic_cast(existing.get()); + + if (existingDecorator) { + return std::make_unique( + std::move(existingDecorator->view.firstView), std::move(*filteredFeatureVector)); + } + + return std::make_unique(std::move(*filteredFeatureVector)); + } + + return std::make_unique(); } std::unique_ptr createFilteredFeatureVector(std::unique_ptr& existing, diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binned.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binned.cpp index 9b69854a20..dd61d9eb6c 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binned.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_decorator_binned.cpp @@ -5,6 +5,562 @@ #include +TEST(BinnedFeatureVectorDecoratorTest, updateCoverageMaskAndStatistics) { + uint32 numBins = 4; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + WeightedStatistics statistics; + uint32 numExamples = numMinorityExamples + 15; + + for (uint32 i = 0; i < numExamples; i++) { + statistics.addCoveredStatistic(i); + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); + Interval interval(2, numBins); + CoverageMask coverageMask(numExamples); + uint32 indicatorValue = 1; + decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); + EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); + const BinnedFeatureVector& binnedFeatureVector = decorator.getView().firstView; + + for (uint32 i = 0; i < interval.start; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_FALSE(coverageMask[index]); + EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = interval.start; i < interval.end; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_TRUE(coverageMask[index]); + EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = interval.end; i < numBins; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_FALSE(coverageMask[index]); + EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = numMinorityExamples; i < numExamples; i++) { + EXPECT_FALSE(coverageMask[i]); + EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); + } +} + +TEST(BinnedFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsInverse) { + uint32 numBins = 4; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + AllocatedMissingFeatureVector missingFeatureVector; + uint32 numMissingExamples = 5; + + for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingExamples; i++) { + missingFeatureVector.set(i, true); + } + + WeightedStatistics statistics; + uint32 numExamples = numMinorityExamples + numMissingExamples + 15; + + for (uint32 i = 0; i < numExamples; i++) { + statistics.addCoveredStatistic(i); + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), std::move(missingFeatureVector)); + Interval interval(2, numBins, true); + CoverageMask coverageMask(numExamples); + uint32 indicatorValue = 1; + decorator.updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); + EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); + const BinnedFeatureVector& binnedFeatureVector = decorator.getView().firstView; + + for (uint32 i = 0; i < interval.start; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_TRUE(coverageMask[index]); + EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = interval.start; i < interval.end; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_FALSE(coverageMask[index]); + EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = interval.end; i < numBins; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_TRUE(coverageMask[index]); + EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingExamples; i++) { + EXPECT_FALSE(coverageMask[i]); + EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); + } + + for (uint32 i = numMinorityExamples + numMissingExamples; i < numExamples; i++) { + EXPECT_TRUE(coverageMask[i]); + EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); + } +} + +TEST(BinnedFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromView) { + uint32 numBins = 4; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + WeightedStatistics statistics; + uint32 numExamples = numMinorityExamples + 15; + + for (uint32 i = 0; i < numExamples; i++) { + statistics.addCoveredStatistic(i); + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); + Interval interval(2, numBins); + CoverageMask coverageMask(numExamples); + uint32 indicatorValue = 1; + std::unique_ptr existing; + decorator.createFilteredFeatureVector(existing, Interval(0, numBins)) + ->updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); + EXPECT_EQ(coverageMask.indicatorValue, indicatorValue); + const BinnedFeatureVector& binnedFeatureVector = decorator.getView().firstView; + + for (uint32 i = 0; i < interval.start; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_FALSE(coverageMask[index]); + EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = interval.start; i < interval.end; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_TRUE(coverageMask[index]); + EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = interval.end; i < numBins; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_FALSE(coverageMask[index]); + EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = numMinorityExamples; i < numExamples; i++) { + EXPECT_FALSE(coverageMask[i]); + EXPECT_FALSE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); + } +} + +TEST(BinnedFeatureVectorDecoratorTest, updateCoverageMaskAndStatisticsFromViewInverse) { + uint32 numBins = 4; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + WeightedStatistics statistics; + uint32 numExamples = numMinorityExamples + 15; + + for (uint32 i = 0; i < numExamples; i++) { + statistics.addCoveredStatistic(i); + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); + Interval interval(2, numBins, true); + CoverageMask coverageMask(numExamples); + uint32 indicatorValue = 1; + std::unique_ptr existing; + decorator.createFilteredFeatureVector(existing, Interval(0, numBins)) + ->updateCoverageMaskAndStatistics(interval, coverageMask, indicatorValue, statistics); + EXPECT_EQ(coverageMask.indicatorValue, (uint32) 0); + const BinnedFeatureVector& binnedFeatureVector = decorator.getView().firstView; + + for (uint32 i = 0; i < interval.start; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_TRUE(coverageMask[index]); + EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = interval.start; i < interval.end; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_FALSE(coverageMask[index]); + EXPECT_FALSE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = interval.end; i < numBins; i++) { + for (auto it = binnedFeatureVector.indices_cbegin(i); it != binnedFeatureVector.indices_cend(i); it++) { + uint32 index = *it; + EXPECT_TRUE(coverageMask[index]); + EXPECT_TRUE(statistics.coveredStatistics.find(index) != statistics.coveredStatistics.end()); + } + } + + for (uint32 i = numMinorityExamples; i < numExamples; i++) { + EXPECT_TRUE(coverageMask[i]); + EXPECT_TRUE(statistics.coveredStatistics.find(i) != statistics.coveredStatistics.end()); + } +} + +TEST(BinnedFeatureVectorDecoratorTest, createFilteredFeatureVectorFromIndices) { + uint32 numBins = 4; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); + std::unique_ptr existing; + Interval interval(2, numBins); + std::unique_ptr filtered = decorator.createFilteredFeatureVector(existing, interval); + const BinnedFeatureVectorView* filteredDecorator = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredDecorator != nullptr); + + if (filteredDecorator) { + // Check filtered indices... + const BinnedFeatureVector& filteredFeatureVector = filteredDecorator->getView().firstView; + EXPECT_EQ(filteredFeatureVector.numBins, interval.end - interval.start); + BinnedFeatureVector::threshold_const_iterator thresholdsBegin = filteredFeatureVector.thresholds_cbegin(); + + for (uint32 i = 0; i < filteredFeatureVector.numBins; i++) { + if (i < filteredFeatureVector.numBins - 1) { + EXPECT_EQ(thresholdsBegin[i], (int32) (interval.start + i)); + } + + BinnedFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector.indices_cbegin(i); + BinnedFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indicesBegin; + EXPECT_EQ(numIndices, numExamplesPerBin); + + for (uint32 j = 0; j < numIndices; j++) { + EXPECT_EQ(indicesBegin[j], ((i + interval.start) * numExamplesPerBin) + j); + } + } + } +} + +TEST(BinnedFeatureVectorDecoratorTest, createFilteredFeatureVectorFromViewWithIndices) { + uint32 numBins = 4; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); + std::unique_ptr existing; + Interval interval(2, numBins); + std::unique_ptr filtered = decorator.createFilteredFeatureVector(existing, Interval(0, numBins)) + ->createFilteredFeatureVector(existing, interval); + const BinnedFeatureVectorView* filteredDecorator = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredDecorator != nullptr); + + if (filteredDecorator) { + // Check filtered indices... + const BinnedFeatureVector& filteredFeatureVector = filteredDecorator->getView().firstView; + EXPECT_EQ(filteredFeatureVector.numBins, interval.end - interval.start); + BinnedFeatureVector::threshold_const_iterator thresholdsBegin = filteredFeatureVector.thresholds_cbegin(); + + for (uint32 i = 0; i < filteredFeatureVector.numBins; i++) { + if (i < filteredFeatureVector.numBins - 1) { + EXPECT_EQ(thresholdsBegin[i], (int32) (interval.start + i)); + } + + BinnedFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector.indices_cbegin(i); + BinnedFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indicesBegin; + EXPECT_EQ(numIndices, numExamplesPerBin); + + for (uint32 j = 0; j < numIndices; j++) { + EXPECT_EQ(indicesBegin[j], ((i + interval.start) * numExamplesPerBin) + j); + } + } + } +} + +TEST(BinnedFeatureVectorDecoratorTest, createFilteredFeatureVectorFromIndicesInverse) { + uint32 numBins = 4; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); + std::unique_ptr existing; + Interval interval(2, numBins, true); + std::unique_ptr filtered = decorator.createFilteredFeatureVector(existing, interval); + const BinnedFeatureVectorView* filteredDecorator = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredDecorator != nullptr); + + if (filteredDecorator) { + // Check filtered indices... + const BinnedFeatureVector& filteredFeatureVector = filteredDecorator->getView().firstView; + EXPECT_EQ(filteredFeatureVector.numBins, interval.end - interval.start); + BinnedFeatureVector::threshold_const_iterator thresholdsBegin = filteredFeatureVector.thresholds_cbegin(); + + for (uint32 i = 0; i < filteredFeatureVector.numBins; i++) { + EXPECT_EQ(thresholdsBegin[i], (int32) i); + BinnedFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector.indices_cbegin(i); + BinnedFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indicesBegin; + EXPECT_EQ(numIndices, numExamplesPerBin); + + for (uint32 j = 0; j < numIndices; j++) { + EXPECT_EQ(indicesBegin[j], (i * numExamplesPerBin) + j); + } + } + } +} + +TEST(BinnedFeatureVectorDecoratorTest, createFilteredFeatureVectorFromViewWithIndicesInverse) { + uint32 numBins = 4; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); + std::unique_ptr existing; + Interval interval(2, numBins, true); + std::unique_ptr filtered = decorator.createFilteredFeatureVector(existing, Interval(0, numBins)) + ->createFilteredFeatureVector(existing, interval); + const BinnedFeatureVectorView* filteredDecorator = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredDecorator != nullptr); + + if (filteredDecorator) { + // Check filtered indices... + const BinnedFeatureVector& filteredFeatureVector = filteredDecorator->getView().firstView; + EXPECT_EQ(filteredFeatureVector.numBins, interval.end - interval.start); + BinnedFeatureVector::threshold_const_iterator thresholdsBegin = filteredFeatureVector.thresholds_cbegin(); + + for (uint32 i = 0; i < filteredFeatureVector.numBins; i++) { + EXPECT_EQ(thresholdsBegin[i], (int32) i); + BinnedFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector.indices_cbegin(i); + BinnedFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indicesBegin; + EXPECT_EQ(numIndices, numExamplesPerBin); + + for (uint32 j = 0; j < numIndices; j++) { + EXPECT_EQ(indicesBegin[j], (i * numExamplesPerBin) + j); + } + } + } +} + +TEST(BinnedFeatureVectorDecoratorTest, createFilteredFeatureVectorFromViewWithCoverageMask) { + uint32 numBins = 4; + uint32 numExamplesPerBin = 10; + uint32 numMinorityExamples = numBins * numExamplesPerBin; + AllocatedBinnedFeatureVector featureVector(numBins, numMinorityExamples, 0); + AllocatedBinnedFeatureVector::threshold_iterator thresholdIterator = featureVector.thresholds; + AllocatedBinnedFeatureVector::index_iterator indptrIterator = featureVector.indptr; + AllocatedBinnedFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numBins; i++) { + if (i < numBins - 1) { + thresholdIterator[i] = i; + } + + indptrIterator[i] = i * numExamplesPerBin; + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + indexIterator[index] = index; + } + } + + CoverageMask coverageMask(numMinorityExamples); + uint32 indicatorValue = 1; + coverageMask.indicatorValue = indicatorValue; + CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); + + for (uint32 i = 0; i < numMinorityExamples; i++) { + if (i % 2 == 0) { + coverageMaskIterator[i] = indicatorValue; + } + } + + BinnedFeatureVectorDecorator decorator(std::move(featureVector), AllocatedMissingFeatureVector()); + std::unique_ptr existing; + std::unique_ptr filtered = decorator.createFilteredFeatureVector(existing, Interval(0, numBins)) + ->createFilteredFeatureVector(existing, coverageMask); + const BinnedFeatureVectorDecorator* filteredDecorator = + dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredDecorator != nullptr); + + if (filteredDecorator) { + // Check filtered indices... + const BinnedFeatureVector& filteredFeatureVector = filteredDecorator->getView().firstView; + + for (uint32 i = 0; i < numBins; i++) { + BinnedFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector.indices_cbegin(i); + BinnedFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector.indices_cend(i); + uint32 numIndices = indicesEnd - indicesBegin; + EXPECT_EQ(numIndices, numExamplesPerBin / 2); + + std::unordered_set indices; + + for (auto it = indicesBegin; it != indicesEnd; it++) { + indices.emplace(*it); + } + + for (uint32 j = 0; j < numExamplesPerBin; j++) { + uint32 index = (i * numExamplesPerBin) + j; + + if (index % 2 == 0) { + EXPECT_TRUE(indices.find(index) != indices.end()); + } else { + EXPECT_TRUE(indices.find(index) == indices.end()); + } + } + } + } +} + TEST(BinnedFeatureVectorDecoratorTest, createFilteredFeatureVectorFromCoverageMask) { uint32 numBins = 3; uint32 numExamplesPerBin = 10; From 4c2a3ea8a45045d96d95cb5a6209eb33153bea03 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 28 Feb 2024 21:29:33 +0100 Subject: [PATCH 49/53] Add utility function "getStartAndEndOfOpenInterval". --- .../include/mlrl/common/input/interval.hpp | 34 ++++++++++++++++++- .../input/feature_vector_decorator_binned.hpp | 24 ++----------- .../feature_vector_decorator_numerical.hpp | 24 ++----------- .../feature_vector_decorator_ordinal.hpp | 24 ++----------- 4 files changed, 42 insertions(+), 64 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/interval.hpp b/cpp/subprojects/common/include/mlrl/common/input/interval.hpp index 0af2e4a72c..0915ba600f 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/interval.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/interval.hpp @@ -3,7 +3,7 @@ */ #pragma once -#include "mlrl/common/data/types.hpp" +#include "mlrl/common/data/tuple.hpp" /** * Specifies the boundaries of an interval that includes/excludes certain elements in a vector. @@ -51,3 +51,35 @@ struct Interval { return *this; } }; + +/** + * Returns the start and end index of an open interval `[0, interval.end]` or `[interval.start, maxIndex]`, depending on + * a given `Interval`. + * + * @param interval A reference to an object of type `Interval` + * @param maxIndex The maximum index of an open interval + * @return A `Tuple` that stores the start and end index + */ +static inline Tuple getStartAndEndOfOpenInterval(const Interval& interval, uint32 maxIndex) { + Tuple tuple; + + if (interval.inverse) { + if (interval.start > 0) { + tuple.first = 0; + tuple.second = interval.start; + } else { + tuple.first = interval.end; + tuple.second = maxIndex; + } + } else { + tuple.first = interval.start; + + if (tuple.first > 0) { + tuple.second = maxIndex; + } else { + tuple.second = interval.end; + } + } + + return tuple; +} diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp index 08971fdf13..8f6075b51d 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_binned.hpp @@ -12,27 +12,9 @@ template static inline std::optional createFilteredBinnedFeatureVectorView( const Decorator& decorator, std::unique_ptr& existing, const Interval& interval) { const BinnedFeatureVector& featureVector = decorator.getView().firstView; - uint32 start; - uint32 end; - - if (interval.inverse) { - if (interval.start > 0) { - start = 0; - end = interval.start; - } else { - start = interval.end; - end = featureVector.numBins; - } - } else { - start = interval.start; - - if (start > 0) { - end = featureVector.numBins; - } else { - end = interval.end; - } - } - + Tuple tuple = getStartAndEndOfOpenInterval(interval, featureVector.numBins); + uint32 start = tuple.first; + uint32 end = tuple.second; uint32 numFilteredBins = end - start; if (numFilteredBins > 0) { diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp index 4a43705797..aa6107d445 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_numerical.hpp @@ -13,27 +13,9 @@ template static inline std::optional createFilteredNumericalFeatureVectorView( const Decorator& decorator, std::unique_ptr& existing, const Interval& interval) { const NumericalFeatureVector& featureVector = decorator.getView().firstView; - uint32 start = interval.start; - uint32 end = interval.end; - - if (interval.inverse) { - if (interval.start > 0) { - start = 0; - end = interval.start; - } else { - start = interval.end; - end = featureVector.numElements; - } - } else { - start = interval.start; - - if (start > 0) { - end = featureVector.numElements; - } else { - end = interval.end; - } - } - + Tuple tuple = getStartAndEndOfOpenInterval(interval, featureVector.numElements); + uint32 start = tuple.first; + uint32 end = tuple.second; uint32 numFilteredElements = end - start; if (numFilteredElements > 0 diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp index 2fc7a2aec2..791bcf78e0 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_decorator_ordinal.hpp @@ -12,27 +12,9 @@ template static inline std::optional createFilteredOrdinalFeatureVectorView( const Decorator& decorator, std::unique_ptr& existing, const Interval& interval) { const NominalFeatureVector& featureVector = decorator.getView().firstView; - uint32 start; - uint32 end; - - if (interval.inverse) { - if (interval.start > 0) { - start = 0; - end = interval.start; - } else { - start = interval.end; - end = featureVector.numValues; - } - } else { - start = interval.start; - - if (start > 0) { - end = featureVector.numValues; - } else { - end = interval.end; - } - } - + Tuple tuple = getStartAndEndOfOpenInterval(interval, featureVector.numValues); + uint32 start = tuple.first; + uint32 end = tuple.second; uint32 numFilteredValues = end - start; if (numFilteredValues > 0) { From f6041b152b21aa078b839a6a717f70e30ca47469 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 28 Feb 2024 23:06:46 +0100 Subject: [PATCH 50/53] Adjust expected output of integration tests. --- ...-equal-frequency_binary-features-dense.txt | 26 +++++++-------- ...equal-frequency_binary-features-sparse.txt | 28 ++++++++-------- ...equal-frequency_nominal-features-dense.txt | 32 +++++++++---------- ...qual-frequency_nominal-features-sparse.txt | 32 +++++++++---------- ...ual-frequency_numerical-features-dense.txt | 32 +++++++++---------- ...al-frequency_numerical-features-sparse.txt | 32 +++++++++---------- ...ning-equal-width_binary-features-dense.txt | 26 +++++++-------- ...ing-equal-width_binary-features-sparse.txt | 28 ++++++++-------- ...ing-equal-width_nominal-features-dense.txt | 32 +++++++++---------- ...ng-equal-width_nominal-features-sparse.txt | 32 +++++++++---------- ...g-equal-width_numerical-features-dense.txt | 32 +++++++++---------- ...-equal-width_numerical-features-sparse.txt | 32 +++++++++---------- 12 files changed, 182 insertions(+), 182 deletions(-) diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-dense.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-dense.txt index 6b545ff8e0..db6fe9a443 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-dense.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-dense.txt @@ -12,20 +12,20 @@ DEBUG A sparse matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 62.02 -Example-wise Jaccard 29.62 +Example-wise F1 62.64 +Example-wise Jaccard 31.28 Example-wise Precision 61.51 -Example-wise Recall 33.49 -Hamming Accuracy 94.62 -Hamming Loss 5.38 -Macro F1 12.52 -Macro Jaccard 6.73 -Macro Precision 92.05 -Macro Recall 8.11 -Micro F1 45.02 -Micro Jaccard 29.05 -Micro Precision 64.19 -Micro Recall 34.67 +Example-wise Recall 36.12 +Hamming Accuracy 94.68 +Hamming Loss 5.32 +Macro F1 14.35 +Macro Jaccard 7.81 +Macro Precision 88.88 +Macro Recall 9.27 +Micro F1 47.2 +Micro Jaccard 30.89 +Micro Precision 63.9 +Micro Recall 37.42 Subset 0/1 Loss 96.98 Subset Accuracy 3.02 diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-sparse.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-sparse.txt index 7a6d9a3ac0..ace0681ecc 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-sparse.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-sparse.txt @@ -12,20 +12,20 @@ DEBUG A sparse matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 61.06 -Example-wise Jaccard 29.58 -Example-wise Precision 62.75 -Example-wise Recall 33.2 -Hamming Accuracy 94.62 -Hamming Loss 5.38 -Macro F1 9.81 -Macro Jaccard 6.25 -Macro Precision 94.94 -Macro Recall 7.58 -Micro F1 44.85 -Micro Jaccard 28.91 -Micro Precision 64.39 -Micro Recall 34.41 +Example-wise F1 62.64 +Example-wise Jaccard 31.28 +Example-wise Precision 61.51 +Example-wise Recall 36.12 +Hamming Accuracy 94.68 +Hamming Loss 5.32 +Macro F1 14.35 +Macro Jaccard 7.81 +Macro Precision 88.88 +Macro Recall 9.27 +Micro F1 47.2 +Micro Jaccard 30.89 +Micro Precision 63.9 +Micro Recall 37.42 Subset 0/1 Loss 96.98 Subset Accuracy 3.02 diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-dense.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-dense.txt index 21c06a8213..75fc6f45cd 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-dense.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-dense.txt @@ -12,21 +12,21 @@ DEBUG A dense matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 71.82 -Example-wise Jaccard 49.49 -Example-wise Precision 71.85 -Example-wise Recall 57.74 -Hamming Accuracy 77.81 -Hamming Loss 22.19 -Macro F1 60.28 -Macro Jaccard 43.89 -Macro Precision 67.51 -Macro Recall 55.34 -Micro F1 62.01 -Micro Jaccard 44.94 -Micro Precision 68.27 -Micro Recall 56.8 -Subset 0/1 Loss 75 -Subset Accuracy 25 +Example-wise F1 71.67 +Example-wise Jaccard 49.15 +Example-wise Precision 73.64 +Example-wise Recall 56.8 +Hamming Accuracy 78.4 +Hamming Loss 21.6 +Macro F1 59.79 +Macro Jaccard 43.82 +Macro Precision 69.16 +Macro Recall 53.66 +Micro F1 62.09 +Micro Jaccard 45.02 +Micro Precision 70.51 +Micro Recall 55.47 +Subset 0/1 Loss 76.02 +Subset Accuracy 23.98 INFO Successfully finished after diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-sparse.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-sparse.txt index e003954c4e..2e59815687 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-sparse.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-sparse.txt @@ -12,21 +12,21 @@ DEBUG A dense matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 70.36 -Example-wise Jaccard 48.17 -Example-wise Precision 73.89 -Example-wise Recall 55.53 -Hamming Accuracy 77.72 -Hamming Loss 22.28 -Macro F1 58.29 -Macro Jaccard 42.16 -Macro Precision 68.8 -Macro Recall 51.65 -Micro F1 60.42 -Micro Jaccard 43.29 -Micro Precision 69.69 -Micro Recall 53.33 -Subset 0/1 Loss 75.51 -Subset Accuracy 24.49 +Example-wise F1 67.86 +Example-wise Jaccard 46.6 +Example-wise Precision 76.79 +Example-wise Recall 53.32 +Hamming Accuracy 78.74 +Hamming Loss 21.26 +Macro F1 58.93 +Macro Jaccard 42.71 +Macro Precision 73.26 +Macro Recall 51.04 +Micro F1 61.18 +Micro Jaccard 44.07 +Micro Precision 73.23 +Micro Recall 52.53 +Subset 0/1 Loss 77.55 +Subset Accuracy 22.45 INFO Successfully finished after diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-dense.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-dense.txt index 1c1bc1b52a..2bbcc0a900 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-dense.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-dense.txt @@ -12,21 +12,21 @@ DEBUG A dense matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 68.16 -Example-wise Jaccard 50.55 -Example-wise Precision 79.59 -Example-wise Recall 57.74 -Hamming Accuracy 80.27 -Hamming Loss 19.73 -Macro F1 61.71 -Macro Jaccard 46.62 -Macro Precision 73.01 -Macro Recall 55.9 -Micro F1 65.06 -Micro Jaccard 48.21 -Micro Precision 74.74 -Micro Recall 57.6 -Subset 0/1 Loss 74.49 -Subset Accuracy 25.51 +Example-wise F1 66.68 +Example-wise Jaccard 49.57 +Example-wise Precision 80.61 +Example-wise Recall 56.46 +Hamming Accuracy 80.1 +Hamming Loss 19.9 +Macro F1 59.83 +Macro Jaccard 44.79 +Macro Precision 74.18 +Macro Recall 53.49 +Micro F1 64 +Micro Jaccard 47.06 +Micro Precision 75.64 +Micro Recall 55.47 +Subset 0/1 Loss 75 +Subset Accuracy 25 INFO Successfully finished after diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-sparse.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-sparse.txt index b4eced308f..29d8316f25 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-sparse.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-sparse.txt @@ -12,21 +12,21 @@ DEBUG A dense matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 66.56 -Example-wise Jaccard 49.53 -Example-wise Precision 80.27 -Example-wise Recall 57.57 -Hamming Accuracy 80.36 -Hamming Loss 19.64 -Macro F1 63.53 -Macro Jaccard 47.3 -Macro Precision 76 -Macro Recall 56.14 -Micro F1 64.95 -Micro Jaccard 48.09 -Micro Precision 75.35 -Micro Recall 57.07 -Subset 0/1 Loss 76.53 -Subset Accuracy 23.47 +Example-wise F1 66.68 +Example-wise Jaccard 48.81 +Example-wise Precision 79.42 +Example-wise Recall 55.7 +Hamming Accuracy 79.59 +Hamming Loss 20.41 +Macro F1 61.17 +Macro Jaccard 45.09 +Macro Precision 73.55 +Macro Recall 54.13 +Micro F1 63.53 +Micro Jaccard 46.55 +Micro Precision 73.85 +Micro Recall 55.73 +Subset 0/1 Loss 75.51 +Subset Accuracy 24.49 INFO Successfully finished after diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-dense.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-dense.txt index 6b545ff8e0..db6fe9a443 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-dense.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-dense.txt @@ -12,20 +12,20 @@ DEBUG A sparse matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 62.02 -Example-wise Jaccard 29.62 +Example-wise F1 62.64 +Example-wise Jaccard 31.28 Example-wise Precision 61.51 -Example-wise Recall 33.49 -Hamming Accuracy 94.62 -Hamming Loss 5.38 -Macro F1 12.52 -Macro Jaccard 6.73 -Macro Precision 92.05 -Macro Recall 8.11 -Micro F1 45.02 -Micro Jaccard 29.05 -Micro Precision 64.19 -Micro Recall 34.67 +Example-wise Recall 36.12 +Hamming Accuracy 94.68 +Hamming Loss 5.32 +Macro F1 14.35 +Macro Jaccard 7.81 +Macro Precision 88.88 +Macro Recall 9.27 +Micro F1 47.2 +Micro Jaccard 30.89 +Micro Precision 63.9 +Micro Recall 37.42 Subset 0/1 Loss 96.98 Subset Accuracy 3.02 diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-sparse.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-sparse.txt index 7a6d9a3ac0..ace0681ecc 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-sparse.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-sparse.txt @@ -12,20 +12,20 @@ DEBUG A sparse matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 61.06 -Example-wise Jaccard 29.58 -Example-wise Precision 62.75 -Example-wise Recall 33.2 -Hamming Accuracy 94.62 -Hamming Loss 5.38 -Macro F1 9.81 -Macro Jaccard 6.25 -Macro Precision 94.94 -Macro Recall 7.58 -Micro F1 44.85 -Micro Jaccard 28.91 -Micro Precision 64.39 -Micro Recall 34.41 +Example-wise F1 62.64 +Example-wise Jaccard 31.28 +Example-wise Precision 61.51 +Example-wise Recall 36.12 +Hamming Accuracy 94.68 +Hamming Loss 5.32 +Macro F1 14.35 +Macro Jaccard 7.81 +Macro Precision 88.88 +Macro Recall 9.27 +Micro F1 47.2 +Micro Jaccard 30.89 +Micro Precision 63.9 +Micro Recall 37.42 Subset 0/1 Loss 96.98 Subset Accuracy 3.02 diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-dense.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-dense.txt index 21c06a8213..75fc6f45cd 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-dense.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-dense.txt @@ -12,21 +12,21 @@ DEBUG A dense matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 71.82 -Example-wise Jaccard 49.49 -Example-wise Precision 71.85 -Example-wise Recall 57.74 -Hamming Accuracy 77.81 -Hamming Loss 22.19 -Macro F1 60.28 -Macro Jaccard 43.89 -Macro Precision 67.51 -Macro Recall 55.34 -Micro F1 62.01 -Micro Jaccard 44.94 -Micro Precision 68.27 -Micro Recall 56.8 -Subset 0/1 Loss 75 -Subset Accuracy 25 +Example-wise F1 71.67 +Example-wise Jaccard 49.15 +Example-wise Precision 73.64 +Example-wise Recall 56.8 +Hamming Accuracy 78.4 +Hamming Loss 21.6 +Macro F1 59.79 +Macro Jaccard 43.82 +Macro Precision 69.16 +Macro Recall 53.66 +Micro F1 62.09 +Micro Jaccard 45.02 +Micro Precision 70.51 +Micro Recall 55.47 +Subset 0/1 Loss 76.02 +Subset Accuracy 23.98 INFO Successfully finished after diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-sparse.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-sparse.txt index e003954c4e..2e59815687 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-sparse.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-sparse.txt @@ -12,21 +12,21 @@ DEBUG A dense matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 70.36 -Example-wise Jaccard 48.17 -Example-wise Precision 73.89 -Example-wise Recall 55.53 -Hamming Accuracy 77.72 -Hamming Loss 22.28 -Macro F1 58.29 -Macro Jaccard 42.16 -Macro Precision 68.8 -Macro Recall 51.65 -Micro F1 60.42 -Micro Jaccard 43.29 -Micro Precision 69.69 -Micro Recall 53.33 -Subset 0/1 Loss 75.51 -Subset Accuracy 24.49 +Example-wise F1 67.86 +Example-wise Jaccard 46.6 +Example-wise Precision 76.79 +Example-wise Recall 53.32 +Hamming Accuracy 78.74 +Hamming Loss 21.26 +Macro F1 58.93 +Macro Jaccard 42.71 +Macro Precision 73.26 +Macro Recall 51.04 +Micro F1 61.18 +Micro Jaccard 44.07 +Micro Precision 73.23 +Micro Recall 52.53 +Subset 0/1 Loss 77.55 +Subset Accuracy 22.45 INFO Successfully finished after diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-dense.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-dense.txt index fe0902805f..9f035f3ce9 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-dense.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-dense.txt @@ -12,21 +12,21 @@ DEBUG A dense matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 71.6 -Example-wise Jaccard 54.51 -Example-wise Precision 81.12 -Example-wise Recall 61.9 -Hamming Accuracy 81.97 -Hamming Loss 18.03 -Macro F1 66.42 -Macro Jaccard 50.44 -Macro Precision 78.82 -Macro Recall 59.49 -Micro F1 68.36 -Micro Jaccard 51.93 -Micro Precision 77.63 -Micro Recall 61.07 -Subset 0/1 Loss 71.94 -Subset Accuracy 28.06 +Example-wise F1 63.64 +Example-wise Jaccard 44.73 +Example-wise Precision 78.91 +Example-wise Recall 50.85 +Hamming Accuracy 78.4 +Hamming Loss 21.6 +Macro F1 58.23 +Macro Jaccard 42.68 +Macro Precision 71.01 +Macro Recall 50.77 +Micro F1 60.06 +Micro Jaccard 42.92 +Micro Precision 73.18 +Micro Recall 50.93 +Subset 0/1 Loss 77.55 +Subset Accuracy 22.45 INFO Successfully finished after diff --git a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-sparse.txt b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-sparse.txt index 91186255cb..f938da8d18 100644 --- a/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-sparse.txt +++ b/python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-sparse.txt @@ -12,21 +12,21 @@ DEBUG A dense matrix is used to store the predicted labels INFO Successfully predicted in INFO Evaluation result for test data: -Example-wise F1 71.09 -Example-wise Jaccard 51.28 -Example-wise Precision 78.4 -Example-wise Recall 58.16 -Hamming Accuracy 80.61 -Hamming Loss 19.39 -Macro F1 62.69 -Macro Jaccard 47.34 -Macro Precision 73.96 -Macro Recall 56.39 -Micro F1 65.66 -Micro Jaccard 48.88 -Micro Precision 75.43 -Micro Recall 58.13 -Subset 0/1 Loss 73.98 -Subset Accuracy 26.02 +Example-wise F1 63.64 +Example-wise Jaccard 44.73 +Example-wise Precision 78.91 +Example-wise Recall 50.85 +Hamming Accuracy 78.4 +Hamming Loss 21.6 +Macro F1 58.23 +Macro Jaccard 42.68 +Macro Precision 71.01 +Macro Recall 50.77 +Micro F1 60.06 +Micro Jaccard 42.92 +Micro Precision 73.18 +Micro Recall 50.93 +Subset 0/1 Loss 77.55 +Subset Accuracy 22.45 INFO Successfully finished after From fa69f1a92405563bbb6c46947aa0f1da1ccb0743 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 28 Feb 2024 23:12:22 +0100 Subject: [PATCH 51/53] Update changelog. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a761c5c109..53898ff777 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ This release comes with several API changes. For an updated overview of the avai ### Quality-of-Life Improvements +- The implementation of feature binning has been reworked in a way that helps avoiding redundant code. - The documentation has been updated to a more modern theme supporting light and dark theme variants. - A build option that allows to disable multi-threading support via OpenMP at compile-time has been added. - The groundwork for GPU support was laid. It can be disabled at compile-time via a build option. From 03bb8d110b7995210ae2580533c283f829d4fcad Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Thu, 29 Feb 2024 23:26:52 +0100 Subject: [PATCH 52/53] Fix types of local variables. --- .../rule_refinement/feature_based_search_binned_common.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp index 8ee34ee3ad..cf7d8a1c16 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp @@ -19,8 +19,8 @@ template static inline uint32 addAllToSubset(IWeightedStatisticsSubset& statisticsSubset, const FeatureVector& featureVector, uint32 index) { - NominalFeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(index); - NominalFeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(index); + typename FeatureVector::index_const_iterator indexIterator = featureVector.indices_cbegin(index); + typename FeatureVector::index_const_iterator indicesEnd = featureVector.indices_cend(index); uint32 numIndices = indicesEnd - indexIterator; uint32 numCovered = 0; From f6b61490fe1385b72fa887178ec0a0598aea1c46 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Thu, 29 Feb 2024 23:29:46 +0100 Subject: [PATCH 53/53] Fix typos in comment. --- .../rule_refinement/feature_based_search_binned_common.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp index cf7d8a1c16..962c5b7df1 100644 --- a/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp +++ b/cpp/subprojects/common/src/mlrl/common/rule_refinement/feature_based_search_binned_common.hpp @@ -6,13 +6,13 @@ #include "mlrl/common/statistics/statistics_subset_weighted.hpp" /** - * Adds all examples corresonding to a single bin in a given feature vector to a given `IWeightedStatisticsSubset`, if + * Adds all examples corresponding to a single bin in a given feature vector to a given `IWeightedStatisticsSubset`, if * they have non-zero weights. * * @tparam FeatureVector The type of the feature vector * @param statisticsSubset A reference to an object of type `IWeightedStatisticsSubset` * @param featureVector A reference to an object of template type `FeatureVector`´that stores the indices of the - * examples that corresond to individual bins + * examples that correspond to individual bins * @param index The index of the bin * @return The number of examples with non-zero weights */