Merge pull request #251 from mrapp-ke/thresholds

Auslagerung des Zugriffs auf Thresholds in eigene Klasse
mrapp-ke · Oct 2, 2020 · 8a10663 · 8a10663
2 parents 9b8caaf + 08bf030
commit 8a10663
Show file tree

Hide file tree

Showing 24 changed files with 1,105 additions and 803 deletions.
diff --git a/python/boomer/boosting/boosting_learners.py b/python/boomer/boosting/boosting_learners.py
@@ -18,6 +18,7 @@
 from boomer.common.rules import ModelBuilder, RuleListBuilder
 from boomer.common.sequential_rule_induction import SequentialRuleInduction
 from boomer.common.statistics import StatisticsProviderFactory
+from boomer.common.thresholds import ExactThresholdsFactory
 
 from boomer.common.rule_learners import INSTANCE_SUB_SAMPLING_BAGGING, FEATURE_SUB_SAMPLING_RANDOM, \
     HEAD_REFINEMENT_SINGLE
@@ -158,11 +159,12 @@ def _create_sequential_rule_induction(self, num_labels: int) -> SequentialRuleIn
         rule_evaluation = self.__create_rule_evaluation(loss_function, l2_regularization_weight)
         num_threads = create_num_threads(self.num_threads)
         statistics_provider_factory = self.__create_statistics_provider_factory(loss_function, rule_evaluation)
+        thresholds_factory = ExactThresholdsFactory()
         rule_induction = TopDownGreedyRuleInduction()
-        return SequentialRuleInduction(statistics_provider_factory, rule_induction, default_rule_head_refinement,
-                                       head_refinement, stopping_criteria, label_sub_sampling, instance_sub_sampling,
-                                       feature_sub_sampling, pruning, shrinkage, min_coverage, max_conditions,
-                                       max_head_refinements, num_threads)
+        return SequentialRuleInduction(statistics_provider_factory, thresholds_factory, rule_induction,
+                                       default_rule_head_refinement, head_refinement, stopping_criteria,
+                                       label_sub_sampling, instance_sub_sampling, feature_sub_sampling, pruning,
+                                       shrinkage, min_coverage, max_conditions, max_head_refinements, num_threads)
 
     def __create_l2_regularization_weight(self) -> float:
         l2_regularization_weight = float(self.l2_regularization_weight)

diff --git a/python/boomer/boosting/example_wise_statistics.pxd b/python/boomer/boosting/example_wise_statistics.pxd
@@ -65,8 +65,6 @@ cdef class ExampleWiseStatisticsProvider(StatisticsProvider):
 
     # Attributes:
 
-    cdef shared_ptr[AbstractExampleWiseStatistics] statistics_ptr
-
     cdef ExampleWiseRuleEvaluation rule_evaluation
 
     # Functions:

diff --git a/python/boomer/boosting/example_wise_statistics.pyx b/python/boomer/boosting/example_wise_statistics.pyx
@@ -54,7 +54,7 @@ cdef class ExampleWiseStatisticsProvider(StatisticsProvider):
         :param rule_evaluation:     The `ExampleWiseRuleEvaluation` to switch to when invoking the function
                                     `switch_rule_evaluation`
         """
-        self.statistics_ptr = shared_ptr[AbstractExampleWiseStatistics](statistics_factory.create())
+        self.statistics_ptr = shared_ptr[AbstractStatistics](statistics_factory.create())
         self.rule_evaluation = rule_evaluation
 
     cdef AbstractStatistics* get(self):
@@ -63,7 +63,8 @@ cdef class ExampleWiseStatisticsProvider(StatisticsProvider):
     cdef void switch_rule_evaluation(self):
         cdef ExampleWiseRuleEvaluation rule_evaluation = self.rule_evaluation
         cdef shared_ptr[IExampleWiseRuleEvaluation] rule_evaluation_ptr = rule_evaluation.rule_evaluation_ptr
-        self.statistics_ptr.get().setRuleEvaluation(rule_evaluation_ptr)
+        dynamic_pointer_cast[AbstractExampleWiseStatistics, AbstractStatistics](self.statistics_ptr).get().setRuleEvaluation(
+            rule_evaluation_ptr)
 
 
 cdef class ExampleWiseStatisticsProviderFactory(StatisticsProviderFactory):

diff --git a/python/boomer/boosting/label_wise_statistics.pxd b/python/boomer/boosting/label_wise_statistics.pxd
@@ -62,8 +62,6 @@ cdef class LabelWiseStatisticsProvider(StatisticsProvider):
 
     # Attributes:
 
-    cdef shared_ptr[AbstractLabelWiseStatistics] statistics_ptr
-
     cdef LabelWiseRuleEvaluation rule_evaluation
 
     # Functions:

diff --git a/python/boomer/boosting/label_wise_statistics.pyx b/python/boomer/boosting/label_wise_statistics.pyx
@@ -53,7 +53,7 @@ cdef class LabelWiseStatisticsProvider(StatisticsProvider):
         :param rule_evaluation:     The `LabelWiseRuleEvaluation` to switch to when invoking the function
                                     `switch_rule_evaluation`
         """
-        self.statistics_ptr = shared_ptr[AbstractLabelWiseStatistics](statistics_factory.create())
+        self.statistics_ptr = <shared_ptr[AbstractStatistics]>(statistics_factory.create())
         self.rule_evaluation = rule_evaluation
 
     cdef AbstractStatistics* get(self):
@@ -62,7 +62,8 @@ cdef class LabelWiseStatisticsProvider(StatisticsProvider):
     cdef void switch_rule_evaluation(self):
         cdef LabelWiseRuleEvaluation rule_evaluation = self.rule_evaluation
         cdef shared_ptr[ILabelWiseRuleEvaluation] rule_evaluation_ptr = rule_evaluation.rule_evaluation_ptr
-        self.statistics_ptr.get().setRuleEvaluation(rule_evaluation_ptr)
+        dynamic_pointer_cast[AbstractLabelWiseStatistics, AbstractStatistics](self.statistics_ptr).get().setRuleEvaluation(
+            rule_evaluation_ptr)
 
 
 cdef class LabelWiseStatisticsProviderFactory(StatisticsProviderFactory):

diff --git a/python/boomer/common/cpp/input_data.cpp b/python/boomer/common/cpp/input_data.cpp
@@ -55,25 +55,22 @@ uint32 DenseFeatureMatrixImpl::getNumCols() {
     return numFeatures_;
 }
 
-void DenseFeatureMatrixImpl::fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) {
+void DenseFeatureMatrixImpl::fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) {
     // The number of elements to be returned
     uint32 numElements = this->getNumRows();
     // The array that stores the indices
-    IndexedFloat32* sortedArray = (IndexedFloat32*) malloc(numElements * sizeof(IndexedFloat32));
+    IndexedFloat32* array = (IndexedFloat32*) malloc(numElements * sizeof(IndexedFloat32));
     // The first element in `x_` that corresponds to the given feature index
     uint32 offset = featureIndex * numElements;
 
     for (uint32 i = 0; i < numElements; i++) {
-        sortedArray[i].index = i;
-        sortedArray[i].value = x_[offset + i];
+        array[i].index = i;
+        array[i].value = x_[offset + i];
     }
 
-    // Sort the array...
-    qsort(sortedArray, numElements, sizeof(IndexedFloat32), &tuples::compareIndexedFloat32);
-
     // Update the given struct...
     indexedArray->numElements = numElements;
-    indexedArray->data = sortedArray;
+    indexedArray->data = array;
 }
 
 CscFeatureMatrixImpl::CscFeatureMatrixImpl(uint32 numExamples, uint32 numFeatures, const float32* xData,
@@ -93,33 +90,30 @@ uint32 CscFeatureMatrixImpl::getNumCols() {
     return numFeatures_;
 }
 
-void CscFeatureMatrixImpl::fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) {
+void CscFeatureMatrixImpl::fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) {
     // The index of the first element in `xData_` and `xRowIndices_` that corresponds to the given feature index+
     uint32 start = xColIndices_[featureIndex];
     // The index of the last element in `xData_` and `xRowIndices_` that corresponds to the given feature index
     uint32 end = xColIndices_[featureIndex + 1];
     // The number of elements to be returned
     uint32 numElements = end - start;
     // The array that stores the indices
-    IndexedFloat32* sortedArray = NULL;
+    IndexedFloat32* array = NULL;
 
     if (numElements > 0) {
-        sortedArray = (IndexedFloat32*) malloc(numElements * sizeof(IndexedFloat32));
+        array = (IndexedFloat32*) malloc(numElements * sizeof(IndexedFloat32));
         uint32 i = 0;
 
         for (uint32 j = start; j < end; j++) {
-            sortedArray[i].index = xRowIndices_[j];
-            sortedArray[i].value = xData_[j];
+            array[i].index = xRowIndices_[j];
+            array[i].value = xData_[j];
             i++;
         }
-
-        // Sort the array...
-        qsort(sortedArray, numElements, sizeof(IndexedFloat32), &tuples::compareIndexedFloat32);
     }
 
     // Update the given struct...
     indexedArray->numElements = numElements;
-    indexedArray->data = sortedArray;
+    indexedArray->data = array;
 }
 
 DokNominalFeatureVectorImpl::DokNominalFeatureVectorImpl(BinaryDokVector* vector) {

diff --git a/python/boomer/common/cpp/input_data.h b/python/boomer/common/cpp/input_data.h
@@ -102,15 +102,14 @@ class IFeatureMatrix : virtual public IMatrix {
         virtual ~IFeatureMatrix() { };
 
         /**
-         * Fetches the indices of the training examples, as well as their feature values, for a specific feature, sorts
-         * them in ascending order by the feature values and stores them in a given struct of type
-         * `IndexedFloat32Array`.
+         * Fetches the indices of the training examples, as well as their feature values, for a specific feature and
+         * stores them in a given struct of type `IndexedFloat32Array`.
          *
          * @param featureIndex  The index of the feature
          * @param indexedArray  A pointer to a struct of type `IndexedFloat32Array`, which should be used to store the
          *                      indices and feature values
          */
-        virtual void fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) = 0;
+        virtual void fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) = 0;
 
 };
 
@@ -141,7 +140,7 @@ class DenseFeatureMatrixImpl : virtual public IFeatureMatrix {
 
         uint32 getNumCols() override;
 
-        void fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) override;
+        void fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) override;
 
 };
 
@@ -183,7 +182,7 @@ class CscFeatureMatrixImpl : virtual public IFeatureMatrix {
 
         uint32 getNumCols() override;
 
-        void fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) override;
+        void fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) override;
 
 };