Skip to content

Commit

Permalink
Merge pull request #251 from mrapp-ke/thresholds
Browse files Browse the repository at this point in the history
Auslagerung des Zugriffs auf Thresholds in eigene Klasse
  • Loading branch information
michael-rapp authored Oct 2, 2020
2 parents 9b8caaf + 08bf030 commit 8a10663
Show file tree
Hide file tree
Showing 24 changed files with 1,105 additions and 803 deletions.
10 changes: 6 additions & 4 deletions python/boomer/boosting/boosting_learners.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from boomer.common.rules import ModelBuilder, RuleListBuilder
from boomer.common.sequential_rule_induction import SequentialRuleInduction
from boomer.common.statistics import StatisticsProviderFactory
from boomer.common.thresholds import ExactThresholdsFactory

from boomer.common.rule_learners import INSTANCE_SUB_SAMPLING_BAGGING, FEATURE_SUB_SAMPLING_RANDOM, \
HEAD_REFINEMENT_SINGLE
Expand Down Expand Up @@ -158,11 +159,12 @@ def _create_sequential_rule_induction(self, num_labels: int) -> SequentialRuleIn
rule_evaluation = self.__create_rule_evaluation(loss_function, l2_regularization_weight)
num_threads = create_num_threads(self.num_threads)
statistics_provider_factory = self.__create_statistics_provider_factory(loss_function, rule_evaluation)
thresholds_factory = ExactThresholdsFactory()
rule_induction = TopDownGreedyRuleInduction()
return SequentialRuleInduction(statistics_provider_factory, rule_induction, default_rule_head_refinement,
head_refinement, stopping_criteria, label_sub_sampling, instance_sub_sampling,
feature_sub_sampling, pruning, shrinkage, min_coverage, max_conditions,
max_head_refinements, num_threads)
return SequentialRuleInduction(statistics_provider_factory, thresholds_factory, rule_induction,
default_rule_head_refinement, head_refinement, stopping_criteria,
label_sub_sampling, instance_sub_sampling, feature_sub_sampling, pruning,
shrinkage, min_coverage, max_conditions, max_head_refinements, num_threads)

def __create_l2_regularization_weight(self) -> float:
l2_regularization_weight = float(self.l2_regularization_weight)
Expand Down
2 changes: 0 additions & 2 deletions python/boomer/boosting/example_wise_statistics.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ cdef class ExampleWiseStatisticsProvider(StatisticsProvider):

# Attributes:

cdef shared_ptr[AbstractExampleWiseStatistics] statistics_ptr

cdef ExampleWiseRuleEvaluation rule_evaluation

# Functions:
Expand Down
5 changes: 3 additions & 2 deletions python/boomer/boosting/example_wise_statistics.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ cdef class ExampleWiseStatisticsProvider(StatisticsProvider):
:param rule_evaluation: The `ExampleWiseRuleEvaluation` to switch to when invoking the function
`switch_rule_evaluation`
"""
self.statistics_ptr = shared_ptr[AbstractExampleWiseStatistics](statistics_factory.create())
self.statistics_ptr = shared_ptr[AbstractStatistics](statistics_factory.create())
self.rule_evaluation = rule_evaluation

cdef AbstractStatistics* get(self):
Expand All @@ -63,7 +63,8 @@ cdef class ExampleWiseStatisticsProvider(StatisticsProvider):
cdef void switch_rule_evaluation(self):
cdef ExampleWiseRuleEvaluation rule_evaluation = self.rule_evaluation
cdef shared_ptr[IExampleWiseRuleEvaluation] rule_evaluation_ptr = rule_evaluation.rule_evaluation_ptr
self.statistics_ptr.get().setRuleEvaluation(rule_evaluation_ptr)
dynamic_pointer_cast[AbstractExampleWiseStatistics, AbstractStatistics](self.statistics_ptr).get().setRuleEvaluation(
rule_evaluation_ptr)


cdef class ExampleWiseStatisticsProviderFactory(StatisticsProviderFactory):
Expand Down
2 changes: 0 additions & 2 deletions python/boomer/boosting/label_wise_statistics.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ cdef class LabelWiseStatisticsProvider(StatisticsProvider):

# Attributes:

cdef shared_ptr[AbstractLabelWiseStatistics] statistics_ptr

cdef LabelWiseRuleEvaluation rule_evaluation

# Functions:
Expand Down
5 changes: 3 additions & 2 deletions python/boomer/boosting/label_wise_statistics.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ cdef class LabelWiseStatisticsProvider(StatisticsProvider):
:param rule_evaluation: The `LabelWiseRuleEvaluation` to switch to when invoking the function
`switch_rule_evaluation`
"""
self.statistics_ptr = shared_ptr[AbstractLabelWiseStatistics](statistics_factory.create())
self.statistics_ptr = <shared_ptr[AbstractStatistics]>(statistics_factory.create())
self.rule_evaluation = rule_evaluation

cdef AbstractStatistics* get(self):
Expand All @@ -62,7 +62,8 @@ cdef class LabelWiseStatisticsProvider(StatisticsProvider):
cdef void switch_rule_evaluation(self):
cdef LabelWiseRuleEvaluation rule_evaluation = self.rule_evaluation
cdef shared_ptr[ILabelWiseRuleEvaluation] rule_evaluation_ptr = rule_evaluation.rule_evaluation_ptr
self.statistics_ptr.get().setRuleEvaluation(rule_evaluation_ptr)
dynamic_pointer_cast[AbstractLabelWiseStatistics, AbstractStatistics](self.statistics_ptr).get().setRuleEvaluation(
rule_evaluation_ptr)


cdef class LabelWiseStatisticsProviderFactory(StatisticsProviderFactory):
Expand Down
28 changes: 11 additions & 17 deletions python/boomer/common/cpp/input_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,25 +55,22 @@ uint32 DenseFeatureMatrixImpl::getNumCols() {
return numFeatures_;
}

void DenseFeatureMatrixImpl::fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) {
void DenseFeatureMatrixImpl::fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) {
// The number of elements to be returned
uint32 numElements = this->getNumRows();
// The array that stores the indices
IndexedFloat32* sortedArray = (IndexedFloat32*) malloc(numElements * sizeof(IndexedFloat32));
IndexedFloat32* array = (IndexedFloat32*) malloc(numElements * sizeof(IndexedFloat32));
// The first element in `x_` that corresponds to the given feature index
uint32 offset = featureIndex * numElements;

for (uint32 i = 0; i < numElements; i++) {
sortedArray[i].index = i;
sortedArray[i].value = x_[offset + i];
array[i].index = i;
array[i].value = x_[offset + i];
}

// Sort the array...
qsort(sortedArray, numElements, sizeof(IndexedFloat32), &tuples::compareIndexedFloat32);

// Update the given struct...
indexedArray->numElements = numElements;
indexedArray->data = sortedArray;
indexedArray->data = array;
}

CscFeatureMatrixImpl::CscFeatureMatrixImpl(uint32 numExamples, uint32 numFeatures, const float32* xData,
Expand All @@ -93,33 +90,30 @@ uint32 CscFeatureMatrixImpl::getNumCols() {
return numFeatures_;
}

void CscFeatureMatrixImpl::fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) {
void CscFeatureMatrixImpl::fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) {
// The index of the first element in `xData_` and `xRowIndices_` that corresponds to the given feature index+
uint32 start = xColIndices_[featureIndex];
// The index of the last element in `xData_` and `xRowIndices_` that corresponds to the given feature index
uint32 end = xColIndices_[featureIndex + 1];
// The number of elements to be returned
uint32 numElements = end - start;
// The array that stores the indices
IndexedFloat32* sortedArray = NULL;
IndexedFloat32* array = NULL;

if (numElements > 0) {
sortedArray = (IndexedFloat32*) malloc(numElements * sizeof(IndexedFloat32));
array = (IndexedFloat32*) malloc(numElements * sizeof(IndexedFloat32));
uint32 i = 0;

for (uint32 j = start; j < end; j++) {
sortedArray[i].index = xRowIndices_[j];
sortedArray[i].value = xData_[j];
array[i].index = xRowIndices_[j];
array[i].value = xData_[j];
i++;
}

// Sort the array...
qsort(sortedArray, numElements, sizeof(IndexedFloat32), &tuples::compareIndexedFloat32);
}

// Update the given struct...
indexedArray->numElements = numElements;
indexedArray->data = sortedArray;
indexedArray->data = array;
}

DokNominalFeatureVectorImpl::DokNominalFeatureVectorImpl(BinaryDokVector* vector) {
Expand Down
11 changes: 5 additions & 6 deletions python/boomer/common/cpp/input_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,14 @@ class IFeatureMatrix : virtual public IMatrix {
virtual ~IFeatureMatrix() { };

/**
* Fetches the indices of the training examples, as well as their feature values, for a specific feature, sorts
* them in ascending order by the feature values and stores them in a given struct of type
* `IndexedFloat32Array`.
* Fetches the indices of the training examples, as well as their feature values, for a specific feature and
* stores them in a given struct of type `IndexedFloat32Array`.
*
* @param featureIndex The index of the feature
* @param indexedArray A pointer to a struct of type `IndexedFloat32Array`, which should be used to store the
* indices and feature values
*/
virtual void fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) = 0;
virtual void fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) = 0;

};

Expand Down Expand Up @@ -141,7 +140,7 @@ class DenseFeatureMatrixImpl : virtual public IFeatureMatrix {

uint32 getNumCols() override;

void fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) override;
void fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) override;

};

Expand Down Expand Up @@ -183,7 +182,7 @@ class CscFeatureMatrixImpl : virtual public IFeatureMatrix {

uint32 getNumCols() override;

void fetchSortedFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) override;
void fetchFeatureValues(uint32 featureIndex, IndexedFloat32Array* indexedArray) override;

};

Expand Down
Loading

0 comments on commit 8a10663

Please sign in to comment.