Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Erstellen von Histogrammen auf Basis von Statistiken #249

Merged
merged 28 commits into from
Sep 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2508e0b
Added Struct Bin
LukasEberle Sep 24, 2020
5ddaac5
Added Interfaces
LukasEberle Sep 24, 2020
d572c12
Updated binning.h
LukasEberle Sep 24, 2020
1b14900
Added Constructors in binning.h
LukasEberle Sep 24, 2020
7cc3b18
Refined Code skeleton
LukasEberle Sep 24, 2020
24b3940
Added some comments
LukasEberle Sep 24, 2020
270089f
Corrected some added nested class on example-/label_wise_statistics
LukasEberle Sep 25, 2020
80f279e
Implemented basic structure
LukasEberle Sep 26, 2020
dab6688
Improved Example Wise Method
LukasEberle Sep 28, 2020
f4938d4
Improved Label Wise Method
LukasEberle Sep 28, 2020
5a41cad
Added Destructors to the bin implementations
LukasEberle Sep 28, 2020
456379c
Changed malloc to calloc to zero initialize the arrays
LukasEberle Sep 28, 2020
1d435a4
Implemented the more trivial feedback
LukasEberle Sep 29, 2020
5169d98
Corrected matrix size
LukasEberle Sep 29, 2020
86e8d00
Corrected matrix size
LukasEberle Sep 29, 2020
f77a7f1
Corrected Syntax Errors
LukasEberle Sep 29, 2020
e98265f
First onBinUpdate Draft
LukasEberle Sep 29, 2020
12edb29
First onBinUpdate Draft
LukasEberle Sep 29, 2020
4821de9
Improved first Draft
LukasEberle Sep 29, 2020
f68e2ea
Corrected a mistake, where a index started at 1 instead of 0
LukasEberle Sep 29, 2020
92fe377
Finished histogram-creation
LukasEberle Sep 29, 2020
d73e734
Merge branch 'approximate-conditions' into histogram-creation
michael-rapp Sep 29, 2020
0452e03
Add author.
michael-rapp Sep 29, 2020
66b4c9b
Add comments.
michael-rapp Sep 29, 2020
5bfbafc
Add comments.
michael-rapp Sep 29, 2020
438a243
Format code.
michael-rapp Sep 29, 2020
e033941
Change order of functions.
michael-rapp Sep 29, 2020
689591b
Edit TODO.
michael-rapp Sep 29, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions python/boomer/boosting/cpp/example_wise_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,37 @@ PredictionCandidate* DenseExampleWiseStatisticsImpl::StatisticsSubsetImpl::calcu
return prediction_;
}

DenseExampleWiseStatisticsImpl::HistogramBuilderImpl::HistogramBuilderImpl(DenseExampleWiseStatisticsImpl* statistics,
uint32 numBins) {
statistics_ = statistics;
numBins_ = numBins;
uint32 numGradients = statistics->getNumCols();
uint32 numHessians = linalg::triangularNumber(numGradients);
gradients_ = (float64*) calloc((numBins_ * numGradients), sizeof(float64));
hessians_ = (float64*) calloc((numBins_ * numHessians), sizeof(float64));
}

void DenseExampleWiseStatisticsImpl::HistogramBuilderImpl::onBinUpdate(uint32 binIndex, IndexedFloat32* indexedValue) {
uint32 numLabels = statistics_->getNumCols();
uint32 index = indexedValue->index;
uint32 offset = index * numLabels;
uint32 gradientOffset = binIndex * numLabels;
uint32 hessianOffset = binIndex * linalg::triangularNumber(numLabels);

for(uint32 c = 0; c < numLabels; c++) {
float64 gradient = statistics_->gradients_[offset + c];
float64 hessian = statistics_->hessians_[offset + c];
gradients_[gradientOffset + c] += gradient;
hessians_[hessianOffset + c] += hessian;
}
}

AbstractStatistics* DenseExampleWiseStatisticsImpl::HistogramBuilderImpl::build() {
return new DenseExampleWiseStatisticsImpl(statistics_->lossFunctionPtr_, statistics_->ruleEvaluationPtr_,
statistics_->lapackPtr_, statistics_->labelMatrixPtr_, gradients_,
hessians_, statistics_->currentScores_);
}

DenseExampleWiseStatisticsImpl::DenseExampleWiseStatisticsImpl(
std::shared_ptr<IExampleWiseLoss> lossFunctionPtr,
std::shared_ptr<IExampleWiseRuleEvaluation> ruleEvaluationPtr, std::shared_ptr<Lapack> lapackPtr,
Expand Down Expand Up @@ -230,6 +261,10 @@ void DenseExampleWiseStatisticsImpl::applyPrediction(uint32 statisticIndex, Pred
&gradients_[offset], &hessians_[statisticIndex * numHessians]);
}

AbstractStatistics::IHistogramBuilder* DenseExampleWiseStatisticsImpl::buildHistogram(uint32 numBins) {
return new DenseExampleWiseStatisticsImpl::HistogramBuilderImpl(this, numBins);
}

DenseExampleWiseStatisticsFactoryImpl::DenseExampleWiseStatisticsFactoryImpl(
std::shared_ptr<IExampleWiseLoss> lossFunctionPtr,
std::shared_ptr<IExampleWiseRuleEvaluation> ruleEvaluationPtr, std::shared_ptr<Lapack> lapackPtr,
Expand Down
34 changes: 34 additions & 0 deletions python/boomer/boosting/cpp/example_wise_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* loss function that is applied example-wise.
*
* @author Michael Rapp ([email protected])
* @author Lukas Johannes Eberle ([email protected])
*/
#pragma once

Expand Down Expand Up @@ -121,6 +122,37 @@ namespace boosting {

};

/**
* Allows to build a histogram based on the gradients and Hessians that are stored by an instance of the
* class `DenseExampleWiseStatisticsImpl`.
*/
class HistogramBuilderImpl : virtual public IHistogramBuilder {

private:

DenseExampleWiseStatisticsImpl* statistics_;

uint32 numBins_;

float64* gradients_;

float64* hessians_;

public:

/**
* @param statistics A pointer to an object of type `DenseExampleWiseStatisticsImpl` that stores
* the gradients and Hessians
* @param numBins The number of bins, the histogram should consist of
*/
HistogramBuilderImpl(DenseExampleWiseStatisticsImpl* statistics, uint32 numBins);

void onBinUpdate(uint32 binIndex, IndexedFloat32* indexedValue) override;

AbstractStatistics* build() override;

};

std::shared_ptr<IExampleWiseLoss> lossFunctionPtr_;

std::shared_ptr<Lapack> lapackPtr_;
Expand Down Expand Up @@ -172,6 +204,8 @@ namespace boosting {

void applyPrediction(uint32 statisticIndex, Prediction* prediction) override;

IHistogramBuilder* buildHistogram(uint32 numBins) override;

};

/**
Expand Down
33 changes: 33 additions & 0 deletions python/boomer/boosting/cpp/label_wise_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,35 @@ LabelWisePredictionCandidate* DenseLabelWiseStatisticsImpl::StatisticsSubsetImpl
return prediction_;
}

DenseLabelWiseStatisticsImpl::HistogramBuilderImpl::HistogramBuilderImpl(DenseLabelWiseStatisticsImpl* statistics,
uint32 numBins) {
statistics_ = statistics;
numBins_ = numBins;
uint32 numLabels = numBins_ * statistics->getNumCols();
gradients_ = (float64*) calloc(numLabels, sizeof(float64));
hessians_ = (float64*) calloc(numLabels, sizeof(float64));
}

void DenseLabelWiseStatisticsImpl::HistogramBuilderImpl::onBinUpdate(uint32 binIndex, IndexedFloat32* indexedValue) {
uint32 numLabels = statistics_->getNumCols();
uint32 index = indexedValue->index;
uint32 offset = index * numLabels;
uint32 binOffset = binIndex * numLabels;

for(uint32 c = 0; c < numLabels; c++) {
float64 gradient = statistics_->gradients_[offset + c];
float64 hessian = statistics_->hessians_[offset + c];
gradients_[binOffset + c] += gradient;
hessians_[binOffset + c] += hessian;
}
}

AbstractStatistics* DenseLabelWiseStatisticsImpl::HistogramBuilderImpl::build() {
return new DenseLabelWiseStatisticsImpl(statistics_->lossFunctionPtr_, statistics_->ruleEvaluationPtr_,
statistics_->labelMatrixPtr_, gradients_, hessians_,
statistics_->currentScores_);
}

DenseLabelWiseStatisticsImpl::DenseLabelWiseStatisticsImpl(std::shared_ptr<ILabelWiseLoss> lossFunctionPtr,
std::shared_ptr<ILabelWiseRuleEvaluation> ruleEvaluationPtr,
std::shared_ptr<IRandomAccessLabelMatrix> labelMatrixPtr,
Expand Down Expand Up @@ -164,6 +193,10 @@ void DenseLabelWiseStatisticsImpl::applyPrediction(uint32 statisticIndex, Predic
}
}

AbstractStatistics::IHistogramBuilder* DenseLabelWiseStatisticsImpl::buildHistogram(uint32 numBins) {
return new DenseLabelWiseStatisticsImpl::HistogramBuilderImpl(this, numBins);
}

DenseLabelWiseStatisticsFactoryImpl::DenseLabelWiseStatisticsFactoryImpl(
std::shared_ptr<ILabelWiseLoss> lossFunctionPtr, std::shared_ptr<ILabelWiseRuleEvaluation> ruleEvaluationPtr,
std::shared_ptr<IRandomAccessLabelMatrix> labelMatrixPtr) {
Expand Down
34 changes: 34 additions & 0 deletions python/boomer/boosting/cpp/label_wise_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* function that is applied label-wise.
*
* @author Michael Rapp ([email protected])
* @author Lukas Johannes Eberle ([email protected])
*/
#pragma once

Expand Down Expand Up @@ -103,6 +104,37 @@ namespace boosting {

};

/**
* Allows to build a histogram based on the gradients and Hessians that are stored by an instance of the
* class `DenseLabelWiseStatisticsImpl`.
*/
class HistogramBuilderImpl : virtual public AbstractStatistics::IHistogramBuilder {

private:

DenseLabelWiseStatisticsImpl* statistics_;

uint32 numBins_;

float64* gradients_;

float64* hessians_;

public:

/**
* @param statistics A pointer to an object of type `DenseLabelWiseStatisticsImpl` that stores
* the gradients and Hessians
* @param numBins The number of bins, the histogram should consist of
*/
HistogramBuilderImpl(DenseLabelWiseStatisticsImpl* statistics, uint32 numBins);

void onBinUpdate(uint32 binIndex, IndexedFloat32* indexedValue) override;

AbstractStatistics* build() override;

};

std::shared_ptr<ILabelWiseLoss> lossFunctionPtr_;

std::shared_ptr<IRandomAccessLabelMatrix> labelMatrixPtr_;
Expand Down Expand Up @@ -148,6 +180,8 @@ namespace boosting {

void applyPrediction(uint32 statisticIndex, Prediction* prediction) override;

AbstractStatistics::IHistogramBuilder* buildHistogram(uint32 numBins) override;

};

/**
Expand Down
1 change: 0 additions & 1 deletion python/boomer/common/cpp/binning.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#pragma once

#include "arrays.h"
#include "statistics.h"
#include "tuples.h"


Expand Down
30 changes: 30 additions & 0 deletions python/boomer/common/cpp/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
* Implements classes that provide access to statistics about the labels of training examples.
*
* @author Michael Rapp ([email protected])
* @author Lukas Johannes Eberle ([email protected])
*/
#pragma once

#include "arrays.h"
#include "predictions.h"
#include "data.h"
#include "binning.h"
#include <memory>


Expand Down Expand Up @@ -153,6 +155,26 @@ class AbstractStatistics : virtual public IMatrix {

public:

/**
* Defines an interface for all classes that allow to build histograms by aggregating the statistics that
* correspond to the same bins.
*/
class IHistogramBuilder : virtual public IBinningObserver {

public:

virtual ~IHistogramBuilder() { };

/**
* Creates and returns a new instance of the class `AbstractStatistics` that stores the histogram that
* has been built.
*
* @return A pointer to an object of type `AbstractStatistics` that has been created
*/
virtual AbstractStatistics* build() = 0;

};

/**
* @param numStatistics The number of statistics
*/
Expand Down Expand Up @@ -252,6 +274,14 @@ class AbstractStatistics : virtual public IMatrix {
*/
virtual void applyPrediction(uint32 statisticIndex, Prediction* prediction) = 0;

/**
* Creates and returns a new instance of the class `IHistogramBuilder` that allows to build a histogram based on
* the statistics.
*
* @return A pointer to an object of type `IHistogramBuilder` that has been created
*/
virtual IHistogramBuilder* buildHistogram(uint32 numBins) = 0;

uint32 getNumRows() override;

uint32 getNumCols() override;
Expand Down
9 changes: 9 additions & 0 deletions python/boomer/common/cpp/tuples.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Provides type definitions of tuples, as well as corresponding utility functions.
*
* @author Michael Rapp ([email protected])
* @author Lukas Johannes Eberle ([email protected])
*/
#pragma once

Expand Down Expand Up @@ -46,6 +47,14 @@ struct IndexedFloat64 {
float64 value;
};

/**
* A struct that stores all necessary information of a group of examples to calculate thresholds.
*/
struct Bin {
uint32 numExamples;
float32 minValue;
float32 maxValue;
};

namespace tuples {

Expand Down
5 changes: 5 additions & 0 deletions python/boomer/seco/cpp/label_wise_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ void DenseLabelWiseStatisticsImpl::applyPrediction(uint32 statisticIndex, Predic
}
}

AbstractStatistics::IHistogramBuilder* DenseLabelWiseStatisticsImpl::buildHistogram(uint32 numBins) {
//TODO Support creation of histograms
return NULL;
}

DenseLabelWiseStatisticsFactoryImpl::DenseLabelWiseStatisticsFactoryImpl(
std::shared_ptr<ILabelWiseRuleEvaluation> ruleEvaluationPtr,
std::shared_ptr<IRandomAccessLabelMatrix> labelMatrixPtr) {
Expand Down
2 changes: 2 additions & 0 deletions python/boomer/seco/cpp/label_wise_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ namespace seco {

void applyPrediction(uint32 statisticIndex, Prediction* prediction) override;

IHistogramBuilder* buildHistogram(uint32 numBins) override;

};

/**
Expand Down