Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Erstellen von Histogrammen auf Basis von Statistiken #249

Merged
merged 28 commits into from
Sep 29, 2020
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2508e0b
Added Struct Bin
LukasEberle Sep 24, 2020
5ddaac5
Added Interfaces
LukasEberle Sep 24, 2020
d572c12
Updated binning.h
LukasEberle Sep 24, 2020
1b14900
Added Constructors in binning.h
LukasEberle Sep 24, 2020
7cc3b18
Refined Code skeleton
LukasEberle Sep 24, 2020
24b3940
Added some comments
LukasEberle Sep 24, 2020
270089f
Corrected some added nested class on example-/label_wise_statistics
LukasEberle Sep 25, 2020
80f279e
Implemented basic structure
LukasEberle Sep 26, 2020
dab6688
Improved Example Wise Method
LukasEberle Sep 28, 2020
f4938d4
Improved Label Wise Method
LukasEberle Sep 28, 2020
5a41cad
Added Destructors to the bin implementations
LukasEberle Sep 28, 2020
456379c
Changed malloc to calloc to zero initialize the arrays
LukasEberle Sep 28, 2020
1d435a4
Implemented the more trivial feedback
LukasEberle Sep 29, 2020
5169d98
Corrected matrix size
LukasEberle Sep 29, 2020
86e8d00
Corrected matrix size
LukasEberle Sep 29, 2020
f77a7f1
Corrected Syntax Errors
LukasEberle Sep 29, 2020
e98265f
First onBinUpdate Draft
LukasEberle Sep 29, 2020
12edb29
First onBinUpdate Draft
LukasEberle Sep 29, 2020
4821de9
Improved first Draft
LukasEberle Sep 29, 2020
f68e2ea
Corrected a mistake, where a index started at 1 instead of 0
LukasEberle Sep 29, 2020
92fe377
Finished histogram-creation
LukasEberle Sep 29, 2020
d73e734
Merge branch 'approximate-conditions' into histogram-creation
michael-rapp Sep 29, 2020
0452e03
Add author.
michael-rapp Sep 29, 2020
66b4c9b
Add comments.
michael-rapp Sep 29, 2020
5bfbafc
Add comments.
michael-rapp Sep 29, 2020
438a243
Format code.
michael-rapp Sep 29, 2020
e033941
Change order of functions.
michael-rapp Sep 29, 2020
689591b
Edit TODO.
michael-rapp Sep 29, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions python/boomer/boosting/cpp/example_wise_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,31 @@ AbstractExampleWiseStatistics* DenseExampleWiseStatisticsFactoryImpl::create() {
return new DenseExampleWiseStatisticsImpl(lossFunctionPtr_, ruleEvaluationPtr_, lapackPtr_, labelMatrixPtr_,
gradients, hessians, currentScores);
}

DenseExampleWiseStatisticsImpl::DenseExampleWiseStatisticsBinsImpl::DenseExampleWiseStatisticsBinsImpl(DenseExampleWiseStatisticsImpl* statistics, uint32 numBins){
statistics_ = statistics;
numBins_ = numBins;
float64* gradients_ = (float64*)calloc(numBins_, sizeof(float64));
float64* hessians_ = (float64*)calloc(numBins_, sizeof(float64));
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved
Bin* bins = (Bin*)malloc(numBins_ * sizeof(Bin));
}

DenseExampleWiseStatisticsImpl::DenseExampleWiseStatisticsBinsImpl::~DenseExampleWiseStatisticsBinsImpl(){
free(gradients_);
free(hessians_);
free(bins);
}

void DenseExampleWiseStatisticsImpl::DenseExampleWiseStatisticsBinsImpl::onBinUpdate(uint32 binIndex, IndexedFloat32* indexedValue){
indexedValue->index = binIndex;
bins[binIndex].numExamples++;
if(bins[binIndex].maxValue < indexedValue->value){
bins[binIndex].maxValue = indexedValue->value;
}else if(indexedValue->value < bins[binIndex].minValue){
bins[binIndex].minValue = indexedValue->value;
}
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved
}

AbstractStatistics* DenseExampleWiseStatisticsImpl::DenseExampleWiseStatisticsBinsImpl::build(){
return statistics_;
}
27 changes: 27 additions & 0 deletions python/boomer/boosting/cpp/example_wise_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "../../common/cpp/arrays.h"
#include "../../common/cpp/statistics.h"
#include "../../common/cpp/binning.h"
#include "example_wise_rule_evaluation.h"
#include "example_wise_losses.h"
#include "statistics.h"
Expand Down Expand Up @@ -121,6 +122,32 @@ namespace boosting {

};

class DenseExampleWiseStatisticsBinsImpl : virtual public IHistogramBuilder {
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved

private:

DenseExampleWiseStatisticsImpl* statistics_;

uint32 numBins_;

float64* gradients_;

float64* hessians_;

Bin* bins;

public:

DenseExampleWiseStatisticsBinsImpl(DenseExampleWiseStatisticsImpl* statistics, uint32 numBins);

~DenseExampleWiseStatisticsBinsImpl();

void onBinUpdate(uint32 binIndex, IndexedFloat32* indexedValue) override;

AbstractStatistics* build();
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved

};

std::shared_ptr<IExampleWiseLoss> lossFunctionPtr_;

std::shared_ptr<Lapack> lapackPtr_;
Expand Down
28 changes: 28 additions & 0 deletions python/boomer/boosting/cpp/label_wise_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,31 @@ AbstractLabelWiseStatistics* DenseLabelWiseStatisticsFactoryImpl::create() {
return new DenseLabelWiseStatisticsImpl(lossFunctionPtr_, ruleEvaluationPtr_, labelMatrixPtr_, gradients, hessians,
currentScores);
}

DenseLabelWiseStatisticsImpl::DenseLabelWiseStatisticsBinsImpl::DenseLabelWiseStatisticsBinsImpl(DenseLabelWiseStatisticsImpl* statistics, uint32 numBins){
statistics_ = statistics;
numBins_ = numBins;
float64* gradients_ = (float64*)calloc(numBins_, sizeof(float64));
float64* hessians_ = (float64*)calloc(numBins_, sizeof(float64));
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved
Bin* bins = (Bin*)malloc(numBins_ * sizeof(Bin));
}

DenseLabelWiseStatisticsImpl::DenseLabelWiseStatisticsBinsImpl::~DenseLabelWiseStatisticsBinsImpl(){
free(gradients_);
free(hessians_);
free(bins);
}

void DenseLabelWiseStatisticsImpl::DenseLabelWiseStatisticsBinsImpl::onBinUpdate(uint32 binIndex, IndexedFloat32* indexedValue){
indexedValue->index = binIndex;
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved
bins[binIndex].numExamples++;
if(bins[binIndex].maxValue < indexedValue->value){
bins[binIndex].maxValue = indexedValue->value;
}else if(indexedValue->value < bins[binIndex].minValue){
bins[binIndex].minValue = indexedValue->value;
}
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved
}

AbstractStatistics* DenseLabelWiseStatisticsImpl::DenseLabelWiseStatisticsBinsImpl::build(){
return statistics_;
}
27 changes: 27 additions & 0 deletions python/boomer/boosting/cpp/label_wise_statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "../../common/cpp/arrays.h"
#include "../../common/cpp/statistics.h"
#include "../../common/cpp/binning.h"
#include "label_wise_rule_evaluation.h"
#include "label_wise_losses.h"
#include "statistics.h"
Expand Down Expand Up @@ -103,6 +104,32 @@ namespace boosting {

};

class DenseLabelWiseStatisticsBinsImpl : virtual public IHistogramBuilder {
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved

private:

DenseLabelWiseStatisticsImpl* statistics_;

uint32 numBins_;

float64* gradients_;

float64* hessians_;

Bin* bins;

public:

DenseLabelWiseStatisticsBinsImpl(DenseLabelWiseStatisticsImpl* statistics, uint32 numBins);

~DenseLabelWiseStatisticsBinsImpl();

void onBinUpdate(uint32 binIndex, IndexedFloat32* indexedValue) override;

AbstractStatistics* build();
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved

};

std::shared_ptr<ILabelWiseLoss> lossFunctionPtr_;

std::shared_ptr<IRandomAccessLabelMatrix> labelMatrixPtr_;
Expand Down
11 changes: 11 additions & 0 deletions python/boomer/common/cpp/binning.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,17 @@ class IBinning {

};

/**
* Defines an Interface for methods, which generate histograms, for bins.
*/
class IHistogramBuilder : virtual public IBinningObserver {
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved

public:

virtual AbstractStatistics* build() = 0;
michael-rapp marked this conversation as resolved.
Show resolved Hide resolved

};

/**
* Assigns floating point values to bins in a way such that each bin contains approximately the same number of values.
*/
Expand Down
8 changes: 8 additions & 0 deletions python/boomer/common/cpp/tuples.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ struct IndexedFloat64 {
float64 value;
};

/**
* A struct that stores all necessary information of a group of examples to calculate thresholds.
*/
struct Bin {
uint32 numExamples;
float32 minValue;
float32 maxValue;
};

namespace tuples {

Expand Down