diff --git a/cpp/subprojects/boosting/include/mlrl/boosting/data/vector_statistic_decomposable_bit.hpp b/cpp/subprojects/boosting/include/mlrl/boosting/data/vector_statistic_decomposable_bit.hpp new file mode 100644 index 000000000..f9d90b75b --- /dev/null +++ b/cpp/subprojects/boosting/include/mlrl/boosting/data/vector_statistic_decomposable_bit.hpp @@ -0,0 +1,183 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/boosting/data/view_statistic_decomposable_bit.hpp" +#include "mlrl/common/data/view_composite.hpp" +#include "mlrl/common/indices/index_vector_complete.hpp" +#include "mlrl/common/indices/index_vector_partial.hpp" + +namespace boosting { + + /** + * An one-dimensional vector that stores aggregated gradients and Hessians that have been calculated using a + * decomposable loss function in a bit vector. For each element in the vector a single gradient and Hessian is + * stored. + */ + class BitDecomposableStatisticVector final + : public CompositeView, AllocatedBitVector> { + public: + + /** + * @param numElements The number of gradients and Hessians in the vector + * @param numBitsPerElement The number of bits per element in the bit vector + * @param init True, if all gradients and Hessians in the vector should be initialized with + * zero, false otherwise + */ + BitDecomposableStatisticVector(uint32 numElements, uint32 numBitsPerElement, bool init = false); + + /** + * @param other A reference to an object of type `BitDecomposableStatisticVector` to be copied + */ + BitDecomposableStatisticVector(const BitDecomposableStatisticVector& other); + + /** + * Returns the number of gradients and Hessians in the vector. + * + * @return The number of gradients and Hessians + */ + uint32 getNumElements() const; + + /** + * Returns the number of bits per gradient or Hessian in the vector. + * + * @return The number of bits per gradient or Hessian + */ + uint32 getNumBitsPerElement() const; + + /** + * Adds all gradients and Hessians in another vector to this vector. + * + * @param vector A reference to an object of type `BitDecomposableStatisticVector` that stores the gradients + * and Hessians to be added to this vector + */ + void add(const BitDecomposableStatisticVector& vector); + + /** + * Adds all gradients and Hessians in a single row of a `BitDecomposableStatisticView` to this vector. + * + * @param view A reference to an object of type `BitDecomposableStatisticView` that stores the gradients + * and Hessians to be added to this vector + * @param row The index of the row to be added to this vector + */ + void add(const BitDecomposableStatisticView& view, uint32 row); + + /** + * Adds all gradients and Hessians in a single row of a `CContiguousView` to this vector. The gradients and + * Hessians to be added are multiplied by a specific weight. + * + * @param view A reference to an object of type `CContiguousView` that stores the gradients and + * Hessians to be added to this vector + * @param row The index of the row to be added to this vector + * @param weight The weight, the gradients and Hessians should be multiplied by + */ + void add(const BitDecomposableStatisticView& view, uint32 row, float64 weight); + + /** + * Removes all gradients and Hessians in a single row of a `CContiguousView` from this vector. + * + * @param view A reference to an object of type `CContiguousView` that stores the gradients and Hessians to + * be removed from this vector + * @param row The index of the row to be removed from this vector + */ + void remove(const BitDecomposableStatisticView& view, uint32 row); + + /** + * Removes all gradients and Hessians in a single row of a `CContiguousView` from this vector. The gradients + * and Hessians to be removed are multiplied by a specific weight. + * + * @param view A reference to an object of type `CContiguousView` that stores the gradients and + * Hessians to be removed from this vector + * @param row The index of the row to be removed from this vector + * @param weight The weight, the gradients and Hessians should be multiplied by + */ + void remove(const BitDecomposableStatisticView& view, uint32 row, float64 weight); + + /** + * Adds certain gradients and Hessians in a single row of a `CContiguousView`, whose positions are given as + * a `CompleteIndexVector`, to this vector. + * + * @param view A reference to an object of type `CContiguousView` that stores the gradients and + * Hessians to be added to this vector + * @param row The index of the row to be added to this vector + * @param indices A reference to a `CompleteIndexVector' that provides access to the indices + */ + void addToSubset(const BitDecomposableStatisticView& view, uint32 row, const CompleteIndexVector& indices); + + /** + * Adds certain gradients and Hessians in single row of a `CContiguousView`, whose positions are given as a + * `PartialIndexVector`, to this vector. + * + * @param view A reference to an object of type `CContiguousView` that stores the gradients and + * Hessians to be added to this vector + * @param row The index of the row to be added to this vector + * @param indices A reference to a `PartialIndexVector' that provides access to the indices + */ + void addToSubset(const BitDecomposableStatisticView& view, uint32 row, const PartialIndexVector& indices); + + /** + * Adds certain gradients and Hessians in a single row of a `CContiguousView`, whose positions are given as + * a `CompleteIndexVector`, to this vector. The gradients and Hessians to be added are multiplied by a + * specific weight. + * + * @param view A reference to an object of type `CContiguousView` that stores the gradients and + * Hessians to be added to this vector + * @param row The index of the row to be added to this vector + * @param indices A reference to a `CompleteIndexVector' that provides access to the indices + * @param weight The weight, the gradients and Hessians should be multiplied by + */ + void addToSubset(const BitDecomposableStatisticView& view, uint32 row, const CompleteIndexVector& indices, + float64 weight); + + /** + * Adds certain gradients and Hessians in single row of a `CContiguousView`, whose positions are given as a + * `PartialIndexVector`, to this vector. The gradients and Hessians to be added are multiplied by a specific + * weight. + * + * @param view A reference to an object of type `CContiguousView` that stores the gradients and + * Hessians to be added to this vector + * @param row The index of the row to be added to this vector + * @param indices A reference to a `PartialIndexVector' that provides access to the indices + * @param weight The weight, the gradients and Hessians should be multiplied by + */ + void addToSubset(const BitDecomposableStatisticView& view, uint32 row, const PartialIndexVector& indices, + float64 weight); + + /** + * Sets the gradients and Hessians in this vector to the difference `first - second` between the gradients + * and Hessians in two other vectors, considering only the gradients and Hessians in the first vector that + * correspond to the positions provided by a `CompleteIndexVector`. + * + * @param first A reference to an object of type `BitDecomposableStatisticVector` that stores the + * gradients and Hessians in the first vector + * @param firstIndices A reference to an object of type `CompleteIndexVector` that provides access to the + * indices + * @param second A reference to an object of type `BitDecomposableStatisticVector` that stores the + * gradients and Hessians in the second vector + */ + void difference(const BitDecomposableStatisticVector& first, const CompleteIndexVector& firstIndices, + const BitDecomposableStatisticVector& second); + + /** + * Sets the gradients and Hessians in this vector to the difference `first - second` between the gradients + * and Hessians in two other vectors, considering only the gradients and Hessians in the first vector that + * correspond to the positions provided by a `PartialIndexVector`. + * + * @param first A reference to an object of type `BitDecomposableStatisticVector` that stores the + * gradients and Hessians in the first vector + * @param firstIndices A reference to an object of type `PartialIndexVector` that provides access to the + * indices + * @param second A reference to an object of type `BitDecomposableStatisticVector` that stores the + * gradients and Hessians in the second vector + */ + void difference(const BitDecomposableStatisticVector& first, const PartialIndexVector& firstIndices, + const BitDecomposableStatisticVector& second); + + /** + * Sets all gradients and Hessians stored in the vector to zero. + */ + void clear(); + }; + +} diff --git a/cpp/subprojects/boosting/meson.build b/cpp/subprojects/boosting/meson.build index 4b878820e..d43affcb3 100644 --- a/cpp/subprojects/boosting/meson.build +++ b/cpp/subprojects/boosting/meson.build @@ -7,6 +7,7 @@ source_files = [ 'src/mlrl/boosting/binning/label_binning_no.cpp', 'src/mlrl/boosting/data/matrix_c_contiguous_numeric.cpp', 'src/mlrl/boosting/data/matrix_sparse_set_numeric.cpp', + 'src/mlrl/boosting/data/vector_statistic_decomposable_bit.cpp', 'src/mlrl/boosting/data/vector_statistic_decomposable_dense.cpp', 'src/mlrl/boosting/data/vector_statistic_decomposable_sparse.cpp', 'src/mlrl/boosting/data/vector_statistic_non_decomposable_dense.cpp', diff --git a/cpp/subprojects/boosting/src/mlrl/boosting/data/vector_statistic_decomposable_bit.cpp b/cpp/subprojects/boosting/src/mlrl/boosting/data/vector_statistic_decomposable_bit.cpp new file mode 100644 index 000000000..257563694 --- /dev/null +++ b/cpp/subprojects/boosting/src/mlrl/boosting/data/vector_statistic_decomposable_bit.cpp @@ -0,0 +1,141 @@ +#include "mlrl/boosting/data/vector_statistic_decomposable_bit.hpp" + +namespace boosting { + + static inline void copyInternally(const BitVector& firstView, BitVector& secondView) { + typename BitVector::const_iterator begin = firstView.cbegin(); + uint32 arraySize = firstView.cend() - begin; + util::copyView(begin, secondView.begin(), arraySize); + } + + static inline void addInternally(BitVector& firstView, const BitVector& secondView) { + typename BitVector::iterator begin = firstView.begin(); + uint32 arraySize = firstView.end() - begin; + util::addToView(begin, secondView.cbegin(), arraySize); + } + + static inline void addInternally(BitVector& firstView, const BitVector& secondView, + const PartialIndexVector& indices) { + uint32 numIndices = indices.getNumElements(); + + for (uint32 i = 0; i < numIndices; i++) { + uint32 index = indices[i]; + uint32 value = secondView[index]; + firstView.set(i, value); + } + } + + static inline void removeInternally(BitVector& firstView, const BitVector& secondView) { + typename BitVector::iterator begin = firstView.begin(); + uint32 arraySize = firstView.end() - begin; + util::removeFromView(begin, secondView.cbegin(), arraySize); + } + + static inline void differenceInternally(BitVector& firstView, const BitVector& secondView, + const BitVector& thirdView) { + typename BitVector::iterator begin = firstView.begin(); + uint32 arraySize = firstView.end() - begin; + util::setViewToDifference(begin, secondView.cbegin(), thirdView.cbegin(), arraySize); + } + + static inline void differenceInternally(BitVector& firstView, const BitVector& secondView, + const PartialIndexVector& indices, const BitVector& thirdView) { + uint32 numIndices = indices.getNumElements(); + + for (uint32 i = 0; i < numIndices; i++) { + uint32 index = indices[i]; + uint32 firstValue = secondView[index]; + uint32 secondValue = thirdView[i]; + firstView.set(i, firstValue - secondValue); + } + } + + BitDecomposableStatisticVector::BitDecomposableStatisticVector(uint32 numElements, uint32 numBitsPerElement, + bool init) + : CompositeView, AllocatedBitVector>( + AllocatedBitVector(numElements, numBitsPerElement, init), + AllocatedBitVector(numElements, numBitsPerElement, init)) {} + + BitDecomposableStatisticVector::BitDecomposableStatisticVector(const BitDecomposableStatisticVector& other) + : BitDecomposableStatisticVector(other.getNumElements(), other.getNumBitsPerElement()) { + copyInternally(other.firstView, this->firstView); + copyInternally(other.secondView, this->secondView); + } + + uint32 BitDecomposableStatisticVector::getNumElements() const { + return this->firstView.numElements; + } + + uint32 BitDecomposableStatisticVector::getNumBitsPerElement() const { + return this->firstView.numBitsPerElement; + } + + void BitDecomposableStatisticVector::add(const BitDecomposableStatisticVector& vector) { + addInternally(this->firstView, vector.firstView); + addInternally(this->secondView, vector.secondView); + } + + void BitDecomposableStatisticVector::add(const BitDecomposableStatisticView& view, uint32 row) { + addInternally(this->firstView, view.firstView[row]); + addInternally(this->secondView, view.secondView[row]); + } + + void BitDecomposableStatisticVector::add(const BitDecomposableStatisticView& view, uint32 row, float64 weight) { + // TODO Implement + throw std::runtime_error("not implemented"); + } + + void BitDecomposableStatisticVector::remove(const BitDecomposableStatisticView& view, uint32 row) { + removeInternally(this->firstView, view.firstView[row]); + removeInternally(this->secondView, view.secondView[row]); + } + + void BitDecomposableStatisticVector::remove(const BitDecomposableStatisticView& view, uint32 row, float64 weight) { + // TODO Implement + throw std::runtime_error("not implemented"); + } + + void BitDecomposableStatisticVector::addToSubset(const BitDecomposableStatisticView& view, uint32 row, + const CompleteIndexVector& indices) { + addInternally(this->firstView, view.firstView[row]); + addInternally(this->secondView, view.secondView[row]); + } + + void BitDecomposableStatisticVector::addToSubset(const BitDecomposableStatisticView& view, uint32 row, + const PartialIndexVector& indices) { + addInternally(this->firstView, view.firstView[row], indices); + addInternally(this->secondView, view.secondView[row], indices); + } + + void BitDecomposableStatisticVector::addToSubset(const BitDecomposableStatisticView& view, uint32 row, + const CompleteIndexVector& indices, float64 weight) { + // TODO Implement + throw std::runtime_error("not implemented"); + } + + void BitDecomposableStatisticVector::addToSubset(const BitDecomposableStatisticView& view, uint32 row, + const PartialIndexVector& indices, float64 weight) { + // TODO Implement + throw std::runtime_error("not implemented"); + } + + void BitDecomposableStatisticVector::difference(const BitDecomposableStatisticVector& first, + const CompleteIndexVector& firstIndices, + const BitDecomposableStatisticVector& second) { + differenceInternally(this->firstView, first.firstView, second.firstView); + differenceInternally(this->secondView, first.secondView, second.secondView); + } + + void BitDecomposableStatisticVector::difference(const BitDecomposableStatisticVector& first, + const PartialIndexVector& firstIndices, + const BitDecomposableStatisticVector& second) { + differenceInternally(this->firstView, first.firstView, firstIndices, second.firstView); + differenceInternally(this->secondView, first.secondView, firstIndices, second.secondView); + } + + void BitDecomposableStatisticVector::clear() { + this->firstView.clear(); + this->secondView.clear(); + } + +} diff --git a/cpp/subprojects/common/include/mlrl/common/data/view_vector_bit.hpp b/cpp/subprojects/common/include/mlrl/common/data/view_vector_bit.hpp index 8a0e17090..f5d2bf034 100644 --- a/cpp/subprojects/common/include/mlrl/common/data/view_vector_bit.hpp +++ b/cpp/subprojects/common/include/mlrl/common/data/view_vector_bit.hpp @@ -77,11 +77,49 @@ class MLRLCOMMON_API BitVector : public BitView { typedef T type; /** - * Sets all values stored in the bit vector to zero. + * An iterator that provides read-only access to the view's underlying array. */ - void clear() { - util::setViewToZeros(BaseView::array, - util::getBitArraySize(numElements, BitView::numBitsPerElement)); + typedef const typename BaseView::value_type* const_iterator; + + /** + * An iterator that provides access to the view's underlying array and allows to modify it. + */ + typedef typename BaseView::value_type* iterator; + + /** + * Returns a `const_iterator` to the beginning of the view's underlying array. + * + * @return A `const_iterator` to the beginning + */ + const_iterator cbegin() const { + return BaseView::array; + } + + /** + * Returns a `const_iterator` to the end of the view's underlying array. + * + * @return A `const_iterator` to the end + */ + const_iterator cend() const { + return &BaseView::array[util::getBitArraySize(numElements, BitView::numBitsPerElement)]; + } + + /** + * Returns an `iterator` to the beginning of the view's underlying array. + * + * @return An `iterator` to the beginning + */ + iterator begin() { + return BaseView::array; + } + + /** + * Returns an `iterator` to the end of the view's underlying array. + * + * @return An `iterator` to the end + */ + iterator end() { + return &BaseView::array[util::getBitArraySize(numElements, BitView::numBitsPerElement)]; } /** @@ -111,6 +149,15 @@ class MLRLCOMMON_API BitVector : public BitView { BaseView::array[offset] &= ~bitMask; BaseView::array[offset] |= value << numShifts; } + + /** + * Sets all values stored in the bit vector to zero. + */ + void clear() { + iterator begin = this->begin(); + uint32 arraySize = util::getBitArraySize(numElements, BitView::numBitsPerElement); + util::setViewToZeros(begin, arraySize); + } }; /** @@ -175,6 +222,15 @@ class BitVectorDecorator : public BitVector { virtual ~BitVectorDecorator() override {} + /** + * Returns the number of bits per element in the bit vector. + * + * @return The number of bits per element + */ + uint32 getNumBitsPerElement() const { + return this->view.numBitsPerElement; + } + /** * Returns the value of the element at a specific position. *