diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp new file mode 100644 index 0000000000..5590cd527e --- /dev/null +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp @@ -0,0 +1,22 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/input/feature_vector_nominal.hpp" + +/** + * A feature vector that stores the indices of all examples that are associated with the minority value, i.e., the least + * frequent value, of a binary feature. + */ +class BinaryFeatureVector final : public NominalFeatureVector { + public: + + /** + * @param numElements The number of elements in the vector, i.e., the number of examples associated with the + * minority value + * @param minorityValue The minority value, i.e., the least frequent value, of the binary feature + * @param majorityValue The majority value, i.e., the most frequent value, of the binary feature + */ + BinaryFeatureVector(uint32 numElements, int32 minorityValue, int32 majorityValue); +}; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp index 6ee68925c7..753922acd6 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp @@ -6,10 +6,10 @@ #include "mlrl/common/input/feature_vector_common.hpp" /** - * A feature vector that stores the indices of the examples that are associated with each value, except for the minority - * value, i.e., the least frequent value, of a nominal feature. + * A feature vector that stores the indices of the examples that are associated with each value, except for the majority + * value, i.e., the most frequent value, of a nominal feature. */ -class NominalFeatureVector final : public AbstractFeatureVector { +class NominalFeatureVector : public AbstractFeatureVector { private: int32* values_; @@ -20,16 +20,17 @@ class NominalFeatureVector final : public AbstractFeatureVector { const uint32 numValues_; - const int32 minorityValue_; + const int32 majorityValue_; public: /** - * @param numValues The number of distinct values of the nominal feature, excluding the minority value - * @param numExamples The total number of examples - * @param minorityValue The minority value, i.e., the least frequent value, of the nominal feature + * @param numValues The number of distinct values of the nominal feature, excluding the majority value + * @param numElements The number of elements in the vector, i.e., the number of examples not associated with + * the majority value + * @param majorityValue The majority value, i.e., the most frequent value, of the nominal feature */ - NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 minorityValue); + NominalFeatureVector(uint32 numValues, uint32 numElements, int32 majorityValue); ~NominalFeatureVector() override; @@ -45,13 +46,13 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * An iterator that provides access to the indices of the examples that are associated with each value of the - * nominal feature and allows to modify them. + * nominal feature, except for the majority value, and allows to modify them. */ typedef uint32* index_iterator; /** * An iterator that provides read-only access to the indices of the examples that are associated with each value - * of the nominal feature. + * of the nominal feature, except for the majority value. */ typedef const uint32* index_const_iterator; @@ -127,7 +128,7 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * Returns an `indptr_iterator` to the beginning of the indices that specify the first element in the array of - * example indices that corresponds to each value of the nominal feature. + * example indices that corresponds to each value of the nominal feature, except for the majority value. * * @return An `indptr_iterator` to the beginning */ @@ -135,18 +136,18 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * Returns an `indptr_iterator` to the end of the indices that specify the first element in the array of example - * indices that corresponds to each value of the nominal feature. + * indices that corresponds to each value of the nominal feature, except for the majority value. * * @return An `indptr_iterator` to the end */ indptr_iterator indptr_end(); /** - * Returns the minority value, i.e., the least frequent value, of the nominal feature. + * Returns the majority value, i.e., the least frequent value, of the nominal feature. * - * @return The minority value + * @return The majority value */ - int32 getMinorityValue() const; + int32 getMajorityValue() const; uint32 getNumElements() const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp index 2d2d44513b..ae741f5c73 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp @@ -7,7 +7,8 @@ #include "mlrl/common/input/feature_vector_common.hpp" /** - * A feature vector that stores the values of training examples for a certain numerical feature. + * A feature vector that stores the values of training examples for a certain numerical feature, except for the examples + * associated with a sparse value. */ class NumericalFeatureVector final : public AbstractFeatureVector { private: @@ -19,7 +20,7 @@ class NumericalFeatureVector final : public AbstractFeatureVector { public: /** - * @param numElements The number of elements in the vector + * @param numElements The number of elements in the vector, excluding those associated with the sparse value * @param sparseValue The value of sparse elements not explicitly stored in the vector */ NumericalFeatureVector(uint32 numElements, float32 sparseValue); diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index e539c878c4..88d0ea250d 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -40,6 +40,7 @@ source_files = [ 'src/mlrl/common/input/feature_type_numerical.cpp', 'src/mlrl/common/input/feature_type_ordinal.cpp', 'src/mlrl/common/input/feature_vector.cpp', + 'src/mlrl/common/input/feature_vector_binary.cpp', 'src/mlrl/common/input/feature_vector_common.cpp', 'src/mlrl/common/input/feature_vector_equal.cpp', 'src/mlrl/common/input/feature_vector_nominal.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp new file mode 100644 index 0000000000..9fd48c7917 --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp @@ -0,0 +1,7 @@ +#include "mlrl/common/input/feature_vector_binary.hpp" + +BinaryFeatureVector::BinaryFeatureVector(uint32 numElements, int32 minorityValue, int32 majorityValue) + : NominalFeatureVector(1, numElements, majorityValue) { + this->values_begin()[0] = minorityValue; + this->indptr_begin()[0] = 0; +} diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp index 0ec53f4e6f..f4fead5202 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp @@ -1,9 +1,9 @@ #include "mlrl/common/input/feature_vector_nominal.hpp" -NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 minorityValue) - : values_(new int32[numValues]), indices_(new uint32[numExamples]), indptr_(new uint32[numValues + 1]), - numValues_(numValues), minorityValue_(minorityValue) { - indptr_[numValues] = numExamples; +NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numElements, int32 majorityValue) + : values_(new int32[numValues]), indices_(new uint32[numElements]), indptr_(new uint32[numValues + 1]), + numValues_(numValues), majorityValue_(majorityValue) { + indptr_[numValues] = numElements; } NominalFeatureVector::~NominalFeatureVector() { @@ -52,8 +52,8 @@ NominalFeatureVector::indptr_iterator NominalFeatureVector::indptr_end() { return &indptr_[numValues_]; } -int32 NominalFeatureVector::getMinorityValue() const { - return minorityValue_; +int32 NominalFeatureVector::getMajorityValue() const { + return majorityValue_; } uint32 NominalFeatureVector::getNumElements() const {