From cb571e537fa1961394f01eb489c1467fce76c0e7 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 25 Sep 2023 13:56:27 +0200 Subject: [PATCH 1/5] Edit comments. --- .../mlrl/common/input/feature_vector_nominal.hpp | 10 +++++----- .../mlrl/common/input/feature_vector_numerical.hpp | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp index 6ee68925c7..2fcb65c86a 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp @@ -26,7 +26,7 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * @param numValues The number of distinct values of the nominal feature, excluding the minority value - * @param numExamples The total number of examples + * @param numExamples The total number of examples, excluding those associated with the minority value * @param minorityValue The minority value, i.e., the least frequent value, of the nominal feature */ NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 minorityValue); @@ -45,13 +45,13 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * An iterator that provides access to the indices of the examples that are associated with each value of the - * nominal feature and allows to modify them. + * nominal feature, except for the minority value, and allows to modify them. */ typedef uint32* index_iterator; /** * An iterator that provides read-only access to the indices of the examples that are associated with each value - * of the nominal feature. + * of the nominal feature, except for the minority value. */ typedef const uint32* index_const_iterator; @@ -127,7 +127,7 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * Returns an `indptr_iterator` to the beginning of the indices that specify the first element in the array of - * example indices that corresponds to each value of the nominal feature. + * example indices that corresponds to each value of the nominal feature, except for the minority value. * * @return An `indptr_iterator` to the beginning */ @@ -135,7 +135,7 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * Returns an `indptr_iterator` to the end of the indices that specify the first element in the array of example - * indices that corresponds to each value of the nominal feature. + * indices that corresponds to each value of the nominal feature, except for the minority value. * * @return An `indptr_iterator` to the end */ diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp index 2d2d44513b..ae741f5c73 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp @@ -7,7 +7,8 @@ #include "mlrl/common/input/feature_vector_common.hpp" /** - * A feature vector that stores the values of training examples for a certain numerical feature. + * A feature vector that stores the values of training examples for a certain numerical feature, except for the examples + * associated with a sparse value. */ class NumericalFeatureVector final : public AbstractFeatureVector { private: @@ -19,7 +20,7 @@ class NumericalFeatureVector final : public AbstractFeatureVector { public: /** - * @param numElements The number of elements in the vector + * @param numElements The number of elements in the vector, excluding those associated with the sparse value * @param sparseValue The value of sparse elements not explicitly stored in the vector */ NumericalFeatureVector(uint32 numElements, float32 sparseValue); From 552f10683dd96b1f1dae171f8f1f2d3d99503861 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 25 Sep 2023 14:01:16 +0200 Subject: [PATCH 2/5] The class NominalFeatureValue now stores the majority value, instead of the minority value, of a nominal feature. --- .../common/input/feature_vector_nominal.hpp | 28 +++++++++---------- .../common/input/feature_vector_nominal.cpp | 8 +++--- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp index 2fcb65c86a..2529272357 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp @@ -6,8 +6,8 @@ #include "mlrl/common/input/feature_vector_common.hpp" /** - * A feature vector that stores the indices of the examples that are associated with each value, except for the minority - * value, i.e., the least frequent value, of a nominal feature. + * A feature vector that stores the indices of the examples that are associated with each value, except for the majority + * value, i.e., the most frequent value, of a nominal feature. */ class NominalFeatureVector final : public AbstractFeatureVector { private: @@ -20,16 +20,16 @@ class NominalFeatureVector final : public AbstractFeatureVector { const uint32 numValues_; - const int32 minorityValue_; + const int32 majorityValue_; public: /** - * @param numValues The number of distinct values of the nominal feature, excluding the minority value - * @param numExamples The total number of examples, excluding those associated with the minority value - * @param minorityValue The minority value, i.e., the least frequent value, of the nominal feature + * @param numValues The number of distinct values of the nominal feature, excluding the majority value + * @param numExamples The total number of examples, excluding those associated with the majority value + * @param majorityValue The majority value, i.e., the most frequent value, of the nominal feature */ - NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 minorityValue); + NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue); ~NominalFeatureVector() override; @@ -45,13 +45,13 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * An iterator that provides access to the indices of the examples that are associated with each value of the - * nominal feature, except for the minority value, and allows to modify them. + * nominal feature, except for the majority value, and allows to modify them. */ typedef uint32* index_iterator; /** * An iterator that provides read-only access to the indices of the examples that are associated with each value - * of the nominal feature, except for the minority value. + * of the nominal feature, except for the majority value. */ typedef const uint32* index_const_iterator; @@ -127,7 +127,7 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * Returns an `indptr_iterator` to the beginning of the indices that specify the first element in the array of - * example indices that corresponds to each value of the nominal feature, except for the minority value. + * example indices that corresponds to each value of the nominal feature, except for the majority value. * * @return An `indptr_iterator` to the beginning */ @@ -135,18 +135,18 @@ class NominalFeatureVector final : public AbstractFeatureVector { /** * Returns an `indptr_iterator` to the end of the indices that specify the first element in the array of example - * indices that corresponds to each value of the nominal feature, except for the minority value. + * indices that corresponds to each value of the nominal feature, except for the majority value. * * @return An `indptr_iterator` to the end */ indptr_iterator indptr_end(); /** - * Returns the minority value, i.e., the least frequent value, of the nominal feature. + * Returns the majority value, i.e., the least frequent value, of the nominal feature. * - * @return The minority value + * @return The majority value */ - int32 getMinorityValue() const; + int32 getMajorityValue() const; uint32 getNumElements() const override; }; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp index 0ec53f4e6f..5c1a7286f2 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp @@ -1,8 +1,8 @@ #include "mlrl/common/input/feature_vector_nominal.hpp" -NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 minorityValue) +NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue) : values_(new int32[numValues]), indices_(new uint32[numExamples]), indptr_(new uint32[numValues + 1]), - numValues_(numValues), minorityValue_(minorityValue) { + numValues_(numValues), majorityValue_(majorityValue) { indptr_[numValues] = numExamples; } @@ -52,8 +52,8 @@ NominalFeatureVector::indptr_iterator NominalFeatureVector::indptr_end() { return &indptr_[numValues_]; } -int32 NominalFeatureVector::getMinorityValue() const { - return minorityValue_; +int32 NominalFeatureVector::getMajorityValue() const { + return majorityValue_; } uint32 NominalFeatureVector::getNumElements() const { From e2c5b25baeb1595e464093cf405ac78de0d8a479 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 25 Sep 2023 14:01:59 +0200 Subject: [PATCH 3/5] Remove final keyword from class NominalFeatureVector. --- .../common/include/mlrl/common/input/feature_vector_nominal.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp index 2529272357..2d92e556e6 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp @@ -9,7 +9,7 @@ * A feature vector that stores the indices of the examples that are associated with each value, except for the majority * value, i.e., the most frequent value, of a nominal feature. */ -class NominalFeatureVector final : public AbstractFeatureVector { +class NominalFeatureVector : public AbstractFeatureVector { private: int32* values_; From 5ce971e0e2fd62243f2b25d91d0ea3a9894e0087 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 25 Sep 2023 14:08:42 +0200 Subject: [PATCH 4/5] Add class BinaryFeatureVector. --- .../common/input/feature_vector_binary.hpp | 21 +++++++++++++++++++ cpp/subprojects/common/meson.build | 1 + .../common/input/feature_vector_binary.cpp | 7 +++++++ 3 files changed, 29 insertions(+) create mode 100644 cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp create mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp new file mode 100644 index 0000000000..f0edeb619e --- /dev/null +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp @@ -0,0 +1,21 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/input/feature_vector_nominal.hpp" + +/** + * A feature vector that stores the indices of all examples that are associated with the minority value, i.e., the least + * frequent value, of a binary feature. + */ +class BinaryFeatureVector final : public NominalFeatureVector { + public: + + /** + * @param numExamples The number of examples associated with the minority value + * @param minorityValue The minority value, i.e., the least frequent value, of the binary feature + * @param majorityValue The majority value, i.e., the most frequent value, of the binary feature + */ + BinaryFeatureVector(uint32 numExamples, int32 minorityValue, int32 majorityValue); +}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index e539c878c4..88d0ea250d 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -40,6 +40,7 @@ source_files = [ 'src/mlrl/common/input/feature_type_numerical.cpp', 'src/mlrl/common/input/feature_type_ordinal.cpp', 'src/mlrl/common/input/feature_vector.cpp', + 'src/mlrl/common/input/feature_vector_binary.cpp', 'src/mlrl/common/input/feature_vector_common.cpp', 'src/mlrl/common/input/feature_vector_equal.cpp', 'src/mlrl/common/input/feature_vector_nominal.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp new file mode 100644 index 0000000000..ff95ef0409 --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp @@ -0,0 +1,7 @@ +#include "mlrl/common/input/feature_vector_binary.hpp" + +BinaryFeatureVector::BinaryFeatureVector(uint32 numExamples, int32 minorityValue, int32 majorityValue) + : NominalFeatureVector(1, numExamples, majorityValue) { + this->values_begin()[0] = minorityValue; + this->indptr_begin()[0] = 0; +} From 6f3469f4b1f16f062500a6a9ef8ed303c1e846fa Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Mon, 25 Sep 2023 14:11:37 +0200 Subject: [PATCH 5/5] Rename constructor arguments. --- .../include/mlrl/common/input/feature_vector_binary.hpp | 5 +++-- .../include/mlrl/common/input/feature_vector_nominal.hpp | 5 +++-- .../common/src/mlrl/common/input/feature_vector_binary.cpp | 4 ++-- .../common/src/mlrl/common/input/feature_vector_nominal.cpp | 6 +++--- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp index f0edeb619e..5590cd527e 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp @@ -13,9 +13,10 @@ class BinaryFeatureVector final : public NominalFeatureVector { public: /** - * @param numExamples The number of examples associated with the minority value + * @param numElements The number of elements in the vector, i.e., the number of examples associated with the + * minority value * @param minorityValue The minority value, i.e., the least frequent value, of the binary feature * @param majorityValue The majority value, i.e., the most frequent value, of the binary feature */ - BinaryFeatureVector(uint32 numExamples, int32 minorityValue, int32 majorityValue); + BinaryFeatureVector(uint32 numElements, int32 minorityValue, int32 majorityValue); }; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp index 2d92e556e6..753922acd6 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp @@ -26,10 +26,11 @@ class NominalFeatureVector : public AbstractFeatureVector { /** * @param numValues The number of distinct values of the nominal feature, excluding the majority value - * @param numExamples The total number of examples, excluding those associated with the majority value + * @param numElements The number of elements in the vector, i.e., the number of examples not associated with + * the majority value * @param majorityValue The majority value, i.e., the most frequent value, of the nominal feature */ - NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue); + NominalFeatureVector(uint32 numValues, uint32 numElements, int32 majorityValue); ~NominalFeatureVector() override; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp index ff95ef0409..9fd48c7917 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/input/feature_vector_binary.hpp" -BinaryFeatureVector::BinaryFeatureVector(uint32 numExamples, int32 minorityValue, int32 majorityValue) - : NominalFeatureVector(1, numExamples, majorityValue) { +BinaryFeatureVector::BinaryFeatureVector(uint32 numElements, int32 minorityValue, int32 majorityValue) + : NominalFeatureVector(1, numElements, majorityValue) { this->values_begin()[0] = minorityValue; this->indptr_begin()[0] = 0; } diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp index 5c1a7286f2..f4fead5202 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp @@ -1,9 +1,9 @@ #include "mlrl/common/input/feature_vector_nominal.hpp" -NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue) - : values_(new int32[numValues]), indices_(new uint32[numExamples]), indptr_(new uint32[numValues + 1]), +NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numElements, int32 majorityValue) + : values_(new int32[numValues]), indices_(new uint32[numElements]), indptr_(new uint32[numValues + 1]), numValues_(numValues), majorityValue_(majorityValue) { - indptr_[numValues] = numExamples; + indptr_[numValues] = numElements; } NominalFeatureVector::~NominalFeatureVector() {