From 60a3cce1e2857c797b47ddc9f0fb66584ba618c1 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 19 Sep 2023 20:25:21 +0200 Subject: [PATCH 1/4] Add class IFeatureVector. --- .../include/mlrl/common/input/feature_vector.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp index 8bc2feb680..956b97f2b8 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp @@ -6,6 +6,16 @@ #include "mlrl/common/data/vector_sparse_array.hpp" #include "mlrl/common/input/missing_feature_vector.hpp" +/** + * Defines an interface for all one-dimensional vectors that store the values of training examples for a certain + * feature. + */ +class IFeatureVector : public IOneDimensionalView { + public: + + virtual ~IFeatureVector() override {}; +}; + /** * An one-dimensional sparse vector that stores the values of training examples for a certain feature, as well as the * indices of examples with missing feature values. From 5578af05edcc4c806b6de6de0c0f02df5bbfb287 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 19 Sep 2023 20:30:14 +0200 Subject: [PATCH 2/4] Add class AbstractFeatureVector. --- .../common/input/feature_vector_common.hpp | 59 +++++++++++++++++++ cpp/subprojects/common/meson.build | 1 + .../common/input/feature_vector_common.cpp | 0 3 files changed, 60 insertions(+) create mode 100644 cpp/subprojects/common/include/mlrl/common/input/feature_vector_common.hpp create mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_vector_common.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_common.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_common.hpp new file mode 100644 index 0000000000..8241587c17 --- /dev/null +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_common.hpp @@ -0,0 +1,59 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/data/vector_dok_binary.hpp" +#include "mlrl/common/input/feature_vector.hpp" + +#include + +/** + * An abstract base class for all feature vectors that store the values of training examples for a certain feature. It + * allows to keep track of the indices of examples with missing feature values. + */ +class AbstractFeatureVector : public IFeatureVector { + private: + + BinaryDokVector missingIndices_; + + public: + + virtual ~AbstractFeatureVector() override {}; + + /** + * An iterator that provides read-only access to the indices of examples with missing feature values. + */ + typedef BinaryDokVector::index_const_iterator missing_index_const_iterator; + + /** + * Returns a `missing_index_const_iterator` to the beginning of the indices of examples with missing feature + * values. + * + * @return A `missing_index_const_iterator` to the beginning + */ + missing_index_const_iterator missing_indices_cbegin() const; + + /** + * Returns a `missing_index_const_iterator` to the end of the indices of examples with missing feature values. + * + * @return A `missing_index_const_iterator` to the end + */ + missing_index_const_iterator missing_indices_cend() const; + + /** + * Sets whether the example at a specific index is missing a feature value or not. + * + * @param index The index of the example + * @param missing True, if the example at the given index is missing a feature value, false otherwise + */ + void setMissing(uint32 index, bool missing); + + /** + * Returns whether the example at a specific index is missing a feature value or not. + * + * @param index The index of the example + * @return True, if the example at the given index is missing a feature value, false otherwise + */ + bool isMissing(uint32 index) const; +}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index e4e19c0220..fa637ab6df 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -40,6 +40,7 @@ source_files = [ 'src/mlrl/common/input/feature_type_numerical.cpp', 'src/mlrl/common/input/feature_type_ordinal.cpp', 'src/mlrl/common/input/feature_vector.cpp', + 'src/mlrl/common/input/feature_vector_common.cpp', 'src/mlrl/common/input/label_matrix_c_contiguous.cpp', 'src/mlrl/common/input/label_matrix_csc.cpp', 'src/mlrl/common/input/label_matrix_csr.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_common.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_common.cpp new file mode 100644 index 0000000000..e69de29bb2 From 9450cad113dfd0d909420717877bd1a942c95b61 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 19 Sep 2023 20:33:44 +0200 Subject: [PATCH 3/4] Add class NumericalFeatureVector. --- .../common/input/feature_vector_numerical.hpp | 86 +++++++++++++++++++ cpp/subprojects/common/meson.build | 1 + .../common/input/feature_vector_numerical.cpp | 28 ++++++ 3 files changed, 115 insertions(+) create mode 100644 cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp create mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_vector_numerical.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp new file mode 100644 index 0000000000..2d2d44513b --- /dev/null +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp @@ -0,0 +1,86 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/data/vector_sparse_array.hpp" +#include "mlrl/common/input/feature_vector_common.hpp" + +/** + * A feature vector that stores the values of training examples for a certain numerical feature. + */ +class NumericalFeatureVector final : public AbstractFeatureVector { + private: + + SparseArrayVector vector_; + + const float32 sparseValue_; + + public: + + /** + * @param numElements The number of elements in the vector + * @param sparseValue The value of sparse elements not explicitly stored in the vector + */ + NumericalFeatureVector(uint32 numElements, float32 sparseValue); + + /** + * An iterator that provides access to the feature values in the vector and allows to modify them. + */ + typedef SparseArrayVector::iterator iterator; + + /** + * An iterator that provides read-only access to the feature values in the vector. + */ + typedef SparseArrayVector::const_iterator const_iterator; + + /** + * Returns an `iterator` to the beginning of the vector. + * + * @return An `iterator` to the beginning + */ + iterator begin(); + + /** + * Returns an `iterator` to the end of the vector. + * + * @return An `iterator` to the end + */ + iterator end(); + + /** + * Returns a `const_iterator` to the beginning of the vector. + * + * @return A `const_iterator` to the beginning + */ + const_iterator cbegin() const; + + /** + * Returns a `const_iterator` to the end of the vector. + * + * @return A `const_iterator` to the end + */ + const_iterator cend() const; + + /** + * Returns the value of sparse elements not explicitly stored in the vector. + * + * @return The value of sparse elements + */ + float32 getSparseValue() const; + + /** + * Sorts the elements in the vector in ascending order based on their values. + */ + void sortByValues(); + + /** + * Sets the number of elements in the vector. + * + * @param numElements The number of elements to be set + * @param freeMemory True, if unused memory should be freed, if possible, false otherwise + */ + void setNumElements(uint32 numElements, bool freeMemory); + + uint32 getNumElements() const override; +}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index fa637ab6df..f82b9c7b68 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -41,6 +41,7 @@ source_files = [ 'src/mlrl/common/input/feature_type_ordinal.cpp', 'src/mlrl/common/input/feature_vector.cpp', 'src/mlrl/common/input/feature_vector_common.cpp', + 'src/mlrl/common/input/feature_vector_numerical.cpp', 'src/mlrl/common/input/label_matrix_c_contiguous.cpp', 'src/mlrl/common/input/label_matrix_csc.cpp', 'src/mlrl/common/input/label_matrix_csr.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_numerical.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_numerical.cpp new file mode 100644 index 0000000000..9a5bb7a7d7 --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_numerical.cpp @@ -0,0 +1,28 @@ +#include "mlrl/common/input/feature_vector_numerical.hpp" + +NumericalFeatureVector::NumericalFeatureVector(uint32 numElements, float32 sparseValue) + : vector_(SparseArrayVector(numElements)), sparseValue_(sparseValue) {} + +NumericalFeatureVector::iterator NumericalFeatureVector::begin() { + return vector_.begin(); +} + +NumericalFeatureVector::iterator NumericalFeatureVector::end() { + return vector_.end(); +} + +NumericalFeatureVector::const_iterator NumericalFeatureVector::cbegin() const { + return vector_.cbegin(); +} + +NumericalFeatureVector::const_iterator NumericalFeatureVector::cend() const { + return vector_.cend(); +} + +void NumericalFeatureVector::setNumElements(uint32 numElements, bool freeMemory) { + return vector_.setNumElements(numElements, freeMemory); +} + +uint32 NumericalFeatureVector::getNumElements() const { + return vector_.getNumElements(); +} From 50b604f77fa569126bba92b7e3ddb6c425145d9c Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 19 Sep 2023 20:53:16 +0200 Subject: [PATCH 4/4] Add class EqualFeatureVector. --- .../mlrl/common/input/feature_vector_equal.hpp | 16 ++++++++++++++++ cpp/subprojects/common/meson.build | 1 + .../mlrl/common/input/feature_vector_equal.cpp | 5 +++++ 3 files changed, 22 insertions(+) create mode 100644 cpp/subprojects/common/include/mlrl/common/input/feature_vector_equal.hpp create mode 100644 cpp/subprojects/common/src/mlrl/common/input/feature_vector_equal.cpp diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_equal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_equal.hpp new file mode 100644 index 0000000000..1644bb515b --- /dev/null +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_equal.hpp @@ -0,0 +1,16 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/input/feature_vector_common.hpp" + +/** + * A feature vector that does not actually store any values. It is used in cases where all training examples have the + * same value for a certain feature. + */ +class EqualFeatureVector final : public AbstractFeatureVector { + public: + + uint32 getNumElements() const override; +}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index f82b9c7b68..a42180a003 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -41,6 +41,7 @@ source_files = [ 'src/mlrl/common/input/feature_type_ordinal.cpp', 'src/mlrl/common/input/feature_vector.cpp', 'src/mlrl/common/input/feature_vector_common.cpp', + 'src/mlrl/common/input/feature_vector_equal.cpp', 'src/mlrl/common/input/feature_vector_numerical.cpp', 'src/mlrl/common/input/label_matrix_c_contiguous.cpp', 'src/mlrl/common/input/label_matrix_csc.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_equal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_equal.cpp new file mode 100644 index 0000000000..a5f4e703b9 --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_equal.cpp @@ -0,0 +1,5 @@ +#include "mlrl/common/input/feature_vector_equal.hpp" + +uint32 EqualFeatureVector::getNumElements() const { + return 0; +}