diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp index 8bc2feb680..956b97f2b8 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector.hpp @@ -6,6 +6,16 @@ #include "mlrl/common/data/vector_sparse_array.hpp" #include "mlrl/common/input/missing_feature_vector.hpp" +/** + * Defines an interface for all one-dimensional vectors that store the values of training examples for a certain + * feature. + */ +class IFeatureVector : public IOneDimensionalView { + public: + + virtual ~IFeatureVector() override {}; +}; + /** * An one-dimensional sparse vector that stores the values of training examples for a certain feature, as well as the * indices of examples with missing feature values. diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_common.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_common.hpp new file mode 100644 index 0000000000..8241587c17 --- /dev/null +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_common.hpp @@ -0,0 +1,59 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/data/vector_dok_binary.hpp" +#include "mlrl/common/input/feature_vector.hpp" + +#include + +/** + * An abstract base class for all feature vectors that store the values of training examples for a certain feature. It + * allows to keep track of the indices of examples with missing feature values. + */ +class AbstractFeatureVector : public IFeatureVector { + private: + + BinaryDokVector missingIndices_; + + public: + + virtual ~AbstractFeatureVector() override {}; + + /** + * An iterator that provides read-only access to the indices of examples with missing feature values. + */ + typedef BinaryDokVector::index_const_iterator missing_index_const_iterator; + + /** + * Returns a `missing_index_const_iterator` to the beginning of the indices of examples with missing feature + * values. + * + * @return A `missing_index_const_iterator` to the beginning + */ + missing_index_const_iterator missing_indices_cbegin() const; + + /** + * Returns a `missing_index_const_iterator` to the end of the indices of examples with missing feature values. + * + * @return A `missing_index_const_iterator` to the end + */ + missing_index_const_iterator missing_indices_cend() const; + + /** + * Sets whether the example at a specific index is missing a feature value or not. + * + * @param index The index of the example + * @param missing True, if the example at the given index is missing a feature value, false otherwise + */ + void setMissing(uint32 index, bool missing); + + /** + * Returns whether the example at a specific index is missing a feature value or not. + * + * @param index The index of the example + * @return True, if the example at the given index is missing a feature value, false otherwise + */ + bool isMissing(uint32 index) const; +}; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp new file mode 100644 index 0000000000..2d2d44513b --- /dev/null +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_numerical.hpp @@ -0,0 +1,86 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/data/vector_sparse_array.hpp" +#include "mlrl/common/input/feature_vector_common.hpp" + +/** + * A feature vector that stores the values of training examples for a certain numerical feature. + */ +class NumericalFeatureVector final : public AbstractFeatureVector { + private: + + SparseArrayVector vector_; + + const float32 sparseValue_; + + public: + + /** + * @param numElements The number of elements in the vector + * @param sparseValue The value of sparse elements not explicitly stored in the vector + */ + NumericalFeatureVector(uint32 numElements, float32 sparseValue); + + /** + * An iterator that provides access to the feature values in the vector and allows to modify them. + */ + typedef SparseArrayVector::iterator iterator; + + /** + * An iterator that provides read-only access to the feature values in the vector. + */ + typedef SparseArrayVector::const_iterator const_iterator; + + /** + * Returns an `iterator` to the beginning of the vector. + * + * @return An `iterator` to the beginning + */ + iterator begin(); + + /** + * Returns an `iterator` to the end of the vector. + * + * @return An `iterator` to the end + */ + iterator end(); + + /** + * Returns a `const_iterator` to the beginning of the vector. + * + * @return A `const_iterator` to the beginning + */ + const_iterator cbegin() const; + + /** + * Returns a `const_iterator` to the end of the vector. + * + * @return A `const_iterator` to the end + */ + const_iterator cend() const; + + /** + * Returns the value of sparse elements not explicitly stored in the vector. + * + * @return The value of sparse elements + */ + float32 getSparseValue() const; + + /** + * Sorts the elements in the vector in ascending order based on their values. + */ + void sortByValues(); + + /** + * Sets the number of elements in the vector. + * + * @param numElements The number of elements to be set + * @param freeMemory True, if unused memory should be freed, if possible, false otherwise + */ + void setNumElements(uint32 numElements, bool freeMemory); + + uint32 getNumElements() const override; +}; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index e4e19c0220..f82b9c7b68 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -40,6 +40,8 @@ source_files = [ 'src/mlrl/common/input/feature_type_numerical.cpp', 'src/mlrl/common/input/feature_type_ordinal.cpp', 'src/mlrl/common/input/feature_vector.cpp', + 'src/mlrl/common/input/feature_vector_common.cpp', + 'src/mlrl/common/input/feature_vector_numerical.cpp', 'src/mlrl/common/input/label_matrix_c_contiguous.cpp', 'src/mlrl/common/input/label_matrix_csc.cpp', 'src/mlrl/common/input/label_matrix_csr.cpp', diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_common.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_common.cpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_numerical.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_numerical.cpp new file mode 100644 index 0000000000..9a5bb7a7d7 --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_numerical.cpp @@ -0,0 +1,28 @@ +#include "mlrl/common/input/feature_vector_numerical.hpp" + +NumericalFeatureVector::NumericalFeatureVector(uint32 numElements, float32 sparseValue) + : vector_(SparseArrayVector(numElements)), sparseValue_(sparseValue) {} + +NumericalFeatureVector::iterator NumericalFeatureVector::begin() { + return vector_.begin(); +} + +NumericalFeatureVector::iterator NumericalFeatureVector::end() { + return vector_.end(); +} + +NumericalFeatureVector::const_iterator NumericalFeatureVector::cbegin() const { + return vector_.cbegin(); +} + +NumericalFeatureVector::const_iterator NumericalFeatureVector::cend() const { + return vector_.cend(); +} + +void NumericalFeatureVector::setNumElements(uint32 numElements, bool freeMemory) { + return vector_.setNumElements(numElements, freeMemory); +} + +uint32 NumericalFeatureVector::getNumElements() const { + return vector_.getNumElements(); +}