diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_type.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_type.hpp index ed06ffb052..16ddbaaa0e 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_type.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_type.hpp @@ -3,6 +3,10 @@ */ #pragma once +#include "mlrl/common/data/view_csc.hpp" +#include "mlrl/common/data/view_fortran_contiguous.hpp" +#include "mlrl/common/input/feature_vector.hpp" + /** * Defines an interface for all classes that represent the type of a feature. */ @@ -16,6 +20,7 @@ class IFeatureType { * * @return True, if the feature is ordinal, false otherwise */ + // TODO Remove virtual bool isOrdinal() const = 0; /** @@ -23,5 +28,30 @@ class IFeatureType { * * @return True, if the feature is nominal, false otherwise */ + // TODO Remove virtual bool isNominal() const = 0; + + /** + * Creates and returns a feature vector that stores the feature values taken from a given Fortran-contiguous + * matrix for a certain feature. + * + * @param featureIndex The index of the feature + * @param featureMatrix A reference to an object of type `FortranContiguousConstView` that provides column-wise + * access to the feature values + * @return An unique pointer to an object of type `IFeatureVector` that has been created + */ + virtual std::unique_ptr createFeatureVector( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) const = 0; + + /** + * Creates and returns a feature vector that stores the feature values taken from a given CSC matrix for a + * certain feature. + * + * @param featureIndex The index of the feature + * @param featureMatrix A reference to an object of type `CscConstView` that provides column-wise access to the + * feature values + * @return An unique pointer to an object of type `IFeatureVector` that has been created + */ + virtual std::unique_ptr createFeatureVector( + uint32 featureIndex, const CscConstView& featureMatrix) const = 0; }; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_type_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_type_nominal.hpp index 168d04ec13..b9b708f1a5 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_type_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_type_nominal.hpp @@ -14,4 +14,10 @@ class NominalFeatureType final : public IFeatureType { bool isOrdinal() const override; bool isNominal() const override; + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) const override; + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const CscConstView& featureMatrix) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_type_numerical.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_type_numerical.hpp index f8bd0eccb6..5b4d9383f0 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_type_numerical.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_type_numerical.hpp @@ -14,4 +14,10 @@ class NumericalFeatureType final : public IFeatureType { bool isOrdinal() const override; bool isNominal() const override; + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) const override; + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const CscConstView& featureMatrix) const override; }; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_type_ordinal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_type_ordinal.hpp index cb8e77152f..39f46947e0 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_type_ordinal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_type_ordinal.hpp @@ -14,4 +14,10 @@ class OrdinalFeatureType final : public IFeatureType { bool isOrdinal() const override; bool isNominal() const override; + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) const override; + + std::unique_ptr createFeatureVector( + uint32 featureIndex, const CscConstView& featureMatrix) const override; }; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp index bbb52d00c2..3bd9661e33 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp @@ -7,3 +7,15 @@ bool NominalFeatureType::isOrdinal() const { bool NominalFeatureType::isNominal() const { return true; } + +std::unique_ptr NominalFeatureType::createFeatureVector( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) const { + // TODO Implement + return nullptr; +} + +std::unique_ptr NominalFeatureType::createFeatureVector( + uint32 featureIndex, const CscConstView& featureMatrix) const { + // TODO Implement + return nullptr; +} diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp index 8b58664979..c115fbab5a 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_numerical.cpp @@ -1,5 +1,76 @@ #include "mlrl/common/input/feature_type_numerical.hpp" +#include "mlrl/common/input/feature_vector_equal.hpp" +#include "mlrl/common/input/feature_vector_numerical.hpp" +#include "mlrl/common/iterator/index_iterator.hpp" + +#include + +template +static inline std::unique_ptr createNumericalFeatureVector(IndexIterator indexIterator, + ValueIterator valueIterator, + uint32 numElements) { + std::unique_ptr featureVectorPtr = std::make_unique(numElements, 0); + NumericalFeatureVector::iterator vectorIterator = featureVectorPtr->begin(); + uint32 n = 0; + + for (uint32 i = 0; i < numElements; i++) { + uint32 index = indexIterator[i]; + float32 value = valueIterator[i]; + + if (std::isnan(value)) { + featureVectorPtr->setMissing(index, true); + } else { + IndexedValue& entry = vectorIterator[n]; + entry.index = index; + entry.value = value; + n++; + } + } + + featureVectorPtr->setNumElements(n, true); + return featureVectorPtr; +} + +static inline std::unique_ptr createNumericalFeatureVector( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) { + FortranContiguousConstView::value_const_iterator valueIterator = + featureMatrix.values_cbegin(featureIndex); + uint32 numElements = featureMatrix.getNumRows(); + return createNumericalFeatureVector(valueIterator, IndexIterator(), numElements); +} + +static inline std::unique_ptr createNumericalFeatureVector( + uint32 featureIndex, const CscConstView& featureMatrix) { + CscConstView::index_const_iterator indexIterator = featureMatrix.indices_cbegin(featureIndex); + CscConstView::index_const_iterator indicesEnd = featureMatrix.indices_cend(featureIndex); + CscConstView::value_const_iterator valueIterator = featureMatrix.values_cbegin(featureIndex); + uint32 numElements = indicesEnd - indexIterator; + return createNumericalFeatureVector(indexIterator, valueIterator, numElements); +} + +template +static inline std::unique_ptr createFeatureVectorInternally(uint32 featureIndex, + const FeatureMatrix& featureMatrix) { + std::unique_ptr featureVectorPtr = + createNumericalFeatureVector(featureIndex, featureMatrix); + + // Sort the feature values... + std::sort(featureVectorPtr->begin(), featureVectorPtr->end(), IndexedValue::CompareValue()); + + // Check if all feature values are equal... + NumericalFeatureVector::const_iterator iterator = featureVectorPtr->cbegin(); + uint32 numElements = featureVectorPtr->getNumElements(); + float32 minValue = iterator[0].value; + float32 maxValue = iterator[numElements - 1].value; + + if (isEqual(minValue, maxValue)) { + return std::make_unique(); + } + + return featureVectorPtr; +} + bool NumericalFeatureType::isOrdinal() const { return false; } @@ -7,3 +78,13 @@ bool NumericalFeatureType::isOrdinal() const { bool NumericalFeatureType::isNominal() const { return false; } + +std::unique_ptr NumericalFeatureType::createFeatureVector( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) const { + return createFeatureVectorInternally(featureIndex, featureMatrix); +} + +std::unique_ptr NumericalFeatureType::createFeatureVector( + uint32 featureIndex, const CscConstView& featureMatrix) const { + return createFeatureVectorInternally(featureIndex, featureMatrix); +} diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp index a61646c9b8..d616c12658 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp @@ -7,3 +7,15 @@ bool OrdinalFeatureType::isOrdinal() const { bool OrdinalFeatureType::isNominal() const { return false; } + +std::unique_ptr OrdinalFeatureType::createFeatureVector( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) const { + // TODO Implement + return nullptr; +} + +std::unique_ptr OrdinalFeatureType::createFeatureVector( + uint32 featureIndex, const CscConstView& featureMatrix) const { + // TODO Implement + return nullptr; +} diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_common.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_common.cpp index e69de29bb2..cf367e8141 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_common.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_common.cpp @@ -0,0 +1,17 @@ +#include "mlrl/common/input/feature_vector_common.hpp" + +AbstractFeatureVector::missing_index_const_iterator AbstractFeatureVector::missing_indices_cbegin() const { + return missingIndices_.indices_cbegin(); +} + +AbstractFeatureVector::missing_index_const_iterator AbstractFeatureVector::missing_indices_cend() const { + return missingIndices_.indices_cend(); +} + +void AbstractFeatureVector::setMissing(uint32 index, bool missing) { + missingIndices_.set(index, missing); +} + +bool AbstractFeatureVector::isMissing(uint32 index) const { + return missingIndices_[index]; +}