From e22feb01d54e01fc03927157fe096eefc3ea0f46 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 3 Oct 2023 13:42:18 +0200 Subject: [PATCH 1/6] Change visibility of member "numValues_" of the class NominalFeatureValue from protected to private. --- .../include/mlrl/common/input/feature_vector_nominal.hpp | 9 ++------- .../src/mlrl/common/input/feature_vector_ordinal.cpp | 4 ++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp index ea80b74466..753922acd6 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp @@ -18,15 +18,10 @@ class NominalFeatureVector : public AbstractFeatureVector { uint32* indptr_; - const int32 majorityValue_; - - protected: - - /** - * The number of distinct values of the nominal feature, excluding the majority value. - */ const uint32 numValues_; + const int32 majorityValue_; + public: /** diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp index ba3bb98c65..e6fb752896 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp @@ -12,7 +12,7 @@ OrdinalFeatureVector::index_iterator OrdinalFeatureVector::order_begin(uint32 in } OrdinalFeatureVector::index_iterator OrdinalFeatureVector::order_end(uint32 index) { - return &order_[numValues_]; + return &order_[this->getNumElements()]; } OrdinalFeatureVector::index_const_iterator OrdinalFeatureVector::order_cbegin(uint32 index) const { @@ -20,5 +20,5 @@ OrdinalFeatureVector::index_const_iterator OrdinalFeatureVector::order_cbegin(ui } OrdinalFeatureVector::index_const_iterator OrdinalFeatureVector::order_cend(uint32 index) const { - return &order_[numValues_]; + return &order_[this->getNumElements()]; } From afcf3770bc92d3bb1d4997ea96d06c696fd81e07 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 3 Oct 2023 14:55:42 +0200 Subject: [PATCH 2/6] Rename constructor argument. --- .../mlrl/common/input/feature_vector_binary.hpp | 10 +++++----- .../src/mlrl/common/input/feature_vector_binary.cpp | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp index 5590cd527e..bc51f22402 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_binary.hpp @@ -13,10 +13,10 @@ class BinaryFeatureVector final : public NominalFeatureVector { public: /** - * @param numElements The number of elements in the vector, i.e., the number of examples associated with the - * minority value - * @param minorityValue The minority value, i.e., the least frequent value, of the binary feature - * @param majorityValue The majority value, i.e., the most frequent value, of the binary feature + * @param numMinorityExamples The number of elements in the vector, i.e., the number of examples associated + * with the minority value + * @param minorityValue The minority value, i.e., the least frequent value, of the binary feature + * @param majorityValue The majority value, i.e., the most frequent value, of the binary feature */ - BinaryFeatureVector(uint32 numElements, int32 minorityValue, int32 majorityValue); + BinaryFeatureVector(uint32 numMinorityExamples, int32 minorityValue, int32 majorityValue); }; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp index 9fd48c7917..11a99d79a9 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/input/feature_vector_binary.hpp" -BinaryFeatureVector::BinaryFeatureVector(uint32 numElements, int32 minorityValue, int32 majorityValue) - : NominalFeatureVector(1, numElements, majorityValue) { +BinaryFeatureVector::BinaryFeatureVector(uint32 numMinorityExamples, int32 minorityValue, int32 majorityValue) + : NominalFeatureVector(1, numMinorityExamples, majorityValue) { this->values_begin()[0] = minorityValue; this->indptr_begin()[0] = 0; } From 77633a8ab923b0175a7a1d089b0f8fe5645b45d0 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 3 Oct 2023 14:58:13 +0200 Subject: [PATCH 3/6] Rename constructor arguments. --- .../include/mlrl/common/input/feature_vector_nominal.hpp | 4 ++-- .../include/mlrl/common/input/feature_vector_ordinal.hpp | 4 ++-- .../src/mlrl/common/input/feature_vector_nominal.cpp | 8 ++++---- .../src/mlrl/common/input/feature_vector_ordinal.cpp | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp index 753922acd6..e44ab70957 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp @@ -26,11 +26,11 @@ class NominalFeatureVector : public AbstractFeatureVector { /** * @param numValues The number of distinct values of the nominal feature, excluding the majority value - * @param numElements The number of elements in the vector, i.e., the number of examples not associated with + * @param numExamples The number of elements in the vector, i.e., the number of examples not associated with * the majority value * @param majorityValue The majority value, i.e., the most frequent value, of the nominal feature */ - NominalFeatureVector(uint32 numValues, uint32 numElements, int32 majorityValue); + NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue); ~NominalFeatureVector() override; diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_ordinal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_ordinal.hpp index ae56c0516c..20dc8f2918 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_ordinal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_ordinal.hpp @@ -18,11 +18,11 @@ class OrdinalFeatureVector : public NominalFeatureVector { /** * @param numValues The number of distinct values of the ordinal feature, excluding the majority value - * @param numElements The number of elements in the vector, i.e., the number of examples not associated with + * @param numExamples The number of elements in the vector, i.e., the number of examples not associated with * the majority value * @param majorityValue The majority value, i.e., the most frequent value, of the ordinal feature */ - OrdinalFeatureVector(uint32 numValues, uint32 numElements, int32 majorityValue); + OrdinalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue); ~OrdinalFeatureVector() override; diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp index 458b5a3c3a..5c1a7286f2 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp @@ -1,9 +1,9 @@ #include "mlrl/common/input/feature_vector_nominal.hpp" -NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numElements, int32 majorityValue) - : values_(new int32[numValues]), indices_(new uint32[numElements]), indptr_(new uint32[numValues + 1]), - majorityValue_(majorityValue), numValues_(numValues) { - indptr_[numValues] = numElements; +NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue) + : values_(new int32[numValues]), indices_(new uint32[numExamples]), indptr_(new uint32[numValues + 1]), + numValues_(numValues), majorityValue_(majorityValue) { + indptr_[numValues] = numExamples; } NominalFeatureVector::~NominalFeatureVector() { diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp index e6fb752896..8fdfe09795 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp @@ -1,7 +1,7 @@ #include "mlrl/common/input/feature_vector_ordinal.hpp" -OrdinalFeatureVector::OrdinalFeatureVector(uint32 numValues, uint32 numElements, int32 majorityValue) - : NominalFeatureVector(numValues, numElements, majorityValue), order_(new uint32[numValues]) {} +OrdinalFeatureVector::OrdinalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue) + : NominalFeatureVector(numValues, numExamples, majorityValue), order_(new uint32[numValues]) {} OrdinalFeatureVector::~OrdinalFeatureVector() { delete[] order_; From c2bd09ab4c4429f5377c335ac1e8a22c74551246 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 3 Oct 2023 17:07:29 +0200 Subject: [PATCH 4/6] Implement "createFeatureVector" functions of the class NominalFeatureType. --- cpp/subprojects/common/meson.build | 1 + .../common/input/feature_type_nominal.cpp | 240 +++++++++- .../common/input/feature_type_nominal.cpp | 436 ++++++++++++++++++ 3 files changed, 673 insertions(+), 4 deletions(-) create mode 100644 cpp/subprojects/common/test/mlrl/common/input/feature_type_nominal.cpp diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index 8d50170a5a..5fa541d4c5 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -125,6 +125,7 @@ source_files = [ # Test files test_files = [ + 'test/mlrl/common/input/feature_type_nominal.cpp', 'test/mlrl/common/input/feature_type_numerical.cpp', 'test/mlrl/common/info.cpp' ] diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp index 3bd9661e33..673165cac8 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp @@ -1,5 +1,239 @@ #include "mlrl/common/input/feature_type_nominal.hpp" +#include "mlrl/common/data/tuple.hpp" +#include "mlrl/common/input/feature_vector_binary.hpp" +#include "mlrl/common/input/feature_vector_equal.hpp" +#include "mlrl/common/iterator/index_iterator.hpp" + +#include +#include + +template +static inline uint32 createMapping(ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping) { + uint32 numExamples = 0; + uint32 numValues = 0; + + for (uint32 i = 0; i < numElements; i++) { + float32 value = valueIterator[i]; + + if (!std::isnan(value)) { + int32 nominalValue = (int32) value; + auto it = mapping.find(nominalValue); + + if (it != mapping.end()) { + it->second.second++; + } else { + mapping.emplace(nominalValue, Tuple {numValues, 1}); + numValues++; + } + + numExamples++; + } + } + + return numExamples; +} + +static inline Tuple getMinorityAndMajorityValue(const std::unordered_map>& mapping) { + auto it = mapping.cbegin(); + auto& firstEntry = *it; + int firstFrequency = firstEntry.second.second; + it++; + auto& secondEntry = *it; + int secondFrequency = secondEntry.second.second; + Tuple tuple; + + if (firstFrequency > secondFrequency) { + tuple.first = secondEntry.first; + tuple.second = firstEntry.first; + } else { + tuple.first = firstEntry.first; + tuple.second = secondEntry.first; + } + + return tuple; +} + +static inline int32 getMajorityValue(const std::unordered_map>& mapping) { + auto it = mapping.cbegin(); + auto& firstEntry = *it; + int32 majorityValue = firstEntry.first; + uint32 majorityValueFrequency = firstEntry.second.second; + + for (; it != mapping.cend(); it++) { + auto& entry = *it; + uint32 frequency = entry.second.second; + + if (frequency > majorityValueFrequency) { + majorityValue = entry.first; + majorityValueFrequency = frequency; + } + } + + return majorityValue; +} + +template +static inline std::unique_ptr createNominalFeatureVector( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping, uint32 numValues, uint32 numExamples, int32 majorityValue) { + std::unique_ptr featureVectorPtr = + std::make_unique(numValues, numExamples, majorityValue); + NominalFeatureVector::value_iterator vectorValueIterator = featureVectorPtr->values_begin(); + NominalFeatureVector::index_iterator vectorIndptrIterator = featureVectorPtr->indptr_begin(); + uint32 offset = 0; + uint32 n = 0; + + for (auto it = mapping.begin(); it != mapping.end(); it++) { + auto& entry = *it; + int32 value = entry.first; + + if (value != majorityValue) { + vectorValueIterator[n] = value; + vectorIndptrIterator[n] = offset; + Tuple& tuple = entry.second; + tuple.first = n; + offset += tuple.second; + n++; + } + } + + for (uint32 i = 0; i < numElements; i++) { + uint32 index = indexIterator[i]; + float32 value = valueIterator[i]; + + if (std::isnan(value)) { + featureVectorPtr->setMissing(index, true); + } else { + int32 nominalValue = (int32) value; + + if (nominalValue != majorityValue) { + Tuple& tuple = mapping.at(nominalValue); + uint32 numRemaining = tuple.second - 1; + tuple.second = numRemaining; + NominalFeatureVector::index_iterator vectorIndexIterator = featureVectorPtr->indices_begin(tuple.first); + vectorIndexIterator[numRemaining] = index; + } + } + } + + return featureVectorPtr; +} + +template +static inline std::unique_ptr createNominalFeatureVector( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping, uint32 numValues, uint32 numExamples, bool sparse) { + int32 majorityValue; + uint32 numMajorityExamples; + + if (sparse) { + majorityValue = 0; + numMajorityExamples = 0; + } else { + majorityValue = getMajorityValue(mapping); + numMajorityExamples = mapping.at(majorityValue).second; + } + + return createNominalFeatureVector(indexIterator, valueIterator, numElements, mapping, numValues - 1, + numExamples - numMajorityExamples, majorityValue); +} + +template +static inline std::unique_ptr createBinaryFeatureVector( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + const std::unordered_map>& mapping, int32 minorityValue, int32 majorityValue) { + const Tuple& tuple = mapping.at(minorityValue); + uint32 numMinorityExamples = tuple.second; + std::unique_ptr featureVectorPtr = + std::make_unique(numMinorityExamples, minorityValue, majorityValue); + BinaryFeatureVector::index_iterator vectorIndexIterator = featureVectorPtr->indices_begin(0); + uint32 n = 0; + + for (uint32 i = 0; i < numElements; i++) { + uint32 index = indexIterator[i]; + float32 value = valueIterator[i]; + + if (std::isnan(value)) { + featureVectorPtr->setMissing(index, true); + } else { + int32 nominalValue = (int32) value; + + if (nominalValue == minorityValue) { + vectorIndexIterator[n] = index; + n++; + } + } + } + + return featureVectorPtr; +} + +template +static inline std::unique_ptr createBinaryFeatureVector( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping, bool sparse) { + int32 minorityValue; + int32 majorityValue; + + if (sparse) { + minorityValue = (*mapping.cbegin()).first; + majorityValue = 0; + } else { + const Tuple tuple = getMinorityAndMajorityValue(mapping); + minorityValue = tuple.first; + majorityValue = tuple.second; + } + + return createBinaryFeatureVector(indexIterator, valueIterator, numElements, mapping, minorityValue, majorityValue); +} + +template +static inline std::unique_ptr createFeatureVectorInternally( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping, uint32 numValues, uint32 numExamples, bool sparse) { + if (numValues > 2) { + return createNominalFeatureVector(indexIterator, valueIterator, numElements, mapping, numValues, numExamples, + sparse); + } else if (numValues > 1) { + return createBinaryFeatureVector(indexIterator, valueIterator, numElements, mapping, sparse); + } else { + return std::make_unique(); + } +} + +static inline std::unique_ptr createFeatureVectorInternally( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) { + FortranContiguousConstView::value_const_iterator valueIterator = + featureMatrix.values_cbegin(featureIndex); + uint32 numElements = featureMatrix.getNumRows(); + std::unordered_map> mapping; + uint32 numExamples = createMapping(valueIterator, numElements, mapping); + uint32 numValues = (uint32) mapping.size(); + return createFeatureVectorInternally(IndexIterator(), valueIterator, numElements, mapping, numValues, numExamples, + false); +} + +static inline std::unique_ptr createFeatureVectorInternally( + uint32 featureIndex, const CscConstView& featureMatrix) { + CscConstView::index_const_iterator indexIterator = featureMatrix.indices_cbegin(featureIndex); + CscConstView::value_const_iterator valuesBegin = featureMatrix.values_cbegin(featureIndex); + CscConstView::value_const_iterator valuesEnd = featureMatrix.values_cend(featureIndex); + uint32 numElements = valuesEnd - valuesBegin; + std::unordered_map> mapping; + uint32 numExamples = createMapping(valuesBegin, numElements, mapping); + uint32 numValues = (uint32) mapping.size(); + bool sparse = numElements < featureMatrix.getNumRows(); + + if (sparse) { + numValues++; + } + + return createFeatureVectorInternally(indexIterator, valuesBegin, numElements, mapping, numValues, numExamples, + sparse); +} + bool NominalFeatureType::isOrdinal() const { return false; } @@ -10,12 +244,10 @@ bool NominalFeatureType::isNominal() const { std::unique_ptr NominalFeatureType::createFeatureVector( uint32 featureIndex, const FortranContiguousConstView& featureMatrix) const { - // TODO Implement - return nullptr; + return createFeatureVectorInternally(featureIndex, featureMatrix); } std::unique_ptr NominalFeatureType::createFeatureVector( uint32 featureIndex, const CscConstView& featureMatrix) const { - // TODO Implement - return nullptr; + return createFeatureVectorInternally(featureIndex, featureMatrix); } diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_type_nominal.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_type_nominal.cpp new file mode 100644 index 0000000000..8ed0b45aa7 --- /dev/null +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_type_nominal.cpp @@ -0,0 +1,436 @@ +#include "mlrl/common/input/feature_type_nominal.hpp" + +#include "mlrl/common/input/feature_vector_binary.hpp" +#include "mlrl/common/input/feature_vector_equal.hpp" +#include "mlrl/common/input/feature_vector_nominal.hpp" + +#include + +TEST(NominalFeatureTypeTest, createNominalFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 8; + float32* features = new float32[numExamples]; + features[0] = 1.0; + features[1] = 0.0; + features[2] = NAN; + features[3] = 1.0; + features[4] = 0.0; + features[5] = NAN; + features[6] = -1.0; + features[7] = 0.0; + FortranContiguousConstView view(numExamples, 1, features); + + // Create feature vector... + std::unique_ptr featureVectorPtr = NominalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const NominalFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 2); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + std::unordered_set values; + + for (auto it = featureVector->values_cbegin(); it != featureVector->values_cend(); it++) { + values.emplace(*it); + } + + EXPECT_TRUE(values.find(-1) != values.end()); + EXPECT_TRUE(values.find(1) != values.end()); + + // Check indices associated with the feature values... + for (uint32 i = 0; i < 2; i++) { + int32 value = featureVector->values_cbegin()[i]; + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(i); it != featureVector->indices_cend(i); it++) { + indices.emplace(*it); + } + + if (value == -1) { + EXPECT_EQ(indices.size(), 1); + EXPECT_TRUE(indices.find(6) != indices.end()); + } else { + EXPECT_EQ(indices.size(), 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + } + } + + delete[] features; +} + +TEST(NominalFeatureTypeTest, createBinaryFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 7; + float32* features = new float32[numExamples]; + features[0] = 1.0; + features[1] = 0.0; + features[2] = NAN; + features[3] = 1.0; + features[4] = 0.0; + features[5] = NAN; + features[6] = 0.0; + FortranContiguousConstView view(numExamples, 1, features); + + // Create feature vector... + std::unique_ptr featureVectorPtr = NominalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const BinaryFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 1); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + int32 minorityValue = featureVector->values_cbegin()[0]; + EXPECT_EQ(minorityValue, (int32) 1); + + // Check indices associated with the feature values... + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(0); it != featureVector->indices_cend(0); it++) { + indices.emplace(*it); + } + + EXPECT_EQ(indices.size(), 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + + delete[] features; +} + +TEST(NominalFeatureTypeTest, createEqualFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 2; + float32* features = new float32[numExamples]; + features[0] = 0.0; + features[1] = 0.0; + FortranContiguousConstView view(numExamples, 1, features); + + // Create feature vector... + std::unique_ptr featureVectorPtr = NominalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + delete[] features; +} + +TEST(NominalFeatureTypeTest, createNominalFeatureVectorFromDenseCscView) { + // Initialize feature matrix... + uint32 numDense = 8; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 1.0; + rowIndices[0] = 0; + data[1] = 0.0; + rowIndices[1] = 1; + data[2] = NAN; + rowIndices[2] = 2; + data[3] = 1.0; + rowIndices[3] = 3; + data[4] = 0.0; + rowIndices[4] = 4; + data[5] = NAN; + rowIndices[5] = 5; + data[6] = -1.0; + rowIndices[6] = 6; + data[7] = 0.0; + rowIndices[7] = 7; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = NominalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const NominalFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 2); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + std::unordered_set values; + + for (auto it = featureVector->values_cbegin(); it != featureVector->values_cend(); it++) { + values.emplace(*it); + } + + EXPECT_TRUE(values.find(-1) != values.end()); + EXPECT_TRUE(values.find(1) != values.end()); + + // Check indices associated with the feature values... + for (uint32 i = 0; i < 2; i++) { + int32 value = featureVector->values_cbegin()[i]; + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(i); it != featureVector->indices_cend(i); it++) { + indices.emplace(*it); + } + + if (value == -1) { + EXPECT_EQ(indices.size(), 1); + EXPECT_TRUE(indices.find(6) != indices.end()); + } else { + EXPECT_EQ(indices.size(), 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + } + } + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(NominalFeatureTypeTest, createBinaryFeatureVectorFromDenseCscView) { + // Initialize feature matrix... + uint32 numDense = 7; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 1.0; + rowIndices[0] = 0; + data[1] = 0.0; + rowIndices[1] = 1; + data[2] = NAN; + rowIndices[2] = 2; + data[3] = 1.0; + rowIndices[3] = 3; + data[4] = 0.0; + rowIndices[4] = 4; + data[5] = NAN; + rowIndices[5] = 5; + data[6] = 0.0; + rowIndices[6] = 6; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = NominalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const BinaryFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 1); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + int32 minorityValue = featureVector->values_cbegin()[0]; + EXPECT_EQ(minorityValue, (int32) 1); + + // Check indices associated with the feature values... + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(0); it != featureVector->indices_cend(0); it++) { + indices.emplace(*it); + } + + EXPECT_EQ(indices.size(), 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(NominalFeatureTypeTest, createEqualFeatureVectorFromDenseCscView) { + // Initialize feature matrix... + uint32 numDense = 2; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 0.0; + rowIndices[0] = 0; + data[1] = 0.0; + rowIndices[1] = 1; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = NominalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(NominalFeatureTypeTest, createNominalFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 5; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 1.0; + rowIndices[0] = 0; + data[1] = NAN; + rowIndices[1] = 2; + data[2] = 1.0; + rowIndices[2] = 3; + data[3] = NAN; + rowIndices[3] = 5; + data[4] = -1.0; + rowIndices[4] = 6; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense + 3, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = NominalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const NominalFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 2); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + std::unordered_set values; + + for (auto it = featureVector->values_cbegin(); it != featureVector->values_cend(); it++) { + values.emplace(*it); + } + + EXPECT_TRUE(values.find(-1) != values.end()); + EXPECT_TRUE(values.find(1) != values.end()); + + // Check indices associated with the feature values... + for (uint32 i = 0; i < 2; i++) { + int32 value = featureVector->values_cbegin()[i]; + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(i); it != featureVector->indices_cend(i); it++) { + indices.emplace(*it); + } + + if (value == -1) { + EXPECT_EQ(indices.size(), 1); + EXPECT_TRUE(indices.find(6) != indices.end()); + } else { + EXPECT_EQ(indices.size(), 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + } + } + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(NominalFeatureTypeTest, createBinaryFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 4; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 1.0; + rowIndices[0] = 0; + data[1] = NAN; + rowIndices[1] = 2; + data[2] = 1.0; + rowIndices[2] = 3; + data[3] = NAN; + rowIndices[3] = 5; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense + 3, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = NominalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const BinaryFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 1); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + int32 minorityValue = featureVector->values_cbegin()[0]; + EXPECT_EQ(minorityValue, (int32) 1); + + // Check indices associated with the feature values... + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(0); it != featureVector->indices_cend(0); it++) { + indices.emplace(*it); + } + + EXPECT_EQ(indices.size(), 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(NominalFeatureTypeTest, createEqualFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 0; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense + 3, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = NominalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} From a74e29708d7433e2fe08b44e9a3e34a53258b241 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 3 Oct 2023 17:12:49 +0200 Subject: [PATCH 5/6] Add explicit casts. --- .../mlrl/common/input/feature_type_nominal.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_type_nominal.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_type_nominal.cpp index 8ed0b45aa7..7198a0646e 100644 --- a/cpp/subprojects/common/test/mlrl/common/input/feature_type_nominal.cpp +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_type_nominal.cpp @@ -55,10 +55,10 @@ TEST(NominalFeatureTypeTest, createNominalFeatureVectorFromFortranContiguousView } if (value == -1) { - EXPECT_EQ(indices.size(), 1); + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 1); EXPECT_TRUE(indices.find(6) != indices.end()); } else { - EXPECT_EQ(indices.size(), 2); + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); EXPECT_TRUE(indices.find(0) != indices.end()); EXPECT_TRUE(indices.find(3) != indices.end()); } @@ -106,7 +106,7 @@ TEST(NominalFeatureTypeTest, createBinaryFeatureVectorFromFortranContiguousView) indices.emplace(*it); } - EXPECT_EQ(indices.size(), 2); + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); EXPECT_TRUE(indices.find(0) != indices.end()); EXPECT_TRUE(indices.find(3) != indices.end()); @@ -192,10 +192,10 @@ TEST(NominalFeatureTypeTest, createNominalFeatureVectorFromDenseCscView) { } if (value == -1) { - EXPECT_EQ(indices.size(), 1); + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 1); EXPECT_TRUE(indices.find(6) != indices.end()); } else { - EXPECT_EQ(indices.size(), 2); + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); EXPECT_TRUE(indices.find(0) != indices.end()); EXPECT_TRUE(indices.find(3) != indices.end()); } @@ -256,7 +256,7 @@ TEST(NominalFeatureTypeTest, createBinaryFeatureVectorFromDenseCscView) { indices.emplace(*it); } - EXPECT_EQ(indices.size(), 2); + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); EXPECT_TRUE(indices.find(0) != indices.end()); EXPECT_TRUE(indices.find(3) != indices.end()); @@ -346,10 +346,10 @@ TEST(NominalFeatureTypeTest, createNominalFeatureVectorFromCscView) { } if (value == -1) { - EXPECT_EQ(indices.size(), 1); + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 1); EXPECT_TRUE(indices.find(6) != indices.end()); } else { - EXPECT_EQ(indices.size(), 2); + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); EXPECT_TRUE(indices.find(0) != indices.end()); EXPECT_TRUE(indices.find(3) != indices.end()); } @@ -404,7 +404,7 @@ TEST(NominalFeatureTypeTest, createBinaryFeatureVectorFromCscView) { indices.emplace(*it); } - EXPECT_EQ(indices.size(), 2); + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); EXPECT_TRUE(indices.find(0) != indices.end()); EXPECT_TRUE(indices.find(3) != indices.end()); From 6f021e7bb2ec29b9f1b3b3de6b0d4612f8ab0c5f Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Tue, 3 Oct 2023 17:21:13 +0200 Subject: [PATCH 6/6] Replace usage of function "find" with "emplace". --- .../src/mlrl/common/input/feature_type_nominal.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp index 673165cac8..de14942f37 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp @@ -19,13 +19,14 @@ static inline uint32 createMapping(ValueIterator valueIterator, uint32 numElemen if (!std::isnan(value)) { int32 nominalValue = (int32) value; - auto it = mapping.find(nominalValue); + auto it = mapping.emplace(nominalValue, Tuple {numValues, 1}); - if (it != mapping.end()) { - it->second.second++; - } else { - mapping.emplace(nominalValue, Tuple {numValues, 1}); + if (it.second) { numValues++; + } else { + auto& entry = *(it.first); + Tuple& tuple = entry.second; + tuple.second++; } numExamples++;