diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_ordinal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_ordinal.hpp index 20dc8f2918..953d0d5937 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_ordinal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_ordinal.hpp @@ -10,10 +10,6 @@ * value, i.e., the most frequent value, of an ordinal feature. */ class OrdinalFeatureVector : public NominalFeatureVector { - private: - - uint32* order_; - public: /** @@ -23,39 +19,4 @@ class OrdinalFeatureVector : public NominalFeatureVector { * @param majorityValue The majority value, i.e., the most frequent value, of the ordinal feature */ OrdinalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue); - - ~OrdinalFeatureVector() override; - - /** - * Returns an `index_iterator` to the beginning of the ordered indices of the values of the ordinal feature. - * - * @param index The index of the value - * @return An `index_iterator` to the beginning - */ - index_iterator order_begin(uint32 index); - - /** - * Returns an `index_iterator` to the end of the ordered indices of the values of the ordinal feature. - * - * @param index The index of the value - * @return An `index_iterator` to the end - */ - index_iterator order_end(uint32 index); - - /** - * Returns an `index_const_iterator` to the beginning of the ordered indices of the values of the ordinal - * feature. - * - * @param index The index of the value - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator order_cbegin(uint32 index) const; - - /** - * Returns an `index_const_iterator` to the end of the ordered indices of the values of the ordinal feature. - * - * @param index The index of the value - * @return An `index_const_iterator` to the end - */ - index_const_iterator order_cend(uint32 index) const; }; diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index 5fa541d4c5..a181587745 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -127,6 +127,7 @@ source_files = [ test_files = [ 'test/mlrl/common/input/feature_type_nominal.cpp', 'test/mlrl/common/input/feature_type_numerical.cpp', + 'test/mlrl/common/input/feature_type_ordinal.cpp', 'test/mlrl/common/info.cpp' ] diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp index 0f4d2fd79a..0d047d67f7 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp @@ -1,79 +1,9 @@ #include "mlrl/common/input/feature_type_nominal.hpp" -#include "mlrl/common/data/tuple.hpp" -#include "mlrl/common/input/feature_vector_binary.hpp" +#include "feature_type_nominal_common.hpp" #include "mlrl/common/input/feature_vector_equal.hpp" #include "mlrl/common/iterator/index_iterator.hpp" -#include - -template -static inline uint32 createMapping(ValueIterator valueIterator, uint32 numElements, - std::unordered_map>& mapping) { - uint32 numExamples = 0; - uint32 numValues = 0; - - for (uint32 i = 0; i < numElements; i++) { - float32 value = valueIterator[i]; - - if (!std::isnan(value)) { - int32 nominalValue = (int32) value; - auto it = mapping.emplace(nominalValue, Tuple {numValues, 1}); - - if (it.second) { - numValues++; - } else { - auto& entry = *(it.first); - Tuple& tuple = entry.second; - tuple.second++; - } - - numExamples++; - } - } - - return numExamples; -} - -static inline Tuple getMinorityAndMajorityValue(const std::unordered_map>& mapping) { - auto it = mapping.cbegin(); - auto& firstEntry = *it; - int firstFrequency = firstEntry.second.second; - it++; - auto& secondEntry = *it; - int secondFrequency = secondEntry.second.second; - Tuple tuple; - - if (firstFrequency > secondFrequency) { - tuple.first = secondEntry.first; - tuple.second = firstEntry.first; - } else { - tuple.first = firstEntry.first; - tuple.second = secondEntry.first; - } - - return tuple; -} - -static inline int32 getMajorityValue(const std::unordered_map>& mapping) { - auto it = mapping.cbegin(); - auto& firstEntry = *it; - int32 majorityValue = firstEntry.first; - uint32 majorityValueFrequency = firstEntry.second.second; - - for (; it != mapping.cend(); it++) { - auto& entry = *it; - uint32 frequency = entry.second.second; - - if (frequency > majorityValueFrequency) { - majorityValue = entry.first; - majorityValueFrequency = frequency; - } - } - - return majorityValue; -} - template static inline std::unique_ptr createNominalFeatureVector( IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, @@ -140,55 +70,6 @@ static inline std::unique_ptr createNominalFeatureVector( numExamples - numMajorityExamples, majorityValue); } -template -static inline std::unique_ptr createBinaryFeatureVector( - IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, - const std::unordered_map>& mapping, int32 minorityValue, int32 majorityValue) { - const Tuple& tuple = mapping.at(minorityValue); - uint32 numMinorityExamples = tuple.second; - std::unique_ptr featureVectorPtr = - std::make_unique(numMinorityExamples, minorityValue, majorityValue); - BinaryFeatureVector::index_iterator vectorIndexIterator = featureVectorPtr->indices_begin(0); - uint32 n = 0; - - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indexIterator[i]; - float32 value = valueIterator[i]; - - if (std::isnan(value)) { - featureVectorPtr->setMissing(index, true); - } else { - int32 nominalValue = (int32) value; - - if (nominalValue == minorityValue) { - vectorIndexIterator[n] = index; - n++; - } - } - } - - return featureVectorPtr; -} - -template -static inline std::unique_ptr createBinaryFeatureVector( - IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, - std::unordered_map>& mapping, bool sparse) { - int32 minorityValue; - int32 majorityValue; - - if (sparse) { - minorityValue = (*mapping.cbegin()).first; - majorityValue = 0; - } else { - const Tuple tuple = getMinorityAndMajorityValue(mapping); - minorityValue = tuple.first; - majorityValue = tuple.second; - } - - return createBinaryFeatureVector(indexIterator, valueIterator, numElements, mapping, minorityValue, majorityValue); -} - template static inline std::unique_ptr createFeatureVectorInternally( IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal_common.hpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal_common.hpp new file mode 100644 index 0000000000..8a49f381dc --- /dev/null +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal_common.hpp @@ -0,0 +1,126 @@ +/* + * @author Michael Rapp (michael.rapp.ml@gmail.com) + */ +#pragma once + +#include "mlrl/common/data/tuple.hpp" +#include "mlrl/common/input/feature_vector_binary.hpp" +#include "mlrl/common/input/feature_vector_equal.hpp" + +#include + +template +static inline uint32 createMapping(ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping) { + uint32 numExamples = 0; + uint32 numValues = 0; + + for (uint32 i = 0; i < numElements; i++) { + float32 value = valueIterator[i]; + + if (!std::isnan(value)) { + int32 nominalValue = (int32) value; + auto it = mapping.emplace(nominalValue, Tuple {numValues, 1}); + + if (it.second) { + numValues++; + } else { + auto& entry = *(it.first); + Tuple& tuple = entry.second; + tuple.second++; + } + + numExamples++; + } + } + + return numExamples; +} + +static inline int32 getMajorityValue(const std::unordered_map>& mapping) { + auto it = mapping.cbegin(); + auto& firstEntry = *it; + int32 majorityValue = firstEntry.first; + uint32 majorityValueFrequency = firstEntry.second.second; + + for (; it != mapping.cend(); it++) { + auto& entry = *it; + uint32 frequency = entry.second.second; + + if (frequency > majorityValueFrequency) { + majorityValue = entry.first; + majorityValueFrequency = frequency; + } + } + + return majorityValue; +} + +static inline Tuple getMinorityAndMajorityValue(const std::unordered_map>& mapping) { + auto it = mapping.cbegin(); + auto& firstEntry = *it; + int firstFrequency = firstEntry.second.second; + it++; + auto& secondEntry = *it; + int secondFrequency = secondEntry.second.second; + Tuple tuple; + + if (firstFrequency > secondFrequency) { + tuple.first = secondEntry.first; + tuple.second = firstEntry.first; + } else { + tuple.first = firstEntry.first; + tuple.second = secondEntry.first; + } + + return tuple; +} + +template +static inline std::unique_ptr createBinaryFeatureVector( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + const std::unordered_map>& mapping, int32 minorityValue, int32 majorityValue) { + const Tuple& tuple = mapping.at(minorityValue); + uint32 numMinorityExamples = tuple.second; + std::unique_ptr featureVectorPtr = + std::make_unique(numMinorityExamples, minorityValue, majorityValue); + BinaryFeatureVector::index_iterator vectorIndexIterator = featureVectorPtr->indices_begin(0); + uint32 n = 0; + + for (uint32 i = 0; i < numElements; i++) { + uint32 index = indexIterator[i]; + float32 value = valueIterator[i]; + + if (std::isnan(value)) { + featureVectorPtr->setMissing(index, true); + } else { + int32 nominalValue = (int32) value; + + if (nominalValue == minorityValue) { + vectorIndexIterator[n] = index; + n++; + } + } + } + + return featureVectorPtr; +} + +template +static inline std::unique_ptr createBinaryFeatureVector( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping, bool sparse) { + int32 minorityValue; + int32 majorityValue; + + if (sparse) { + minorityValue = (*mapping.cbegin()).first; + majorityValue = 0; + } else { + const Tuple tuple = getMinorityAndMajorityValue(mapping); + minorityValue = tuple.first; + majorityValue = tuple.second; + } + + return createBinaryFeatureVector(indexIterator, valueIterator, numElements, mapping, minorityValue, majorityValue); +} diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp index d616c12658..637dff5ff2 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_type_ordinal.cpp @@ -1,5 +1,132 @@ #include "mlrl/common/input/feature_type_ordinal.hpp" +#include "feature_type_nominal_common.hpp" +#include "mlrl/common/data/vector_sparse_array.hpp" +#include "mlrl/common/input/feature_vector_ordinal.hpp" +#include "mlrl/common/iterator/index_iterator.hpp" + +#include + +template +static inline std::unique_ptr createOrdinalFeatureVector( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping, uint32 numValues, uint32 numExamples, int32 majorityValue) { + DenseVector sortedValues(numValues); + uint32 n = 0; + + for (auto it = mapping.cbegin(); it != mapping.cend(); it++) { + auto& entry = *it; + int32 value = entry.first; + + if (value != majorityValue) { + sortedValues[n] = value; + n++; + } + } + + std::sort(sortedValues.begin(), sortedValues.end(), std::less()); + + std::unique_ptr featureVectorPtr = + std::make_unique(numValues, numExamples, majorityValue); + OrdinalFeatureVector::value_iterator vectorValueIterator = featureVectorPtr->values_begin(); + OrdinalFeatureVector::index_iterator vectorIndptrIterator = featureVectorPtr->indptr_begin(); + uint32 offset = 0; + + for (uint32 i = 0; i < numValues; i++) { + int32 value = sortedValues[i]; + vectorValueIterator[i] = value; + vectorIndptrIterator[i] = offset; + Tuple& tuple = mapping.at(value); + tuple.first = i; + offset += tuple.second; + } + + for (uint32 i = 0; i < numElements; i++) { + uint32 index = indexIterator[i]; + float32 value = valueIterator[i]; + + if (std::isnan(value)) { + featureVectorPtr->setMissing(index, true); + } else { + int32 nominalValue = (int32) value; + + if (nominalValue != majorityValue) { + Tuple& tuple = mapping.at(nominalValue); + uint32 numRemaining = tuple.second - 1; + tuple.second = numRemaining; + OrdinalFeatureVector::index_iterator vectorIndexIterator = featureVectorPtr->indices_begin(tuple.first); + vectorIndexIterator[numRemaining] = index; + } + } + } + + return featureVectorPtr; +} + +template +static inline std::unique_ptr createOrdinalFeatureVector( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping, uint32 numValues, uint32 numExamples, bool sparse) { + int32 majorityValue; + uint32 numMajorityExamples; + + if (sparse) { + majorityValue = 0; + numMajorityExamples = 0; + } else { + majorityValue = getMajorityValue(mapping); + numMajorityExamples = mapping.at(majorityValue).second; + } + + return createOrdinalFeatureVector(indexIterator, valueIterator, numElements, mapping, numValues - 1, + numExamples - numMajorityExamples, majorityValue); +} + +template +static inline std::unique_ptr createFeatureVectorInternally( + IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements, + std::unordered_map>& mapping, uint32 numValues, uint32 numExamples, bool sparse) { + if (numValues > 2) { + return createOrdinalFeatureVector(indexIterator, valueIterator, numElements, mapping, numValues, numExamples, + sparse); + } else if (numValues > 1) { + return createBinaryFeatureVector(indexIterator, valueIterator, numElements, mapping, sparse); + } else { + return std::make_unique(); + } +} + +static inline std::unique_ptr createFeatureVectorInternally( + uint32 featureIndex, const FortranContiguousConstView& featureMatrix) { + FortranContiguousConstView::value_const_iterator valueIterator = + featureMatrix.values_cbegin(featureIndex); + uint32 numElements = featureMatrix.getNumRows(); + std::unordered_map> mapping; + uint32 numExamples = createMapping(valueIterator, numElements, mapping); + uint32 numValues = (uint32) mapping.size(); + return createFeatureVectorInternally(IndexIterator(), valueIterator, numElements, mapping, numValues, numExamples, + false); +} + +static inline std::unique_ptr createFeatureVectorInternally( + uint32 featureIndex, const CscConstView& featureMatrix) { + CscConstView::index_const_iterator indexIterator = featureMatrix.indices_cbegin(featureIndex); + CscConstView::value_const_iterator valuesBegin = featureMatrix.values_cbegin(featureIndex); + CscConstView::value_const_iterator valuesEnd = featureMatrix.values_cend(featureIndex); + uint32 numElements = valuesEnd - valuesBegin; + std::unordered_map> mapping; + uint32 numExamples = createMapping(valuesBegin, numElements, mapping); + uint32 numValues = (uint32) mapping.size(); + bool sparse = numElements < featureMatrix.getNumRows(); + + if (sparse) { + numValues++; + } + + return createFeatureVectorInternally(indexIterator, valuesBegin, numElements, mapping, numValues, numExamples, + sparse); +} + bool OrdinalFeatureType::isOrdinal() const { return true; } @@ -10,12 +137,10 @@ bool OrdinalFeatureType::isNominal() const { std::unique_ptr OrdinalFeatureType::createFeatureVector( uint32 featureIndex, const FortranContiguousConstView& featureMatrix) const { - // TODO Implement - return nullptr; + return createFeatureVectorInternally(featureIndex, featureMatrix); } std::unique_ptr OrdinalFeatureType::createFeatureVector( uint32 featureIndex, const CscConstView& featureMatrix) const { - // TODO Implement - return nullptr; + return createFeatureVectorInternally(featureIndex, featureMatrix); } diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp index 8fdfe09795..ef5c8a0388 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_ordinal.cpp @@ -1,24 +1,4 @@ #include "mlrl/common/input/feature_vector_ordinal.hpp" OrdinalFeatureVector::OrdinalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue) - : NominalFeatureVector(numValues, numExamples, majorityValue), order_(new uint32[numValues]) {} - -OrdinalFeatureVector::~OrdinalFeatureVector() { - delete[] order_; -} - -OrdinalFeatureVector::index_iterator OrdinalFeatureVector::order_begin(uint32 index) { - return order_; -} - -OrdinalFeatureVector::index_iterator OrdinalFeatureVector::order_end(uint32 index) { - return &order_[this->getNumElements()]; -} - -OrdinalFeatureVector::index_const_iterator OrdinalFeatureVector::order_cbegin(uint32 index) const { - return order_; -} - -OrdinalFeatureVector::index_const_iterator OrdinalFeatureVector::order_cend(uint32 index) const { - return &order_[this->getNumElements()]; -} + : NominalFeatureVector(numValues, numExamples, majorityValue) {} diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_type_ordinal.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_type_ordinal.cpp new file mode 100644 index 0000000000..6d4ac140c8 --- /dev/null +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_type_ordinal.cpp @@ -0,0 +1,421 @@ +#include "mlrl/common/input/feature_type_ordinal.hpp" + +#include "mlrl/common/input/feature_vector_binary.hpp" +#include "mlrl/common/input/feature_vector_equal.hpp" +#include "mlrl/common/input/feature_vector_ordinal.hpp" + +#include + +TEST(OrdinalFeatureTypeTest, createOrdinalFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 8; + float32* features = new float32[numExamples]; + features[0] = 1.0; + features[1] = 0.0; + features[2] = NAN; + features[3] = 1.0; + features[4] = 0.0; + features[5] = NAN; + features[6] = -1.0; + features[7] = 0.0; + FortranContiguousConstView view(numExamples, 1, features); + + // Create feature vector... + std::unique_ptr featureVectorPtr = OrdinalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const OrdinalFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 2); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + OrdinalFeatureVector::value_const_iterator valueIterator = featureVector->values_cbegin(); + EXPECT_EQ(valueIterator[0], (int32) -1); + EXPECT_EQ(valueIterator[1], (int32) 1); + + // Check indices associated with the feature values... + for (uint32 i = 0; i < 2; i++) { + int32 value = valueIterator[i]; + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(i); it != featureVector->indices_cend(i); it++) { + indices.emplace(*it); + } + + if (value == -1) { + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 1); + EXPECT_TRUE(indices.find(6) != indices.end()); + } else { + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + } + } + + delete[] features; +} + +TEST(OrdinalFeatureTypeTest, createBinaryFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 7; + float32* features = new float32[numExamples]; + features[0] = 1.0; + features[1] = 0.0; + features[2] = NAN; + features[3] = 1.0; + features[4] = 0.0; + features[5] = NAN; + features[6] = 0.0; + FortranContiguousConstView view(numExamples, 1, features); + + // Create feature vector... + std::unique_ptr featureVectorPtr = OrdinalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const BinaryFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 1); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + int32 minorityValue = featureVector->values_cbegin()[0]; + EXPECT_EQ(minorityValue, (int32) 1); + + // Check indices associated with the feature values... + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(0); it != featureVector->indices_cend(0); it++) { + indices.emplace(*it); + } + + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + + delete[] features; +} + +TEST(OrdinalFeatureTypeTest, createEqualFeatureVectorFromFortranContiguousView) { + // Initialize feature matrix... + uint32 numExamples = 2; + float32* features = new float32[numExamples]; + features[0] = 0.0; + features[1] = 0.0; + FortranContiguousConstView view(numExamples, 1, features); + + // Create feature vector... + std::unique_ptr featureVectorPtr = OrdinalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + delete[] features; +} + +TEST(OrdinalFeatureTypeTest, createOrdinalFeatureVectorFromDenseCscView) { + // Initialize feature matrix... + uint32 numDense = 8; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 1.0; + rowIndices[0] = 0; + data[1] = 0.0; + rowIndices[1] = 1; + data[2] = NAN; + rowIndices[2] = 2; + data[3] = 1.0; + rowIndices[3] = 3; + data[4] = 0.0; + rowIndices[4] = 4; + data[5] = NAN; + rowIndices[5] = 5; + data[6] = -1.0; + rowIndices[6] = 6; + data[7] = 0.0; + rowIndices[7] = 7; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = OrdinalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const OrdinalFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 2); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + OrdinalFeatureVector::value_const_iterator valueIterator = featureVector->values_cbegin(); + EXPECT_EQ(valueIterator[0], (int32) -1); + EXPECT_EQ(valueIterator[1], (int32) 1); + + // Check indices associated with the feature values... + for (uint32 i = 0; i < 2; i++) { + int32 value = valueIterator[i]; + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(i); it != featureVector->indices_cend(i); it++) { + indices.emplace(*it); + } + + if (value == -1) { + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 1); + EXPECT_TRUE(indices.find(6) != indices.end()); + } else { + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + } + } + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(OrdinalFeatureTypeTest, createBinaryFeatureVectorFromDenseCscView) { + // Initialize feature matrix... + uint32 numDense = 7; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 1.0; + rowIndices[0] = 0; + data[1] = 0.0; + rowIndices[1] = 1; + data[2] = NAN; + rowIndices[2] = 2; + data[3] = 1.0; + rowIndices[3] = 3; + data[4] = 0.0; + rowIndices[4] = 4; + data[5] = NAN; + rowIndices[5] = 5; + data[6] = 0.0; + rowIndices[6] = 6; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = OrdinalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const BinaryFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 1); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + int32 minorityValue = featureVector->values_cbegin()[0]; + EXPECT_EQ(minorityValue, (int32) 1); + + // Check indices associated with the feature values... + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(0); it != featureVector->indices_cend(0); it++) { + indices.emplace(*it); + } + + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(OrdinalFeatureTypeTest, createEqualFeatureVectorFromDenseCscView) { + // Initialize feature matrix... + uint32 numDense = 2; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 0.0; + rowIndices[0] = 0; + data[1] = 0.0; + rowIndices[1] = 1; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = OrdinalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(OrdinalFeatureTypeTest, createOrdinalFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 5; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 1.0; + rowIndices[0] = 0; + data[1] = NAN; + rowIndices[1] = 2; + data[2] = 1.0; + rowIndices[2] = 3; + data[3] = NAN; + rowIndices[3] = 5; + data[4] = -1.0; + rowIndices[4] = 6; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense + 3, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = OrdinalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const OrdinalFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 2); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + OrdinalFeatureVector::value_const_iterator valueIterator = featureVector->values_cbegin(); + EXPECT_EQ(valueIterator[0], (int32) -1); + EXPECT_EQ(valueIterator[1], (int32) 1); + + // Check indices associated with the feature values... + for (uint32 i = 0; i < 2; i++) { + int32 value = valueIterator[i]; + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(i); it != featureVector->indices_cend(i); it++) { + indices.emplace(*it); + } + + if (value == -1) { + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 1); + EXPECT_TRUE(indices.find(6) != indices.end()); + } else { + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + } + } + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(OrdinalFeatureTypeTest, createBinaryFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 4; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + data[0] = 1.0; + rowIndices[0] = 0; + data[1] = NAN; + rowIndices[1] = 2; + data[2] = 1.0; + rowIndices[2] = 3; + data[3] = NAN; + rowIndices[3] = 5; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense + 3, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = OrdinalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const BinaryFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + // Check dimensionality of feature vector... + EXPECT_FLOAT_EQ(featureVector->getMajorityValue(), (int32) 0); + EXPECT_EQ(featureVector->getNumElements(), (uint32) 1); + + // Check for missing feature values.... + EXPECT_TRUE(featureVector->isMissing(2)); + EXPECT_TRUE(featureVector->isMissing(5)); + + // Check for regular feature values... + int32 minorityValue = featureVector->values_cbegin()[0]; + EXPECT_EQ(minorityValue, (int32) 1); + + // Check indices associated with the feature values... + std::unordered_set indices; + + for (auto it = featureVector->indices_cbegin(0); it != featureVector->indices_cend(0); it++) { + indices.emplace(*it); + } + + EXPECT_EQ(indices.size(), (std::unordered_set::size_type) 2); + EXPECT_TRUE(indices.find(0) != indices.end()); + EXPECT_TRUE(indices.find(3) != indices.end()); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +} + +TEST(OrdinalFeatureTypeTest, createEqualFeatureVectorFromCscView) { + // Initialize feature matrix... + uint32 numDense = 0; + float32* data = new float32[numDense]; + uint32* rowIndices = new uint32[numDense]; + uint32* indptr = new uint32[2]; + indptr[0] = 0; + indptr[1] = numDense; + CscConstView view(numDense + 3, 1, data, rowIndices, indptr); + + // Create feature vector... + std::unique_ptr featureVectorPtr = OrdinalFeatureType().createFeatureVector(0, view); + + // Check type of feature vector... + const EqualFeatureVector* featureVector = dynamic_cast(featureVectorPtr.get()); + EXPECT_TRUE(featureVector != nullptr); + + delete[] data; + delete[] rowIndices; + delete[] indptr; +}