diff --git a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp index 727bf06681..805fcf60b3 100644 --- a/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp +++ b/cpp/subprojects/common/include/mlrl/common/input/feature_vector_nominal.hpp @@ -14,14 +14,23 @@ class NominalFeatureVector : public AbstractFeatureVector { int32* values_; - uint32* indices_; - - uint32* indptr_; - const uint32 numValues_; const int32 majorityValue_; + protected: + + /** + * A pointer to an array that stores the indices of all examples not associated with the majority value. + */ + uint32* indices_; + + /** + * A pointer to an array that stores the indices of the first element in `indices_` that corresponds to a + * certain value. + */ + uint32* indptr_; + public: /** diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build index a181587745..8935e30291 100644 --- a/cpp/subprojects/common/meson.build +++ b/cpp/subprojects/common/meson.build @@ -128,6 +128,8 @@ test_files = [ 'test/mlrl/common/input/feature_type_nominal.cpp', 'test/mlrl/common/input/feature_type_numerical.cpp', 'test/mlrl/common/input/feature_type_ordinal.cpp', + 'test/mlrl/common/input/feature_vector_binary.cpp', + 'test/mlrl/common/input/feature_vector_equal.cpp', 'test/mlrl/common/info.cpp' ] diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp index 66b7146f4d..25b4cb6353 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_binary.cpp @@ -1,5 +1,7 @@ #include "mlrl/common/input/feature_vector_binary.hpp" +#include "mlrl/common/input/feature_vector_equal.hpp" + BinaryFeatureVector::BinaryFeatureVector(uint32 numMinorityExamples, int32 minorityValue, int32 majorityValue) : NominalFeatureVector(1, numMinorityExamples, majorityValue) { this->values_begin()[0] = minorityValue; @@ -8,12 +10,63 @@ BinaryFeatureVector::BinaryFeatureVector(uint32 numMinorityExamples, int32 minor std::unique_ptr BinaryFeatureVector::createFilteredFeatureVector( std::unique_ptr& existing, uint32 start, uint32 end) const { - // TODO Implement - return nullptr; + return std::make_unique(); } std::unique_ptr BinaryFeatureVector::createFilteredFeatureVector( std::unique_ptr& existing, const CoverageMask& coverageMask) const { - // TODO Implement - return nullptr; + index_const_iterator indexIterator = this->indices_cbegin(0); + index_const_iterator indicesEnd = this->indices_cend(0); + uint32 maxIndices = indicesEnd - indexIterator; + std::unique_ptr filteredFeatureVectorPtr; + BinaryFeatureVector* existingPtr = dynamic_cast(existing.get()); + + if (existingPtr) { + existing.release(); + filteredFeatureVectorPtr = std::unique_ptr(existingPtr); + + // Filter the indices of examples with missing feature values... + for (auto it = filteredFeatureVectorPtr->missing_indices_cbegin(); + it != filteredFeatureVectorPtr->missing_indices_cend();) { + uint32 index = *it; + it++; + + if (!coverageMask.isCovered(index)) { + filteredFeatureVectorPtr->setMissing(index, false); + } + } + } else { + filteredFeatureVectorPtr = + std::make_unique(maxIndices, this->values_cbegin()[0], this->getMajorityValue()); + + // Add the indices of examples with missing feature values... + for (auto it = this->missing_indices_cbegin(); it != this->missing_indices_cend(); it++) { + uint32 index = *it; + + if (coverageMask.isCovered(index)) { + filteredFeatureVectorPtr->setMissing(index, true); + } + } + } + + // Filter the indices of examples associated with the minority value... + index_iterator filteredIndexIterator = filteredFeatureVectorPtr->indices_begin(0); + uint32 n = 0; + + for (uint32 i = 0; i < maxIndices; i++) { + uint32 index = indexIterator[i]; + + if (coverageMask.isCovered(index)) { + filteredIndexIterator[n] = index; + n++; + } + } + + if (n > 0) { + filteredFeatureVectorPtr->indices_ = (uint32*) realloc(filteredFeatureVectorPtr->indices_, n * sizeof(uint32)); + filteredFeatureVectorPtr->indptr_[1] = n; + return filteredFeatureVectorPtr; + } + + return std::make_unique(); } diff --git a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp index bdad091bdf..75cdd30f86 100644 --- a/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp +++ b/cpp/subprojects/common/src/mlrl/common/input/feature_vector_nominal.cpp @@ -1,15 +1,18 @@ #include "mlrl/common/input/feature_vector_nominal.hpp" +#include + NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue) - : values_(new int32[numValues]), indices_(new uint32[numExamples]), indptr_(new uint32[numValues + 1]), - numValues_(numValues), majorityValue_(majorityValue) { + : values_((int32*) malloc(numValues * sizeof(int32))), numValues_(numValues), majorityValue_(majorityValue), + indices_((uint32*) malloc(numExamples * sizeof(uint32))), + indptr_((uint32*) malloc((numValues + 1) * sizeof(uint32))) { indptr_[numValues] = numExamples; } NominalFeatureVector::~NominalFeatureVector() { - delete[] values_; - delete[] indices_; - delete[] indptr_; + free(values_); + free(indices_); + free(indptr_); } NominalFeatureVector::value_iterator NominalFeatureVector::values_begin() { diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_binary.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_binary.cpp new file mode 100644 index 0000000000..7ed55b58d0 --- /dev/null +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_binary.cpp @@ -0,0 +1,153 @@ +#include "mlrl/common/input/feature_vector_binary.hpp" + +#include "mlrl/common/input/feature_vector_equal.hpp" + +#include + +TEST(BinaryFeatureVectorTest, createFilteredFeatureVectorFromIndices) { + BinaryFeatureVector featureVector(10, 0, 1); + std::unique_ptr existing; + std::unique_ptr filtered = featureVector.createFilteredFeatureVector(existing, 0, 1); + const EqualFeatureVector* filteredFeatureVector = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredFeatureVector != nullptr); +} + +TEST(BinaryFeatureVectorTest, createFilteredFeatureVectorFromCoverageMask) { + uint32 numMinorityExamples = 10; + BinaryFeatureVector featureVector(numMinorityExamples, 0, 1); + BinaryFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numMinorityExamples; i++) { + indexIterator[i] = i; + } + + uint32 numMissingIndices = 10; + + for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingIndices; i++) { + featureVector.setMissing(i, true); + } + + CoverageMask coverageMask(numMinorityExamples + numMissingIndices); + uint32 indicatorValue = 1; + coverageMask.setIndicatorValue(indicatorValue); + CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); + + for (uint32 i = 0; i < numMinorityExamples + numMissingIndices; i++) { + if (i % 2 == 0) { + coverageMaskIterator[i] = indicatorValue; + } + } + + std::unique_ptr existing; + std::unique_ptr filtered = featureVector.createFilteredFeatureVector(existing, coverageMask); + const BinaryFeatureVector* filteredFeatureVector = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredFeatureVector != nullptr); + + // Check filtered indices... + BinaryFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector->indices_cbegin(0); + BinaryFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector->indices_cend(0); + uint32 numIndices = indicesEnd - indicesBegin; + EXPECT_EQ(numIndices, numMinorityExamples / 2); + std::unordered_set indices; + + for (auto it = indicesBegin; it != indicesEnd; it++) { + indices.emplace(*it); + } + + for (uint32 i = 0; i < numMinorityExamples; i++) { + if (i % 2 == 0) { + EXPECT_TRUE(indices.find(i) != indices.end()); + } else { + EXPECT_TRUE(indices.find(i) == indices.end()); + } + } + + // Check missing indices... + for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingIndices; i++) { + if (i % 2 == 0) { + EXPECT_TRUE(filteredFeatureVector->isMissing(i)); + } else { + EXPECT_FALSE(filteredFeatureVector->isMissing(i)); + } + } +} + +TEST(BinaryFeatureVectorTest, createFilteredFeatureVectorFromCoverageMaskUsingExisting) { + uint32 numMinorityExamples = 10; + std::unique_ptr featureVector = + std::make_unique(numMinorityExamples, 0, 1); + BinaryFeatureVector::index_iterator indexIterator = featureVector->indices_begin(0); + + for (uint32 i = 0; i < numMinorityExamples; i++) { + indexIterator[i] = i; + } + + uint32 numMissingIndices = 10; + + for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingIndices; i++) { + featureVector->setMissing(i, true); + } + + CoverageMask coverageMask(numMinorityExamples + numMissingIndices); + uint32 indicatorValue = 1; + coverageMask.setIndicatorValue(indicatorValue); + CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); + + for (uint32 i = 0; i < numMinorityExamples + numMissingIndices; i++) { + if (i % 2 == 0) { + coverageMaskIterator[i] = indicatorValue; + } + } + + std::unique_ptr existing = std::move(featureVector); + std::unique_ptr filtered = existing->createFilteredFeatureVector(existing, coverageMask); + const BinaryFeatureVector* filteredFeatureVector = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredFeatureVector != nullptr); + EXPECT_TRUE(existing.get() == nullptr); + + // Check filtered indices... + BinaryFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector->indices_cbegin(0); + BinaryFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector->indices_cend(0); + uint32 numIndices = indicesEnd - indicesBegin; + EXPECT_EQ(numIndices, numMinorityExamples / 2); + std::unordered_set indices; + + for (auto it = indicesBegin; it != indicesEnd; it++) { + indices.emplace(*it); + } + + for (uint32 i = 0; i < numMinorityExamples; i++) { + if (i % 2 == 0) { + EXPECT_TRUE(indices.find(i) != indices.end()); + } else { + EXPECT_TRUE(indices.find(i) == indices.end()); + } + } + + // Check missing indices... + for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingIndices; i++) { + if (i % 2 == 0) { + EXPECT_TRUE(filteredFeatureVector->isMissing(i)); + } else { + EXPECT_FALSE(filteredFeatureVector->isMissing(i)); + } + } +} + +TEST(BinaryFeatureVectorTest, createFilteredFeatureVectorFromCoverageMaskReturnsEqualFeatureVector) { + uint32 numMinorityExamples = 10; + BinaryFeatureVector featureVector(numMinorityExamples, 0, 1); + BinaryFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0); + + for (uint32 i = 0; i < numMinorityExamples; i++) { + indexIterator[i] = i; + } + + CoverageMask coverageMask(numMinorityExamples); + coverageMask.setIndicatorValue(1); + + std::unique_ptr existing; + std::unique_ptr filtered = featureVector.createFilteredFeatureVector(existing, coverageMask); + const EqualFeatureVector* filteredFeatureVector = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredFeatureVector != nullptr); +} diff --git a/cpp/subprojects/common/test/mlrl/common/input/feature_vector_equal.cpp b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_equal.cpp new file mode 100644 index 0000000000..2fe1ccc497 --- /dev/null +++ b/cpp/subprojects/common/test/mlrl/common/input/feature_vector_equal.cpp @@ -0,0 +1,39 @@ +#include "mlrl/common/input/feature_vector_equal.hpp" + +#include + +TEST(EqualFeatureVectorTest, createFilteredFeatureVectorFromIndices) { + EqualFeatureVector featureVector; + std::unique_ptr existing; + std::unique_ptr filtered = featureVector.createFilteredFeatureVector(existing, 0, 1); + const EqualFeatureVector* filteredFeatureVector = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredFeatureVector != nullptr); +} + +TEST(EqualFeatureVectorTest, createFilteredFeatureVectorFromIndicesUsingExisting) { + EqualFeatureVector featureVector; + std::unique_ptr existing = std::make_unique(); + std::unique_ptr filtered = featureVector.createFilteredFeatureVector(existing, 0, 1); + const EqualFeatureVector* filteredFeatureVector = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredFeatureVector != nullptr); + EXPECT_TRUE(existing.get() == nullptr); +} + +TEST(EqualFeatureVectorTest, createFilteredFeatureVectorFromCoverageMask) { + EqualFeatureVector featureVector; + std::unique_ptr existing; + CoverageMask coverageMask(10); + std::unique_ptr filtered = featureVector.createFilteredFeatureVector(existing, coverageMask); + const EqualFeatureVector* filteredFeatureVector = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredFeatureVector != nullptr); +} + +TEST(EqualFeatureVectorTest, createFilteredFeatureVectorFromCoverageMaskUsingExisting) { + EqualFeatureVector featureVector; + std::unique_ptr existing = std::make_unique(); + CoverageMask coverageMask(10); + std::unique_ptr filtered = featureVector.createFilteredFeatureVector(existing, coverageMask); + const EqualFeatureVector* filteredFeatureVector = dynamic_cast(filtered.get()); + EXPECT_TRUE(filteredFeatureVector != nullptr); + EXPECT_TRUE(existing.get() == nullptr); +}