Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filterung eines BinaryFeatureVector #789

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,23 @@ class NominalFeatureVector : public AbstractFeatureVector {

int32* values_;

uint32* indices_;

uint32* indptr_;

const uint32 numValues_;

const int32 majorityValue_;

protected:

/**
* A pointer to an array that stores the indices of all examples not associated with the majority value.
*/
uint32* indices_;

/**
* A pointer to an array that stores the indices of the first element in `indices_` that corresponds to a
* certain value.
*/
uint32* indptr_;

public:

/**
Expand Down
2 changes: 2 additions & 0 deletions cpp/subprojects/common/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ test_files = [
'test/mlrl/common/input/feature_type_nominal.cpp',
'test/mlrl/common/input/feature_type_numerical.cpp',
'test/mlrl/common/input/feature_type_ordinal.cpp',
'test/mlrl/common/input/feature_vector_binary.cpp',
'test/mlrl/common/input/feature_vector_equal.cpp',
'test/mlrl/common/info.cpp'
]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "mlrl/common/input/feature_vector_binary.hpp"

#include "mlrl/common/input/feature_vector_equal.hpp"

BinaryFeatureVector::BinaryFeatureVector(uint32 numMinorityExamples, int32 minorityValue, int32 majorityValue)
: NominalFeatureVector(1, numMinorityExamples, majorityValue) {
this->values_begin()[0] = minorityValue;
Expand All @@ -8,12 +10,63 @@ BinaryFeatureVector::BinaryFeatureVector(uint32 numMinorityExamples, int32 minor

std::unique_ptr<IFeatureVector> BinaryFeatureVector::createFilteredFeatureVector(
std::unique_ptr<IFeatureVector>& existing, uint32 start, uint32 end) const {
// TODO Implement
return nullptr;
return std::make_unique<EqualFeatureVector>();
}

std::unique_ptr<IFeatureVector> BinaryFeatureVector::createFilteredFeatureVector(
std::unique_ptr<IFeatureVector>& existing, const CoverageMask& coverageMask) const {
// TODO Implement
return nullptr;
index_const_iterator indexIterator = this->indices_cbegin(0);
index_const_iterator indicesEnd = this->indices_cend(0);
uint32 maxIndices = indicesEnd - indexIterator;
std::unique_ptr<BinaryFeatureVector> filteredFeatureVectorPtr;
BinaryFeatureVector* existingPtr = dynamic_cast<BinaryFeatureVector*>(existing.get());

if (existingPtr) {
existing.release();
filteredFeatureVectorPtr = std::unique_ptr<BinaryFeatureVector>(existingPtr);

// Filter the indices of examples with missing feature values...
for (auto it = filteredFeatureVectorPtr->missing_indices_cbegin();
it != filteredFeatureVectorPtr->missing_indices_cend();) {
uint32 index = *it;
it++;

if (!coverageMask.isCovered(index)) {
filteredFeatureVectorPtr->setMissing(index, false);
}
}
} else {
filteredFeatureVectorPtr =
std::make_unique<BinaryFeatureVector>(maxIndices, this->values_cbegin()[0], this->getMajorityValue());

// Add the indices of examples with missing feature values...
for (auto it = this->missing_indices_cbegin(); it != this->missing_indices_cend(); it++) {
uint32 index = *it;

if (coverageMask.isCovered(index)) {
filteredFeatureVectorPtr->setMissing(index, true);
}
}
}

// Filter the indices of examples associated with the minority value...
index_iterator filteredIndexIterator = filteredFeatureVectorPtr->indices_begin(0);
uint32 n = 0;

for (uint32 i = 0; i < maxIndices; i++) {
uint32 index = indexIterator[i];

if (coverageMask.isCovered(index)) {
filteredIndexIterator[n] = index;
n++;
}
}

if (n > 0) {
filteredFeatureVectorPtr->indices_ = (uint32*) realloc(filteredFeatureVectorPtr->indices_, n * sizeof(uint32));
filteredFeatureVectorPtr->indptr_[1] = n;
return filteredFeatureVectorPtr;
}

return std::make_unique<EqualFeatureVector>();
}
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
#include "mlrl/common/input/feature_vector_nominal.hpp"

#include <cstdlib>

NominalFeatureVector::NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue)
: values_(new int32[numValues]), indices_(new uint32[numExamples]), indptr_(new uint32[numValues + 1]),
numValues_(numValues), majorityValue_(majorityValue) {
: values_((int32*) malloc(numValues * sizeof(int32))), numValues_(numValues), majorityValue_(majorityValue),
indices_((uint32*) malloc(numExamples * sizeof(uint32))),
indptr_((uint32*) malloc((numValues + 1) * sizeof(uint32))) {
indptr_[numValues] = numExamples;
}

NominalFeatureVector::~NominalFeatureVector() {
delete[] values_;
delete[] indices_;
delete[] indptr_;
free(values_);
free(indices_);
free(indptr_);
}

NominalFeatureVector::value_iterator NominalFeatureVector::values_begin() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#include "mlrl/common/input/feature_vector_binary.hpp"

#include "mlrl/common/input/feature_vector_equal.hpp"

#include <gtest/gtest.h>

TEST(BinaryFeatureVectorTest, createFilteredFeatureVectorFromIndices) {
BinaryFeatureVector featureVector(10, 0, 1);
std::unique_ptr<IFeatureVector> existing;
std::unique_ptr<IFeatureVector> filtered = featureVector.createFilteredFeatureVector(existing, 0, 1);
const EqualFeatureVector* filteredFeatureVector = dynamic_cast<const EqualFeatureVector*>(filtered.get());
EXPECT_TRUE(filteredFeatureVector != nullptr);
}

TEST(BinaryFeatureVectorTest, createFilteredFeatureVectorFromCoverageMask) {
uint32 numMinorityExamples = 10;
BinaryFeatureVector featureVector(numMinorityExamples, 0, 1);
BinaryFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0);

for (uint32 i = 0; i < numMinorityExamples; i++) {
indexIterator[i] = i;
}

uint32 numMissingIndices = 10;

for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingIndices; i++) {
featureVector.setMissing(i, true);
}

CoverageMask coverageMask(numMinorityExamples + numMissingIndices);
uint32 indicatorValue = 1;
coverageMask.setIndicatorValue(indicatorValue);
CoverageMask::iterator coverageMaskIterator = coverageMask.begin();

for (uint32 i = 0; i < numMinorityExamples + numMissingIndices; i++) {
if (i % 2 == 0) {
coverageMaskIterator[i] = indicatorValue;
}
}

std::unique_ptr<IFeatureVector> existing;
std::unique_ptr<IFeatureVector> filtered = featureVector.createFilteredFeatureVector(existing, coverageMask);
const BinaryFeatureVector* filteredFeatureVector = dynamic_cast<const BinaryFeatureVector*>(filtered.get());
EXPECT_TRUE(filteredFeatureVector != nullptr);

// Check filtered indices...
BinaryFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector->indices_cbegin(0);
BinaryFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector->indices_cend(0);
uint32 numIndices = indicesEnd - indicesBegin;
EXPECT_EQ(numIndices, numMinorityExamples / 2);
std::unordered_set<uint32> indices;

for (auto it = indicesBegin; it != indicesEnd; it++) {
indices.emplace(*it);
}

for (uint32 i = 0; i < numMinorityExamples; i++) {
if (i % 2 == 0) {
EXPECT_TRUE(indices.find(i) != indices.end());
} else {
EXPECT_TRUE(indices.find(i) == indices.end());
}
}

// Check missing indices...
for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingIndices; i++) {
if (i % 2 == 0) {
EXPECT_TRUE(filteredFeatureVector->isMissing(i));
} else {
EXPECT_FALSE(filteredFeatureVector->isMissing(i));
}
}
}

TEST(BinaryFeatureVectorTest, createFilteredFeatureVectorFromCoverageMaskUsingExisting) {
uint32 numMinorityExamples = 10;
std::unique_ptr<BinaryFeatureVector> featureVector =
std::make_unique<BinaryFeatureVector>(numMinorityExamples, 0, 1);
BinaryFeatureVector::index_iterator indexIterator = featureVector->indices_begin(0);

for (uint32 i = 0; i < numMinorityExamples; i++) {
indexIterator[i] = i;
}

uint32 numMissingIndices = 10;

for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingIndices; i++) {
featureVector->setMissing(i, true);
}

CoverageMask coverageMask(numMinorityExamples + numMissingIndices);
uint32 indicatorValue = 1;
coverageMask.setIndicatorValue(indicatorValue);
CoverageMask::iterator coverageMaskIterator = coverageMask.begin();

for (uint32 i = 0; i < numMinorityExamples + numMissingIndices; i++) {
if (i % 2 == 0) {
coverageMaskIterator[i] = indicatorValue;
}
}

std::unique_ptr<IFeatureVector> existing = std::move(featureVector);
std::unique_ptr<IFeatureVector> filtered = existing->createFilteredFeatureVector(existing, coverageMask);
const BinaryFeatureVector* filteredFeatureVector = dynamic_cast<const BinaryFeatureVector*>(filtered.get());
EXPECT_TRUE(filteredFeatureVector != nullptr);
EXPECT_TRUE(existing.get() == nullptr);

// Check filtered indices...
BinaryFeatureVector::index_const_iterator indicesBegin = filteredFeatureVector->indices_cbegin(0);
BinaryFeatureVector::index_const_iterator indicesEnd = filteredFeatureVector->indices_cend(0);
uint32 numIndices = indicesEnd - indicesBegin;
EXPECT_EQ(numIndices, numMinorityExamples / 2);
std::unordered_set<uint32> indices;

for (auto it = indicesBegin; it != indicesEnd; it++) {
indices.emplace(*it);
}

for (uint32 i = 0; i < numMinorityExamples; i++) {
if (i % 2 == 0) {
EXPECT_TRUE(indices.find(i) != indices.end());
} else {
EXPECT_TRUE(indices.find(i) == indices.end());
}
}

// Check missing indices...
for (uint32 i = numMinorityExamples; i < numMinorityExamples + numMissingIndices; i++) {
if (i % 2 == 0) {
EXPECT_TRUE(filteredFeatureVector->isMissing(i));
} else {
EXPECT_FALSE(filteredFeatureVector->isMissing(i));
}
}
}

TEST(BinaryFeatureVectorTest, createFilteredFeatureVectorFromCoverageMaskReturnsEqualFeatureVector) {
uint32 numMinorityExamples = 10;
BinaryFeatureVector featureVector(numMinorityExamples, 0, 1);
BinaryFeatureVector::index_iterator indexIterator = featureVector.indices_begin(0);

for (uint32 i = 0; i < numMinorityExamples; i++) {
indexIterator[i] = i;
}

CoverageMask coverageMask(numMinorityExamples);
coverageMask.setIndicatorValue(1);

std::unique_ptr<IFeatureVector> existing;
std::unique_ptr<IFeatureVector> filtered = featureVector.createFilteredFeatureVector(existing, coverageMask);
const EqualFeatureVector* filteredFeatureVector = dynamic_cast<const EqualFeatureVector*>(filtered.get());
EXPECT_TRUE(filteredFeatureVector != nullptr);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#include "mlrl/common/input/feature_vector_equal.hpp"

#include <gtest/gtest.h>

TEST(EqualFeatureVectorTest, createFilteredFeatureVectorFromIndices) {
EqualFeatureVector featureVector;
std::unique_ptr<IFeatureVector> existing;
std::unique_ptr<IFeatureVector> filtered = featureVector.createFilteredFeatureVector(existing, 0, 1);
const EqualFeatureVector* filteredFeatureVector = dynamic_cast<const EqualFeatureVector*>(filtered.get());
EXPECT_TRUE(filteredFeatureVector != nullptr);
}

TEST(EqualFeatureVectorTest, createFilteredFeatureVectorFromIndicesUsingExisting) {
EqualFeatureVector featureVector;
std::unique_ptr<IFeatureVector> existing = std::make_unique<EqualFeatureVector>();
std::unique_ptr<IFeatureVector> filtered = featureVector.createFilteredFeatureVector(existing, 0, 1);
const EqualFeatureVector* filteredFeatureVector = dynamic_cast<const EqualFeatureVector*>(filtered.get());
EXPECT_TRUE(filteredFeatureVector != nullptr);
EXPECT_TRUE(existing.get() == nullptr);
}

TEST(EqualFeatureVectorTest, createFilteredFeatureVectorFromCoverageMask) {
EqualFeatureVector featureVector;
std::unique_ptr<IFeatureVector> existing;
CoverageMask coverageMask(10);
std::unique_ptr<IFeatureVector> filtered = featureVector.createFilteredFeatureVector(existing, coverageMask);
const EqualFeatureVector* filteredFeatureVector = dynamic_cast<const EqualFeatureVector*>(filtered.get());
EXPECT_TRUE(filteredFeatureVector != nullptr);
}

TEST(EqualFeatureVectorTest, createFilteredFeatureVectorFromCoverageMaskUsingExisting) {
EqualFeatureVector featureVector;
std::unique_ptr<IFeatureVector> existing = std::make_unique<EqualFeatureVector>();
CoverageMask coverageMask(10);
std::unique_ptr<IFeatureVector> filtered = featureVector.createFilteredFeatureVector(existing, coverageMask);
const EqualFeatureVector* filteredFeatureVector = dynamic_cast<const EqualFeatureVector*>(filtered.get());
EXPECT_TRUE(filteredFeatureVector != nullptr);
EXPECT_TRUE(existing.get() == nullptr);
}