Skip to content

Commit

Permalink
Merge pull request #787 from mrapp-ke/create-ordinal-feature-vector
Browse files Browse the repository at this point in the history
Erzeugen eines OrdinalFeatureVector
  • Loading branch information
michael-rapp authored Oct 12, 2023
2 parents 6337019 + 5104e06 commit bcceffa
Show file tree
Hide file tree
Showing 7 changed files with 679 additions and 184 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
* value, i.e., the most frequent value, of an ordinal feature.
*/
class OrdinalFeatureVector : public NominalFeatureVector {
private:

uint32* order_;

public:

/**
Expand All @@ -23,39 +19,4 @@ class OrdinalFeatureVector : public NominalFeatureVector {
* @param majorityValue The majority value, i.e., the most frequent value, of the ordinal feature
*/
OrdinalFeatureVector(uint32 numValues, uint32 numExamples, int32 majorityValue);

~OrdinalFeatureVector() override;

/**
* Returns an `index_iterator` to the beginning of the ordered indices of the values of the ordinal feature.
*
* @param index The index of the value
* @return An `index_iterator` to the beginning
*/
index_iterator order_begin(uint32 index);

/**
* Returns an `index_iterator` to the end of the ordered indices of the values of the ordinal feature.
*
* @param index The index of the value
* @return An `index_iterator` to the end
*/
index_iterator order_end(uint32 index);

/**
* Returns an `index_const_iterator` to the beginning of the ordered indices of the values of the ordinal
* feature.
*
* @param index The index of the value
* @return An `index_const_iterator` to the beginning
*/
index_const_iterator order_cbegin(uint32 index) const;

/**
* Returns an `index_const_iterator` to the end of the ordered indices of the values of the ordinal feature.
*
* @param index The index of the value
* @return An `index_const_iterator` to the end
*/
index_const_iterator order_cend(uint32 index) const;
};
1 change: 1 addition & 0 deletions cpp/subprojects/common/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ source_files = [
test_files = [
'test/mlrl/common/input/feature_type_nominal.cpp',
'test/mlrl/common/input/feature_type_numerical.cpp',
'test/mlrl/common/input/feature_type_ordinal.cpp',
'test/mlrl/common/info.cpp'
]

Expand Down
121 changes: 1 addition & 120 deletions cpp/subprojects/common/src/mlrl/common/input/feature_type_nominal.cpp
Original file line number Diff line number Diff line change
@@ -1,79 +1,9 @@
#include "mlrl/common/input/feature_type_nominal.hpp"

#include "mlrl/common/data/tuple.hpp"
#include "mlrl/common/input/feature_vector_binary.hpp"
#include "feature_type_nominal_common.hpp"
#include "mlrl/common/input/feature_vector_equal.hpp"
#include "mlrl/common/iterator/index_iterator.hpp"

#include <unordered_map>

template<typename ValueIterator>
static inline uint32 createMapping(ValueIterator valueIterator, uint32 numElements,
std::unordered_map<int32, Tuple<uint32>>& mapping) {
uint32 numExamples = 0;
uint32 numValues = 0;

for (uint32 i = 0; i < numElements; i++) {
float32 value = valueIterator[i];

if (!std::isnan(value)) {
int32 nominalValue = (int32) value;
auto it = mapping.emplace(nominalValue, Tuple<uint32> {numValues, 1});

if (it.second) {
numValues++;
} else {
auto& entry = *(it.first);
Tuple<uint32>& tuple = entry.second;
tuple.second++;
}

numExamples++;
}
}

return numExamples;
}

static inline Tuple<int32> getMinorityAndMajorityValue(const std::unordered_map<int32, Tuple<uint32>>& mapping) {
auto it = mapping.cbegin();
auto& firstEntry = *it;
int firstFrequency = firstEntry.second.second;
it++;
auto& secondEntry = *it;
int secondFrequency = secondEntry.second.second;
Tuple<int32> tuple;

if (firstFrequency > secondFrequency) {
tuple.first = secondEntry.first;
tuple.second = firstEntry.first;
} else {
tuple.first = firstEntry.first;
tuple.second = secondEntry.first;
}

return tuple;
}

static inline int32 getMajorityValue(const std::unordered_map<int32, Tuple<uint32>>& mapping) {
auto it = mapping.cbegin();
auto& firstEntry = *it;
int32 majorityValue = firstEntry.first;
uint32 majorityValueFrequency = firstEntry.second.second;

for (; it != mapping.cend(); it++) {
auto& entry = *it;
uint32 frequency = entry.second.second;

if (frequency > majorityValueFrequency) {
majorityValue = entry.first;
majorityValueFrequency = frequency;
}
}

return majorityValue;
}

template<typename IndexIterator, typename ValueIterator>
static inline std::unique_ptr<NominalFeatureVector> createNominalFeatureVector(
IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements,
Expand Down Expand Up @@ -140,55 +70,6 @@ static inline std::unique_ptr<NominalFeatureVector> createNominalFeatureVector(
numExamples - numMajorityExamples, majorityValue);
}

template<typename IndexIterator, typename ValueIterator>
static inline std::unique_ptr<BinaryFeatureVector> createBinaryFeatureVector(
IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements,
const std::unordered_map<int32, Tuple<uint32>>& mapping, int32 minorityValue, int32 majorityValue) {
const Tuple<uint32>& tuple = mapping.at(minorityValue);
uint32 numMinorityExamples = tuple.second;
std::unique_ptr<BinaryFeatureVector> featureVectorPtr =
std::make_unique<BinaryFeatureVector>(numMinorityExamples, minorityValue, majorityValue);
BinaryFeatureVector::index_iterator vectorIndexIterator = featureVectorPtr->indices_begin(0);
uint32 n = 0;

for (uint32 i = 0; i < numElements; i++) {
uint32 index = indexIterator[i];
float32 value = valueIterator[i];

if (std::isnan(value)) {
featureVectorPtr->setMissing(index, true);
} else {
int32 nominalValue = (int32) value;

if (nominalValue == minorityValue) {
vectorIndexIterator[n] = index;
n++;
}
}
}

return featureVectorPtr;
}

template<typename IndexIterator, typename ValueIterator>
static inline std::unique_ptr<BinaryFeatureVector> createBinaryFeatureVector(
IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements,
std::unordered_map<int32, Tuple<uint32>>& mapping, bool sparse) {
int32 minorityValue;
int32 majorityValue;

if (sparse) {
minorityValue = (*mapping.cbegin()).first;
majorityValue = 0;
} else {
const Tuple<int32> tuple = getMinorityAndMajorityValue(mapping);
minorityValue = tuple.first;
majorityValue = tuple.second;
}

return createBinaryFeatureVector(indexIterator, valueIterator, numElements, mapping, minorityValue, majorityValue);
}

template<typename IndexIterator, typename ValueIterator>
static inline std::unique_ptr<IFeatureVector> createFeatureVectorInternally(
IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* @author Michael Rapp ([email protected])
*/
#pragma once

#include "mlrl/common/data/tuple.hpp"
#include "mlrl/common/input/feature_vector_binary.hpp"
#include "mlrl/common/input/feature_vector_equal.hpp"

#include <unordered_map>

template<typename ValueIterator>
static inline uint32 createMapping(ValueIterator valueIterator, uint32 numElements,
std::unordered_map<int32, Tuple<uint32>>& mapping) {
uint32 numExamples = 0;
uint32 numValues = 0;

for (uint32 i = 0; i < numElements; i++) {
float32 value = valueIterator[i];

if (!std::isnan(value)) {
int32 nominalValue = (int32) value;
auto it = mapping.emplace(nominalValue, Tuple<uint32> {numValues, 1});

if (it.second) {
numValues++;
} else {
auto& entry = *(it.first);
Tuple<uint32>& tuple = entry.second;
tuple.second++;
}

numExamples++;
}
}

return numExamples;
}

static inline int32 getMajorityValue(const std::unordered_map<int32, Tuple<uint32>>& mapping) {
auto it = mapping.cbegin();
auto& firstEntry = *it;
int32 majorityValue = firstEntry.first;
uint32 majorityValueFrequency = firstEntry.second.second;

for (; it != mapping.cend(); it++) {
auto& entry = *it;
uint32 frequency = entry.second.second;

if (frequency > majorityValueFrequency) {
majorityValue = entry.first;
majorityValueFrequency = frequency;
}
}

return majorityValue;
}

static inline Tuple<int32> getMinorityAndMajorityValue(const std::unordered_map<int32, Tuple<uint32>>& mapping) {
auto it = mapping.cbegin();
auto& firstEntry = *it;
int firstFrequency = firstEntry.second.second;
it++;
auto& secondEntry = *it;
int secondFrequency = secondEntry.second.second;
Tuple<int32> tuple;

if (firstFrequency > secondFrequency) {
tuple.first = secondEntry.first;
tuple.second = firstEntry.first;
} else {
tuple.first = firstEntry.first;
tuple.second = secondEntry.first;
}

return tuple;
}

template<typename IndexIterator, typename ValueIterator>
static inline std::unique_ptr<BinaryFeatureVector> createBinaryFeatureVector(
IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements,
const std::unordered_map<int32, Tuple<uint32>>& mapping, int32 minorityValue, int32 majorityValue) {
const Tuple<uint32>& tuple = mapping.at(minorityValue);
uint32 numMinorityExamples = tuple.second;
std::unique_ptr<BinaryFeatureVector> featureVectorPtr =
std::make_unique<BinaryFeatureVector>(numMinorityExamples, minorityValue, majorityValue);
BinaryFeatureVector::index_iterator vectorIndexIterator = featureVectorPtr->indices_begin(0);
uint32 n = 0;

for (uint32 i = 0; i < numElements; i++) {
uint32 index = indexIterator[i];
float32 value = valueIterator[i];

if (std::isnan(value)) {
featureVectorPtr->setMissing(index, true);
} else {
int32 nominalValue = (int32) value;

if (nominalValue == minorityValue) {
vectorIndexIterator[n] = index;
n++;
}
}
}

return featureVectorPtr;
}

template<typename IndexIterator, typename ValueIterator>
static inline std::unique_ptr<BinaryFeatureVector> createBinaryFeatureVector(
IndexIterator indexIterator, ValueIterator valueIterator, uint32 numElements,
std::unordered_map<int32, Tuple<uint32>>& mapping, bool sparse) {
int32 minorityValue;
int32 majorityValue;

if (sparse) {
minorityValue = (*mapping.cbegin()).first;
majorityValue = 0;
} else {
const Tuple<int32> tuple = getMinorityAndMajorityValue(mapping);
minorityValue = tuple.first;
majorityValue = tuple.second;
}

return createBinaryFeatureVector(indexIterator, valueIterator, numElements, mapping, minorityValue, majorityValue);
}
Loading

0 comments on commit bcceffa

Please sign in to comment.