Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neue Klasse NominalFeatureVector #775

Merged
merged 8 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/subprojects/common/include/mlrl/common/data/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <cmath>
#include <limits>

typedef int int32;
typedef long int int64;
typedef unsigned char uint8;
typedef unsigned int uint32;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@
#include "mlrl/common/data/vector_sparse_array.hpp"
#include "mlrl/common/input/missing_feature_vector.hpp"

/**
* Defines an interface for all one-dimensional vectors that store the values of training examples for a certain
* feature.
*/
class IFeatureVector : public IOneDimensionalView {
public:

virtual ~IFeatureVector() override {};
};

/**
* An one-dimensional sparse vector that stores the values of training examples for a certain feature, as well as the
* indices of examples with missing feature values.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* @author Michael Rapp ([email protected])
*/
#pragma once

#include "mlrl/common/data/vector_dok_binary.hpp"
#include "mlrl/common/input/feature_vector.hpp"

#include <memory>

/**
* An abstract base class for all feature vectors that store the values of training examples for a certain feature. It
* allows to keep track of the indices of examples with missing feature values.
*/
class AbstractFeatureVector : public IFeatureVector {
private:

BinaryDokVector missingIndices_;

public:

virtual ~AbstractFeatureVector() override {};

/**
* An iterator that provides read-only access to the indices of examples with missing feature values.
*/
typedef BinaryDokVector::index_const_iterator missing_index_const_iterator;

/**
* Returns a `missing_index_const_iterator` to the beginning of the indices of examples with missing feature
* values.
*
* @return A `missing_index_const_iterator` to the beginning
*/
missing_index_const_iterator missing_indices_cbegin() const;

/**
* Returns a `missing_index_const_iterator` to the end of the indices of examples with missing feature values.
*
* @return A `missing_index_const_iterator` to the end
*/
missing_index_const_iterator missing_indices_cend() const;

/**
* Sets whether the example at a specific index is missing a feature value or not.
*
* @param index The index of the example
* @param missing True, if the example at the given index is missing a feature value, false otherwise
*/
void setMissing(uint32 index, bool missing);

/**
* Returns whether the example at a specific index is missing a feature value or not.
*
* @param index The index of the example
* @return True, if the example at the given index is missing a feature value, false otherwise
*/
bool isMissing(uint32 index) const;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* @author Michael Rapp ([email protected])
*/
#pragma once

#include "mlrl/common/input/feature_vector_common.hpp"

/**
* A feature vector that does not actually store any values. It is used in cases where all training examples have the
* same value for a certain feature.
*/
class EqualFeatureVector final : public AbstractFeatureVector {
public:

uint32 getNumElements() const override;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* @author Michael Rapp ([email protected])
*/
#pragma once

#include "mlrl/common/input/feature_vector_common.hpp"

/**
* A feature vector that stores the indices of the examples that are associated with each value, except for the minority
* value, i.e., the least frequent value, of a nominal feature.
*/
class NominalFeatureVector final : public AbstractFeatureVector {
private:

int32* values_;

uint32* indices_;

uint32* indptr_;

const uint32 numValues_;

const int32 minorityValue_;

public:

/**
* @param numValues The number of distinct values of the nominal feature, excluding the minority value
* @param numExamples The total number of examples
* @param minorityValue The minority value, i.e., the least frequent value, of the nominal feature
*/
NominalFeatureVector(uint32 numValues, uint32 numExamples, int32 minorityValue);

~NominalFeatureVector() override;

/**
* An iterator that provides access to the values of the nominal feature and allows to modify them.
*/
typedef int32* value_iterator;

/**
* An iterator that provides read-only access to the values of the nominal feature.
*/
typedef const int32* value_const_iterator;

/**
* An iterator that provides access to the indices of the examples that are associated with each value of the
* nominal feature and allows to modify them.
*/
typedef uint32* index_iterator;

/**
* An iterator that provides read-only access to the indices of the examples that are associated with each value
* of the nominal feature.
*/
typedef const uint32* index_const_iterator;

/**
* An iterator that provides access to the indices that specify the first element in the array of example
* indices that corresponds to each value of the nominal feature.
*/
typedef uint32* indptr_iterator;

/**
* Returns a `value_iterator` to the beginning of the values of the nominal feature.
*
* @return A `value_iterator` to the beginning
*/
value_iterator values_begin();

/**
* Returns a `value_iterator` to the end of the values of the nominal feature.
*
* @return A `value_iterator` to the end
*/
value_iterator values_end();

/**
* Returns a `value_const_iterator` to the beginning of the values of the nominal feature.
*
* @return A `value_const_iterator` to the beginning
*/
value_const_iterator values_cbegin() const;

/**
* Returns a `value_const_iterator` to the end of the value of the nominal feature.
*
* @return A `value_const_iterator` to the end
*/
value_const_iterator values_cend() const;

/**
* Returns an `index_iterator` to the beginning of the indices of the examples that are associated with a
* specific value of the nominal feature.
*
* @param index The index of the value
* @return An `index_iterator` to the beginning
*/
index_iterator indices_begin(uint32 index);

/**
* Returns an `index_iterator` to the end of the indices of the examples that are associated with a specific
* value of the nominal feature.
*
* @param index The index of the value
* @return An `index_iterator` to the end
*/
index_iterator indices_end(uint32 index);

/**
* Returns an `index_const_iterator` to the beginning of the indices of the examples that are associated with a
* specific value of the nominal feature.
*
* @param index The index of the value
* @return An `index_const_iterator` to the beginning
*/
index_const_iterator indices_cbegin(uint32 index) const;

/**
* Returns an `index_const_iterator` to the end of the indices of the examples that are associated with a
* specific value of the nominal feature.
*
* @param index The index of the value
* @return An `index_const_iterator` to the end
*/
index_const_iterator indices_cend(uint32 index) const;

/**
* Returns an `indptr_iterator` to the beginning of the indices that specify the first element in the array of
* example indices that corresponds to each value of the nominal feature.
*
* @return An `indptr_iterator` to the beginning
*/
indptr_iterator indptr_begin();

/**
* Returns an `indptr_iterator` to the end of the indices that specify the first element in the array of example
* indices that corresponds to each value of the nominal feature.
*
* @return An `indptr_iterator` to the end
*/
indptr_iterator indptr_end();

/**
* Returns the minority value, i.e., the least frequent value, of the nominal feature.
*
* @return The minority value
*/
int32 getMinorityValue() const;

uint32 getNumElements() const override;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* @author Michael Rapp ([email protected])
*/
#pragma once

#include "mlrl/common/data/vector_sparse_array.hpp"
#include "mlrl/common/input/feature_vector_common.hpp"

/**
* A feature vector that stores the values of training examples for a certain numerical feature.
*/
class NumericalFeatureVector final : public AbstractFeatureVector {
private:

SparseArrayVector<float32> vector_;

const float32 sparseValue_;

public:

/**
* @param numElements The number of elements in the vector
* @param sparseValue The value of sparse elements not explicitly stored in the vector
*/
NumericalFeatureVector(uint32 numElements, float32 sparseValue);

/**
* An iterator that provides access to the feature values in the vector and allows to modify them.
*/
typedef SparseArrayVector<float32>::iterator iterator;

/**
* An iterator that provides read-only access to the feature values in the vector.
*/
typedef SparseArrayVector<float32>::const_iterator const_iterator;

/**
* Returns an `iterator` to the beginning of the vector.
*
* @return An `iterator` to the beginning
*/
iterator begin();

/**
* Returns an `iterator` to the end of the vector.
*
* @return An `iterator` to the end
*/
iterator end();

/**
* Returns a `const_iterator` to the beginning of the vector.
*
* @return A `const_iterator` to the beginning
*/
const_iterator cbegin() const;

/**
* Returns a `const_iterator` to the end of the vector.
*
* @return A `const_iterator` to the end
*/
const_iterator cend() const;

/**
* Returns the value of sparse elements not explicitly stored in the vector.
*
* @return The value of sparse elements
*/
float32 getSparseValue() const;

/**
* Sorts the elements in the vector in ascending order based on their values.
*/
void sortByValues();

/**
* Sets the number of elements in the vector.
*
* @param numElements The number of elements to be set
* @param freeMemory True, if unused memory should be freed, if possible, false otherwise
*/
void setNumElements(uint32 numElements, bool freeMemory);

uint32 getNumElements() const override;
};
4 changes: 4 additions & 0 deletions cpp/subprojects/common/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ source_files = [
'src/mlrl/common/input/feature_type_numerical.cpp',
'src/mlrl/common/input/feature_type_ordinal.cpp',
'src/mlrl/common/input/feature_vector.cpp',
'src/mlrl/common/input/feature_vector_common.cpp',
'src/mlrl/common/input/feature_vector_equal.cpp',
'src/mlrl/common/input/feature_vector_nominal.cpp',
'src/mlrl/common/input/feature_vector_numerical.cpp',
'src/mlrl/common/input/label_matrix_c_contiguous.cpp',
'src/mlrl/common/input/label_matrix_csc.cpp',
'src/mlrl/common/input/label_matrix_csr.cpp',
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "mlrl/common/input/feature_vector_equal.hpp"

uint32 EqualFeatureVector::getNumElements() const {
return 0;
}
Loading
Loading