Skip to content

Commit

Permalink
ADD: bump
Browse files Browse the repository at this point in the history
  • Loading branch information
T-K-233 committed Jun 13, 2024
1 parent 13524c3 commit f32de3c
Show file tree
Hide file tree
Showing 37 changed files with 622 additions and 425 deletions.
2 changes: 1 addition & 1 deletion example/fast-depth/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ int main() {
init(model);

printf("setting input data...\n");
// NN_fill_F32(&model->x, 0.0);
// NN_fill(&model->x, 0.0);
memcpy((uint8_t *)model->x.data, (uint8_t *)model_input_data, (size_t)model_input_end - (size_t)model_input_start);

// cycles = READ_CSR("mcycle");
Expand Down
10 changes: 5 additions & 5 deletions nn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ option(RVV "Use RISCV vector extension implementation" OFF)
add_library(nn
src/nn_tensor.c
src/nn_print.c
src/abs/nn_abs.c
src/nn_abs.c
src/nn_add.c
src/batchnorm2d/nn_batchnorm2d.c
src/conv2d/nn_conv2d.c
Expand All @@ -16,11 +16,11 @@ add_library(nn
src/nn_linear.c
src/nn_matmul.c
src/matrixnorm/nn_matrixnorm.c
src/max/nn_max.c
src/maximum/nn_maximum.c
src/nn_max.c
src/nn_maximum.c
src/maxpool2d/nn_maxpool2d.c
src/min/nn_min.c
src/minimum/nn_minimum.c
src/nn_min.c
src/nn_minimum.c
src/mul/nn_mul.c
src/neg/nn_neg.c
src/relu/nn_relu.c
Expand Down
56 changes: 56 additions & 0 deletions nn/inc/kernel/abs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#ifndef __NN__ABS_H
#define __NN__ABS_H

#include <stddef.h>
#include <stdint.h>
#include <math.h>

#ifdef AVX
#include <immintrin.h>
#endif

#ifdef RVV
#include <riscv_vector.h>
#endif

static inline void NN__abs_F32(size_t n, float *y, float *x) {
#if defined(AVX)
// Mask to clear the sign bit
__m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF));

size_t vl = 8;

while (n > 0) {
size_t count = n < vl ? n : vl;

// Load input values into an AVX register
__m256 vec_x = _mm256_loadu_ps(x);

// Compute the absolute values
__m256 vec_y = _mm256_and_ps(vec_x, mask);

// Store the result
_mm256_storeu_ps(y, vec_y);

x += count;
y += count;
n -= count;
}
#elif defined(RVV)
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m1(n);
vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x, vl);
vfloat32m1_t vec_y = __riscv_vfabs_v_f32m1(vec_x, vl);
__riscv_vse32_v_f32m1(y, vec_y, vl);
x += vl;
y += vl;
n -= vl;
}
#else
for (size_t i = 0; i < n; i += 1) {
y[i] = fabsf(x[i]);
}
#endif
}

#endif // __NN__ABS_H
35 changes: 35 additions & 0 deletions nn/inc/kernel/max.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef __NN__MAX_H
#define __NN__MAX_H

#include <stddef.h>
#include <stdint.h>
#include <float.h>

#ifdef RVV
#include <riscv_vector.h>
#endif

static inline void NN__max_F32(size_t n, float *s, float *x) {
float max = -FLT_MAX;

#ifdef RVV
vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(max, 1);
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m1(n);
vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x, vl);
vec_max = __riscv_vfredmax_vs_f32m1_f32m1(vec_x, vec_max, vl);
x += vl;
n -= vl;
}
max = __riscv_vfmv_f_s_f32m1_f32(vec_max);
#else
for (size_t i = 0; i < n; i += 1) {
float val = x[i];
max = val > max ? val : max;
}
#endif

*s = max;
}

#endif // __NN__MAX_H
35 changes: 35 additions & 0 deletions nn/inc/kernel/min.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#ifndef __NN__MIN_H
#define __NN__MIN_H

#include <stddef.h>
#include <stdint.h>
#include <float.h>

#ifdef RVV
#include <riscv_vector.h>
#endif

static inline void NN__min_F32(size_t n, float *s, float *x) {
float min = FLT_MAX;

#ifdef RVV
vfloat32m1_t vec_min = __riscv_vfmv_s_f_f32m1(min, 1);
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m1(n);
vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x, vl);
vec_min = __riscv_vfredmin_vs_f32m1_f32m1(vec_x, vec_min, vl);
x += vl;
n -= vl;
}
min = __riscv_vfmv_f_s_f32m1_f32(vec_min);
#else
for (size_t i = 0; i < n; i += 1) {
float val = x[i];
min = val < min ? val : min;
}
#endif

*s = min;
}

#endif // __NN__MIN_H
9 changes: 1 addition & 8 deletions nn/inc/nn_abs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
#define __NN_ABS_H

#include <assert.h>
#include <math.h>

#include "nn_tensor.h"
#include "kernel/abs.h"


/**
Expand All @@ -17,12 +17,5 @@
*/
void NN_abs(Tensor *out, Tensor *input);

void NN_abs_F32(Tensor *out, Tensor *input);


void NN_abs_F32_AVX(Tensor *out, Tensor *input);

void NN_abs_F32_RVV(Tensor *out, Tensor *input);


#endif // __NN_ABS_H
4 changes: 2 additions & 2 deletions nn/inc/nn_add.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ void NN_add(Tensor *out, Tensor *a, Tensor *b);
* @param in: the input tensor
* @param scalar: scalar value
*/
void NN_add1_F32(Tensor *out, Tensor *in, float scalar);
void NN_add1(Tensor *out, Tensor *in, float scalar);

/**
* Element-wise addition
Expand All @@ -52,7 +52,7 @@ void NN_addInplace(Tensor *b, Tensor *a);
* @param b: the target tensor
* @param scalar: scalar value
*/
void NN_addInplace1_F32(Tensor *b, float scalar);
void NN_addInplace1(Tensor *b, float scalar);

void NN_add_1D(Tensor *out, Tensor *a, Tensor *b);

Expand Down
6 changes: 1 addition & 5 deletions nn/inc/nn_fill.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@
* @param tensor: the input tensor
* @param value: scalar value
*/
void NN_fill_F32(Tensor *tensor, float value);

void NN_fill_I32(Tensor *tensor, int32_t value);

void NN_fill_I8(Tensor *tensor, int8_t value);
void NN_fill(Tensor *tensor, float value);

Tensor *NN_zeros(size_t ndim, const size_t *shape, DataType dtype);

Expand Down
16 changes: 2 additions & 14 deletions nn/inc/nn_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// fundamental operations
//

inline static void NN__cpy_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = x[i]; }

inline static void NN__neg_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = -x[i]; }
inline static void NN__mul_F32 (const int n, float *z, const float *x, const float *y) { for (int i = 0; i < n; i += 1) z[i] = x[i]*y[i]; }
inline static void NN__div_F32 (const int n, float *z, const float *x, const float *y) { for (int i = 0; i < n; i += 1) z[i] = x[i]/y[i]; }
Expand Down Expand Up @@ -349,7 +349,7 @@ inline static void NN__div_F32 (const int n, float *z, const float *x, const flo
// inline static void NN__sqr_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = x[i]*x[i]; }
// inline static void NN__sqrt_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = sqrtf(x[i]); }
// inline static void NN__log_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = logf(x[i]); }
// inline static void NN__abs_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = fabsf(x[i]); }

// inline static void NN__sgn_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
// inline static void NN__step_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
// inline static void NN__tanh_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = tanhf(x[i]); }
Expand Down Expand Up @@ -754,18 +754,6 @@ inline static void NN__div_F32 (const int n, float *z, const float *x, const flo
// *s = sum;
// }

// inline static void NN__max_F32(const int n, float *s, const float *x) {
// #ifndef GGML_USE_ACCELERATE
// float max = -INFINITY;
// for (int i = 0; i < n; i += 1) {
// max = MAX(max, x[i]);
// }
// *s = max;
// #else
// vDSP_maxv(x, 1, s, n);
// #endif
// }

// inline static void NN__norm_inv_F32(const int n, float *s, const float *x) {
// NN__norm_F32(n, s, x);
// *s = 1.f/(*s);
Expand Down
5 changes: 1 addition & 4 deletions nn/inc/nn_max.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <float.h>

#include "nn_tensor.h"
#include "kernel/max.h"


/**
Expand All @@ -14,9 +15,5 @@
*/
float NN_max(Tensor *tensor);

float NN_max_F32(Tensor *tensor);

float NN_max_F32_RVV(Tensor *tensor);


#endif // __NN_MAX_H
9 changes: 3 additions & 6 deletions nn/inc/nn_maximum.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
#define __NN_MAXIMUM_H

#include <assert.h>
#include <float.h>
#ifdef RVV
#include <riscv_vector.h>
#endif

#include "nn_tensor.h"

Expand All @@ -16,10 +18,5 @@
*/
void NN_maximum(Tensor *out, Tensor *a, Tensor *b);

void NN_maximum_F32(Tensor *out, Tensor *a, Tensor *b);


void NN_maximum_F32_RVV(Tensor *out, Tensor *a, Tensor *b);


#endif // __NN_MAXIMUM_H
6 changes: 1 addition & 5 deletions nn/inc/nn_min.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <float.h>

#include "nn_tensor.h"
#include "kernel/min.h"


/**
Expand All @@ -14,10 +15,5 @@
*/
float NN_min(Tensor *tensor);

float NN_min_F32(Tensor *tensor);


float NN_min_F32_RVV(Tensor *tensor);


#endif // __NN_MIN_H
8 changes: 3 additions & 5 deletions nn/inc/nn_minimum.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#define __NN_MINIMUM_H

#include <assert.h>
#ifdef RVV
#include <riscv_vector.h>
#endif

#include "nn_tensor.h"

Expand All @@ -15,10 +18,5 @@
*/
void NN_minimum(Tensor *out, Tensor *a, Tensor *b);

void NN_minimum_F32(Tensor *out, Tensor *a, Tensor *b);


void NN_minimum_F32_RVV(Tensor *out, Tensor *a, Tensor *b);


#endif // __NN_MINIMUM_H
2 changes: 2 additions & 0 deletions nn/inc/nn_print.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef __NN_PRINT_H
#define __NN_PRINT_H

#include <math.h>

#include "nn_tensor.h"


Expand Down
14 changes: 0 additions & 14 deletions nn/src/abs/nn_abs.c

This file was deleted.

36 changes: 0 additions & 36 deletions nn/src/abs/nn_abs_avx.c

This file was deleted.

Loading

0 comments on commit f32de3c

Please sign in to comment.