diff --git a/example/fast-depth/main.c b/example/fast-depth/main.c index bbda924..978ca5d 100644 --- a/example/fast-depth/main.c +++ b/example/fast-depth/main.c @@ -86,7 +86,7 @@ int main() { init(model); printf("setting input data...\n"); - // NN_fill_F32(&model->x, 0.0); + // NN_fill(&model->x, 0.0); memcpy((uint8_t *)model->x.data, (uint8_t *)model_input_data, (size_t)model_input_end - (size_t)model_input_start); // cycles = READ_CSR("mcycle"); diff --git a/nn/CMakeLists.txt b/nn/CMakeLists.txt index ebd481d..ea5780b 100644 --- a/nn/CMakeLists.txt +++ b/nn/CMakeLists.txt @@ -6,7 +6,7 @@ option(RVV "Use RISCV vector extension implementation" OFF) add_library(nn src/nn_tensor.c src/nn_print.c - src/abs/nn_abs.c + src/nn_abs.c src/nn_add.c src/batchnorm2d/nn_batchnorm2d.c src/conv2d/nn_conv2d.c @@ -16,11 +16,11 @@ add_library(nn src/nn_linear.c src/nn_matmul.c src/matrixnorm/nn_matrixnorm.c - src/max/nn_max.c - src/maximum/nn_maximum.c + src/nn_max.c + src/nn_maximum.c src/maxpool2d/nn_maxpool2d.c - src/min/nn_min.c - src/minimum/nn_minimum.c + src/nn_min.c + src/nn_minimum.c src/mul/nn_mul.c src/neg/nn_neg.c src/relu/nn_relu.c diff --git a/nn/inc/kernel/abs.h b/nn/inc/kernel/abs.h new file mode 100644 index 0000000..afc8699 --- /dev/null +++ b/nn/inc/kernel/abs.h @@ -0,0 +1,56 @@ +#ifndef __NN__ABS_H +#define __NN__ABS_H + +#include +#include +#include + +#ifdef AVX + #include +#endif + +#ifdef RVV + #include +#endif + +static inline void NN__abs_F32(size_t n, float *y, float *x) { + #if defined(AVX) + // Mask to clear the sign bit + __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)); + + size_t vl = 8; + + while (n > 0) { + size_t count = n < vl ? n : vl; + + // Load input values into an AVX register + __m256 vec_x = _mm256_loadu_ps(x); + + // Compute the absolute values + __m256 vec_y = _mm256_and_ps(vec_x, mask); + + // Store the result + _mm256_storeu_ps(y, vec_y); + + x += count; + y += count; + n -= count; + } + #elif defined(RVV) + while (n > 0) { + size_t vl = __riscv_vsetvl_e32m1(n); + vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x, vl); + vfloat32m1_t vec_y = __riscv_vfabs_v_f32m1(vec_x, vl); + __riscv_vse32_v_f32m1(y, vec_y, vl); + x += vl; + y += vl; + n -= vl; + } + #else + for (size_t i = 0; i < n; i += 1) { + y[i] = fabsf(x[i]); + } + #endif +} + +#endif // __NN__ABS_H diff --git a/nn/inc/kernel/max.h b/nn/inc/kernel/max.h new file mode 100644 index 0000000..4c33d84 --- /dev/null +++ b/nn/inc/kernel/max.h @@ -0,0 +1,35 @@ +#ifndef __NN__MAX_H +#define __NN__MAX_H + +#include +#include +#include + +#ifdef RVV + #include +#endif + +static inline void NN__max_F32(size_t n, float *s, float *x) { + float max = -FLT_MAX; + + #ifdef RVV + vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(max, 1); + while (n > 0) { + size_t vl = __riscv_vsetvl_e32m1(n); + vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x, vl); + vec_max = __riscv_vfredmax_vs_f32m1_f32m1(vec_x, vec_max, vl); + x += vl; + n -= vl; + } + max = __riscv_vfmv_f_s_f32m1_f32(vec_max); + #else + for (size_t i = 0; i < n; i += 1) { + float val = x[i]; + max = val > max ? val : max; + } + #endif + + *s = max; +} + +#endif // __NN__MAX_H diff --git a/nn/inc/kernel/min.h b/nn/inc/kernel/min.h new file mode 100644 index 0000000..44f2b0f --- /dev/null +++ b/nn/inc/kernel/min.h @@ -0,0 +1,35 @@ +#ifndef __NN__MIN_H +#define __NN__MIN_H + +#include +#include +#include + +#ifdef RVV + #include +#endif + +static inline void NN__min_F32(size_t n, float *s, float *x) { + float min = FLT_MAX; + + #ifdef RVV + vfloat32m1_t vec_min = __riscv_vfmv_s_f_f32m1(min, 1); + while (n > 0) { + size_t vl = __riscv_vsetvl_e32m1(n); + vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x, vl); + vec_min = __riscv_vfredmin_vs_f32m1_f32m1(vec_x, vec_min, vl); + x += vl; + n -= vl; + } + min = __riscv_vfmv_f_s_f32m1_f32(vec_min); + #else + for (size_t i = 0; i < n; i += 1) { + float val = x[i]; + min = val < min ? val : min; + } + #endif + + *s = min; +} + +#endif // __NN__MIN_H diff --git a/nn/inc/nn_abs.h b/nn/inc/nn_abs.h index 0d78fc8..2f047a3 100644 --- a/nn/inc/nn_abs.h +++ b/nn/inc/nn_abs.h @@ -2,9 +2,9 @@ #define __NN_ABS_H #include -#include #include "nn_tensor.h" +#include "kernel/abs.h" /** @@ -17,12 +17,5 @@ */ void NN_abs(Tensor *out, Tensor *input); -void NN_abs_F32(Tensor *out, Tensor *input); - - -void NN_abs_F32_AVX(Tensor *out, Tensor *input); - -void NN_abs_F32_RVV(Tensor *out, Tensor *input); - #endif // __NN_ABS_H diff --git a/nn/inc/nn_add.h b/nn/inc/nn_add.h index 6087bf8..9d488e0 100644 --- a/nn/inc/nn_add.h +++ b/nn/inc/nn_add.h @@ -32,7 +32,7 @@ void NN_add(Tensor *out, Tensor *a, Tensor *b); * @param in: the input tensor * @param scalar: scalar value */ -void NN_add1_F32(Tensor *out, Tensor *in, float scalar); +void NN_add1(Tensor *out, Tensor *in, float scalar); /** * Element-wise addition @@ -52,7 +52,7 @@ void NN_addInplace(Tensor *b, Tensor *a); * @param b: the target tensor * @param scalar: scalar value */ -void NN_addInplace1_F32(Tensor *b, float scalar); +void NN_addInplace1(Tensor *b, float scalar); void NN_add_1D(Tensor *out, Tensor *a, Tensor *b); diff --git a/nn/inc/nn_fill.h b/nn/inc/nn_fill.h index 8e534c7..51ea231 100644 --- a/nn/inc/nn_fill.h +++ b/nn/inc/nn_fill.h @@ -13,11 +13,7 @@ * @param tensor: the input tensor * @param value: scalar value */ -void NN_fill_F32(Tensor *tensor, float value); - -void NN_fill_I32(Tensor *tensor, int32_t value); - -void NN_fill_I8(Tensor *tensor, int8_t value); +void NN_fill(Tensor *tensor, float value); Tensor *NN_zeros(size_t ndim, const size_t *shape, DataType dtype); diff --git a/nn/inc/nn_math.h b/nn/inc/nn_math.h index 41b49c6..77c8ba0 100644 --- a/nn/inc/nn_math.h +++ b/nn/inc/nn_math.h @@ -12,7 +12,7 @@ // fundamental operations // -inline static void NN__cpy_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = x[i]; } + inline static void NN__neg_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = -x[i]; } inline static void NN__mul_F32 (const int n, float *z, const float *x, const float *y) { for (int i = 0; i < n; i += 1) z[i] = x[i]*y[i]; } inline static void NN__div_F32 (const int n, float *z, const float *x, const float *y) { for (int i = 0; i < n; i += 1) z[i] = x[i]/y[i]; } @@ -349,7 +349,7 @@ inline static void NN__div_F32 (const int n, float *z, const float *x, const flo // inline static void NN__sqr_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = x[i]*x[i]; } // inline static void NN__sqrt_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = sqrtf(x[i]); } // inline static void NN__log_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = logf(x[i]); } -// inline static void NN__abs_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = fabsf(x[i]); } + // inline static void NN__sgn_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); } // inline static void NN__step_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? 1.f : 0.f; } // inline static void NN__tanh_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = tanhf(x[i]); } @@ -754,18 +754,6 @@ inline static void NN__div_F32 (const int n, float *z, const float *x, const flo // *s = sum; // } -// inline static void NN__max_F32(const int n, float *s, const float *x) { -// #ifndef GGML_USE_ACCELERATE -// float max = -INFINITY; -// for (int i = 0; i < n; i += 1) { -// max = MAX(max, x[i]); -// } -// *s = max; -// #else -// vDSP_maxv(x, 1, s, n); -// #endif -// } - // inline static void NN__norm_inv_F32(const int n, float *s, const float *x) { // NN__norm_F32(n, s, x); // *s = 1.f/(*s); diff --git a/nn/inc/nn_max.h b/nn/inc/nn_max.h index 4d1de86..4cde93e 100644 --- a/nn/inc/nn_max.h +++ b/nn/inc/nn_max.h @@ -5,6 +5,7 @@ #include #include "nn_tensor.h" +#include "kernel/max.h" /** @@ -14,9 +15,5 @@ */ float NN_max(Tensor *tensor); -float NN_max_F32(Tensor *tensor); - -float NN_max_F32_RVV(Tensor *tensor); - #endif // __NN_MAX_H diff --git a/nn/inc/nn_maximum.h b/nn/inc/nn_maximum.h index fa210e8..c7158dd 100644 --- a/nn/inc/nn_maximum.h +++ b/nn/inc/nn_maximum.h @@ -2,7 +2,9 @@ #define __NN_MAXIMUM_H #include -#include +#ifdef RVV + #include +#endif #include "nn_tensor.h" @@ -16,10 +18,5 @@ */ void NN_maximum(Tensor *out, Tensor *a, Tensor *b); -void NN_maximum_F32(Tensor *out, Tensor *a, Tensor *b); - - -void NN_maximum_F32_RVV(Tensor *out, Tensor *a, Tensor *b); - #endif // __NN_MAXIMUM_H diff --git a/nn/inc/nn_min.h b/nn/inc/nn_min.h index 165c27b..b25db8d 100644 --- a/nn/inc/nn_min.h +++ b/nn/inc/nn_min.h @@ -5,6 +5,7 @@ #include #include "nn_tensor.h" +#include "kernel/min.h" /** @@ -14,10 +15,5 @@ */ float NN_min(Tensor *tensor); -float NN_min_F32(Tensor *tensor); - - -float NN_min_F32_RVV(Tensor *tensor); - #endif // __NN_MIN_H diff --git a/nn/inc/nn_minimum.h b/nn/inc/nn_minimum.h index 7bb0808..04e13ce 100644 --- a/nn/inc/nn_minimum.h +++ b/nn/inc/nn_minimum.h @@ -2,6 +2,9 @@ #define __NN_MINIMUM_H #include +#ifdef RVV + #include +#endif #include "nn_tensor.h" @@ -15,10 +18,5 @@ */ void NN_minimum(Tensor *out, Tensor *a, Tensor *b); -void NN_minimum_F32(Tensor *out, Tensor *a, Tensor *b); - - -void NN_minimum_F32_RVV(Tensor *out, Tensor *a, Tensor *b); - #endif // __NN_MINIMUM_H diff --git a/nn/inc/nn_print.h b/nn/inc/nn_print.h index 31c4cbc..cea4297 100644 --- a/nn/inc/nn_print.h +++ b/nn/inc/nn_print.h @@ -1,6 +1,8 @@ #ifndef __NN_PRINT_H #define __NN_PRINT_H +#include + #include "nn_tensor.h" diff --git a/nn/src/abs/nn_abs.c b/nn/src/abs/nn_abs.c deleted file mode 100644 index bdd4d25..0000000 --- a/nn/src/abs/nn_abs.c +++ /dev/null @@ -1,14 +0,0 @@ - -#include "nn_abs.h" - - -void NN_abs_F32(Tensor *out, Tensor *input) { - assert(out->ndim == input->ndim); - assert(input->dtype == DTYPE_F32); - assert(out->dtype == DTYPE_F32); - assert(out->size == input->size); - - for (size_t i = 0; i < out->size; i += 1) { - ((float *)out->data)[i] = fabs(((float *)input->data)[i]); - } -} diff --git a/nn/src/abs/nn_abs_avx.c b/nn/src/abs/nn_abs_avx.c deleted file mode 100644 index 8092943..0000000 --- a/nn/src/abs/nn_abs_avx.c +++ /dev/null @@ -1,36 +0,0 @@ - -#include "nn_abs.h" -#include - -void NN_abs_F32_AVX(Tensor *out, Tensor *input) { - assert(out->ndim == input->ndim); - assert(input->dtype == DTYPE_F32); - assert(out->dtype == DTYPE_F32); - assert(out->size == input->size); - - float *ptr_out = out->data; - float *ptr_in = input->data; - - size_t n = out->shape[0] * out->shape[1]; - size_t vl = 8; - - __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)); // Mask to clear the sign bit - - while (n > 0) { - size_t count = n < vl ? n : vl; - - // Load input values into an AVX register - __m256 vec_in = _mm256_loadu_ps(ptr_in); - - // Compute the absolute values - __m256 vec_out = _mm256_and_ps(vec_in, mask); - - // Store the result - _mm256_storeu_ps(ptr_out, vec_out); - - ptr_in += count; - ptr_out += count; - n -= count; - } -} - diff --git a/nn/src/abs/nn_abs_rvv.c b/nn/src/abs/nn_abs_rvv.c deleted file mode 100644 index 360fcd4..0000000 --- a/nn/src/abs/nn_abs_rvv.c +++ /dev/null @@ -1,25 +0,0 @@ - -#include "nn_abs.h" -#include "riscv_vector.h" - -void NN_abs_F32_RVV(Tensor *out, Tensor *input) { - assert(out->ndim == input->ndim); - assert(input->dtype == DTYPE_F32); - assert(out->dtype == DTYPE_F32); - assert(out->size == input->size); - - float *ptr_out = out->data; - float *ptr_in = input->data; - - size_t n = out->shape[0] * out->shape[1]; - while (n > 0) { - size_t vl = __riscv_vsetvl_e32m1(n); - vfloat32m1_t vec_in = __riscv_vle32_v_f32m1(ptr_in, vl); - vfloat32m1_t vec_out = __riscv_vfabs_v_f32m1(vec_in, vl); - __riscv_vse32_v_f32m1(ptr_out, vec_out, vl); - ptr_in += vl; - ptr_out += vl; - n -= vl; - } -} - diff --git a/nn/src/max/nn_max.c b/nn/src/max/nn_max.c deleted file mode 100644 index 5d6cb13..0000000 --- a/nn/src/max/nn_max.c +++ /dev/null @@ -1,18 +0,0 @@ - -#include "nn_max.h" - - -float NN_max_F32(Tensor *tensor) { - assert(tensor->dtype == DTYPE_F32); - - float max = -FLT_MAX; - - for (size_t i = 0; i < tensor->size; i += 1) { - float val = ((float *)tensor->data)[i]; - if (val > max) { - max = val; - } - } - - return max; -} diff --git a/nn/src/max/nn_max_rvv.c b/nn/src/max/nn_max_rvv.c deleted file mode 100644 index 5fe1c54..0000000 --- a/nn/src/max/nn_max_rvv.c +++ /dev/null @@ -1,25 +0,0 @@ - -#include "nn_max.h" -#include "riscv_vector.h" - -float NN_max_F32_RVV(Tensor *tensor) { - assert(tensor->dtype == DTYPE_F32); - - float max = -FLT_MAX; - float *ptr = tensor->data; - - vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(max, 1); - - size_t n = tensor->shape[0] * tensor->shape[1]; - while (n > 0) { - size_t vl = __riscv_vsetvl_e32m1(n); - vfloat32m1_t vec_data = __riscv_vle32_v_f32m1(ptr, vl); - vec_max = __riscv_vfredmax_vs_f32m1_f32m1(vec_data, vec_max, vl); - ptr += vl; - n -= vl; - } - max = __riscv_vfmv_f_s_f32m1_f32(vec_max); - - return max; -} - diff --git a/nn/src/maximum/nn_maximum.c b/nn/src/maximum/nn_maximum.c deleted file mode 100644 index b6e1f6d..0000000 --- a/nn/src/maximum/nn_maximum.c +++ /dev/null @@ -1,19 +0,0 @@ - -#include "nn_maximum.h" - - -void NN_maximum_F32(Tensor *out, Tensor *a, Tensor *b) { - assert(b->ndim == a->ndim); - assert(out->ndim == a->ndim); - assert(a->dtype == DTYPE_F32); - assert(b->dtype == DTYPE_F32); - assert(out->dtype == DTYPE_F32); - assert(b->size == a->size); - assert(out->size == a->size); - - for (size_t i = 0; i < out->size; i += 1) { - float a_val = ((float *)a->data)[i]; - float b_val = ((float *)b->data)[i]; - ((float *)out->data)[i] = a_val > b_val ? a_val : b_val; - } -} diff --git a/nn/src/maximum/nn_maximum_rvv.c b/nn/src/maximum/nn_maximum_rvv.c deleted file mode 100644 index c0e0c0f..0000000 --- a/nn/src/maximum/nn_maximum_rvv.c +++ /dev/null @@ -1,31 +0,0 @@ - -#include "nn_maximum.h" -#include "riscv_vector.h" - -void NN_maximum_F32_RVV(Tensor *out, Tensor *a, Tensor *b) { - assert(b->ndim == a->ndim); - assert(out->ndim == a->ndim); - assert(a->dtype == DTYPE_F32); - assert(b->dtype == DTYPE_F32); - assert(out->dtype == DTYPE_F32); - assert(b->size == a->size); - assert(out->size == a->size); - - float *ptr_out = out->data; - float *ptr_a = a->data; - float *ptr_b = b->data; - - size_t n = out->shape[0] * out->shape[1]; - while (n > 0) { - size_t vl = __riscv_vsetvl_e32m1(n); - vfloat32m1_t vec_a = __riscv_vle32_v_f32m1(ptr_a, vl); - vfloat32m1_t vec_b = __riscv_vle32_v_f32m1(ptr_b, vl); - vfloat32m1_t vec_out = __riscv_vfmax_vv_f32m1(vec_a, vec_b, vl); - __riscv_vse32_v_f32m1(ptr_out, vec_out, vl); - ptr_a += vl; - ptr_b += vl; - ptr_out += vl; - n -= vl; - } -} - diff --git a/nn/src/min/nn_min.c b/nn/src/min/nn_min.c deleted file mode 100644 index ad2e637..0000000 --- a/nn/src/min/nn_min.c +++ /dev/null @@ -1,18 +0,0 @@ - -#include "nn_min.h" - - -float NN_min_F32(Tensor *tensor) { - assert(tensor->dtype == DTYPE_F32); - - float min = FLT_MAX; - - for (size_t i = 0; i < tensor->size; i += 1) { - float val = ((float *)tensor->data)[i]; - if (val < min) { - min = val; - } - } - - return min; -} diff --git a/nn/src/min/nn_min_rvv.c b/nn/src/min/nn_min_rvv.c deleted file mode 100644 index f39ec03..0000000 --- a/nn/src/min/nn_min_rvv.c +++ /dev/null @@ -1,25 +0,0 @@ - -#include "nn_min.h" -#include "riscv_vector.h" - -float NN_min_F32_RVV(Tensor *tensor) { - assert(tensor->dtype == DTYPE_F32); - - float min = FLT_MAX; - float *ptr = tensor->data; - - vfloat32m1_t vec_min = __riscv_vfmv_s_f_f32m1(min, 1); - - size_t n = tensor->shape[0] * tensor->shape[1]; - while (n > 0) { - size_t vl = __riscv_vsetvl_e32m1(n); - vfloat32m1_t vec_data = __riscv_vle32_v_f32m1(ptr, vl); - vec_min = __riscv_vfredmin_vs_f32m1_f32m1(vec_data, vec_min, vl); - ptr += vl; - n -= vl; - } - min = __riscv_vfmv_f_s_f32m1_f32(vec_min); - - return min; -} - diff --git a/nn/src/minimum/nn_minimum.c b/nn/src/minimum/nn_minimum.c deleted file mode 100644 index 15f55b0..0000000 --- a/nn/src/minimum/nn_minimum.c +++ /dev/null @@ -1,19 +0,0 @@ - -#include "nn_minimum.h" - - -void NN_minimum_F32(Tensor *out, Tensor *a, Tensor *b) { - assert(b->ndim == a->ndim); - assert(out->ndim == a->ndim); - assert(a->dtype == DTYPE_F32); - assert(b->dtype == DTYPE_F32); - assert(out->dtype == DTYPE_F32); - assert(b->size == a->size); - assert(out->size == a->size); - - for (size_t i = 0; i < out->size; i += 1) { - float a_val = ((float *)a->data)[i]; - float b_val = ((float *)b->data)[i]; - ((float *)out->data)[i] = a_val < b_val ? a_val : b_val; - } -} diff --git a/nn/src/minimum/nn_minimum_rvv.c b/nn/src/minimum/nn_minimum_rvv.c deleted file mode 100644 index 4fade23..0000000 --- a/nn/src/minimum/nn_minimum_rvv.c +++ /dev/null @@ -1,31 +0,0 @@ - -#include "nn_minimum.h" -#include "riscv_vector.h" - -void NN_minimum_F32_RVV(Tensor *out, Tensor *a, Tensor *b) { - assert(b->ndim == a->ndim); - assert(out->ndim == a->ndim); - assert(a->dtype == DTYPE_F32); - assert(b->dtype == DTYPE_F32); - assert(out->dtype == DTYPE_F32); - assert(b->size == a->size); - assert(out->size == a->size); - - float *ptr_out = out->data; - float *ptr_a = a->data; - float *ptr_b = b->data; - - size_t n = out->shape[0] * out->shape[1]; - while (n > 0) { - size_t vl = __riscv_vsetvl_e32m1(n); - vfloat32m1_t vec_a = __riscv_vle32_v_f32m1(ptr_a, vl); - vfloat32m1_t vec_b = __riscv_vle32_v_f32m1(ptr_b, vl); - vfloat32m1_t vec_out = __riscv_vfmin_vv_f32m1(vec_a, vec_b, vl); - __riscv_vse32_v_f32m1(ptr_out, vec_out, vl); - ptr_a += vl; - ptr_b += vl; - ptr_out += vl; - n -= vl; - } -} - diff --git a/nn/src/nn_abs.c b/nn/src/nn_abs.c new file mode 100644 index 0000000..250a235 --- /dev/null +++ b/nn/src/nn_abs.c @@ -0,0 +1,20 @@ + +#include "nn_abs.h" + + +void NN_abs(Tensor *out, Tensor *in) { + assert(out->ndim == in->ndim); + assert(out->dtype == in->dtype); + assert(out->size == in->size); + + switch (out->dtype) { + case DTYPE_F32: + NN_abs_F32(out, in); + return; + + default: + } + printf("[ERROR] Unsupported operation of tensor with dtype %s = |%s|\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(in->dtype) + ); +} diff --git a/nn/src/nn_add.c b/nn/src/nn_add.c index 7ea1c90..b802577 100644 --- a/nn/src/nn_add.c +++ b/nn/src/nn_add.c @@ -29,9 +29,21 @@ void NN_add(Tensor *out, Tensor *a, Tensor *b) { } } -void NN_add1_F32(Tensor *out, Tensor *a, float b) { - assert(out->dtype == DTYPE_F32 && a->dtype == DTYPE_F32); - NN__add1_F32(out->size, (float *)out->data, (float *)a->data, b); +void NN_add1(Tensor *out, Tensor *a, float b) { + assert(out->ndim == a->ndim); + assert(out->dtype == a->dtype); + assert(out->size == a->size); + + switch (out->dtype) { + case DTYPE_F32: + NN__add1_F32(out->size, (float *)out->data, (float *)a->data, b); + return; + + default: + } + printf("[ERROR] Unsupported operation between tensor with dtype %s += %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype) + ); } void NN_addInplace(Tensor *b, Tensor *a) { @@ -49,13 +61,21 @@ void NN_addInplace(Tensor *b, Tensor *a) { } printf("[ERROR] Unsupported operation between tensor with dtype %s += %s\n", - NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) + NN_getDataTypeName(b->dtype), NN_getDataTypeName(a->dtype) ); } -void NN_addInplace1_F32(Tensor *b, float scalar) { - assert(b->dtype == DTYPE_F32); - NN__acc1_F32(b->size, (float *)b->data, scalar); +void NN_addInplace1(Tensor *b, float scalar) { + switch (b->dtype) { + case DTYPE_F32: + NN__acc1_F32(b->size, (float *)b->data, scalar); + return; + default: + } + + printf("[ERROR] Unsupported operation between tensor with dtype %s += float\n", + NN_getDataTypeName(b->dtype) + ); } void NN_add_1D(Tensor *out, Tensor *a, Tensor *b) { @@ -71,8 +91,8 @@ void NN_add_1D(Tensor *out, Tensor *a, Tensor *b) { return; } - printf("[ERROR] Unsupported operation between tensor with dtype %s + %s -> %s\n", - NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype) + printf("[ERROR] Unsupported operation between tensor with dtype %s = %s + %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) ); } @@ -108,8 +128,8 @@ void NN_add_2D(Tensor *out, Tensor *a, Tensor *b) { return; } - printf("[ERROR] Unsupported operation between tensor with dtype %s + %s -> %s\n", - NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype) + printf("[ERROR] Unsupported operation between tensor with dtype %s = %s + %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) ); } @@ -149,8 +169,8 @@ void NN_add_3D(Tensor *out, Tensor *a, Tensor *b) { } } - printf("[ERROR] Unsupported operation between tensor with dtype %s + %s -> %s\n", - NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype) + printf("[ERROR] Unsupported operation between tensor with dtype %s = %s + %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) ); } @@ -196,8 +216,8 @@ void NN_add_4D(Tensor *out, Tensor *a, Tensor *b) { } } - printf("[ERROR] Unsupported operation between tensor with dtype %s + %s -> %s\n", - NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype) + printf("[ERROR] Unsupported operation between tensor with dtype %s = %s + %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) ); } diff --git a/nn/src/nn_fill.c b/nn/src/nn_fill.c index e063d4a..354fdbb 100644 --- a/nn/src/nn_fill.c +++ b/nn/src/nn_fill.c @@ -2,40 +2,26 @@ #include "nn_fill.h" -void NN_fill_F32(Tensor *tensor, float value) { - assert(tensor->dtype == DTYPE_F32); - - NN__fill_F32(tensor->size, (float *)tensor->data, value); -} - -void NN_fill_I32(Tensor *tensor, int32_t value) { - assert(tensor->dtype == DTYPE_I32); - - NN__fill_I32(tensor->size, (int32_t *)tensor->data, value); -} - -void NN_fill_I8(Tensor *tensor, int8_t value) { - assert(tensor->dtype == DTYPE_I8); - - NN__fill_I8(tensor->size, (int8_t *)tensor->data, value); -} - -Tensor *NN_zeros(size_t ndim, const size_t *shape, DataType dtype) { - Tensor *t = NN_tensor(ndim, shape, dtype, NULL); - - switch (dtype) { +void NN_fill(Tensor *tensor, float value) { + switch (tensor->dtype) { case DTYPE_I8: - NN_fill_I8(t, 0); - break; + NN__fill_I8(tensor->size, (int8_t *)tensor->data, (int8_t)value); + return; case DTYPE_I32: - NN_fill_I32(t, 0); - break; + NN__fill_I32(tensor->size, (int32_t *)tensor->data, (int32_t)value); + return; case DTYPE_F32: - NN_fill_F32(t, 0); - break; + NN__fill_F32(tensor->size, (float *)tensor->data, value); + return; default: - printf("[WARNING] Unsupported data type: %d\n", dtype); + printf("[ERROR] Unsupported operation fill to tensor with dtype: %d\n", tensor->dtype); } +} + +Tensor *NN_zeros(size_t ndim, const size_t *shape, DataType dtype) { + Tensor *t = NN_tensor(ndim, shape, dtype, NULL); + + NN_fill(t, 0); return t; } @@ -43,19 +29,7 @@ Tensor *NN_zeros(size_t ndim, const size_t *shape, DataType dtype) { Tensor *NN_ones(size_t ndim, const size_t *shape, DataType dtype) { Tensor *t = NN_tensor(ndim, shape, dtype, NULL); - switch (dtype) { - case DTYPE_I8: - NN_fill_I8(t, 1); - break; - case DTYPE_I32: - NN_fill_I32(t, 1); - break; - case DTYPE_F32: - NN_fill_F32(t, 1); - break; - default: - printf("[WARNING] Unsupported data type: %d\n", dtype); - } + NN_fill(t, 1); return t; } @@ -80,7 +54,7 @@ Tensor *NN_rand(size_t ndim, const size_t *shape, DataType dtype) { } break; default: - printf("[WARNING] Unsupported data type: %d\n", dtype); + printf("[ERROR] Unsupported data type: %d\n", dtype); } return t; diff --git a/nn/src/nn_matmul.c b/nn/src/nn_matmul.c index d819ed4..0d98d76 100644 --- a/nn/src/nn_matmul.c +++ b/nn/src/nn_matmul.c @@ -6,7 +6,9 @@ void NN_matmul(Tensor *out, Tensor *a, Tensor *b) { NN_matmul_F32(out, a, b); return; } - printf("Unsupported operation: %s @ %s -> %s\n", NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype)); + printf("Unsupported operation: %s = %s @ %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) + ); } void NN_matmul_F32(Tensor *out, Tensor *a, Tensor *b) { @@ -36,7 +38,9 @@ void NN_matmulT(Tensor *out, Tensor *a, Tensor *b) { NN_matmulT_F32(out, a, b); return; } - printf("Unsupported operation: %s @ %s -> %s\n", NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype)); + printf("Unsupported operation: %s = %s @ %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) + ); } void NN_matmulT_F32(Tensor *out, Tensor *a, Tensor *b) { diff --git a/nn/src/nn_max.c b/nn/src/nn_max.c new file mode 100644 index 0000000..928d377 --- /dev/null +++ b/nn/src/nn_max.c @@ -0,0 +1,20 @@ + +#include "nn_max.h" + + +float NN_max(Tensor *tensor) { + float max; + + switch (tensor->dtype) { + case DTYPE_F32: + NN__max_F32(tensor->size, &max, (float *)tensor->data); + break; + + default: + printf("[ERROR] Unsupported operation of tensor with dtype max(%s)\n", + NN_getDataTypeName(tensor->dtype) + ); + } + + return max; +} diff --git a/nn/src/nn_maximum.c b/nn/src/nn_maximum.c new file mode 100644 index 0000000..349a709 --- /dev/null +++ b/nn/src/nn_maximum.c @@ -0,0 +1,48 @@ + +#include "nn_maximum.h" + + +void NN_maximum(Tensor *out, Tensor *a, Tensor *b) { + assert(b->ndim == a->ndim); + assert(out->ndim == a->ndim); + assert(a->dtype == DTYPE_F32); + assert(b->dtype == DTYPE_F32); + assert(out->dtype == DTYPE_F32); + assert(b->size == a->size); + assert(out->size == a->size); + + switch (out->dtype) { + case DTYPE_F32: + #ifdef RVV + float *ptr_out = out->data; + float *ptr_a = a->data; + float *ptr_b = b->data; + + size_t n = out->size; + while (n > 0) { + size_t vl = __riscv_vsetvl_e32m1(n); + vfloat32m1_t vec_a = __riscv_vle32_v_f32m1(ptr_a, vl); + vfloat32m1_t vec_b = __riscv_vle32_v_f32m1(ptr_b, vl); + vfloat32m1_t vec_out = __riscv_vfmax_vv_f32m1(vec_a, vec_b, vl); + __riscv_vse32_v_f32m1(ptr_out, vec_out, vl); + ptr_a += vl; + ptr_b += vl; + ptr_out += vl; + n -= vl; + } + #else + for (size_t i = 0; i < out->size; i += 1) { + float a_val = ((float *)a->data)[i]; + float b_val = ((float *)b->data)[i]; + ((float *)out->data)[i] = a_val > b_val ? a_val : b_val; + } + #endif + return; + + default: + } + + printf("[ERROR] Unsupported operation between tensor with dtype %s = max(%s, %s)\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) + ); +} diff --git a/nn/src/nn_min.c b/nn/src/nn_min.c new file mode 100644 index 0000000..c25e7cf --- /dev/null +++ b/nn/src/nn_min.c @@ -0,0 +1,20 @@ + +#include "nn_min.h" + + +float NN_min(Tensor *tensor) { + float min; + + switch (tensor->dtype) { + case DTYPE_F32: + NN__min_F32(tensor->size, &min, (float *)tensor->data); + break; + + default: + printf("[ERROR] Unsupported operation of tensor with dtype min(%s)\n", + NN_getDataTypeName(tensor->dtype) + ); + } + + return min; +} diff --git a/nn/src/nn_minimum.c b/nn/src/nn_minimum.c new file mode 100644 index 0000000..afc3216 --- /dev/null +++ b/nn/src/nn_minimum.c @@ -0,0 +1,48 @@ + +#include "nn_minimum.h" + + +void NN_minimum(Tensor *out, Tensor *a, Tensor *b) { + assert(b->ndim == a->ndim); + assert(out->ndim == a->ndim); + assert(a->dtype == DTYPE_F32); + assert(b->dtype == DTYPE_F32); + assert(out->dtype == DTYPE_F32); + assert(b->size == a->size); + assert(out->size == a->size); + + switch (out->dtype) { + case DTYPE_F32: + #ifdef RVV + float *ptr_out = out->data; + float *ptr_a = a->data; + float *ptr_b = b->data; + + size_t n = out->size; + while (n > 0) { + size_t vl = __riscv_vsetvl_e32m1(n); + vfloat32m1_t vec_a = __riscv_vle32_v_f32m1(ptr_a, vl); + vfloat32m1_t vec_b = __riscv_vle32_v_f32m1(ptr_b, vl); + vfloat32m1_t vec_out = __riscv_vfmin_vv_f32m1(vec_a, vec_b, vl); + __riscv_vse32_v_f32m1(ptr_out, vec_out, vl); + ptr_a += vl; + ptr_b += vl; + ptr_out += vl; + n -= vl; + } + #else + for (size_t i = 0; i < out->size; i += 1) { + float a_val = ((float *)a->data)[i]; + float b_val = ((float *)b->data)[i]; + ((float *)out->data)[i] = a_val < b_val ? a_val : b_val; + } + #endif + return; + + default: + } + + printf("[ERROR] Unsupported operation between tensor with dtype %s = max(%s, %s)\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) + ); +} diff --git a/nn/src/nn_sub.c b/nn/src/nn_sub.c index 0847086..b322b03 100644 --- a/nn/src/nn_sub.c +++ b/nn/src/nn_sub.c @@ -24,7 +24,7 @@ void NN_sub(Tensor *out, Tensor *a, Tensor *b) { return; default: - printf("[ERROR] Unsupported tensor dimension: %d\n", out->ndim); + printf("[ERROR] Unsupported tensor dimension: %lu\n", out->ndim); return; } } @@ -42,8 +42,8 @@ void NN_sub_1D(Tensor *out, Tensor *a, Tensor *b) { return; } - printf("[ERROR] Unsupported operation between tensor with dtype %s + %s -> %s\n", - NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype) + printf("[ERROR] Unsupported operation between tensor with dtype %s = %s - %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) ); } @@ -79,8 +79,8 @@ void NN_sub_2D(Tensor *out, Tensor *a, Tensor *b) { return; } - printf("[ERROR] Unsupported operation between tensor with dtype %s + %s -> %s\n", - NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype) + printf("[ERROR] Unsupported operation between tensor with dtype %s = %s - %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) ); } @@ -109,8 +109,8 @@ void NN_sub_3D(Tensor *out, Tensor *a, Tensor *b) { } } - printf("[ERROR] Unsupported operation between tensor with dtype %s + %s -> %s\n", - NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype) + printf("[ERROR] Unsupported operation between tensor with dtype %s = %s - %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) ); } @@ -144,8 +144,8 @@ void NN_sub_4D(Tensor *out, Tensor *a, Tensor *b) { } } - printf("[ERROR] Unsupported operation between tensor with dtype %s + %s -> %s\n", - NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype), NN_getDataTypeName(out->dtype) + printf("[ERROR] Unsupported operation between tensor with dtype %s = %s - %s\n", + NN_getDataTypeName(out->dtype), NN_getDataTypeName(a->dtype), NN_getDataTypeName(b->dtype) ); } diff --git a/tests/src/generate_test.py b/tests/src/generate_test.py index 288be30..864ed43 100644 --- a/tests/src/generate_test.py +++ b/tests/src/generate_test.py @@ -56,6 +56,33 @@ } """ +def generateImmTestPattern(op, function, inputs, detail=""): + result = function(*[value for name, value in inputs]).item() + + test_template = env.from_string(""" + { + printf("{{ (op + ":").ljust(24) }}"); +{% for name, value in inputs %}{% if (type(value) == torch.Tensor and name != "actual") %} + Tensor *{{ name }} = NN_tensor({{ len(value.shape) }}, (size_t[]){ {{ value.shape | join(", ") }} }, DTYPE_F32, (float[]){ {{ value.numpy().flatten() | join(", ") }} }); +{% elif str(type(value)) == "" %} + float {{ name }} = {{ value }};{% endif %}{% endfor %} + + float golden = {{ result }}; + float actual; + + cycles = READ_CSR("mcycle"); + actual = NN_{{ op }}({{ inputs[0][0] }}); + cycles = READ_CSR("mcycle") - cycles; + printf("%s (%lu cycles)\\n", float_eq(golden, actual, 1e-6) ? "PASS" : "FAIL", cycles); + +{% for name, value in inputs %}{% if (type(value) == torch.Tensor and name != "actual") %} + NN_deleteTensor({{ name }});{% endif %}{% endfor %} + + } +""") + + return test_template.render(op=op, inputs=inputs, result=result, detail=detail) + def generateTestPattern(op, function, inputs, detail=""): result = function(*[value for name, value in inputs]) @@ -99,15 +126,15 @@ def generateTestPattern(op, function, inputs, detail=""): "add", lambda a, b: a + b, ( - ("a", torch.rand((3, 3))), - ("b", torch.rand((3, 3))), + ("a", torch.rand((7, 7))), + ("b", torch.rand((7, 7))), ), ), generateTestPattern( - "add1_F32", + "add1", lambda a, b: a + b, ( - ("a", torch.rand((3, 3))), + ("a", torch.rand((7, 7))), ("v", random.random()), ), ), @@ -115,23 +142,23 @@ def generateTestPattern(op, function, inputs, detail=""): "sub", lambda a, b: a - b, ( - ("a", torch.rand((3, 3))), - ("b", torch.rand((3, 3))), + ("a", torch.rand((7, 7))), + ("b", torch.rand((7, 7))), ), ), generateTestPattern( "addInplace", lambda a, b: a + b, ( - ("actual", torch.zeros((3, 3))), - ("b", torch.rand((3, 3))), + ("actual", torch.zeros((7, 7))), + ("b", torch.rand((7, 7))), ) ), generateTestPattern( - "fill_F32", + "fill", lambda a, v: a.fill_(v), ( - ("actual", torch.zeros((3, 3))), + ("actual", torch.zeros((7, 7))), ("v", random.random()), ), ), @@ -139,18 +166,49 @@ def generateTestPattern(op, function, inputs, detail=""): "matmulT", lambda a, b: a @ b.T, ( - ("a", torch.rand((6, 3))), - ("b", torch.rand((5, 3))), + ("a", torch.rand((6, 7))), + ("b", torch.rand((5, 7))), ), ), generateTestPattern( "matmul", lambda a, b: a @ b, ( - ("a", torch.rand((6, 3))), - ("b", torch.rand((3, 5))), + ("a", torch.rand((6, 7))), + ("b", torch.rand((7, 5))), ), ), + generateTestPattern( + "maximum", + lambda a, b: torch.maximum(a, b), + ( + ("a", torch.rand((7, 7))), + ("b", torch.rand((7, 7))), + ), + ), + generateTestPattern( + "minimum", + lambda a, b: torch.minimum(a, b), + ( + ("a", torch.rand((7, 7))), + ("b", torch.rand((7, 7))), + ), + ), + generateImmTestPattern( + "max", + lambda a: torch.max(a), + ( + ("a", torch.rand((7, 7))), + ), + ), + generateImmTestPattern( + "min", + lambda a: torch.min(a), + ( + ("a", torch.rand((7, 7))), + ), + ), + ] )) diff --git a/tests/src/main.c b/tests/src/main.c index 2f054a6..c4543ba 100644 --- a/tests/src/main.c +++ b/tests/src/main.c @@ -41,13 +41,13 @@ int main() { { printf("add: "); - Tensor *a = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 0.4962566, 0.7682218, 0.08847743, 0.13203049, 0.30742282, 0.6340787, 0.4900934, 0.89644474, 0.45562798 }); + Tensor *a = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.4962566, 0.7682218, 0.08847743, 0.13203049, 0.30742282, 0.6340787, 0.4900934, 0.89644474, 0.45562798, 0.6323063, 0.34889346, 0.4017173, 0.022325754, 0.16885895, 0.29388845, 0.5185218, 0.6976676, 0.8000114, 0.16102946, 0.28226858, 0.68160856, 0.915194, 0.3970999, 0.8741559, 0.41940832, 0.55290705, 0.9527381, 0.03616482, 0.18523103, 0.37341738, 0.30510002, 0.9320004, 0.17591017, 0.26983356, 0.15067977, 0.031719506, 0.20812976, 0.929799, 0.7231092, 0.7423363, 0.5262958, 0.24365824, 0.58459234, 0.03315264, 0.13871688, 0.242235, 0.81546897, 0.7931606, 0.27825248 }); - Tensor *b = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 0.6323063, 0.34889346, 0.4017173, 0.022325754, 0.16885895, 0.29388845, 0.5185218, 0.6976676, 0.8000114 }); + Tensor *b = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.4819588, 0.81978035, 0.99706656, 0.6984411, 0.5675464, 0.83524317, 0.20559883, 0.593172, 0.112347245, 0.15345693, 0.24170822, 0.7262365, 0.7010802, 0.20382375, 0.65105355, 0.774486, 0.43689132, 0.5190908, 0.61585236, 0.8101883, 0.98009706, 0.11468822, 0.31676513, 0.69650495, 0.9142747, 0.93510365, 0.9411784, 0.5995073, 0.06520867, 0.54599625, 0.18719733, 0.034022927, 0.94424623, 0.8801799, 0.0012360215, 0.593586, 0.41577, 0.41771942, 0.27112156, 0.6922781, 0.20384824, 0.68329567, 0.75285405, 0.8579358, 0.6869556, 0.005132377, 0.17565155, 0.7496575, 0.6046507 }); - Tensor *golden = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 1.1285629, 1.1171153, 0.49019474, 0.15435624, 0.47628176, 0.92796713, 1.0086153, 1.5941124, 1.2556393 }); - Tensor *actual = NN_zeros(2, (size_t[]){ 3, 3 }, DTYPE_F32); + Tensor *golden = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.9782154, 1.5880022, 1.085544, 0.8304716, 0.87496924, 1.4693218, 0.69569224, 1.4896168, 0.5679752, 0.7857632, 0.5906017, 1.1279538, 0.72340596, 0.3726827, 0.944942, 1.2930079, 1.1345589, 1.3191022, 0.7768818, 1.0924568, 1.6617056, 1.0298822, 0.71386504, 1.5706608, 1.333683, 1.4880106, 1.8939165, 0.6356721, 0.2504397, 0.9194136, 0.49229735, 0.9660233, 1.1201564, 1.1500134, 0.15191579, 0.62530553, 0.62389976, 1.3475184, 0.99423075, 1.4346144, 0.730144, 0.9269539, 1.3374465, 0.8910884, 0.82567245, 0.24736738, 0.9911205, 1.5428181, 0.88290316 }); + Tensor *actual = NN_zeros(2, (size_t[]){ 7, 7 }, DTYPE_F32); cycles = READ_CSR("mcycle"); NN_add(actual, a, b); @@ -63,17 +63,17 @@ int main() { } { - printf("add1_F32: "); + printf("add1: "); - Tensor *a = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 0.16102946, 0.28226858, 0.68160856, 0.915194, 0.3970999, 0.8741559, 0.41940832, 0.55290705, 0.9527381 }); + Tensor *a = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.10995799, 0.21209025, 0.97037464, 0.83690894, 0.28198743, 0.3741576, 0.023700953, 0.49101293, 0.123470545, 0.11432165, 0.4724502, 0.5750725, 0.29523486, 0.7966888, 0.19573045, 0.95368505, 0.84264994, 0.07835853, 0.37555784, 0.5225613, 0.57295054, 0.61858714, 0.69621414, 0.5299501, 0.25603563, 0.7365945, 0.02037555, 0.20364666, 0.37483507, 0.25644332, 0.32508332, 0.09018916, 0.39364243, 0.6068782, 0.17426711, 0.47434032, 0.8579254, 0.44859987, 0.5138961, 0.45686555, 0.6011907, 0.81791973, 0.9736231, 0.81752795, 0.97470677, 0.46383917, 0.050839245, 0.2629614, 0.8404526 }); float v = 0.8444218515250481; - Tensor *golden = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 1.0054513, 1.1266904, 1.5260304, 1.7596159, 1.2415218, 1.7185777, 1.2638302, 1.3973289, 1.7971599 }); - Tensor *actual = NN_zeros(2, (size_t[]){ 3, 3 }, DTYPE_F32); + Tensor *golden = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.95437986, 1.0565121, 1.8147964, 1.6813308, 1.1264093, 1.2185795, 0.8681228, 1.3354348, 0.9678924, 0.9587435, 1.3168721, 1.4194944, 1.1396568, 1.6411107, 1.0401523, 1.7981069, 1.6870718, 0.9227804, 1.2199798, 1.3669832, 1.4173725, 1.463009, 1.5406361, 1.374372, 1.1004574, 1.5810163, 0.8647974, 1.0480685, 1.2192569, 1.1008651, 1.1695051, 0.934611, 1.2380643, 1.4513001, 1.0186889, 1.3187622, 1.7023473, 1.2930217, 1.358318, 1.3012874, 1.4456125, 1.6623416, 1.8180449, 1.6619499, 1.8191286, 1.308261, 0.8952611, 1.1073833, 1.6848745 }); + Tensor *actual = NN_zeros(2, (size_t[]){ 7, 7 }, DTYPE_F32); cycles = READ_CSR("mcycle"); - NN_add1_F32(actual, a, v); + NN_add1(actual, a, v); cycles = READ_CSR("mcycle") - cycles; printf("%s (%lu cycles)\n", compare(golden, actual) ? "PASS" : "FAIL", cycles); @@ -87,13 +87,13 @@ int main() { { printf("sub: "); - Tensor *a = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 0.03616482, 0.18523103, 0.37341738, 0.30510002, 0.9320004, 0.17591017, 0.26983356, 0.15067977, 0.031719506 }); + Tensor *a = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.49675876, 0.25147682, 0.11684412, 0.032073975, 0.0779959, 0.39858162, 0.774203, 0.77032053, 0.017784059, 0.811891, 0.10874528, 0.39429486, 0.29726368, 0.40369236, 0.40182865, 0.051325023, 0.068281054, 0.42176026, 0.5064661, 0.27286255, 0.6883496, 0.049970806, 0.46625638, 0.9397097, 0.296054, 0.95150155, 0.6810769, 0.048769534, 0.8163487, 0.44230276, 0.27679658, 0.89982665, 0.09595239, 0.55365247, 0.39531565, 0.8570563, 0.63957226, 0.7402527, 0.6765795, 0.37976265, 0.39484727, 0.08795929, 0.77092206, 0.89698905, 0.8421124, 0.14731085, 0.52229995, 0.14753294, 0.22475791 }); - Tensor *b = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 0.20812976, 0.929799, 0.7231092, 0.7423363, 0.5262958, 0.24365824, 0.58459234, 0.03315264, 0.13871688 }); + Tensor *b = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.20864725, 0.6708725, 0.20204341, 0.4890914, 0.52103406, 0.8223115, 0.122039974, 0.15674388, 0.20966923, 0.8499667, 0.3202675, 0.92174435, 0.6808038, 0.563313, 0.496278, 0.40115923, 0.5627332, 0.38582766, 0.49648678, 0.5637965, 0.10889745, 0.23793429, 0.90374637, 0.09422666, 0.4640969, 0.99461937, 0.6806185, 0.5141565, 0.066695035, 0.74768895, 0.14385962, 0.35806787, 0.33224183, 0.4259563, 0.50546914, 0.91240376, 0.5624194, 0.9478464, 0.8058562, 0.18389302, 0.72425205, 0.14655197, 0.28808743, 0.64706135, 0.66509604, 0.875114, 0.33904207, 0.50080043, 0.7574118 }); - Tensor *golden = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ -0.17196494, -0.744568, -0.3496918, -0.43723625, 0.40570462, -0.06774807, -0.31475878, 0.11752713, -0.10699737 }); - Tensor *actual = NN_zeros(2, (size_t[]){ 3, 3 }, DTYPE_F32); + Tensor *golden = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.2881115, -0.4193957, -0.0851993, -0.45701742, -0.44303817, -0.4237299, 0.652163, 0.61357665, -0.19188517, -0.038075686, -0.21152222, -0.5274495, -0.3835401, -0.15962064, -0.09444934, -0.3498342, -0.49445212, 0.0359326, 0.009979308, -0.29093397, 0.57945216, -0.18796349, -0.43749, 0.84548306, -0.1680429, -0.04311782, 0.00045835972, -0.465387, 0.74965364, -0.3053862, 0.13293695, 0.5417588, -0.23628944, 0.12769616, -0.110153496, -0.055347443, 0.07715285, -0.20759374, -0.12927675, 0.19586962, -0.32940477, -0.058592677, 0.48283464, 0.2499277, 0.17701638, -0.7278032, 0.18325788, -0.3532675, -0.53265387 }); + Tensor *actual = NN_zeros(2, (size_t[]){ 7, 7 }, DTYPE_F32); cycles = READ_CSR("mcycle"); NN_sub(actual, a, b); @@ -111,11 +111,11 @@ int main() { { printf("addInplace: "); - Tensor *b = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 0.242235, 0.81546897, 0.7931606, 0.27825248, 0.4819588, 0.81978035, 0.99706656, 0.6984411, 0.5675464 }); + Tensor *b = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.016453922, 0.8614903, 0.08653879, 0.50689125, 0.41499162, 0.23666352, 0.5660855, 0.91345936, 0.35384023, 0.20315295, 0.31508058, 0.0044258237, 0.725697, 0.25986814, 0.16632986, 0.21194929, 0.787478, 0.76478684, 0.8837609, 0.68136156, 0.33302015, 0.36027592, 0.647715, 0.91101736, 0.6359461, 0.26342732, 0.2649613, 0.02726549, 0.608024, 0.21940875, 0.054212093, 0.93843824, 0.1752944, 0.44311923, 0.64324677, 0.51592916, 0.16355914, 0.09583914, 0.8985412, 0.58141935, 0.91481227, 0.3323797, 0.6472777, 0.3856619, 0.47776443, 0.1954779, 0.66910046, 0.65808296, 0.4896857 }); - Tensor *golden = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 0.242235, 0.81546897, 0.7931606, 0.27825248, 0.4819588, 0.81978035, 0.99706656, 0.6984411, 0.5675464 }); - Tensor *actual = NN_zeros(2, (size_t[]){ 3, 3 }, DTYPE_F32); + Tensor *golden = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.016453922, 0.8614903, 0.08653879, 0.50689125, 0.41499162, 0.23666352, 0.5660855, 0.91345936, 0.35384023, 0.20315295, 0.31508058, 0.0044258237, 0.725697, 0.25986814, 0.16632986, 0.21194929, 0.787478, 0.76478684, 0.8837609, 0.68136156, 0.33302015, 0.36027592, 0.647715, 0.91101736, 0.6359461, 0.26342732, 0.2649613, 0.02726549, 0.608024, 0.21940875, 0.054212093, 0.93843824, 0.1752944, 0.44311923, 0.64324677, 0.51592916, 0.16355914, 0.09583914, 0.8985412, 0.58141935, 0.91481227, 0.3323797, 0.6472777, 0.3856619, 0.47776443, 0.1954779, 0.66910046, 0.65808296, 0.4896857 }); + Tensor *actual = NN_zeros(2, (size_t[]){ 7, 7 }, DTYPE_F32); cycles = READ_CSR("mcycle"); NN_addInplace(actual, b); @@ -130,15 +130,15 @@ int main() { } { - printf("fill_F32: "); + printf("fill: "); float v = 0.7579544029403025; - Tensor *golden = NN_tensor(2, (size_t[]){ 3, 3 }, DTYPE_F32, (float[]){ 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544 }); - Tensor *actual = NN_zeros(2, (size_t[]){ 3, 3 }, DTYPE_F32); + Tensor *golden = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544, 0.7579544 }); + Tensor *actual = NN_zeros(2, (size_t[]){ 7, 7 }, DTYPE_F32); cycles = READ_CSR("mcycle"); - NN_fill_F32(actual, v); + NN_fill(actual, v); cycles = READ_CSR("mcycle") - cycles; printf("%s (%lu cycles)\n", compare(golden, actual) ? "PASS" : "FAIL", cycles); @@ -151,12 +151,12 @@ int main() { { printf("matmulT: "); - Tensor *a = NN_tensor(2, (size_t[]){ 6, 3 }, DTYPE_F32, (float[]){ 0.83524317, 0.20559883, 0.593172, 0.112347245, 0.15345693, 0.24170822, 0.7262365, 0.7010802, 0.20382375, 0.65105355, 0.774486, 0.43689132, 0.5190908, 0.61585236, 0.8101883, 0.98009706, 0.11468822, 0.31676513 }); + Tensor *a = NN_tensor(2, (size_t[]){ 6, 7 }, DTYPE_F32, (float[]){ 0.38754892, 0.1917851, 0.8457724, 0.12778795, 0.70483273, 0.33187324, 0.258766, 0.58982253, 0.24027151, 0.6152024, 0.5981904, 0.12875527, 0.5832493, 0.7129646, 0.6979155, 0.43706065, 0.09010619, 0.42292297, 0.67365384, 0.31756145, 0.68979055, 0.8329813, 0.2389242, 0.5049309, 0.7067495, 0.5391889, 0.54176575, 0.5624327, 0.10692614, 0.5392941, 0.8462349, 0.9505569, 0.79387546, 0.5670015, 0.7335071, 0.25676018, 0.08565581, 0.07003945, 0.99880487, 0.8173947, 0.15438312, 0.6956213 }); - Tensor *b = NN_tensor(2, (size_t[]){ 5, 3 }, DTYPE_F32, (float[]){ 0.69650495, 0.9142747, 0.93510365, 0.9411784, 0.5995073, 0.06520867, 0.54599625, 0.18719733, 0.034022927, 0.94424623, 0.8801799, 0.0012360215, 0.593586, 0.41577, 0.41771942 }); + Tensor *b = NN_tensor(2, (size_t[]){ 5, 7 }, DTYPE_F32, (float[]){ 0.8775838, 0.9998074, 0.93719745, 0.8873769, 0.38537037, 0.32452917, 0.9105244, 0.7801898, 0.19911051, 0.9495086, 0.7415793, 0.77256775, 0.18661183, 0.6434499, 0.32471877, 0.8906783, 0.4100297, 0.69465625, 0.5888109, 0.7127341, 0.33008623, 0.7437857, 0.15076452, 0.6129275, 0.16170406, 0.006731212, 0.09847212, 0.89473504, 0.7705178, 0.96910787, 0.9005606, 0.053477287, 0.15878445, 0.4192087, 0.17528385 }); - Tensor *golden = NN_tensor(2, (size_t[]){ 6, 5 }, DTYPE_F32, (float[]){ 1.3244021, 0.94805074, 0.51470864, 0.9703723, 0.82904994, 0.44457445, 0.21349882, 0.098291524, 0.2414519, 0.23145676, 1.3374035, 1.1171119, 0.5346975, 1.3030747, 0.8077131, 1.5700936, 1.1055566, 0.5153189, 1.2969818, 0.8909623, 1.6822176, 0.91059625, 0.42627248, 1.0332117, 0.90260935, 1.0837072, 1.0118583, 0.56737596, 1.0267907, 0.7617748 }); + Tensor *golden = NN_tensor(2, (size_t[]){ 6, 5 }, DTYPE_F32, (float[]){ 2.0528438, 2.0113466, 1.4691894, 1.1251802, 1.5493748, 2.7533019, 2.20283, 1.8001729, 1.6449451, 1.6632531, 2.4999304, 2.0542672, 1.797321, 1.3615949, 1.4260775, 2.9659772, 2.58056, 2.0705404, 1.6395576, 1.7771993, 3.4274392, 2.8903294, 2.6360347, 1.5506982, 1.9102607, 2.261407, 2.1324728, 1.703151, 1.0514295, 0.7137757 }); Tensor *actual = NN_zeros(2, (size_t[]){ 6, 5 }, DTYPE_F32); cycles = READ_CSR("mcycle"); @@ -175,12 +175,12 @@ int main() { { printf("matmul: "); - Tensor *a = NN_tensor(2, (size_t[]){ 6, 3 }, DTYPE_F32, (float[]){ 0.27112156, 0.6922781, 0.20384824, 0.68329567, 0.75285405, 0.8579358, 0.6869556, 0.005132377, 0.17565155, 0.7496575, 0.6046507, 0.10995799, 0.21209025, 0.97037464, 0.83690894, 0.28198743, 0.3741576, 0.023700953 }); + Tensor *a = NN_tensor(2, (size_t[]){ 6, 7 }, DTYPE_F32, (float[]){ 0.84719825, 0.121996105, 0.25604928, 0.016954303, 0.21612722, 0.91123873, 0.90938, 0.85791886, 0.88606364, 0.94459325, 0.3719685, 0.72000104, 0.9454652, 0.6654094, 0.9998382, 0.75933146, 0.81082416, 0.32500392, 0.73991376, 0.5574533, 0.38059133, 0.21814507, 0.21944171, 0.11525959, 0.83566517, 0.8554656, 0.44309366, 0.210657, 0.88645273, 0.81974447, 0.537167, 0.26393235, 0.9595239, 0.70447034, 0.12042731, 0.97854143, 0.8796869, 0.31775457, 0.78107727, 0.21590549, 0.42164284, 0.9245506 }); - Tensor *b = NN_tensor(2, (size_t[]){ 3, 5 }, DTYPE_F32, (float[]){ 0.49101293, 0.123470545, 0.11432165, 0.4724502, 0.5750725, 0.29523486, 0.7966888, 0.19573045, 0.95368505, 0.84264994, 0.07835853, 0.37555784, 0.5225613, 0.57295054, 0.61858714 }); + Tensor *b = NN_tensor(2, (size_t[]){ 7, 5 }, DTYPE_F32, (float[]){ 0.52065957, 0.14639091, 0.33288354, 0.36427742, 0.4035356, 0.5478503, 0.9624148, 0.5267702, 0.19128, 0.52562714, 0.7397436, 0.7480201, 0.04303074, 0.41052878, 0.12842774, 0.2866572, 0.6801467, 0.1449349, 0.68586344, 0.92438906, 0.5327942, 0.16675615, 0.32085752, 0.60918206, 0.11884099, 0.74840516, 0.04606521, 0.01935333, 0.014169693, 0.39856833, 0.83621645, 0.026760519, 0.91559356, 0.29998857, 0.64644206 }); - Tensor *golden = NN_tensor(2, (size_t[]){ 6, 5 }, DTYPE_F32, (float[]){ 0.35348204, 0.6615625, 0.27301818, 0.90510166, 0.86536056, 0.6250023, 1.0063617, 0.673796, 1.5323635, 1.5580451, 0.3525831, 0.15487501, 0.17132716, 0.4300866, 0.5080299, 0.5552216, 0.61557466, 0.26151043, 0.99382263, 1.0086348, 0.45620644, 1.1135812, 0.6515146, 1.5051413, 1.4573545, 0.250781, 0.3418054, 0.11785651, 0.503633, 0.49210823 }); + Tensor *golden = NN_tensor(2, (size_t[]){ 6, 5 }, DTYPE_F32, (float[]){ 2.259774, 0.54684687, 1.2793616, 0.8660709, 1.4312925, 3.3851278, 2.1193447, 1.705457, 1.776533, 2.1696422, 2.7592213, 1.8639714, 1.4114801, 1.6380526, 1.7633034, 1.522168, 1.0664225, 0.7902231, 1.332517, 1.4051012, 2.5228224, 1.6957082, 1.220037, 1.5118933, 1.5742182, 2.6540992, 1.8389752, 1.8399594, 1.6057416, 2.4114716 }); Tensor *actual = NN_zeros(2, (size_t[]){ 6, 5 }, DTYPE_F32); cycles = READ_CSR("mcycle"); @@ -196,4 +196,90 @@ int main() { NN_deleteTensor(actual); } + { + printf("maximum: "); + + Tensor *a = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.52280146, 0.049140453, 0.9146645, 0.7692217, 0.99699783, 0.7526061, 0.1699655, 0.9172919, 0.5268722, 0.73710823, 0.09908545, 0.35618675, 0.009061217, 0.30525374, 0.6078656, 0.10741913, 0.6593821, 0.7684034, 0.56965464, 0.16545832, 0.11234015, 0.3457417, 0.7194791, 0.9931982, 0.7875145, 0.44369537, 0.6753082, 0.009468555, 0.07294935, 0.73330396, 0.2167924, 0.74054784, 0.14703393, 0.25234455, 0.08815551, 0.76092035, 0.44905245, 0.88480055, 0.8094361, 0.7766713, 0.51607805, 0.345411, 0.39128417, 0.5664503, 0.74785477, 0.14970505, 0.91963893, 0.44563496, 0.08102721 }); + + Tensor *b = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.22947109, 0.94240886, 0.9572636, 0.036860168, 0.85264915, 0.7505796, 0.79595923, 0.9232646, 0.23052484, 0.6578879, 0.7046166, 0.35225332, 0.66732657, 0.3561433, 0.80913067, 0.3612727, 0.31360215, 0.6258745, 0.6773468, 0.25571418, 0.54419917, 0.78976786, 0.45025164, 0.65216696, 0.3794065, 0.6752498, 0.1378029, 0.2059856, 0.24620473, 0.95950544, 0.36545795, 0.49863482, 0.25775224, 0.99914503, 0.9883351, 0.122906685, 0.09466505, 0.12100351, 0.49758863, 0.37254804, 0.17272717, 0.32066393, 0.59446543, 0.23875463, 0.61079127, 0.38534206, 0.25771832, 0.56869274, 0.9111291 }); + + + Tensor *golden = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.52280146, 0.94240886, 0.9572636, 0.7692217, 0.99699783, 0.7526061, 0.79595923, 0.9232646, 0.5268722, 0.73710823, 0.7046166, 0.35618675, 0.66732657, 0.3561433, 0.80913067, 0.3612727, 0.6593821, 0.7684034, 0.6773468, 0.25571418, 0.54419917, 0.78976786, 0.7194791, 0.9931982, 0.7875145, 0.6752498, 0.6753082, 0.2059856, 0.24620473, 0.95950544, 0.36545795, 0.74054784, 0.25775224, 0.99914503, 0.9883351, 0.76092035, 0.44905245, 0.88480055, 0.8094361, 0.7766713, 0.51607805, 0.345411, 0.59446543, 0.5664503, 0.74785477, 0.38534206, 0.91963893, 0.56869274, 0.9111291 }); + Tensor *actual = NN_zeros(2, (size_t[]){ 7, 7 }, DTYPE_F32); + + cycles = READ_CSR("mcycle"); + NN_maximum(actual, a, b); + cycles = READ_CSR("mcycle") - cycles; + printf("%s (%lu cycles)\n", compare(golden, actual) ? "PASS" : "FAIL", cycles); + + + NN_deleteTensor(a); + NN_deleteTensor(b); + NN_deleteTensor(golden); + NN_freeTensorData(actual); + NN_deleteTensor(actual); + } + + { + printf("minimum: "); + + Tensor *a = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.16196036, 0.5232172, 0.31561613, 0.99065316, 0.025618374, 0.0206694, 0.9926925, 0.18365502, 0.5958617, 0.45684695, 0.3946715, 0.3883261, 0.8177203, 0.5238985, 0.013192713, 0.20481992, 0.32954985, 0.7516082, 0.17643315, 0.9714598, 0.38863534, 0.410219, 0.891779, 0.75130385, 0.92406017, 0.7892222, 0.34832305, 0.1682638, 0.46279848, 0.9138188, 0.3321901, 0.036315024, 0.7049642, 0.9867357, 0.3576584, 0.08598822, 0.046470165, 0.6252997, 0.46214014, 0.24750638, 0.60106593, 0.6898794, 0.8976595, 0.8881911, 0.42515814, 0.059116423, 0.048188448, 0.9668448, 0.7210276 }); + + Tensor *b = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.7179537, 0.06738949, 0.96300787, 0.97367156, 0.95143014, 0.07820749, 0.3113383, 0.1561181, 0.9734828, 0.28516, 0.27172273, 0.76195645, 0.26870382, 0.25373894, 0.45626426, 0.45194024, 0.11051077, 0.91683406, 0.27943915, 0.67735744, 0.9348918, 0.7521582, 0.57078993, 0.9254285, 0.5672131, 0.2686717, 0.97299975, 0.61834025, 0.012159586, 0.3576542, 0.15941626, 0.9383765, 0.41742706, 0.044237554, 0.46856833, 0.81400645, 0.6299002, 0.6581022, 0.5464366, 0.68640935, 0.378174, 0.3010999, 0.032645762, 0.12333155, 0.71670127, 0.20394331, 0.57173324, 0.6595957, 0.53540194 }); + + + Tensor *golden = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.16196036, 0.06738949, 0.31561613, 0.97367156, 0.025618374, 0.0206694, 0.3113383, 0.1561181, 0.5958617, 0.28516, 0.27172273, 0.3883261, 0.26870382, 0.25373894, 0.013192713, 0.20481992, 0.11051077, 0.7516082, 0.17643315, 0.67735744, 0.38863534, 0.410219, 0.57078993, 0.75130385, 0.5672131, 0.2686717, 0.34832305, 0.1682638, 0.012159586, 0.3576542, 0.15941626, 0.036315024, 0.41742706, 0.044237554, 0.3576584, 0.08598822, 0.046470165, 0.6252997, 0.46214014, 0.24750638, 0.378174, 0.3010999, 0.032645762, 0.12333155, 0.42515814, 0.059116423, 0.048188448, 0.6595957, 0.53540194 }); + Tensor *actual = NN_zeros(2, (size_t[]){ 7, 7 }, DTYPE_F32); + + cycles = READ_CSR("mcycle"); + NN_minimum(actual, a, b); + cycles = READ_CSR("mcycle") - cycles; + printf("%s (%lu cycles)\n", compare(golden, actual) ? "PASS" : "FAIL", cycles); + + + NN_deleteTensor(a); + NN_deleteTensor(b); + NN_deleteTensor(golden); + NN_freeTensorData(actual); + NN_deleteTensor(actual); + } + + { + printf("max: "); + + Tensor *a = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.17582512, 0.9781642, 0.20925027, 0.9112503, 0.10224587, 0.37972575, 0.7719844, 0.29570967, 0.9200215, 0.15592176, 0.080114245, 0.27454042, 0.5808252, 0.96037793, 0.26129955, 0.6788141, 0.37464648, 0.39156884, 0.8676517, 0.112507045, 0.55310667, 0.9702046, 0.4312939, 0.88821906, 0.3460216, 0.9024811, 0.016334832, 0.42793816, 0.4121768, 0.6620425, 0.6961637, 0.88390845, 0.425507, 0.48017246, 0.8424056, 0.36471343, 0.9383168, 0.16709393, 0.44589508, 0.47314453, 0.72310495, 0.84183806, 0.4207481, 0.0857597, 0.7477461, 0.6495659, 0.70084965, 0.19156617, 0.8217978 }); + + + float golden = 0.9781641960144043; + float actual; + + cycles = READ_CSR("mcycle"); + actual = NN_max(a); + cycles = READ_CSR("mcycle") - cycles; + printf("%s (%lu cycles)\n", float_eq(golden, actual, 1e-6) ? "PASS" : "FAIL", cycles); + + + NN_deleteTensor(a); + + } + + { + printf("min: "); + + Tensor *a = NN_tensor(2, (size_t[]){ 7, 7 }, DTYPE_F32, (float[]){ 0.9735775, 0.5433857, 0.032975793, 0.85099494, 0.12927437, 0.61493605, 0.5726589, 0.26598173, 0.6740978, 0.052783668, 0.61387974, 0.18302453, 0.44593316, 0.5643144, 0.9259722, 0.26143986, 0.82031804, 0.4364831, 0.2625361, 0.06460017, 0.04124081, 0.98830533, 0.37530023, 0.5249744, 0.63555616, 0.8398661, 0.92673707, 0.9055086, 0.12955844, 0.4198916, 0.20413119, 0.21432412, 0.6186035, 0.969324, 0.099448025, 0.80260223, 0.24076664, 0.40261286, 0.89688545, 0.38691485, 0.5455279, 0.15048373, 0.92562044, 0.43536508, 0.13430476, 0.64640516, 0.14449131, 0.10324633, 0.5304596 }); + + + float golden = 0.03297579288482666; + float actual; + + cycles = READ_CSR("mcycle"); + actual = NN_min(a); + cycles = READ_CSR("mcycle") - cycles; + printf("%s (%lu cycles)\n", float_eq(golden, actual, 1e-6) ? "PASS" : "FAIL", cycles); + + + NN_deleteTensor(a); + + } + } \ No newline at end of file diff --git a/tests/src/main_fp16.c b/tests/src/main_fp16.c new file mode 100644 index 0000000..3b1b414 --- /dev/null +++ b/tests/src/main_fp16.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include + +#include "nn.h" +// #include "riscv_vector.h" + + +void print_bits_half(float16_t x) { + for(int i = 15; i >= 0; i -= 1) { + printf("%d", ((x>>i)&1)); + if(i == 15 || i == 10) + printf(" "); + if(i == 10) + printf(" "); + } + printf("\n"); +} +void print_bits(float x) { + uint32_t b = *(uint32_t*)&x; + for(int i = 31; i >= 0; i -= 1) { + printf("%d", ((b>>i)&1)); + if(i == 31 || i == 23) + printf(" "); + if(i == 23) + printf(" "); + } + printf("\n"); +} + +uint8_t compareResult(float golden, float actual) { + float diff = fabs(golden - actual); + if (diff > 1e-2) { + printf("FAILED "); + printf("golden: "); + NN_printFloat(golden, 6); + printf("\n"); + printf("actual: "); + NN_printFloat(actual, 6); + printf("\n"); + return 1; + } + printf("PASSED\n"); + return 0; +} + +int main() { + // for (size_t i = 0; i < 100; i += 1) { + // float x = rand() / (float)RAND_MAX * 1000.0f; + + float x = (float)(0x47ca9334); + + float16_t x_compressed = NN_floatToHalf(x); + float x_decompressed = NN_halfToFloat(x_compressed); + + print_bits(x); + print_bits_half(x_compressed); + print_bits(x_decompressed); + + printf("%f\t", x); + printf("%f\n", x_decompressed); + + compareResult(x, x_decompressed); + // } + return 0; +} \ No newline at end of file