From 1f248f9272fce0d67a8a4c7e7039f84f86a516c3 Mon Sep 17 00:00:00 2001 From: "-T.K.-" Date: Sat, 8 Jun 2024 01:00:09 -0700 Subject: [PATCH] ADD: minor fix --- nn/CMakeLists.txt | 3 +++ nn/inc/nn.h | 1 + nn/inc/nn_add.h | 1 + nn/inc/nn_max.h | 8 ++++---- nn/inc/nn_min.h | 8 ++++---- nn/src/add/nn_add.c | 22 +++++++++++----------- nn/src/add/nn_add_rvv.c | 6 +++--- nn/src/matmul/nn_matmul_rvv.c | 4 ++-- nn/src/max/nn_max.c | 32 ++++++++++++++++++++++---------- nn/src/max/nn_max_rvv.c | 12 ++++++------ nn/src/min/nn_min.c | 32 ++++++++++++++++++++++---------- nn/src/min/nn_min_rvv.c | 12 ++++++------ test/src/main.c | 21 ++++++++++++++++++--- 13 files changed, 103 insertions(+), 59 deletions(-) diff --git a/nn/CMakeLists.txt b/nn/CMakeLists.txt index 0d08596..650b637 100644 --- a/nn/CMakeLists.txt +++ b/nn/CMakeLists.txt @@ -25,6 +25,9 @@ set(SOURCES src/min/nn_min.c src/min/nn_min_rvv.c + + src/mul/nn_mul.c + src/mul/nn_mul_rvv.c src/sum/nn_sum.c diff --git a/nn/inc/nn.h b/nn/inc/nn.h index a073245..5b092a5 100644 --- a/nn/inc/nn.h +++ b/nn/inc/nn.h @@ -11,6 +11,7 @@ #include "nn_matmul.h" #include "nn_max.h" #include "nn_min.h" +#include "nn_mul.h" #include "nn_sum.h" diff --git a/nn/inc/nn_add.h b/nn/inc/nn_add.h index 8c23b49..c053342 100644 --- a/nn/inc/nn_add.h +++ b/nn/inc/nn_add.h @@ -5,6 +5,7 @@ #include #include "nn_tensor.h" +#include "nn_print.h" /** diff --git a/nn/inc/nn_max.h b/nn/inc/nn_max.h index 17946d6..7d6e55e 100644 --- a/nn/inc/nn_max.h +++ b/nn/inc/nn_max.h @@ -10,13 +10,13 @@ /** * Returns the maximum value of all elements in the input tensor. * - * @param t: input tensor + * @param tensor: input tensor */ -float NN_max(Tensor *t); +float NN_max(Tensor *tensor); -float NN_max_F32(Tensor *t); +float NN_max_F32(Tensor *tensor); -float NN_max_F32_RVV(Tensor *t); +float NN_max_F32_RVV(Tensor *tensor); #endif // __NN_MAX_H diff --git a/nn/inc/nn_min.h b/nn/inc/nn_min.h index d668275..e0abae3 100644 --- a/nn/inc/nn_min.h +++ b/nn/inc/nn_min.h @@ -10,13 +10,13 @@ /** * Returns the minimum value of all elements in the input tensor. * - * @param t: input tensor + * @param tensor: input tensor */ -float NN_min(Tensor *t); +float NN_min(Tensor *tensor); -float NN_min_F32(Tensor *t); +float NN_min_F32(Tensor *tensor); -float NN_min_F32_RVV(Tensor *t); +float NN_min_F32_RVV(Tensor *tensor); #endif // __NN_MIN_H diff --git a/nn/src/add/nn_add.c b/nn/src/add/nn_add.c index c08fd52..196f431 100644 --- a/nn/src/add/nn_add.c +++ b/nn/src/add/nn_add.c @@ -22,7 +22,7 @@ void NN_add_F32(Tensor *out, Tensor *a, Tensor *b) { assert(a->shape[0] == b->shape[0]); out->dtype = DTYPE_F32; - for (size_t i = 0; indim; i+=1) { + for (size_t i = 0; i < out->ndim; i += 1) { out->shape[i] = a->shape[i] > b->shape[i] ? a->shape[i] : b->shape[i]; } @@ -32,7 +32,7 @@ void NN_add_F32(Tensor *out, Tensor *a, Tensor *b) { switch (out->ndim) { case 1: - for (size_t i = 0; ishape[0]; i+=1) { + for (size_t i = 0; i < out->shape[0]; i += 1) { *((float *)out_ptr) = *((float *)a_ptr) + *((float *)b_ptr); out_ptr += out->strides[0]; a_ptr += a->strides[0]; @@ -40,8 +40,8 @@ void NN_add_F32(Tensor *out, Tensor *a, Tensor *b) { } return; case 2: - for (size_t i = 0; ishape[0]; i+=1) { - for (size_t j = 0; jshape[1]; j+=1) { + for (size_t i = 0; i < out->shape[0]; i += 1) { + for (size_t j = 0; j < out->shape[1]; j += 1) { *((float *)out_ptr) = *((float *)a_ptr) + *((float *)b_ptr); out_ptr += out->strides[1]; a_ptr += a->strides[1]; @@ -56,9 +56,9 @@ void NN_add_F32(Tensor *out, Tensor *a, Tensor *b) { } printf("Unsupported operation between tensor with shape "); - NN_printShape(a->shape); + NN_printShape(a); printf(" and "); - NN_printShape(b->shape); + NN_printShape(b); printf("\n"); } @@ -79,7 +79,7 @@ void NN_add_INT(Tensor *out, Tensor *a, Tensor *b) { switch (out->ndim) { case 1: - for (size_t i = 0; ishape[0]; i+=1) { + for (size_t i = 0; i < out->shape[0]; i += 1) { *((int32_t *)out_ptr) = *((int32_t *)a_ptr) + *((int32_t *)b_ptr); out_ptr += out->strides[0]; a_ptr += a->strides[0]; @@ -87,8 +87,8 @@ void NN_add_INT(Tensor *out, Tensor *a, Tensor *b) { } return; case 2: - for (size_t i = 0; ishape[0]; i+=1) { - for (size_t j = 0; jshape[1]; j+=1) { + for (size_t i = 0; i < out->shape[0]; i += 1) { + for (size_t j = 0; j < out->shape[1]; j += 1) { *((int32_t *)out_ptr) = *((int32_t *)a_ptr) + *((int32_t *)b_ptr); out_ptr += out->strides[1]; a_ptr += a->strides[1]; @@ -103,9 +103,9 @@ void NN_add_INT(Tensor *out, Tensor *a, Tensor *b) { } printf("Unsupported operation between tensor with shape "); - NN_printShape(a->shape); + NN_printShape(a); printf(" and "); - NN_printShape(b->shape); + NN_printShape(b); printf("\n"); } diff --git a/nn/src/add/nn_add_rvv.c b/nn/src/add/nn_add_rvv.c index 7a5f017..5d6ef19 100644 --- a/nn/src/add/nn_add_rvv.c +++ b/nn/src/add/nn_add_rvv.c @@ -19,10 +19,10 @@ void NN_add_F32_RVV(Tensor *out, Tensor *a, Tensor *b) { size_t n = out->shape[0] * out->shape[1]; while (n > 0) { size_t vl = __riscv_vsetvl_e32m1(n); - vfloat32m1_t vec_a = __riscv_vlse32_v_f32m1(a_ptr, a->strides[1], vl); - vfloat32m1_t vec_b = __riscv_vlse32_v_f32m1(b_ptr, b->strides[1], vl); + vfloat32m1_t vec_a = __riscv_vlse32_v_f32m1((float *)a_ptr, a->strides[1], vl); + vfloat32m1_t vec_b = __riscv_vlse32_v_f32m1((float *)b_ptr, b->strides[1], vl); vfloat32m1_t vec_out = __riscv_vfadd_vv_f32m1(vec_a, vec_b, vl); - __riscv_vsse32_v_f32m1(out_ptr, out->strides[1], vec_out, vl); + __riscv_vsse32_v_f32m1((float *)out_ptr, out->strides[1], vec_out, vl); a_ptr += vl * a->strides[1]; b_ptr += vl * b->strides[1]; out_ptr += vl * out->strides[1]; diff --git a/nn/src/matmul/nn_matmul_rvv.c b/nn/src/matmul/nn_matmul_rvv.c index f019dcc..70c3a2c 100644 --- a/nn/src/matmul/nn_matmul_rvv.c +++ b/nn/src/matmul/nn_matmul_rvv.c @@ -31,8 +31,8 @@ void NN_matmul_F32_RVV(Tensor *out, Tensor *a, Tensor *b) { size_t n = a->shape[1]; while (n > 0) { size_t vl = __riscv_vsetvl_e32m1(n); - vfloat32m1_t vec_a = __riscv_vlse32_v_f32m1(a_ptr_v, a->strides[1], vl); - vfloat32m1_t vec_b = __riscv_vlse32_v_f32m1(b_ptr_v, b->strides[0], vl); + vfloat32m1_t vec_a = __riscv_vlse32_v_f32m1((float *)a_ptr_v, a->strides[1], vl); + vfloat32m1_t vec_b = __riscv_vlse32_v_f32m1((float *)b_ptr_v, b->strides[0], vl); vec_s = __riscv_vfmacc_vv_f32m1(vec_s, vec_a, vec_b, vl); a_ptr_v += vl * a->strides[1]; diff --git a/nn/src/max/nn_max.c b/nn/src/max/nn_max.c index 03cf1aa..c9fcb31 100644 --- a/nn/src/max/nn_max.c +++ b/nn/src/max/nn_max.c @@ -2,19 +2,31 @@ #include "nn_max.h" -float NN_max_F32(Tensor *t) { - assert(t->dtype == DTYPE_F32); +float NN_max_F32(Tensor *tensor) { + assert(tensor->dtype == DTYPE_F32); float max = -FLT_MAX; - float *t_data = (float *)t->data; - - for (size_t i = 0; i < t->shape[0]; i += 1) { - for (size_t j = 0; j < t->shape[1]; j += 1) { - if (t_data[i * t->shape[1] + j] > max) { - max = t_data[i * t->shape[1] + j]; + uint8_t *ptr = tensor->data; + + switch (tensor->ndim) { + case 1: + for (size_t i = 0; i < tensor->shape[0]; i += 1) { + float val = *((float *)ptr); + max = val > max ? val : max; + ptr += tensor->strides[0]; } - } + break; + case 2: + for (size_t i = 0; i < tensor->shape[0]; i += 1) { + for (size_t j = 0; j < tensor->shape[1]; j += 1) { + float val = *((float *)ptr); + max = val > max ? val : max; + ptr += tensor->strides[1]; + } + ptr -= tensor->strides[1] * tensor->shape[1]; + ptr += tensor->strides[0]; + } + break; } - return max; } diff --git a/nn/src/max/nn_max_rvv.c b/nn/src/max/nn_max_rvv.c index 259dab6..eceb516 100644 --- a/nn/src/max/nn_max_rvv.c +++ b/nn/src/max/nn_max_rvv.c @@ -2,20 +2,20 @@ #include "nn_max.h" #include "riscv_vector.h" -float NN_max_F32_RVV(Tensor *t) { - assert(t->dtype == DTYPE_F32); +float NN_max_F32_RVV(Tensor *tensor) { + assert(tensor->dtype == DTYPE_F32); float max = -FLT_MAX; - float *t_data = (float *)t->data; + uint8_t *ptr = tensor->data; vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(max, 1); - size_t n = t->shape[0] * t->shape[1]; + size_t n = tensor->shape[0] * tensor->shape[1]; while (n > 0) { size_t vl = __riscv_vsetvl_e32m1(n); - vfloat32m1_t vec_data = __riscv_vle32_v_f32m1(t_data, vl); + vfloat32m1_t vec_data = __riscv_vlse32_v_f32m1((float *)ptr, tensor->strides[1], vl); vec_max = __riscv_vfredmax_vs_f32m1_f32m1(vec_data, vec_max, vl); - t_data += vl; + ptr += vl * tensor->strides[1]; n -= vl; } max = __riscv_vfmv_f_s_f32m1_f32(vec_max); diff --git a/nn/src/min/nn_min.c b/nn/src/min/nn_min.c index d61e727..d7cbab1 100644 --- a/nn/src/min/nn_min.c +++ b/nn/src/min/nn_min.c @@ -2,19 +2,31 @@ #include "nn_min.h" -float NN_min_F32(Tensor *t) { - assert(t->dtype == DTYPE_F32); +float NN_min_F32(Tensor *tensor) { + assert(tensor->dtype == DTYPE_F32); float min = FLT_MAX; - float *t_data = (float *)t->data; - - for (size_t i = 0; i < t->shape[0]; i += 1) { - for (size_t j = 0; j < t->shape[1]; j += 1) { - if (t_data[i * t->shape[1] + j] < min) { - min = t_data[i * t->shape[1] + j]; + uint8_t *ptr = tensor->data; + + switch (tensor->ndim) { + case 1: + for (size_t i = 0; i < tensor->shape[0]; i += 1) { + float val = *((float *)ptr); + min = val < min ? val : min; + ptr += tensor->strides[0]; } - } + break; + case 2: + for (size_t i = 0; i < tensor->shape[0]; i += 1) { + for (size_t j = 0; j < tensor->shape[1]; j += 1) { + float val = *((float *)ptr); + min = val < min ? val : min; + ptr += tensor->strides[1]; + } + ptr -= tensor->strides[1] * tensor->shape[1]; + ptr += tensor->strides[0]; + } + break; } - return min; } diff --git a/nn/src/min/nn_min_rvv.c b/nn/src/min/nn_min_rvv.c index 64c53d7..ee277a1 100644 --- a/nn/src/min/nn_min_rvv.c +++ b/nn/src/min/nn_min_rvv.c @@ -2,20 +2,20 @@ #include "nn_min.h" #include "riscv_vector.h" -float NN_min_F32_RVV(Tensor *t) { - assert(t->dtype == DTYPE_F32); +float NN_min_F32_RVV(Tensor *tensor) { + assert(tensor->dtype == DTYPE_F32); float min = FLT_MAX; - float *t_data = (float *)t->data; + uint8_t *ptr = tensor->data; vfloat32m1_t vec_min = __riscv_vfmv_s_f_f32m1(min, 1); - size_t n = t->shape[0] * t->shape[1]; + size_t n = tensor->shape[0] * tensor->shape[1]; while (n > 0) { size_t vl = __riscv_vsetvl_e32m1(n); - vfloat32m1_t vec_data = __riscv_vle32_v_f32m1(t_data, vl); + vfloat32m1_t vec_data = __riscv_vlse32_v_f32m1((float *)ptr, tensor->strides[1], vl); vec_min = __riscv_vfredmin_vs_f32m1_f32m1(vec_data, vec_min, vl); - t_data += vl; + ptr += vl * tensor->strides[1]; n -= vl; } min = __riscv_vfmv_f_s_f32m1_f32(vec_min); diff --git a/test/src/main.c b/test/src/main.c index 8f08e34..6f49620 100644 --- a/test/src/main.c +++ b/test/src/main.c @@ -74,7 +74,6 @@ int main() { // matvec { - // array gen Tensor *H = NN_rand(2, (size_t[]){N, M}, DTYPE_F32); Tensor *V = NN_rand(2, (size_t[]){M, 1}, DTYPE_F32); Tensor *W = NN_rand(2, (size_t[]){1, M}, DTYPE_F32); @@ -87,7 +86,7 @@ int main() { cycles = READ_CSR("mcycle"); NN_matmul_F32_RVV(actual_vec, H, V); cycles = READ_CSR("mcycle") - cycles; - printf("%s (%lu)\n", compare_2d(golden_vec, actual_vec, N, 1) ? "pass" : "fail", cycles); + printf("%s (%lu)\n", compare_2d(golden_vec->data, actual_vec->data, N, 1) ? "pass" : "fail", cycles); printf("matvec_t:\t"); NN_transpose_F32(H, H); @@ -96,7 +95,7 @@ int main() { cycles = READ_CSR("mcycle"); NN_matmul_F32_RVV(actual_vec, W, H); cycles = READ_CSR("mcycle") - cycles; - printf("%s (%lu)\n", compare_2d(golden_vec, actual_vec, N, 1) ? "pass" : "fail", cycles); + printf("%s (%lu)\n", compare_2d(golden_vec->data, actual_vec->data, N, 1) ? "pass" : "fail", cycles); NN_freeTensorData(H); NN_deleteTensor(H); @@ -130,7 +129,23 @@ int main() { // matmulf { + Tensor *A = NN_rand(2, (size_t[]){M, N}, DTYPE_F32); + Tensor *C = NN_tensor(2, (size_t[]){M, N}, DTYPE_F32, NULL); + Tensor *D = NN_tensor(2, (size_t[]){M, N}, DTYPE_F32, NULL); + + printf("mulf:\t\t"); + NN_mul_F32(C, A, 10.0f); + cycles = READ_CSR("mcycle"); + NN_mul_F32_RVV(D, A, 10.0f); + cycles = READ_CSR("mcycle") - cycles; + printf("%s (%lu)\n", compare_2d(C->data, D->data, M, N) ? "pass" : "fail", cycles); + NN_freeTensorData(A); + NN_deleteTensor(A); + NN_freeTensorData(C); + NN_deleteTensor(C); + NN_freeTensorData(D); + NN_deleteTensor(D); } // matsub