Skip to content

Commit

Permalink
ADD: minor fix
Browse files Browse the repository at this point in the history
  • Loading branch information
T-K-233 committed Jun 8, 2024
1 parent 2a74099 commit 1f248f9
Show file tree
Hide file tree
Showing 13 changed files with 103 additions and 59 deletions.
3 changes: 3 additions & 0 deletions nn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ set(SOURCES

src/min/nn_min.c
src/min/nn_min_rvv.c

src/mul/nn_mul.c
src/mul/nn_mul_rvv.c

src/sum/nn_sum.c

Expand Down
1 change: 1 addition & 0 deletions nn/inc/nn.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "nn_matmul.h"
#include "nn_max.h"
#include "nn_min.h"
#include "nn_mul.h"
#include "nn_sum.h"


Expand Down
1 change: 1 addition & 0 deletions nn/inc/nn_add.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <math.h>

#include "nn_tensor.h"
#include "nn_print.h"


/**
Expand Down
8 changes: 4 additions & 4 deletions nn/inc/nn_max.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
/**
* Returns the maximum value of all elements in the input tensor.
*
* @param t: input tensor
* @param tensor: input tensor
*/
float NN_max(Tensor *t);
float NN_max(Tensor *tensor);

float NN_max_F32(Tensor *t);
float NN_max_F32(Tensor *tensor);

float NN_max_F32_RVV(Tensor *t);
float NN_max_F32_RVV(Tensor *tensor);


#endif // __NN_MAX_H
8 changes: 4 additions & 4 deletions nn/inc/nn_min.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
/**
* Returns the minimum value of all elements in the input tensor.
*
* @param t: input tensor
* @param tensor: input tensor
*/
float NN_min(Tensor *t);
float NN_min(Tensor *tensor);

float NN_min_F32(Tensor *t);
float NN_min_F32(Tensor *tensor);

float NN_min_F32_RVV(Tensor *t);
float NN_min_F32_RVV(Tensor *tensor);


#endif // __NN_MIN_H
22 changes: 11 additions & 11 deletions nn/src/add/nn_add.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ void NN_add_F32(Tensor *out, Tensor *a, Tensor *b) {
assert(a->shape[0] == b->shape[0]);

out->dtype = DTYPE_F32;
for (size_t i = 0; i<out->ndim; i+=1) {
for (size_t i = 0; i < out->ndim; i += 1) {
out->shape[i] = a->shape[i] > b->shape[i] ? a->shape[i] : b->shape[i];
}

Expand All @@ -32,16 +32,16 @@ void NN_add_F32(Tensor *out, Tensor *a, Tensor *b) {

switch (out->ndim) {
case 1:
for (size_t i = 0; i<out->shape[0]; i+=1) {
for (size_t i = 0; i < out->shape[0]; i += 1) {
*((float *)out_ptr) = *((float *)a_ptr) + *((float *)b_ptr);
out_ptr += out->strides[0];
a_ptr += a->strides[0];
b_ptr += b->strides[0];
}
return;
case 2:
for (size_t i = 0; i<out->shape[0]; i+=1) {
for (size_t j = 0; j<out->shape[1]; j+=1) {
for (size_t i = 0; i < out->shape[0]; i += 1) {
for (size_t j = 0; j < out->shape[1]; j += 1) {
*((float *)out_ptr) = *((float *)a_ptr) + *((float *)b_ptr);
out_ptr += out->strides[1];
a_ptr += a->strides[1];
Expand All @@ -56,9 +56,9 @@ void NN_add_F32(Tensor *out, Tensor *a, Tensor *b) {
}

printf("Unsupported operation between tensor with shape ");
NN_printShape(a->shape);
NN_printShape(a);
printf(" and ");
NN_printShape(b->shape);
NN_printShape(b);
printf("\n");
}

Expand All @@ -79,16 +79,16 @@ void NN_add_INT(Tensor *out, Tensor *a, Tensor *b) {

switch (out->ndim) {
case 1:
for (size_t i = 0; i<out->shape[0]; i+=1) {
for (size_t i = 0; i < out->shape[0]; i += 1) {
*((int32_t *)out_ptr) = *((int32_t *)a_ptr) + *((int32_t *)b_ptr);
out_ptr += out->strides[0];
a_ptr += a->strides[0];
b_ptr += b->strides[0];
}
return;
case 2:
for (size_t i = 0; i<out->shape[0]; i+=1) {
for (size_t j = 0; j<out->shape[1]; j+=1) {
for (size_t i = 0; i < out->shape[0]; i += 1) {
for (size_t j = 0; j < out->shape[1]; j += 1) {
*((int32_t *)out_ptr) = *((int32_t *)a_ptr) + *((int32_t *)b_ptr);
out_ptr += out->strides[1];
a_ptr += a->strides[1];
Expand All @@ -103,9 +103,9 @@ void NN_add_INT(Tensor *out, Tensor *a, Tensor *b) {
}

printf("Unsupported operation between tensor with shape ");
NN_printShape(a->shape);
NN_printShape(a);
printf(" and ");
NN_printShape(b->shape);
NN_printShape(b);
printf("\n");
}

6 changes: 3 additions & 3 deletions nn/src/add/nn_add_rvv.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ void NN_add_F32_RVV(Tensor *out, Tensor *a, Tensor *b) {
size_t n = out->shape[0] * out->shape[1];
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m1(n);
vfloat32m1_t vec_a = __riscv_vlse32_v_f32m1(a_ptr, a->strides[1], vl);
vfloat32m1_t vec_b = __riscv_vlse32_v_f32m1(b_ptr, b->strides[1], vl);
vfloat32m1_t vec_a = __riscv_vlse32_v_f32m1((float *)a_ptr, a->strides[1], vl);
vfloat32m1_t vec_b = __riscv_vlse32_v_f32m1((float *)b_ptr, b->strides[1], vl);
vfloat32m1_t vec_out = __riscv_vfadd_vv_f32m1(vec_a, vec_b, vl);
__riscv_vsse32_v_f32m1(out_ptr, out->strides[1], vec_out, vl);
__riscv_vsse32_v_f32m1((float *)out_ptr, out->strides[1], vec_out, vl);
a_ptr += vl * a->strides[1];
b_ptr += vl * b->strides[1];
out_ptr += vl * out->strides[1];
Expand Down
4 changes: 2 additions & 2 deletions nn/src/matmul/nn_matmul_rvv.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ void NN_matmul_F32_RVV(Tensor *out, Tensor *a, Tensor *b) {
size_t n = a->shape[1];
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m1(n);
vfloat32m1_t vec_a = __riscv_vlse32_v_f32m1(a_ptr_v, a->strides[1], vl);
vfloat32m1_t vec_b = __riscv_vlse32_v_f32m1(b_ptr_v, b->strides[0], vl);
vfloat32m1_t vec_a = __riscv_vlse32_v_f32m1((float *)a_ptr_v, a->strides[1], vl);
vfloat32m1_t vec_b = __riscv_vlse32_v_f32m1((float *)b_ptr_v, b->strides[0], vl);
vec_s = __riscv_vfmacc_vv_f32m1(vec_s, vec_a, vec_b, vl);

a_ptr_v += vl * a->strides[1];
Expand Down
32 changes: 22 additions & 10 deletions nn/src/max/nn_max.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,31 @@
#include "nn_max.h"


float NN_max_F32(Tensor *t) {
assert(t->dtype == DTYPE_F32);
float NN_max_F32(Tensor *tensor) {
assert(tensor->dtype == DTYPE_F32);

float max = -FLT_MAX;
float *t_data = (float *)t->data;

for (size_t i = 0; i < t->shape[0]; i += 1) {
for (size_t j = 0; j < t->shape[1]; j += 1) {
if (t_data[i * t->shape[1] + j] > max) {
max = t_data[i * t->shape[1] + j];
uint8_t *ptr = tensor->data;

switch (tensor->ndim) {
case 1:
for (size_t i = 0; i < tensor->shape[0]; i += 1) {
float val = *((float *)ptr);
max = val > max ? val : max;
ptr += tensor->strides[0];
}
}
break;
case 2:
for (size_t i = 0; i < tensor->shape[0]; i += 1) {
for (size_t j = 0; j < tensor->shape[1]; j += 1) {
float val = *((float *)ptr);
max = val > max ? val : max;
ptr += tensor->strides[1];
}
ptr -= tensor->strides[1] * tensor->shape[1];
ptr += tensor->strides[0];
}
break;
}

return max;
}
12 changes: 6 additions & 6 deletions nn/src/max/nn_max_rvv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
#include "nn_max.h"
#include "riscv_vector.h"

float NN_max_F32_RVV(Tensor *t) {
assert(t->dtype == DTYPE_F32);
float NN_max_F32_RVV(Tensor *tensor) {
assert(tensor->dtype == DTYPE_F32);

float max = -FLT_MAX;
float *t_data = (float *)t->data;
uint8_t *ptr = tensor->data;

vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(max, 1);

size_t n = t->shape[0] * t->shape[1];
size_t n = tensor->shape[0] * tensor->shape[1];
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m1(n);
vfloat32m1_t vec_data = __riscv_vle32_v_f32m1(t_data, vl);
vfloat32m1_t vec_data = __riscv_vlse32_v_f32m1((float *)ptr, tensor->strides[1], vl);
vec_max = __riscv_vfredmax_vs_f32m1_f32m1(vec_data, vec_max, vl);
t_data += vl;
ptr += vl * tensor->strides[1];
n -= vl;
}
max = __riscv_vfmv_f_s_f32m1_f32(vec_max);
Expand Down
32 changes: 22 additions & 10 deletions nn/src/min/nn_min.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,31 @@
#include "nn_min.h"


float NN_min_F32(Tensor *t) {
assert(t->dtype == DTYPE_F32);
float NN_min_F32(Tensor *tensor) {
assert(tensor->dtype == DTYPE_F32);

float min = FLT_MAX;
float *t_data = (float *)t->data;

for (size_t i = 0; i < t->shape[0]; i += 1) {
for (size_t j = 0; j < t->shape[1]; j += 1) {
if (t_data[i * t->shape[1] + j] < min) {
min = t_data[i * t->shape[1] + j];
uint8_t *ptr = tensor->data;

switch (tensor->ndim) {
case 1:
for (size_t i = 0; i < tensor->shape[0]; i += 1) {
float val = *((float *)ptr);
min = val < min ? val : min;
ptr += tensor->strides[0];
}
}
break;
case 2:
for (size_t i = 0; i < tensor->shape[0]; i += 1) {
for (size_t j = 0; j < tensor->shape[1]; j += 1) {
float val = *((float *)ptr);
min = val < min ? val : min;
ptr += tensor->strides[1];
}
ptr -= tensor->strides[1] * tensor->shape[1];
ptr += tensor->strides[0];
}
break;
}

return min;
}
12 changes: 6 additions & 6 deletions nn/src/min/nn_min_rvv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
#include "nn_min.h"
#include "riscv_vector.h"

float NN_min_F32_RVV(Tensor *t) {
assert(t->dtype == DTYPE_F32);
float NN_min_F32_RVV(Tensor *tensor) {
assert(tensor->dtype == DTYPE_F32);

float min = FLT_MAX;
float *t_data = (float *)t->data;
uint8_t *ptr = tensor->data;

vfloat32m1_t vec_min = __riscv_vfmv_s_f_f32m1(min, 1);

size_t n = t->shape[0] * t->shape[1];
size_t n = tensor->shape[0] * tensor->shape[1];
while (n > 0) {
size_t vl = __riscv_vsetvl_e32m1(n);
vfloat32m1_t vec_data = __riscv_vle32_v_f32m1(t_data, vl);
vfloat32m1_t vec_data = __riscv_vlse32_v_f32m1((float *)ptr, tensor->strides[1], vl);
vec_min = __riscv_vfredmin_vs_f32m1_f32m1(vec_data, vec_min, vl);
t_data += vl;
ptr += vl * tensor->strides[1];
n -= vl;
}
min = __riscv_vfmv_f_s_f32m1_f32(vec_min);
Expand Down
21 changes: 18 additions & 3 deletions test/src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ int main() {

// matvec
{
// array gen
Tensor *H = NN_rand(2, (size_t[]){N, M}, DTYPE_F32);
Tensor *V = NN_rand(2, (size_t[]){M, 1}, DTYPE_F32);
Tensor *W = NN_rand(2, (size_t[]){1, M}, DTYPE_F32);
Expand All @@ -87,7 +86,7 @@ int main() {
cycles = READ_CSR("mcycle");
NN_matmul_F32_RVV(actual_vec, H, V);
cycles = READ_CSR("mcycle") - cycles;
printf("%s (%lu)\n", compare_2d(golden_vec, actual_vec, N, 1) ? "pass" : "fail", cycles);
printf("%s (%lu)\n", compare_2d(golden_vec->data, actual_vec->data, N, 1) ? "pass" : "fail", cycles);

printf("matvec_t:\t");
NN_transpose_F32(H, H);
Expand All @@ -96,7 +95,7 @@ int main() {
cycles = READ_CSR("mcycle");
NN_matmul_F32_RVV(actual_vec, W, H);
cycles = READ_CSR("mcycle") - cycles;
printf("%s (%lu)\n", compare_2d(golden_vec, actual_vec, N, 1) ? "pass" : "fail", cycles);
printf("%s (%lu)\n", compare_2d(golden_vec->data, actual_vec->data, N, 1) ? "pass" : "fail", cycles);

NN_freeTensorData(H);
NN_deleteTensor(H);
Expand Down Expand Up @@ -130,7 +129,23 @@ int main() {

// matmulf
{
Tensor *A = NN_rand(2, (size_t[]){M, N}, DTYPE_F32);
Tensor *C = NN_tensor(2, (size_t[]){M, N}, DTYPE_F32, NULL);
Tensor *D = NN_tensor(2, (size_t[]){M, N}, DTYPE_F32, NULL);

printf("mulf:\t\t");
NN_mul_F32(C, A, 10.0f);
cycles = READ_CSR("mcycle");
NN_mul_F32_RVV(D, A, 10.0f);
cycles = READ_CSR("mcycle") - cycles;
printf("%s (%lu)\n", compare_2d(C->data, D->data, M, N) ? "pass" : "fail", cycles);

NN_freeTensorData(A);
NN_deleteTensor(A);
NN_freeTensorData(C);
NN_deleteTensor(C);
NN_freeTensorData(D);
NN_deleteTensor(D);
}

// matsub
Expand Down

0 comments on commit 1f248f9

Please sign in to comment.