ADD: update codebase

ucb-bar · Jun 6, 2024 · 86a4ff6 · 86a4ff6
1 parent 6c37812
commit 86a4ff6
Show file tree

Hide file tree

Showing 19 changed files with 416 additions and 52 deletions.
diff --git a/nn/CMakeLists.txt b/nn/CMakeLists.txt
@@ -7,12 +7,22 @@ set(INCLUDES
 set(SOURCES
   src/nn_tensor.c
   src/nn_print.c
+
   src/add/nn_add.c
+  src/add/nn_add_rvv.c
+
   src/copy/nn_copy.c
-  src/matmul/nn_matmul.c
 
-  src/add/nn_add_rvv.c
+  src/matmul/nn_matmul.c
+  src/matmul/nn_matmul_eaglex.c
   src/matmul/nn_matmul_rvv.c
+
+  src/max/nn_max.c
+  src/max/nn_max_rvv.c
+
+  src/min/nn_min.c
+  src/min/nn_min_rvv.c
+
 )
 
 add_library(nn ${SOURCES})

diff --git a/nn/inc/nn.h b/nn/inc/nn.h
@@ -8,6 +8,8 @@
 #include "nn_add.h"
 #include "nn_copy.h"
 #include "nn_matmul.h"
+#include "nn_max.h"
+#include "nn_min.h"
 
 
 // http://elm-chan.org/junk/32bit/binclude.html

diff --git a/nn/inc/nn_linear.h b/nn/inc/nn_linear.h
@@ -14,8 +14,4 @@
  */
 void NN_linear_F32(Tensor *y, Tensor *x, Tensor *w, Tensor *b);
 
-void NN_linear_I32(Tensor *y, Tensor *x, Tensor *w, Tensor *b);
-
-void NN_linear_I8_I8_I8_I32(Tensor *y, Tensor *x, Tensor *w, Tensor *b);
-
 #endif // __NN_LINEAR_H
diff --git a/nn/inc/nn_matmul.h b/nn/inc/nn_matmul.h
@@ -26,5 +26,7 @@ void NN_matmul_I32(Tensor *out, Tensor *a, Tensor *b);
 
 void NN_matmul_F32_RVV(Tensor *out, Tensor *a, Tensor *b);
 
+void NN_matmul_I8_I8_I32_EAGLEX(Tensor *out, Tensor *a, Tensor *b);
+
 
 #endif // __NN_MATMUL_H
diff --git a/nn/inc/nn_max.h b/nn/inc/nn_max.h
@@ -0,0 +1,22 @@
+#ifndef __NN_MAX_H
+#define __NN_MAX_H
+
+#include <assert.h>
+#include <float.h>
+
+#include "nn_tensor.h"
+
+
+/**
+ * Returns the maximum value of all elements in the input tensor.
+ * 
+ * @param t: input tensor
+ */
+float NN_max(Tensor *t);
+
+float NN_max_F32(Tensor *t);
+
+float NN_max_F32_RVV(Tensor *t);
+
+
+#endif // __NN_MAX_H
diff --git a/nn/inc/nn_min.h b/nn/inc/nn_min.h
@@ -0,0 +1,22 @@
+#ifndef __NN_MIN_H
+#define __NN_MIN_H
+
+#include <assert.h>
+#include <float.h>
+
+#include "nn_tensor.h"
+
+
+/**
+ * Returns the minimum value of all elements in the input tensor.
+ * 
+ * @param t: input tensor
+ */
+float NN_min(Tensor *t);
+
+float NN_min_F32(Tensor *t);
+
+float NN_min_F32_RVV(Tensor *t);
+
+
+#endif // __NN_MIN_H
diff --git a/nn/inc/nn_tensor.h b/nn/inc/nn_tensor.h
@@ -77,35 +77,80 @@ static inline const char *NN_getDataTypeName(DataType dtype) {
   }
 }
 
+/**
+ * Frees the memory allocated for the tensor data
+ */
 static inline void NN_freeTensorData(Tensor *t) {
   free(t->data);
 }
 
+/**
+ * Frees the memory allocated for the tensor
+ */
 static inline void NN_deleteTensor(Tensor *t) {
   free(t);
 }
 
-
 /**
- * Initialize a tensor
+ * Initialize a given tensor
  * 
  * @param ndim: number of dimensions
  * @param shape: shape of tensor
- * @param dtype: DataType
+ * @param dtype: data type
  * @param data: pointer to data, if NULL, the data will be allocated
  */
 void NN_initTensor(Tensor *t, size_t ndim, size_t *shape, DataType dtype, void *data);
 
+/**
+ * Create a new tensor
+ * 
+ * @param ndim: number of dimensions
+ * @param shape: shape of tensor
+ * @param dtype: data type
+ * @param data: pointer to data, if NULL, the data will be allocated
+ * @return Tensor
+*/
 Tensor *NN_tensor(size_t ndim, size_t *shape, DataType dtype, void *data);
 
+/**
+ * Returns a tensor filled with the scalar value 0.
+ * 
+ * @param ndim: number of dimensions
+ * @param shape: shape of tensor
+ * @param dtype: data type
+ * @return Tensor
+ */
 Tensor *NN_zeros(size_t ndim, size_t *shape, DataType dtype);
 
+/**
+ * Returns a tensor filled with the scalar value 1.
+ * 
+ * @param ndim: number of dimensions
+ * @param shape: shape of tensor
+ * @param dtype: data type
+ * @return Tensor
+ */
 Tensor *NN_ones(size_t ndim, size_t *shape, DataType dtype);
 
+/**
+ * Returns a tensor filled with random numbers from a uniform distribution.
+ * 
+ * The range of the random number is dependent on the data type:
+ * - For Float32, the range is [0, 1]
+ * - For Int8, the range is [0, 255]
+ * - For Int32, the range is [0, RAND_MAX]
+ * 
+ * @param ndim: number of dimensions
+ * @param shape: shape of tensor
+ * @param dtype: data type
+ * @return Tensor
+ */
 Tensor *NN_rand(size_t ndim, size_t *shape, DataType dtype);
 
 /**
- * Convert tensor data type
+ * Returns this tensor cast to the type of the given tensor.
+ * 
+ * This is a no-op if the tensor is already of the correct type. 
  * 
  * @param t: input tensor
  * @param dtype: target data type

diff --git a/nn/inc/rv.h b/nn/inc/rv.h
@@ -0,0 +1,94 @@
+/**
+ * @file rv_common.h
+ * @brief RISC-V Definitions
+ * 
+ * This header file provides common definitions and operations for RISC-V core programming.
+ * It includes memory register attributes, bit operation definitions, RISC-V specific definitions,
+ * and common enumerations for state and status values.
+ *
+ * The memory register attributes define volatile permissions for read-only, write-only, and read/write access.
+ * The bit operation definitions provide macros for setting, clearing, reading, and writing specific bits in a register.
+ * The RISC-V specific definitions include macros for reading and writing control and status registers (CSRs),
+ * as well as operations to swap, set, and clear specific bits in a CSR.
+ * The common definitions include enumerations for state values (such as RESET and SET), and status values (such as OK and ERROR).
+ *
+ * @note This file should be included to access RISC-V core-specific definitions and perform common operations.
+ *
+ * @author -T.K.-
+ * @date 2023-05-20
+ */
+
+#ifndef __RV_H
+#define __RV_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+
+/* ================ Memory register attributes ================ */
+#ifdef __cplusplus
+  #define   __I     volatile             /** Defines "read only" permissions */
+#else
+  #define   __I     volatile const       /** Defines "read only" permissions */
+#endif
+#define     __O     volatile             /** Defines "write only" permissions */
+#define     __IO    volatile             /** Defines "read / write" permissions */
+
+/* following defines should be used for structure members */
+#define     __IM     volatile const      /** Defines "read only" structure member permissions */
+#define     __OM     volatile            /** Defines "write only" structure member permissions */
+#define     __IOM    volatile            /** Defines "read / write" structure member permissions */
+
+
+/* ================ Bit Operation definitions ================ */
+#define SET_BITS(REG, BIT)                    ((REG) |= (BIT))
+#define CLEAR_BITS(REG, BIT)                  ((REG) &= ~(BIT))
+#define READ_BITS(REG, BIT)                   ((REG) & (BIT))
+#define WRITE_BITS(REG, CLEARMASK, SETMASK)   ((REG) = (((REG) & (~(CLEARMASK))) | (SETMASK)))
+
+
+/* ================ RISC-V specific definitions ================ */
+#define READ_CSR(REG) ({                          \
+  unsigned long __tmp;                            \
+  asm volatile ("csrr %0, " REG : "=r"(__tmp));  \
+  __tmp; })
+
+#define WRITE_CSR(REG, VAL) ({                    \
+  asm volatile ("csrw " REG ", %0" :: "rK"(VAL)); })
+
+#define SWAP_CSR(REG, VAL) ({                     \
+  unsigned long __tmp;                            \
+  asm volatile ("csrrw %0, " REG ", %1" : "=r"(__tmp) : "rK"(VAL)); \
+  __tmp; })
+
+#define SET_CSR_BITS(REG, BIT) ({                 \
+  unsigned long __tmp;                            \
+  asm volatile ("csrrs %0, " REG ", %1" : "=r"(__tmp) : "rK"(BIT)); \
+  __tmp; })
+
+#define CLEAR_CSR_BITS(REG, BIT) ({               \
+  unsigned long __tmp;                            \
+  asm volatile ("csrrc %0, " REG ", %1" : "=r"(__tmp) : "rK"(BIT)); \
+  __tmp; })
+
+
+/* ================ Common definitions ================ */
+typedef enum {
+  RESET = 0UL,
+  SET   = !RESET,
+
+  DISABLE = RESET,
+  ENABLE  = SET,
+
+  LOW   = RESET,
+  HIGH  = SET,
+} State;
+
+typedef enum {
+  OK = 0U,
+  ERROR,
+  BUSY,
+  TIMEOUT
+} Status;
+
+#endif /* __RV_H */
diff --git a/nn/src/add/nn_add_rvv.c b/nn/src/add/nn_add_rvv.c
@@ -15,14 +15,16 @@ void NN_add_F32_RVV(Tensor *out, Tensor *a, Tensor *b) {
   float *a_data = (float *)a->data;
   float *b_data = (float *)b->data;
 
-  int k = out->shape[0] * out->shape[1];
-  int l = 0;
-  for (size_t vl; k > 0; k -= vl, l += vl) {
+  // TODO: add broadcasting support
+
+  size_t i = 0;
+  size_t vl = 0;
+  for (size_t k = out->shape[0] * out->shape[1]; k > 0; k -= vl, i += vl) {
     vl = __riscv_vsetvl_e32m1(k);
-    vfloat32m1_t vec_a = __riscv_vle32_v_f32m1(a_data + l, vl);
-    vfloat32m1_t vec_b = __riscv_vle32_v_f32m1(b_data + l, vl);
+    vfloat32m1_t vec_a = __riscv_vle32_v_f32m1(a_data + i, vl);
+    vfloat32m1_t vec_b = __riscv_vle32_v_f32m1(b_data + i, vl);
     vfloat32m1_t vec_c = __riscv_vfadd_vv_f32m1(vec_a, vec_b, vl);
-    __riscv_vse32_v_f32m1(out_data + l, vec_c, vl);
+    __riscv_vse32_v_f32m1(out_data + i, vec_c, vl);
   }
 }
 
diff --git a/nn/src/linear/nn_linear.c b/nn/src/linear/nn_linear.c
@@ -14,27 +14,3 @@ void NN_linear_F32(Tensor *y, Tensor *x, Tensor *w, Tensor *b) {
   NN_matmul_F32(y, x, w);
   NN_add_F32(y, y, b);
 }
-
-void NN_linear_I32(Tensor *y, Tensor *x, Tensor *w, Tensor *b) {
-  assert(x->shape[1] == w->shape[0]);
-  assert(y->shape[0] == x->shape[0]);
-  assert(y->shape[1] == w->shape[1]);
-  assert(b->shape[0] == w->shape[1]);
-  assert(x->dtype == DTYPE_I32);
-  assert(w->dtype == DTYPE_I32);
-  assert(b->dtype == DTYPE_I32);
-  assert(y->dtype == DTYPE_I32);
-
-  NN_matmul_I32(y, x, w);
-  NN_add_I32(y, y, b);
-}
-
-void NN_linear_I8_I8_I8_I32(Tensor *y, Tensor *x, Tensor *w, Tensor *b) {
-  assert(x->dtype == DTYPE_I8);
-  assert(w->dtype == DTYPE_I8);
-  assert(b->dtype == DTYPE_I8);
-  assert(y->dtype == DTYPE_I32);
-
-  NN_matmul_I8_I8_I32(y, x, w);
-  NN_add_I32_I8_I32(y, y, b);
-}
diff --git a/nn/src/matmul/nn_matmul_eaglex.c b/nn/src/matmul/nn_matmul_eaglex.c
@@ -0,0 +1,7 @@
+
+#include "nn_matmul.h"
+
+void NN_matmul_I8_I8_I32_EAGLEX(Tensor *out, Tensor *a, Tensor *b) {
+  // TODO: port to here
+}
+
diff --git a/nn/src/matmul/nn_matmul_rvv.c b/nn/src/matmul/nn_matmul_rvv.c
@@ -19,6 +19,7 @@ void NN_matmul_F32_RVV(Tensor *out, Tensor *a, Tensor *b) {
       float *ptr_a = (float *)a->data + i * a->shape[1];
       float *ptr_b = (float *)b->data + j;
       vfloat32m1_t vec_s = __riscv_vfmv_v_f_f32m1(0, vlmax);
+
       size_t vl = 0;
       for (int k = a->shape[1]; k > 0; k -= vl, ptr_a += vl, ptr_b += vl) {
         vl = __riscv_vsetvl_e32m1(k);

diff --git a/nn/src/max/nn_max.c b/nn/src/max/nn_max.c
@@ -0,0 +1,20 @@
+
+#include "nn_max.h"
+
+
+float NN_max_F32(Tensor *t) {
+  assert(t->dtype == DTYPE_F32);
+
+  float max = -FLT_MAX;
+  float *t_data = (float *)t->data;
+
+  for (size_t i = 0; i < t->shape[0]; i += 1) {
+    for (size_t j = 0; j < t->shape[1]; j += 1) {
+      if (t_data[i * t->shape[1] + j] > max) {
+        max = t_data[i * t->shape[1] + j];
+      }
+    }
+  }
+
+  return max;
+}
diff --git a/nn/src/max/nn_max_rvv.c b/nn/src/max/nn_max_rvv.c
@@ -0,0 +1,22 @@
+
+#include "nn_max.h"
+#include "riscv_vector.h"
+
+float NN_max_F32_RVV(Tensor *t) {
+  assert(t->dtype == DTYPE_F32);
+
+  float max = -FLT_MAX;
+  float *t_data = (float *)t->data;
+
+  vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(max, 1);
+  size_t i = 0;
+  size_t vl = 0;
+  for (size_t k = t->shape[0] * t->shape[1]; k > 0; k -= vl, i += vl) {
+    vl = __riscv_vsetvl_e32m1(k);
+    vfloat32m1_t vec_t = __riscv_vle32_v_f32m1(t_data + i, vl);
+    vec_max = __riscv_vfredmax_vs_f32m1_f32m1(vec_t, vec_max, vl);
+  }
+  max = __riscv_vfmv_f_s_f32m1_f32(vec_max);
+  return max;
+}
+