ADD: update

ucb-bar · Jul 14, 2024 · 9980c95 · 9980c95
1 parent d5bd9f4
commit 9980c95
Show file tree

Hide file tree

Showing 9 changed files with 155 additions and 102 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -47,7 +47,7 @@ set(WRAP_SPECS_FILE     "htif_wrap.specs")
 set(SPECS_FILE          "htif_nano.specs")
 set(LIBGLOSS_DIR        "$ENV{RISCV}/riscv64-unknown-elf/lib/")
 
-set(MARCH               "rv64gcv_zfh_zvfh")
+set(MARCH               "rv64gcv_zfh_zvfh_zvfhmin")
 set(MABI                "lp64d")
 set(MCMODEL             "medany")
 
@@ -60,6 +60,8 @@ target_compile_options(target-riscv INTERFACE -march=${MARCH} -mabi=${MABI} -mcm
 target_compile_options(target-riscv INTERFACE -Wl,-Map=output.map -specs=${SPECS_FILE} -specs=${WRAP_SPECS_FILE})
 target_compile_options(target-riscv INTERFACE -T ${LINKER_SCRIPT})
 
+target_compile_definitions(target-riscv INTERFACE FLT16_MAX=65504.0f)
+
 target_link_options(target-riscv INTERFACE -static)
 target_link_options(target-riscv INTERFACE -march=${MARCH} -mabi=${MABI} -mcmodel=${MCMODEL})
 target_link_options(target-riscv INTERFACE -Wl,-Map=output.map -specs=${SPECS_FILE} -specs=${WRAP_SPECS_FILE})

diff --git a/nn/impl/rvv/abs.c b/nn/impl/rvv/abs.c
@@ -45,17 +45,17 @@ void NN__abs_i32(size_t n, int32_t *y, size_t incy, int32_t *x, size_t incx) {
   }
 }
 
-// void NN__abs_f16(size_t n, float16_t *y, size_t incy, float16_t *x, size_t incx) {
-//   while (n > 0) {
-//     size_t vl = __riscv_vsetvl_e16m1(n);
-//     vfloat16m1_t vec_x = __riscv_vlse16_v_f16m1(x, sizeof(float16_t) * incx, vl);
-//     vfloat16m1_t vec_y = __riscv_vfabs_v_f16m1(vec_x, vl);
-//     __riscv_vse16_v_f16m1(y, sizeof(float16_t) * incy, vec_y, vl);
-//     x += vl;
-//     y += vl;
-//     n -= vl;
-//   }
-// }
+void NN__abs_f16(size_t n, float16_t *y, size_t incy, float16_t *x, size_t incx) {
+  while (n > 0) {
+    size_t vl = __riscv_vsetvl_e16m1(n);
+    vfloat16m1_t vec_x = __riscv_vlse16_v_f16m1(x, sizeof(float16_t) * incx, vl);
+    vfloat16m1_t vec_y = __riscv_vfabs_v_f16m1(vec_x, vl);
+    __riscv_vsse16_v_f16m1(y, sizeof(float16_t) * incy, vec_y, vl);
+    x += vl;
+    y += vl;
+    n -= vl;
+  }
+}
 
 void NN__abs_f32(size_t n, float *y, size_t incy, float *x, size_t incx) {
   while (n > 0) {

diff --git a/nn/impl/rvv/add.c b/nn/impl/rvv/add.c
@@ -20,25 +20,11 @@ void NN__add_i8(size_t n, int8_t *z, size_t incz, int8_t *x, size_t incx, int8_t
 
 void NN__add_f16(size_t n, float16_t *z, size_t incz, float16_t *x, size_t incx, float16_t *y, size_t incy) {
   while (n > 0) {
-    size_t vl;
-
-    printf("hi\n");
-
-    // size_t vl = __riscv_vsetvl_e16m1(n);
-    asm volatile("vsetvli %0, %1, e16, m1, ta, ma" : "=r"(vl) : "r"(n));
-
-    // vfloat16m1_t vec_x = __riscv_vlse16_v_f16m1(x, sizeof(float16_t) * incx, vl);
-    asm volatile("vlse16.v v24, (%0), %1" : : "r"(x), "r"(sizeof(float16_t) * incx));
-
-    // vfloat16m1_t vec_y = __riscv_vlse16_v_f16m1(y, sizeof(float16_t) * incy, vl);
-    asm volatile("vlse16.v v25, (%0), %1" : : "r"(y), "r"(sizeof(float16_t) * incy));
-
-    // // vfloat16m1_t vec_z = __riscv_vfadd_vv_f16m1(vec_x, vec_y, vl);
-    asm volatile("vfadd.vv v24, v24, v25");
-
-    // __riscv_vsse16_v_f16m1(z, sizeof(float16_t) * incz, vec_z, vl);
-    asm volatile("vsse16.v v24, (%0), %1" : : "r"(z), "r"(sizeof(float16_t) * incz));
-
+    size_t vl = __riscv_vsetvl_e16m1(n);
+    vfloat16m1_t vec_x = __riscv_vlse16_v_f16m1(x, sizeof(float16_t) * incx, vl);
+    vfloat16m1_t vec_y = __riscv_vlse16_v_f16m1(y, sizeof(float16_t) * incy, vl);
+    vfloat16m1_t vec_z = __riscv_vfadd_vv_f16m1(vec_x, vec_y, vl);
+    __riscv_vsse16_v_f16m1(z, sizeof(float16_t) * incz, vec_z, vl);
     x += vl;
     y += vl;
     z += vl;

diff --git a/nn/impl/rvv/maximum1.c b/nn/impl/rvv/maximum1.c
@@ -6,22 +6,11 @@
 
 void NN__maximum1_f16(size_t n, float16_t *y, size_t incy, float16_t *x, size_t incx, float16_t scalar) {
   while (n > 0) {
-    size_t vl;
-    // size_t vl = __riscv_vsetvl_e16m1(n);
-    asm volatile("vsetvli %0, %1, e16, m1, ta, ma" : "=r"(vl) : "r"(n));
-
-    // vfloat16m1_t vec_x = __riscv_vlse16_v_f16m1(x, sizeof(float16_t) * incx, vl);
-    asm volatile("vlse16.v v26, (%0), %1" : : "r"(x), "r"(sizeof(float16_t) * incx));
-
-    // vfloat16m1_t vec_s = __riscv_vfmv_v_f_f16m1(scalar, vl);
-    asm volatile("vmv.v.x v25, %0" : : "r"(scalar));
-
-    // vfloat16m1_t vec_y = __riscv_vfmax_vv_f16m1(vec_x, vec_s, vl);
-    asm volatile("vfmax.vv v25, v26, v25");
-
-    // __riscv_vsse16_v_f16m1(y, sizeof(float16_t) * incy, vec_y, vl);
-    asm volatile("vsse16.v v25, (%0), %1" : : "r"(y), "r"(sizeof(float16_t) * incy));
-
+    size_t vl = __riscv_vsetvl_e16m1(n);
+    vfloat16m1_t vec_x = __riscv_vlse16_v_f16m1(x, sizeof(float16_t) * incx, vl);
+    vfloat16m1_t vec_s = __riscv_vfmv_v_f_f16m1(scalar, vl);
+    vfloat16m1_t vec_y = __riscv_vfmax_vv_f16m1(vec_x, vec_s, vl);
+    __riscv_vsse16_v_f16m1(y, sizeof(float16_t) * incy, vec_y, vl);
     x += vl;
     y += vl;
     n -= vl;

diff --git a/nn/impl/rvv/minimum1.c b/nn/impl/rvv/minimum1.c
@@ -6,22 +6,11 @@
 
 void NN__minimum1_f16(size_t n, float16_t *y, size_t incy, float16_t *x, size_t incx, float16_t scalar) {
   while (n > 0) {
-    size_t vl;
-    // size_t vl = __riscv_vsetvl_e16m1(n);
-    asm volatile("vsetvli %0, %1, e16, m1, ta, ma" : "=r"(vl) : "r"(n));
-
-    // vfloat16m1_t vec_x = __riscv_vlse16_v_f16m1(x, sizeof(float16_t) * incx, vl);
-    asm volatile("vlse16.v v26, (%0), %1" : : "r"(x), "r"(sizeof(float16_t) * incx));
-
-    // vfloat16m1_t vec_s = __riscv_vfmv_v_f_f16m1(scalar, vl);
-    asm volatile("vmv.v.x v25, %0" : : "r"(scalar));
-
-    // vfloat16m1_t vec_y = __riscv_vfmin_vv_f16m1(vec_x, vec_s, vl);
-    asm volatile("vfmin.vv v25, v26, v25");
-
-    // __riscv_vsse16_v_f16m1(y, sizeof(float16_t) * incy, vec_y, vl);
-    asm volatile("vsse16.v v25, (%0), %1" : : "r"(y), "r"(sizeof(float16_t) * incy));
-
+    size_t vl = __riscv_vsetvl_e16m1(n);
+    vfloat16m1_t vec_x = __riscv_vlse16_v_f16m1(x, sizeof(float16_t) * incx, vl);
+    vfloat16m1_t vec_s = __riscv_vfmv_v_f_f16m1(scalar, vl);
+    vfloat16m1_t vec_y = __riscv_vfmin_vv_f16m1(vec_x, vec_s, vl);
+    __riscv_vsse16_v_f16m1(y, sizeof(float16_t) * incy, vec_y, vl);
     x += vl;
     y += vl;
     n -= vl;

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -12,6 +12,15 @@ elseif (RISCV)
 endif()
 
 
-target_link_libraries(tests PUBLIC nn)
+# include_directories(
+#   ../nn
+#   ../nn/functional 
+#   ../nn/impl)
+
+find_library(LIB_TO_INCLUDE nn ./)
+
+target_link_libraries(tests PUBLIC ${LIB_TO_INCLUDE})
+
+# target_link_libraries(tests PUBLIC nn)
 target_link_libraries(tests PUBLIC m)
 
diff --git a/tests/src/generate_test.py b/tests/src/generate_test.py
@@ -41,9 +41,9 @@ def functional_rms_norm(x, w, eps):
 
 
 test_pattern = [
-    # ("abs",         lambda a: torch.abs(a),             [("a", rand((7, 7))),                                           ]),
-    # ("add",         lambda a, b: a + b,                 [("a", rand((6, 7))),         ("b", rand((6, 7)))               ]),
-    # ("add",         lambda a, b: a + b,                 [("a", rand((6, 7))),         ("b", rand((1, 7)))               ]),
+    ("abs",         lambda a: torch.abs(a),             [("a", rand((7, 7))),                                           ]),
+    ("add",         lambda a, b: a + b,                 [("a", rand((6, 7))),         ("b", rand((6, 7)))               ]),
+    ("add",         lambda a, b: a + b,                 [("a", rand((6, 7))),         ("b", rand((1, 7)))               ]),
     # ("add",         lambda a, b: a + b,                 [("a", rand((6, 7))),         ("b", rand((6, 1)))               ]),
     # ("add",         lambda a, b: a + b,                 [("a", rand((6, 7))),         ("b", rand((7, )))                ]),
     # ("add_inplace", lambda a, b: a + b,                 [("actual", torch.zeros((7, 7))),   ("b", rand((7, 7)))         ]),