Skip to content

Commit

Permalink
ADD: add more types to impls
Browse files Browse the repository at this point in the history
  • Loading branch information
T-K-233 committed Jul 17, 2024
1 parent 525738d commit 3723d15
Show file tree
Hide file tree
Showing 43 changed files with 877 additions and 10 deletions.
17 changes: 17 additions & 0 deletions nn/impl/acc.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,29 @@
#include <stddef.h>
#include <stdint.h>

#include "nn_float16.h"


void NN__acc_i8(size_t n,
int8_t *y, size_t incy,
const int8_t *x, size_t incx
);

void NN__acc_i16(size_t n,
int16_t *y, size_t incy,
const int16_t *x, size_t incx
);

void NN__acc_i32(size_t n,
int32_t *y, size_t incy,
const int32_t *x, size_t incx
);

void NN__acc_f16(size_t n,
float16_t *y, size_t incy,
const float16_t *x, size_t incx
);

void NN__acc_f32(size_t n,
float *y, size_t incy,
const float *x, size_t incx
Expand Down
24 changes: 23 additions & 1 deletion nn/impl/acc1.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,31 @@

#include <stddef.h>

#include "nn_float16.h"


void NN__acc1_i8(size_t n,
int8_t *result, size_t incr,
int8_t scalar
);

void NN__acc1_i16(size_t n,
int16_t *result, size_t incr,
int16_t scalar
);

void NN__acc1_i32(size_t n,
int32_t *result, size_t incr,
int32_t scalar
);

void NN__acc1_f16(size_t n,
float16_t *result, size_t incr,
float16_t scalar
);

void NN__acc1_f32(size_t n,
float *result, size_t incx,
float *result, size_t incr,
float scalar
);

Expand Down
12 changes: 12 additions & 0 deletions nn/impl/add.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ void NN__add_i8(size_t n,
const int8_t *y, size_t incy
);

void NN__add_i16(size_t n,
int16_t *z, size_t incz,
const int16_t *x, size_t incx,
const int16_t *y, size_t incy
);

void NN__add_i32(size_t n,
int32_t *z, size_t incz,
const int32_t *x, size_t incx,
const int32_t *y, size_t incy
);

void NN__add_f16(size_t n,
float16_t *z, size_t incz,
const float16_t *x, size_t incx,
Expand Down
25 changes: 25 additions & 0 deletions nn/impl/add1.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,31 @@

#include <stddef.h>

#include "nn_float16.h"

void NN__add1_i8(size_t n,
int8_t *z, size_t incz,
const int8_t *x, size_t incx,
int8_t scalar
);

void NN__add1_i16(size_t n,
int16_t *z, size_t incz,
const int16_t *x, size_t incx,
int16_t scalar
);

void NN__add1_i32(size_t n,
int32_t *z, size_t incz,
const int32_t *x, size_t incx,
int32_t scalar
);

void NN__add1_f16(size_t n,
float16_t *z, size_t incz,
const float16_t *x, size_t incx,
float16_t scalar
);

void NN__add1_f32(size_t n,
float *z, size_t incz,
Expand Down
18 changes: 18 additions & 0 deletions nn/impl/cpu/acc.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,24 @@ __attribute__((weak)) void NN__acc_i8(size_t n, int8_t *y, size_t incy, const in
}
}

__attribute__((weak)) void NN__acc_i16(size_t n, int16_t *y, size_t incy, const int16_t *x, size_t incx) {
for (size_t i = 0; i < n; i += 1) {
y[i * incy] += x[i * incx];
}
}

__attribute__((weak)) void NN__acc_i32(size_t n, int32_t *y, size_t incy, const int32_t *x, size_t incx) {
for (size_t i = 0; i < n; i += 1) {
y[i * incy] += x[i * incx];
}
}

__attribute__((weak)) void NN__acc_f16(size_t n, float16_t *y, size_t incy, const float16_t *x, size_t incx) {
for (size_t i = 0; i < n; i += incx) {
y[i * incy] = NN_float_to_half(NN_half_to_float(y[i * incy]) + NN_half_to_float(x[i * incx]));
}
}

__attribute__((weak)) void NN__acc_f32(size_t n, float *y, size_t incy, const float *x, size_t incx) {
for (size_t i = 0; i < n; i += 1) {
y[i * incy] += x[i * incx];
Expand Down
30 changes: 27 additions & 3 deletions nn/impl/cpu/acc1.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,32 @@
#include "acc1.h"


__attribute__((weak)) void NN__acc1_f32(size_t n, float *result, size_t incx, float scalar) {
for (size_t i = 0; i < n; i += incx) {
result[i] += scalar;
__attribute__((weak)) void NN__acc1_i8(size_t n, int8_t *result, size_t incr, int8_t scalar) {
for (size_t i = 0; i < n; i += 1) {
result[i + incr] += scalar;
}
}

__attribute__((weak)) void NN__acc1_i16(size_t n, int16_t *result, size_t incr, int16_t scalar) {
for (size_t i = 0; i < n; i += 1) {
result[i + incr] += scalar;
}
}

__attribute__((weak)) void NN__acc1_i32(size_t n, int32_t *result, size_t incr, int32_t scalar) {
for (size_t i = 0; i < n; i += 1) {
result[i + incr] += scalar;
}
}

__attribute__((weak)) void NN__acc1_f16(size_t n, float16_t *result, size_t incr, float16_t scalar) {
for (size_t i = 0; i < n; i += 1) {
result[i + incr] = NN_float_to_half(NN_half_to_float(result[i * incr]) + NN_half_to_float(scalar));
}
}

__attribute__((weak)) void NN__acc1_f32(size_t n, float *result, size_t incr, float scalar) {
for (size_t i = 0; i < n; i += 1) {
result[i + incr] += scalar;
}
}
12 changes: 12 additions & 0 deletions nn/impl/cpu/add.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@ __attribute__((weak)) void NN__add_i8(size_t n, int8_t *z, size_t incz, const in
}
}

__attribute__((weak)) void NN__add_i16(size_t n, int16_t *z, size_t incz, const int16_t *x, size_t incx, const int16_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
z[i * incz] = x[i * incx] + y[i * incy];
}
}

__attribute__((weak)) void NN__add_i32(size_t n, int32_t *z, size_t incz, const int32_t *x, size_t incx, const int32_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
z[i * incz] = x[i * incx] + y[i * incy];
}
}

__attribute__((weak)) void NN__add_f16(size_t n, float16_t *z, size_t incz, const float16_t *x, size_t incx, const float16_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
z[i * incz] = NN_float_to_half(NN_half_to_float(x[i * incx]) + NN_half_to_float(y[i * incy]));
Expand Down
25 changes: 25 additions & 0 deletions nn/impl/cpu/add1.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,31 @@
#include "add1.h"



__attribute__((weak)) void NN__add1_i8(size_t n, int8_t *y, size_t incy, const int8_t *x, size_t incx, int8_t scalar) {
for (size_t i = 0; i < n; i += 1) {
y[i * incy] = x[i * incx] + scalar;
}
}

__attribute__((weak)) void NN__add1_i16(size_t n, int16_t *y, size_t incy, const int16_t *x, size_t incx, int16_t scalar) {
for (size_t i = 0; i < n; i += 1) {
y[i * incy] = x[i * incx] + scalar;
}
}

__attribute__((weak)) void NN__add1_i32(size_t n, int32_t *y, size_t incy, const int32_t *x, size_t incx, int32_t scalar) {
for (size_t i = 0; i < n; i += 1) {
y[i * incy] = x[i * incx] + scalar;
}
}

__attribute__((weak)) void NN__add1_f16(size_t n, float16_t *y, size_t incy, const float16_t *x, size_t incx, float16_t scalar) {
for (size_t i = 0; i < n; i += incx) {
y[i * incy] = NN_float_to_half(NN_half_to_float(x[i * incx]) + NN_half_to_float(scalar));
}
}

__attribute__((weak)) void NN__add1_f32(size_t n, float *y, size_t incy, const float *x, size_t incx, float scalar) {
for (size_t i = 0; i < n; i += 1) {
y[i * incy] = x[i * incx] + scalar;
Expand Down
24 changes: 24 additions & 0 deletions nn/impl/cpu/div.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,30 @@
#include "div.h"


__attribute__((weak)) void NN__div_i8(size_t n, int8_t *z, size_t incz, const int8_t *x, size_t incx, const int8_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
z[i * incz] = x[i * incx] / y[i * incy];
}
}

__attribute__((weak)) void NN__div_i16(size_t n, int16_t *z, size_t incz, const int16_t *x, size_t incx, const int16_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
z[i * incz] = x[i * incx] / y[i * incy];
}
}

__attribute__((weak)) void NN__div_i32(size_t n, int32_t *z, size_t incz, const int32_t *x, size_t incx, const int32_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
z[i * incz] = x[i * incx] / y[i * incy];
}
}

__attribute__((weak)) void NN__div_f16(size_t n, float16_t *z, size_t incz, const float16_t *x, size_t incx, const float16_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
z[i * incz] = NN_float_to_half(NN_half_to_float(x[i * incx]) / NN_half_to_float(y[i * incy]));
}
}

__attribute__((weak)) void NN__div_f32(size_t n, float *z, size_t incz, const float *x, size_t incx, const float *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
z[i * incz] = x[i * incx] / y[i * incy];
Expand Down
24 changes: 24 additions & 0 deletions nn/impl/cpu/dot.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,30 @@
#include "dot.h"


__attribute__((weak)) void NN__dot_i8_to_i32(size_t n, int32_t *result, const int8_t *x, size_t incx, const int8_t *y, size_t incy) {
int32_t sum = 0;
for (size_t i = 0; i < n; i += 1) {
sum += x[i * incx] * y[i * incy];
}
*result = sum;
}

__attribute__((weak)) void NN__dot_i16_to_i32(size_t n, int32_t *result, const int16_t *x, size_t incx, const int16_t *y, size_t incy) {
int32_t sum = 0;
for (size_t i = 0; i < n; i += 1) {
sum += x[i * incx] * y[i * incy];
}
*result = sum;
}

__attribute__((weak)) void NN__dot_i32(size_t n, int32_t *result, const int32_t *x, size_t incx, const int32_t *y, size_t incy) {
int32_t sum = 0;
for (size_t i = 0; i < n; i += 1) {
sum += x[i * incx] * y[i * incy];
}
*result = sum;
}

__attribute__((weak)) void NN__dot_f16(size_t n, float16_t *result, const float16_t *x, size_t incx, const float16_t *y, size_t incy) {
float sum_f32 = 0;
for (size_t i = 0; i < n; i += 1) {
Expand Down
1 change: 1 addition & 0 deletions nn/impl/cpu/fill.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "fill.h"


__attribute__((weak)) void NN__fill_u8(size_t n, uint8_t *x, size_t incx, uint8_t scalar) {
for (size_t i = 0; i < n; i += 1) {
x[i * incx] = scalar;
Expand Down
36 changes: 36 additions & 0 deletions nn/impl/cpu/max.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,42 @@
#include "max.h"


__attribute__((weak)) void NN__max_i8(size_t n, int8_t *result, const int8_t *x, size_t incx) {
int8_t max = INT8_MIN;
for (size_t i = 0; i < n; i += 1) {
int8_t val = x[i * incx];
max = val > max ? val : max;
}
*result = max;
}

__attribute__((weak)) void NN__max_i16(size_t n, int16_t *result, const int16_t *x, size_t incx) {
int16_t max = INT16_MIN;
for (size_t i = 0; i < n; i += 1) {
int16_t val = x[i * incx];
max = val > max ? val : max;
}
*result = max;
}

__attribute__((weak)) void NN__max_i32(size_t n, int32_t *result, const int32_t *x, size_t incx) {
int32_t max = INT32_MIN;
for (size_t i = 0; i < n; i += 1) {
int32_t val = x[i * incx];
max = val > max ? val : max;
}
*result = max;
}

__attribute__((weak)) void NN__max_f16(size_t n, float16_t *result, const float16_t *x, size_t incx) {
float16_t max = NN_float_to_half(-FLT_MAX);
for (size_t i = 0; i < n; i += 1) {
float16_t val = x[i * incx];
max = NN_half_to_float(val) > NN_half_to_float(max) ? val : max;
}
*result = max;
}

__attribute__((weak)) void NN__max_f32(size_t n, float *result, const float *x, size_t incx) {
float max = -FLT_MAX;
for (size_t i = 0; i < n; i += 1) {
Expand Down
32 changes: 32 additions & 0 deletions nn/impl/cpu/maximum.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,38 @@
#include "maximum.h"


__attribute__((weak)) void NN__maximum_i8(size_t n, int8_t *z, size_t incz, const int8_t *x, size_t incx, const int8_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
int8_t x_val = x[i * incx];
int8_t y_val = y[i * incy];
z[i * incz] = x_val > y_val ? x_val : y_val;
}
}

__attribute__((weak)) void NN__maximum_i16(size_t n, int16_t *z, size_t incz, const int16_t *x, size_t incx, const int16_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
int16_t x_val = x[i * incx];
int16_t y_val = y[i * incy];
z[i * incz] = x_val > y_val ? x_val : y_val;
}
}

__attribute__((weak)) void NN__maximum_i32(size_t n, int32_t *z, size_t incz, const int32_t *x, size_t incx, const int32_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
int32_t x_val = x[i * incx];
int32_t y_val = y[i * incy];
z[i * incz] = x_val > y_val ? x_val : y_val;
}
}

__attribute__((weak)) void NN__maximum_f16(size_t n, float16_t *z, size_t incz, const float16_t *x, size_t incx, const float16_t *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
float16_t x_val = x[i * incx];
float16_t y_val = y[i * incy];
z[i * incz] = NN_half_to_float(x_val) > NN_half_to_float(y_val) ? x_val : y_val;
}
}

__attribute__((weak)) void NN__maximum_f32(size_t n, float *z, size_t incz, const float *x, size_t incx, const float *y, size_t incy) {
for (size_t i = 0; i < n; i += 1) {
float x_val = x[i * incx];
Expand Down
Loading

0 comments on commit 3723d15

Please sign in to comment.