Skip to content

Commit

Permalink
fix codes using stdlib
Browse files Browse the repository at this point in the history
  • Loading branch information
xhuohai committed Sep 5, 2023
1 parent 4e6be01 commit 7d121bd
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 25 deletions.
9 changes: 5 additions & 4 deletions modules/cpu/src/runtime/cmodel/include/layernorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <riscv_vector.h>
#endif

using namespace nncase::runtime::cpu;
namespace kernels {

namespace {
Expand All @@ -26,8 +27,8 @@ void layernorm_naive_impl(const T *input, const T *sum, T *sum_sqr, T *output,
if (rms_norm) {
mean = 0;
}
auto sigma =
std::sqrt(sum_sqr[o_offset] / norm_size - mean * mean + eps);
auto sigma = nncase_mt.float_unary_sqrt(sum_sqr[o_offset] / norm_size -
mean * mean + eps);

auto input_offset = offset(input_stride, input_index);
auto in_offset = offset(gamma_strides, input_index.subspan(axis));
Expand Down Expand Up @@ -142,8 +143,8 @@ template <class T>
void layernorm(const T *input, T *sum, T *sum_sqr, T *output, T *gamma, T *beta,
dims_t input_dims, strides_t input_strides,
strides_t output_strides, strides_t sum_strides,
strides_t gamma_strides, T eps, int32_t axis,
int32_t norm_size, bool rms_norm = false) {
strides_t gamma_strides, T eps, int32_t axis, int32_t norm_size,
bool rms_norm = false) {
#ifdef __riscv_vector
return layernorm_rvv_impl(
input, sum, sum_sqr, gamma, beta,
Expand Down
6 changes: 3 additions & 3 deletions modules/cpu/src/runtime/cmodel/include/matmul.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ void contiguous_matmul_impl(const T *input_a, const T *input_b, T *output,
auto b_unit_size = new_b_shape[3] * new_b_shape[4];
auto out_unit_size = new_a_shape[3] * new_b_shape[4];

auto dim0 = std::max(new_a_shape[0], new_b_shape[0]);
auto dim1 = std::max(new_a_shape[1], new_b_shape[1]);
auto dim2 = std::max(new_a_shape[2], new_b_shape[2]);
auto dim0 = new_a_shape[0]> new_b_shape[0]? new_a_shape[0] : new_b_shape[0];
auto dim1 = new_a_shape[1]> new_b_shape[1]? new_a_shape[1] : new_b_shape[1];
auto dim2 = new_a_shape[2]> new_b_shape[2]? new_a_shape[2] : new_b_shape[2];
auto ah_size = a_unit_size * new_a_shape[2];
auto bh_size = b_unit_size * new_b_shape[2];
auto oh_size = out_unit_size * dim2;
Expand Down
15 changes: 9 additions & 6 deletions modules/cpu/src/runtime/cmodel/include/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ template <class TShape>
size_t get_reduce_block_size(const TShape &in_shape, const TShape &axis) {
size_t size = 1;
for (size_t i = 0; i < in_shape.size(); i++) {
if (std::find(axis.begin(), axis.end(), i) != axis.end()) {
size *= in_shape[i];
for (size_t j = 0; j < axis.size(); j++) {
if (i == axis[j]) {
size *= in_shape[i];
break;
}
}
}

Expand Down Expand Up @@ -143,14 +146,14 @@ void reduce(reduce_op_t op, const T *init_value, const T *input, T *output,
gsl::span<const size_t> out_strides, bool keep_dims) noexcept {
auto out_shape = get_reduced_shape(in_shape, axis, keep_dims);
switch (op) {
REDUCE_IMPL(reduce_op_t::mean, std::plus<T>(),
REDUCE_IMPL(reduce_op_t::mean, [](T a, T b) { return a + b; },
[block_size = (T)get_reduce_block_size(in_shape, axis)](
T v) { return v / block_size; });
REDUCE_IMPL_NO_POST(reduce_op_t::min,
[](T a, T b) { return std::min(a, b); });
[](T a, T b) { return a > b ? b : a; });
REDUCE_IMPL_NO_POST(reduce_op_t::max,
[](T a, T b) { return std::max(a, b); });
REDUCE_IMPL_NO_POST(reduce_op_t::sum, std::plus<T>());
[](T a, T b) { return a > b ? a : b; });
REDUCE_IMPL_NO_POST(reduce_op_t::sum, [](T a, T b) { return a + b; });
REDUCE_IMPL_NO_POST(reduce_op_t::sum_sqr,
[](T a, T b) { return a + (b * b); });
case reduce_op_t::prod:
Expand Down
18 changes: 16 additions & 2 deletions modules/cpu/src/runtime/cmodel/include/runtime_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,14 @@ inline dims_t get_reduced_offset(gsl::span<const size_t> in_offset,
dims_t off;
off.reserve(in_offset.size() - (keep_dims ? 0 : axis.size()));
for (size_t i = 0; i < in_offset.size(); i++) {
if (std::find(axis.begin(), axis.end(), i) == axis.end()) {
bool found = false;
for (size_t j = 0; j < axis.size(); j++) {
if (i == axis[j]) {
found = true;
break;
}
}
if (!found) {
off.push_back(in_offset[i]);
} else {
if (keep_dims)
Expand Down Expand Up @@ -199,7 +206,14 @@ inline dims_t get_reduced_shape(gsl::span<const size_t> in_shape,
dims_t shape;
shape.reserve(in_shape.size() - (keep_dims ? 0 : axis.size()));
for (size_t i = 0; i < in_shape.size(); i++) {
if (std::find(axis.begin(), axis.end(), i) == axis.end()) {
bool found = false;
for (size_t j = 0; j < axis.size(); j++) {
if (i == axis[j]) {
found = true;
break;
}
}
if (!found) {
shape.push_back(in_shape[i]);
} else {
if (keep_dims)
Expand Down
20 changes: 16 additions & 4 deletions modules/cpu/src/runtime/cmodel/include/softmax.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <cmath>
#include <runtime_utils.h>

using namespace nncase::runtime::cpu;

namespace kernels {

namespace {
Expand All @@ -30,7 +32,10 @@ void softmax_impl(const T *input, T *output, gsl::span<const size_t> in_shape,
auto reduced_shape = get_reduced_shape(in_shape, axes, true);
auto reduced_strides = get_default_strides(reduced_shape);
auto reduced_size = compute_size(reduced_shape);
std::vector<T> tmp(reduced_size, std::numeric_limits<T>::lowest());
auto tmp = (T *)runtime_util.malloc(reduced_size * sizeof(T));
for (size_t i = 0; i < reduced_size; i++) {
tmp[i] = std::numeric_limits<T>().lowest();
}

// reduce_max
(apply(in_shape, [&](gsl::span<const size_t> index) -> void {
Expand All @@ -41,7 +46,7 @@ void softmax_impl(const T *input, T *output, gsl::span<const size_t> in_shape,
auto out_idx = offset(reduced_strides, out_index);
auto &out = tmp[out_idx];

out = std::max(in, out);
out = in > out ? in : out;
}));

// x - reduce_max
Expand All @@ -57,7 +62,9 @@ void softmax_impl(const T *input, T *output, gsl::span<const size_t> in_shape,
}));

// exp(x - reduce_max) and sum
tmp.assign(tmp.size(), static_cast<T>(0));
for (size_t i = 0; i < reduced_size; i++) {
tmp[i] = static_cast<T>(0);
}
(apply(in_shape, [&](gsl::span<const size_t> index) -> void {
auto in_idx = offset(out_strides, index);
const auto in = output[in_idx];
Expand All @@ -78,7 +85,12 @@ void softmax_impl(const T *input, T *output, gsl::span<const size_t> in_shape,
auto &out = output[out_idx];
out /= in;
if (needLog) {
out = std::log(out);
if (std::is_same_v<T, float>) {
out = nncase_mt.float_unary_log(out);
} else {
runtime_util.rt_assert(
false, (char *)"Not supported Type in softmax!");
}
}
}));
}
Expand Down
9 changes: 5 additions & 4 deletions modules/cpu/src/runtime/cmodel/include/tdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ template <typename T, loc_t BLoc>
void concat(std::initializer_list<tensor<T, loc_t::local>> inits,
tensor<T, BLoc> &output, size_t axis) {
itlib::small_vector<const gsl::byte *const, 8> inputs(inits.size());
std::vector<strides_t> in_strides(inits.size());
itlib::small_vector<strides_t> in_strides(inits.size());
auto concat_dims = dims_t(inits.size(), 1);
for (size_t i = 0; i < inits.size(); ++i) {
if (inits[i].dimension().size() != 0) {
Expand Down Expand Up @@ -273,7 +273,8 @@ template <class T, loc_t Src, loc_t Dest>
void tdma_load_broadcast_async([[maybe_unused]] tensor<T, Dest> &dest,
[[maybe_unused]] tensor<T, Src> &src,
[[maybe_unused]] thread_context &ctx) {
throw std::system_error(std::make_error_code(std::errc::not_supported));
// throw std::system_error(std::make_error_code(std::errc::not_supported));
runtime_util.rt_assert(false, (char*)"not_supported");
}

template <class T>
Expand All @@ -287,7 +288,7 @@ void tdma_reduce_async(tensor<T, loc_t::local> &src,
new_dims.insert(new_dims.begin(), BLOCKS * CORES);
if (visited == 1) {
if (global_hardware_ctx.global_var != nullptr) {
throw std::runtime_error(" the global var has been used!");
runtime_util.rt_assert(false, (char*)"the global var has been used!");
}
gather_tensor = new tensor<T>(new_dims);
global_hardware_ctx.global_var = (void *)gather_tensor;
Expand Down Expand Up @@ -359,7 +360,7 @@ void tdma_all_reduce_async(tensor<T, ALoc> &src, tensor<T, BLoc> &dest,
new_dims.insert(new_dims.begin(), BLOCKS * CORES);
if (visited == 1) {
if (global_hardware_ctx.global_var != nullptr) {
throw std::runtime_error(" the global var has been used!");
runtime_util.rt_assert(false, (char*)"the global var has been used!");
}
gather_tensor = new tensor<T>(new_dims);
global_hardware_ctx.global_var = (void *)gather_tensor;
Expand Down
4 changes: 2 additions & 2 deletions modules/cpu/src/runtime/cmodel/include/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ template <typename T, loc_t Loc = loc_t::local> class tensor {
strides_(get_default_strides(dims_)),
size_(compute_size(dims_)) {
if (size_ != data_.size()) {
throw std::errc::invalid_argument;
runtime_util.rt_assert(false, (char*)"Invalid tensor size");
}
}

Expand All @@ -45,7 +45,7 @@ template <typename T, loc_t Loc = loc_t::local> class tensor {
strides_(strides),
size_(compute_size(dims_, strides_)) {
if (size_ != data_.size()) {
throw std::errc::invalid_argument;
runtime_util.rt_assert(false, (char*)"Invalid tensor size");
}
}

Expand Down

0 comments on commit 7d121bd

Please sign in to comment.