Skip to content

Commit

Permalink
pass demo2
Browse files Browse the repository at this point in the history
  • Loading branch information
zhen8838 committed Aug 21, 2023
1 parent f208204 commit 16ec3b9
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 35 deletions.
5 changes: 4 additions & 1 deletion modules/cpu/src/runtime/cmodel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ add_library(cpu_cmodel STATIC

target_include_directories(cpu_cmodel PUBLIC include)
target_link_libraries(cpu_cmodel PUBLIC fmt::fmt spdlog::spdlog)
if(NOT APPLE AND NOT MSVC)
target_link_libraries(cpu_cmodel PRIVATE rt)
endif()
set_target_properties(cpu_cmodel PROPERTIES POSITION_INDEPENDENT_CODE ON)

add_executable(cpu_cmodel_cli src/cpu_cmodel.cpp ../shared_memory.cpp)
Expand All @@ -15,7 +18,7 @@ install(TARGETS cpu_cmodel_cli COMPONENT nncase-runtime)

function(add_test name test_path)
add_executable(${name} "${test_path}/main.cpp")
target_link_libraries(${name} cpu_cmodel spdlogd)
target_link_libraries(${name} cpu_cmodel spdlog::spdlog)
endfunction(add_test)

add_test(demo1 tests/demo1)
Expand Down
7 changes: 7 additions & 0 deletions modules/cpu/src/runtime/cmodel/include/io_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,11 @@ inline std::vector<uint8_t> read_file(const std::filesystem::path &filename) {
if (!infile.good())
throw std::runtime_error("Cannot open file: " + filename.string());
return read_stream(infile);
}

template <class T>
inline void to_file(gsl::span<T> src, const std::filesystem::path &filename) {
std::ofstream ofile(filename.string(), std::ios::binary | std::ios::out);
ofile.write(src.template as_span<const char>().data(), src.size_bytes());
ofile.close();
}
74 changes: 48 additions & 26 deletions modules/cpu/src/runtime/cmodel/include/layernorm.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "runtime_utils.h"
#include <apply.h>
#include <cmath>
#ifdef __riscv_vector
#include <riscv_vector.h>
Expand All @@ -10,29 +11,47 @@ namespace {
template <typename T>
void layernorm_naive_impl(T *input, const T *sum, T *sum_sqr, T *gamma, T *beta,
gsl::span<const size_t> input_shape,
[[maybe_unused]] gsl::span<const size_t> input_stride,
T eps, int32_t axis, int32_t norm_size) noexcept {
// only process continues tensor for now
size_t outer_size = 1;
for (auto i = 0; i < axis; i++) {
outer_size *= input_shape[i];
}

size_t inner_size = 1;
for (auto i = axis; i < input_shape.size(); i++) {
inner_size *= input_shape[i];
}

for (size_t o = 0; o < outer_size; o++) {
auto mean = sum[o] / norm_size;
auto sigma = std::sqrt(sum_sqr[o] / norm_size - mean * mean + eps);
for (size_t i = 0; i < inner_size; i++) {
auto x = input + o * inner_size + i;
*x = (*x - mean) / sigma *
(gamma == nullptr ? static_cast<T>(1) : gamma[i]) +
(beta == nullptr ? static_cast<T>(0) : beta[i]);
}
}
gsl::span<const size_t> input_stride,
[[maybe_unused]] gsl::span<const size_t> sum_strides,
gsl::span<const size_t> gamma_strides, T eps,
int32_t axis, int32_t norm_size) noexcept {
apply(input_shape, [&](gsl::span<const size_t> input_index) -> void {
// input_index
auto o_offset = offset(sum_strides, input_index.subspan(0, axis));
// auto o_offset = input_index[0];
auto mean = sum[o_offset] / norm_size;
auto sigma =
std::sqrt(sum_sqr[o_offset] / norm_size - mean * mean + eps);

auto input_offset = offset(input_stride, input_index);
auto in_offset = offset(gamma_strides, input_index.subspan(axis));
input[input_offset] =
(input[input_offset] - mean) / sigma *
(gamma == nullptr ? static_cast<T>(1) : gamma[in_offset]) +
(beta == nullptr ? static_cast<T>(0) : beta[in_offset]);
});

// // only process continues tensor for now
// size_t outer_size = 1;
// for (auto i = 0; i < axis; i++) {
// outer_size *= input_shape[i];
// }

// size_t inner_size = 1;
// for (auto i = axis; i < input_shape.size(); i++) {
// inner_size *= input_shape[i];
// }

// for (size_t o = 0; o < outer_size; o++) {
// auto mean = sum[o] / norm_size;
// auto sigma = std::sqrt(sum_sqr[o] / norm_size - mean * mean + eps);
// for (size_t i = 0; i < inner_size; i++) {
// auto x = input + o * inner_size + i;
// *x = (*x - mean) / sigma *
// (gamma == nullptr ? static_cast<T>(1) : gamma[i]) +
// (beta == nullptr ? static_cast<T>(0) : beta[i]);
// }
// }
}

#ifdef __riscv_vector
Expand Down Expand Up @@ -115,8 +134,9 @@ void layernorm_rvv_impl(const T *input, const T *sum, T *sum_sqr, T *gamma,

template <class T>
void layernorm(T *input, T *sum, T *sum_sqr, T *gamma, T *beta,
dims_t input_dims, strides_t input_strides, T eps, int32_t axis,
int32_t norm_size) {
dims_t input_dims, strides_t input_strides,
strides_t sum_strides, strides_t gamma_strides, T eps,
int32_t axis, int32_t norm_size) {
#ifdef __riscv_vector
return layernorm_rvv_impl(
input, sum, sum_sqr, gamma, beta,
Expand All @@ -127,7 +147,9 @@ void layernorm(T *input, T *sum, T *sum_sqr, T *gamma, T *beta,
return layernorm_naive_impl(
input, sum, sum_sqr, gamma, beta,
gsl::make_span(input_dims).template as_span<const size_t>(),
gsl::make_span(input_strides).template as_span<const size_t>(), eps,
gsl::make_span(input_strides).template as_span<const size_t>(),
gsl::make_span(sum_strides).template as_span<const size_t>(),
gsl::make_span(gamma_strides).template as_span<const size_t>(), eps,
axis, norm_size);
#endif
}
Expand Down
9 changes: 7 additions & 2 deletions modules/cpu/src/runtime/cmodel/include/tdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,14 @@ void tensor_layernorm_sync(tensor<T, Loc> &input, tensor<T, loc_t::local> &sum,
tensor<T, loc_t::local> &gamma,
tensor<T, loc_t::local> &beta, T eps, int32_t axis,
int32_t norm_size) {
assert(sum.strides() == sum_sqr.strides());
assert(is_contiguous(sum.dimension(), sum.strides()));
assert(gamma.strides() == beta.strides());
assert(is_contiguous(gamma.dimension(), gamma.strides()));
kernels::layernorm(input.data().data(), sum.data().data(),
sum_sqr.data().data(), gamma.data().data(),
beta.data().data(), input.dimension(), input.strides(),
eps, axis, norm_size);
sum.strides(), gamma.strides(), eps, axis, norm_size);
}

template <typename T, loc_t Loc>
Expand All @@ -126,7 +130,8 @@ void tensor_layernorm_sync(tensor<T, Loc> &input, tensor<T, loc_t::local> &sum,
kernels::layernorm(input.data().data(), sum.data().data(),
sum_sqr.data().data(), static_cast<T *>(nullptr),
static_cast<T *>(nullptr), input.dimension(),
input.strides(), static_cast<T>(1e-5), axis, norm_size);
input.strides(), sum.strides(), dims_t({}),
static_cast<T>(1e-5), axis, norm_size);
}

template <typename T, loc_t ALoc>
Expand Down
14 changes: 8 additions & 6 deletions modules/cpu/src/runtime/cmodel/tests/demo2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,7 @@ int main([[maybe_unused]] int argc, char **argv) {
pthread_join(t_7_2, NULL);
pthread_join(t_7_3, NULL);

auto cos = cosine(Norm.data().begin(),
gsl::make_span(src_Norm).as_span<float>().begin(),
Norm.data().size());
printf("Norm cosine %f\n", cos);

cos = cosine(QKH.data().begin(),
auto cos = cosine(QKH.data().begin(),
gsl::make_span(src_QKH).as_span<float>().begin(),
QKH.data().size());
printf("QKH cosine %f\n", cos);
Expand All @@ -158,5 +153,12 @@ int main([[maybe_unused]] int argc, char **argv) {
gsl::make_span(src_YM).as_span<float>().begin(),
YM.data().size());
printf("YM cosine %f\n", cos);

to_file(Norm.data(),"ONorm.bin");

cos = cosine(Norm.data().begin(),
gsl::make_span(src_Norm).as_span<float>().begin(),
Norm.data().size());
printf("Norm cosine %f\n", cos);
return 0;
}

0 comments on commit 16ec3b9

Please sign in to comment.