Skip to content

Commit

Permalink
pass gather
Browse files Browse the repository at this point in the history
  • Loading branch information
xhuohai committed Sep 5, 2023
1 parent 7f8d964 commit 4becac1
Show file tree
Hide file tree
Showing 16 changed files with 993 additions and 541 deletions.
48 changes: 30 additions & 18 deletions modules/cpu/src/runtime/cmodel/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,29 +1,41 @@
project(cpu)
cmake_minimum_required(VERSION 3.13)

add_library(cpu_cmodel STATIC
src/hardware_context.cpp
src/dummy.cpp
)

set(CMAKE_CXX_STANDARD 20)

target_include_directories(cpu_cmodel PUBLIC include)
target_link_libraries(cpu_cmodel PUBLIC fmt::fmt spdlog::spdlog)
if(NOT APPLE AND NOT MSVC)
target_link_libraries(cpu_cmodel PRIVATE rt)
endif()
target_include_directories(cpu_cmodel PUBLIC /compiler/huochenghai/GNNE/rebuild-ir/nncase/src/Native/include/ /compiler/huochenghai/GNNE/rebuild-ir/nncase/modules/cpu/include/)
target_include_directories(cpu_cmodel PUBLIC /root/.conan/data/gsl-lite/0.37.0/_/_/package/5ab84d6acfe1f23c4fae0ab88f26e3a396351ac9/include/)
# target_link_libraries(cpu_cmodel PUBLIC gsl::gsl-lite)
# if(NOT APPLE AND NOT MSVC)
# target_link_libraries(cpu_cmodel PRIVATE rt)
# endif()
set_target_properties(cpu_cmodel PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_executable(cpu_cmodel_cli src/cpu_cmodel.cpp ../shared_memory.cpp)
target_link_libraries(cpu_cmodel_cli PUBLIC cpu_cmodel)
set_target_properties(cpu_cmodel_cli PROPERTIES POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME "nncase.simulator.cpu.c")
install(TARGETS cpu_cmodel_cli COMPONENT nncase-runtime)

# add_executable(cpu_cmodel_cli src/cpu_cmodel.cpp ../shared_memory.cpp)
# target_link_libraries(cpu_cmodel_cli PUBLIC cpu_cmodel)
# set_target_properties(cpu_cmodel_cli PROPERTIES POSITION_INDEPENDENT_CODE ON
# OUTPUT_NAME "nncase.simulator.cpu.c")
# install(TARGETS cpu_cmodel_cli COMPONENT nncase-runtime)


function(add_test name test_path)
if (CMAKE_BUILD_TYPE STREQUAL "Release")
add_compile_options(-O1)
endif()
add_link_options(-no-pie -nostartfiles -fPIC -fno-stack-protector -static -Wl,-e,_Z6_startP19hardware_context_mtP15runtime_util_mtPhS3_S3_)
add_executable(${name} "${test_path}/main.cpp")
target_link_libraries(${name} cpu_cmodel spdlog::spdlog)
target_link_libraries(${name} cpu_cmodel)
endfunction(add_test)

add_test(demo1 tests/demo1)
add_test(demo2 tests/demo2)
add_test(demo3 tests/demo3)
add_test(norm tests/norm)
add_test(embed tests/embed)
add_test(head tests/head)
# add_test(demo1 tests/demo1)
# add_test(demo2 tests/demo2)
# add_test(demo3 tests/demo3)
add_test(demo4 tests/demo4)
# add_test(norm tests/norm)
# add_test(embed tests/embed)
# add_test(head tests/head)
16 changes: 8 additions & 8 deletions modules/cpu/src/runtime/cmodel/include/gather.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,21 @@ void gather_impl(const T *input, T *output, gsl::span<const size_t> in_shape,
}

// which index to be used in indices
dims_t indices_index(out_index.begin() + axis,
out_index.begin() + axis + indices_shape.size());
auto indices_offset =
offset(get_default_strides(indices_shape), indices_index);
// dims_t indices_index(out_index.begin() + axis,
// out_index.begin() + axis + indices_shape.size());
// auto indices_offset =
// offset((indices_shape), indices_index);
// select sub block in dim axis
in_index[i_index] = indices[indices_offset];
++i_index;
// in_index[i_index] = indices[indices_offset];
// ++i_index;

// select position in sub block
for (auto o_index = axis + indices_shape.size();
o_index < out_index.size(); ++o_index, ++i_index) {
in_index[i_index] = out_index[o_index];
}
output[offset(out_strides, out_index)] =
input[offset(in_strides, in_index)];
// output[offset(out_strides, out_index)] =
// input[offset(in_strides, in_index)];
});
}
} // namespace
Expand Down
53 changes: 37 additions & 16 deletions modules/cpu/src/runtime/cmodel/include/hardware_context.h
Original file line number Diff line number Diff line change
@@ -1,30 +1,51 @@
#pragma once

#include <functional>
#include <iostream>
#include <memory>
// #include <memory>

struct hardware_context_impl;
struct hardware_context_mt {
void (*lock_block)(int bid);
int (*mark_block_visit)(int bid, int tid);
void (*unlock_block)(int bid);
void (*wait_block_sync)(int bid, int visited,
std::function<void()> callable);
void (*lock_all)();
int (*mark_all_visit)(int bid, int tid);
void (*unlock_all)();
void (*wait_all_sync)(int visited, std::function<void()> callable);
void (*init)();
};

class hardware_context {
public:
hardware_context();
void lock_block(int bid);
int mark_block_visit(int bid, int tid);
void unlock_block(int bid);
// hardware_context(hardware_context_mt *impl) : impl_(impl){};
void lock_block(int bid) { impl_->lock_block(bid); }
int mark_block_visit(int bid, int tid) {
return impl_->mark_block_visit(bid, tid);
}
void unlock_block(int bid) { impl_->unlock_block(bid); }
void wait_block_sync(
int bid, int visited, std::function<void()> callable = []() -> void {});
void lock_all();
int mark_all_visit(int bid, int tid);
void unlock_all();
int bid, int visited,
std::function<void()> callable = []() -> void {}) {
impl_->wait_block_sync(bid, visited, callable);
}
void lock_all() { impl_->lock_all(); }
int mark_all_visit(int bid, int tid) {
return impl_->mark_all_visit(bid, tid);
}
void unlock_all() { impl_->unlock_all(); }
void wait_all_sync(
int visited, std::function<void()> callable = []() -> void {});
int visited, std::function<void()> callable = []() -> void {}) {
impl_->wait_all_sync(visited, callable);
}
void *global_var = nullptr;

private:
std::unique_ptr<hardware_context_impl> impl_;
hardware_context_mt* impl_;
};

extern std::unique_ptr<hardware_context> global_hardware_ctx;
static hardware_context global_hardware_ctx;

void global_hardware_init();
void global_hardware_init(hardware_context_mt *impl) {
global_hardware_ctx.impl_ = impl;
impl->init();
}
28 changes: 0 additions & 28 deletions modules/cpu/src/runtime/cmodel/include/io_utils.h

This file was deleted.

81 changes: 50 additions & 31 deletions modules/cpu/src/runtime/cmodel/include/runtime_utils.h
Original file line number Diff line number Diff line change
@@ -1,24 +1,40 @@
#pragma once

#include <array>
#include <cmath>
// #include <array>
// #include <cmath>
#include <cstddef>
#include <gsl/gsl-lite.hpp>
#include <iostream>
#include <numeric>
// #include <iostream>
// #include <numeric>
#include <runtime_types.h>
#include <vector>
// #include <vector>



struct runtime_util_mt {
int (*printf)(const char *__restrict __format, ...);
void *(*malloc)(size_t size);
int (*free)(void *ptr);
float (*sqrt)(float x);
void (*create_thread)(pthread_t &pt, void *param_, void *(*call)(void *));
void (*join_thread)(pthread_t &pt);
};

static runtime_util_mt runtime_util;

void print_vec(itlib::small_vector<size_t, 8> vec) {
for (const size_t v : vec) {
std::cout << std::to_string(v) << ", ";
runtime_util.printf("%zu, ", v);
}
std::cout << std::endl;
runtime_util.printf("\n");
}

template <class TShape> inline size_t compute_size(const TShape &shape) {
return std::accumulate(shape.begin(), shape.end(), 1,
std::multiplies<size_t>());
size_t size = 1;
for (size_t i = 0; i < shape.size(); ++i) {
size *= shape[i];
}
return size;
}

template <class TShape>
Expand All @@ -35,16 +51,15 @@ inline size_t compute_size(const TShape &shape, const TShape &strides) {
}

template <class shape_type, class strides_type>
inline std::size_t compute_strides(const shape_type &shape,
inline size_t compute_strides(const shape_type &shape,
strides_type &strides) {
using strides_value_type = typename std::decay_t<strides_type>::value_type;
strides_value_type data_size = 1;
size_t data_size = 1;
for (std::size_t i = shape.size(); i != 0; --i) {
strides[i - 1] = data_size;
data_size =
strides[i - 1] * static_cast<strides_value_type>(shape[i - 1]);
strides[i - 1] * static_cast<size_t>(shape[i - 1]);
}
return static_cast<std::size_t>(data_size);
return static_cast<size_t>(data_size);
}

inline strides_t get_default_strides(dims_t shape) {
Expand All @@ -56,11 +71,13 @@ inline strides_t get_default_strides(dims_t shape) {
template <class offset_type, class S, class It>
inline offset_type element_offset(const S &strides, It first,
It last) noexcept {
using difference_type = typename std::iterator_traits<It>::difference_type;
auto size = static_cast<difference_type>((std::min)(
static_cast<size_t>(std::distance(first, last)), strides.size()));
return std::inner_product(last - size, last, strides.cend() - size,
offset_type(0));
// using difference_type = typename
// std::iterator_traits<It>::difference_type; auto size =
// static_cast<difference_type>((std::min)(
// static_cast<size_t>(std::distance(first, last)), strides.size()));
// return std::inner_product(last - size, last, strides.cend() - size,
// offset_type(0));
return 0;
}

inline size_t offset(gsl::span<const size_t> strides,
Expand All @@ -69,8 +86,10 @@ inline size_t offset(gsl::span<const size_t> strides,
if (strides.size() == 0 || index.size() == 0) {
return 0;
}
assert(strides.size() == index.size());
// elf loader 不支持
// assert(strides.size() == index.size());
return element_offset<size_t>(strides, index.begin(), index.end());
// return 0;
}

inline bool is_shape_equal(const dims_t &a, const dims_t &b) {
Expand Down Expand Up @@ -125,15 +144,15 @@ get_last_not_contiguous_index(gsl::span<const size_t> strides,
return -1;
}

template <typename T>
inline void span_copy(gsl::span<T> dest, gsl::span<T> src) {
std::copy(src.begin(), src.end(), dest.begin());
}
// template <typename T>
// inline void span_copy(gsl::span<T> dest, gsl::span<T> src) {
// std::copy(src.data(), src.data()+src.size(), dest.data());
// }

template <typename T>
inline void span_equal(gsl::span<T> dest, gsl::span<T> src) {
std::copy(src.begin(), src.end(), dest.begin());
}
// template <typename T>
// inline void span_equal(gsl::span<T> dest, gsl::span<T> src) {
// std::copy(src.begin(), src.end(), dest.begin());
// }

template <typename T> double dot(const T *v1, const T *v2, size_t size) {
double ret = 0.f;
Expand All @@ -146,10 +165,10 @@ template <typename T> double dot(const T *v1, const T *v2, size_t size) {

template <typename T> double cosine(const T *v1, const T *v2, size_t size) {
for (size_t i = 0; i < 10; i++) {
std::cout << v1[i] << " " << v2[i] << std::endl;
runtime_util.printf("%f, %f\n", (float)v1[i], (float)v2[i]);;
}
return dot(v1, v2, size) /
((sqrt(dot(v1, v1, size)) * sqrt(dot(v2, v2, size))));
((runtime_util.sqrt(dot(v1, v1, size)) * runtime_util.sqrt(dot(v2, v2, size))));
}

inline dims_t get_reduced_offset(gsl::span<const size_t> in_offset,
Expand Down Expand Up @@ -198,4 +217,4 @@ inline dims_t get_reduced_shape(gsl::span<const size_t> in_shape,
}
}
return shape;
}
}
Loading

0 comments on commit 4becac1

Please sign in to comment.