Skip to content

Commit

Permalink
ADD: update
Browse files Browse the repository at this point in the history
  • Loading branch information
T-K-233 committed Jul 12, 2024
1 parent b264649 commit afb203c
Show file tree
Hide file tree
Showing 46 changed files with 995 additions and 1,474 deletions.
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
.conda-env/
.vscode/

example/mnist/data/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand All @@ -30,5 +28,7 @@ share/python-wheels/
MANIFEST


example/llama2/checkpoints/stories15M.bin
examples/mnist/data/

examples/llama2/checkpoints/stories15M.bin

51 changes: 42 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,28 +1,48 @@
########################################################################################################################
# file: CMakeLists.txt
#
# usage:
# Edit "VARIABLES"-section to suit project requirements.
# Build instructions:
# cmake . -D CMAKE_TOOLCHAIN_FILE=./riscv-gcc.cmake -S ./ -B ./build/ -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Debug -D RVV=ON
# cmake --build ./build/ --target all
# cmake --build ./build/ --target clean
########################################################################################################################
cmake_minimum_required(VERSION 3.10)

project(nn LANGUAGES C)

# Options
option(BUILD_SHARED_LIBS "Build using shared libraries" OFF )
option(CMAKE_BUILD_TYPE "Build type" Debug )
option(STATIC_LINKING "Link statically" TRUE )

# architecture options
option(X86 "Build for x86" OFF )
option(RISCV "Build for RISC-V" OFF )

# accelerator options
option(AVX "Use AVX implementation" OFF)
option(RVV "Use RISCV vector extension implementation" OFF)

add_library(target-riscv INTERFACE)
target_compile_features(target-riscv INTERFACE c_std_11)
target_compile_definitions(target-riscv INTERFACE RISCV)

add_compile_options(-O1 -Wall -Wextra)

add_library(target-x86 INTERFACE)
target_compile_features(target-x86 INTERFACE c_std_11)
target_compile_definitions(target-x86 INTERFACE X86)

target_link_options(target-x86 INTERFACE -static)
target_link_options(target-riscv INTERFACE -static)

target_compile_options(target-x86 INTERFACE -march=native)
# target_compile_options(target-x86 INTERFACE -mavx512fp16)
target_link_options(target-x86 INTERFACE -static)
target_link_options(target-x86 INTERFACE -march=native)
target_link_options(target-x86 INTERFACE -fno-common -fno-builtin-printf)


add_library(target-riscv INTERFACE)
target_compile_features(target-riscv INTERFACE c_std_11)
target_compile_definitions(target-riscv INTERFACE RISCV)

set(WRAP_SPECS_FILE "htif_wrap.specs")
set(SPECS_FILE "htif_nano.specs")
set(LIBGLOSS_DIR "$ENV{RISCV}/riscv64-unknown-elf/lib/")
Expand All @@ -31,9 +51,22 @@ set(MARCH "rv64gcv_zfh_zvfh")
set(MABI "lp64d")
set(MCMODEL "medany")

if (NOT DEFINED LINKER_SCRIPT)
set(LINKER_SCRIPT ${CMAKE_SOURCE_DIR}/toolchain/htif.ld)
endif()

target_compile_options(target-riscv INTERFACE -fno-common -fno-builtin-printf)
target_compile_options(target-riscv INTERFACE -march=${MARCH} -mabi=${MABI} -mcmodel=${MCMODEL})
target_compile_options(target-riscv INTERFACE -Wl,-Map=output.map -specs=${SPECS_FILE} -specs=${WRAP_SPECS_FILE} -T ${CMAKE_SOURCE_DIR}/htif.ld)
target_link_options(target-x86 INTERFACE -fno-common -fno-builtin-printf)
target_compile_options(target-riscv INTERFACE -Wl,-Map=output.map -specs=${SPECS_FILE} -specs=${WRAP_SPECS_FILE})
target_compile_options(target-riscv INTERFACE -T ${LINKER_SCRIPT})

target_link_options(target-riscv INTERFACE -static)
target_link_options(target-riscv INTERFACE -march=${MARCH} -mabi=${MABI} -mcmodel=${MCMODEL})
target_link_options(target-riscv INTERFACE -Wl,-Map=output.map -specs=${SPECS_FILE} -specs=${WRAP_SPECS_FILE} -T ${CMAKE_SOURCE_DIR}/htif.ld)
target_link_options(target-riscv INTERFACE -Wl,-Map=output.map -specs=${SPECS_FILE} -specs=${WRAP_SPECS_FILE})
target_link_options(target-riscv INTERFACE -T ${LINKER_SCRIPT})


add_subdirectory(nn)
add_subdirectory(tests)
add_subdirectory(examples)

30 changes: 30 additions & 0 deletions docs/Tensor-Creation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Tensor Creation

A set of factory functions are available for creating a tensor object. These factory functions configure the shape, data type, device and other properties of the new tensor, and optionally populate them according to specific algorithms.

## Factory Functions

A *factory function* is a function that produces a new tensor. There are many factory functions available, which differ in the way they initialize a new tensor before returning it. All factory functions adhere to the following general “schema”:

```c
Tensor *NN_<function-name>(<ndim>, <shape>, <datatype>, <tensor-options>)
```

### Available Factory Functions

The following factory functions are available at the time of this writing:

**tensor**: Returns a tensor with uninitialized values or preallocated buffer.

**zeros**: Returns a tensor filled with all zeros.

**ones**: Returns a tensor filled with all ones.

**full**: Returns a tensor filled with a single value.

**rand**: Returns a tensor filled with values drawn from a uniform distribution on [0, 1).

**randint**: Returns a tensor with integers randomly drawn from an interval.

**arange**: Returns a tensor with a sequence of integers.

3 changes: 3 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
add_subdirectory(fast-depth)
add_subdirectory(llama2)
add_subdirectory(stereo-block-matching)
40 changes: 9 additions & 31 deletions examples/fast-depth/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,22 @@ cmake_minimum_required(VERSION 3.10)

project(fast-depth LANGUAGES C)


add_executable(fast-depth main.c)
target_include_directories(fast-depth PUBLIC inc ../utility)

target_include_directories(fast-depth PUBLIC ./)
target_include_directories(fast-depth PUBLIC ../utility)


target_compile_features(fast-depth INTERFACE c_std_11)

if (X86)
message("fast-depth: building for x86")
target_link_libraries(fast-depth PUBLIC target-x86)

message(STATUS "fast-depth: Building for x86")
target_link_libraries(fast-depth PUBLIC target-x86)
elseif (RISCV)
message("fast-depth: building for RISC-V")
# CMake toolchain definition for RISC-V GCC toolchain
set(CMAKE_SYSTEM_NAME "Generic" CACHE STRING "")
set(CMAKE_SYSTEM_PROCESSOR "riscv" CACHE STRING "")

set(TOOLCHAIN_PREFIX "riscv64-unknown-elf-")

set(CMAKE_C_COMPILER "${TOOLCHAIN_PREFIX}gcc")
set(CMAKE_ASM_COMPILER "${TOOLCHAIN_PREFIX}gcc")
set(CMAKE_CXX_COMPILER "${TOOLCHAIN_PREFIX}g++")
set(CMAKE_AR "${TOOLCHAIN_PREFIX}ar")
set(CMAKE_LINKER "{TOOLCHAIN_PREFIX}ld")
set(CMAKE_OBJCOPY "${TOOLCHAIN_PREFIX}objcopy")
set(CMAKE_SIZE "${TOOLCHAIN_PREFIX}size")
set(CMAKE_STRIP "${TOOLCHAIN_PREFIX}ld")

target_link_libraries(fast-depth PUBLIC target-riscv)
message(STATUS "fast-depth: Building for RISC-V")
target_link_libraries(fast-depth PUBLIC target-riscv)
endif ()

add_compile_options(-O3 -Wall -Wextra)
target_compile_options(fast-depth PRIVATE -O3 -Wall -Wextra)

target_compile_options(fast-depth PRIVATE -u _printf_float)

add_subdirectory(../../ ./build/)

add_subdirectory(../../nn/ ./build/nn)
target_link_libraries(fast-depth PUBLIC nn)

target_link_libraries(fast-depth PUBLIC m)

2 changes: 1 addition & 1 deletion examples/fast-depth/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "termimg.h"

// load the weight data block from the model.bin file
INCLUDE_FILE(".rodata", "../input.bin", model_input);
INCLUDE_FILE(".rodata", "./input.bin", model_input);
extern uint8_t model_input_data[];
extern size_t model_input_start[];
extern size_t model_input_end[];
Expand Down
2 changes: 1 addition & 1 deletion examples/fast-depth/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


// load the weight data block from the model.bin file
INCLUDE_FILE(".rodata", "../model.bin", model_weight);
INCLUDE_FILE(".rodata", "./model.bin", model_weight);
extern uint8_t model_weight_data[];
extern size_t model_weight_start[];
extern size_t model_weight_end[];
Expand Down
37 changes: 7 additions & 30 deletions examples/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,21 @@ cmake_minimum_required(VERSION 3.10)

project(llama2 LANGUAGES C)


add_executable(llama2 main.c)
target_include_directories(llama2 PUBLIC inc)

target_include_directories(llama2 PUBLIC ./)


target_compile_features(llama2 INTERFACE c_std_11)

if (X86)
message("llama2: building for x86")
target_link_libraries(llama2 PUBLIC target-x86)

message(STATUS "llama2: Building for x86")
elseif (RISCV)
message("llama2: building for RISC-V")
# CMake toolchain definition for RISC-V GCC toolchain
set(CMAKE_SYSTEM_NAME "Generic" CACHE STRING "")
set(CMAKE_SYSTEM_PROCESSOR "riscv" CACHE STRING "")

set(TOOLCHAIN_PREFIX "riscv64-unknown-elf-")

set(CMAKE_C_COMPILER "${TOOLCHAIN_PREFIX}gcc")
set(CMAKE_ASM_COMPILER "${TOOLCHAIN_PREFIX}gcc")
set(CMAKE_CXX_COMPILER "${TOOLCHAIN_PREFIX}g++")
set(CMAKE_AR "${TOOLCHAIN_PREFIX}ar")
set(CMAKE_LINKER "{TOOLCHAIN_PREFIX}ld")
set(CMAKE_OBJCOPY "${TOOLCHAIN_PREFIX}objcopy")
set(CMAKE_SIZE "${TOOLCHAIN_PREFIX}size")
set(CMAKE_STRIP "${TOOLCHAIN_PREFIX}ld")

target_link_libraries(llama2 PUBLIC target-riscv)
message(STATUS "llama2: Building for RISC-V")
target_link_libraries(llama2 PUBLIC target-riscv)
endif ()

add_compile_options(-O3 -Wall -Wextra)
target_compile_options(llama2 PRIVATE -O3 -Wall -Wextra)

target_compile_options(llama2 PRIVATE -u _printf_float)

add_subdirectory(../../ ./build/)

add_subdirectory(../../nn/ ./build/nn)
target_link_libraries(llama2 PUBLIC nn)

target_link_libraries(llama2 PUBLIC m)

35 changes: 32 additions & 3 deletions examples/llama2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ wget -P checkpoints/ https://huggingface.co/karpathy/tinyllamas/resolve/main/sto

# Performance Benchmark

Native impl
Native impl (150479)

```
```bash
[100%] Built target llama2
Llama 2: a small transformer model for text generation
forward taking 458394741 cycles
Expand All @@ -29,4 +29,33 @@ forward taking 458942021 cycles
there
forward taking 459044968 cycles
was
```
```

Replace matmul and softmax with JIT tensors

```bash
[100%] Built target llama2
Llama 2: a small transformer model for text generation
forward taking 11942828 cycles
Once
forward taking 12093307 cycles
upon
forward taking 12196991 cycles
a
forward taking 12292760 cycles
time
forward taking 12398778 cycles
,
forward taking 12490108 cycles
there
forward taking 12593055 cycles
was
```


replacing float arrays to tensors
```
forward taking 11274729 cycles
```


Loading

0 comments on commit afb203c

Please sign in to comment.