Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge latest changes from upstream #6

Merged
merged 48 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
0fd6c1f
embedding : print cosine similarity (#899)
ggerganov Mar 14, 2024
381da2d
metal : build metallib + fix embed path (#6015)
ggerganov Mar 14, 2024
68265eb
embedding : print all resulting embeddings (#899)
ggerganov Mar 14, 2024
3fe8d7a
ggml : designate enum vals for integer types (#6050)
ggerganov Mar 14, 2024
3ca2348
gguf-py : add support for I8, I16 and I32 (#6045)
certik Mar 14, 2024
2c4fb69
llama : optimize defrag moves + fix fragmentation calculation (#6037)
Xarbirus Mar 14, 2024
a44bc96
llama : fix typo
ggerganov Mar 14, 2024
43241ad
server: disable debug release type sanitizer, simplify trigger (#6047)
phymbert Mar 14, 2024
15a3332
readme : improve readme for Llava-1.6 example (#6044)
jianliao Mar 14, 2024
77178ee
gguf-py : fix dtype check (#6045)
ggerganov Mar 14, 2024
044ec4b
embedding : add EOS token if not present (#899)
ggerganov Mar 14, 2024
69ff613
llama : support models without vocabulary (#5798)
Xarbirus Mar 14, 2024
7271077
gguf-py : bump version to 0.8.0 (#6060)
certik Mar 14, 2024
6e0438d
gguf : fix resource leaks (#6061)
stevegrubb Mar 14, 2024
4755afd
llama : fix integer overflow during quantization (#6063)
ggerganov Mar 14, 2024
b0bc9f4
llama-bench : use random tokens to improve accuracy with mixtral (#6069)
slaren Mar 15, 2024
aab606a
llama : add Orion chat template (#6066)
ngxson Mar 15, 2024
7ce2c77
gguf : add support for I64 and F64 arrays (#6062)
certik Mar 15, 2024
753e36f
[SYCL] Fix non-intel device selection (#6042)
AidanBeltonS Mar 15, 2024
131b058
make : ggml-metal.o depends on ggml.h
ggerganov Mar 15, 2024
46acb36
fix set main gpu error (#6073)
NeoZhangJianyu Mar 15, 2024
3020327
cuda : disable unused cudaLaunchHostFunc code (#6078)
slaren Mar 15, 2024
4e9a7f7
llava : change API to pure C style for Rust FFI bindgen (#6079)
tinglou Mar 15, 2024
12247f4
llama : add Command-R support (#6033)
acanis Mar 15, 2024
877b4d0
llama : add support for control vectors (#5970)
vgel Mar 15, 2024
d84c485
llama : fix Baichuan2 13B (#6092)
slaren Mar 15, 2024
a56d09a
ci : close inactive issue with workflow (#6053)
phymbert Mar 16, 2024
15961ec
common : refactor nested if causing error C1061 on MSVC (#6101)
dranger003 Mar 16, 2024
dfbfdd6
readme : add wllama as a wasm binding (#6100)
ngxson Mar 16, 2024
b5f4ae0
gritlm : add initial README.md (#6086)
danbev Mar 16, 2024
c47cf41
ggml : add AVX512F SIMD (#6088)
amiralimi Mar 16, 2024
dc0f612
ggml:fix finding transfer queue family index error (#6094)
GainLee Mar 17, 2024
cd776c3
ci : close all stale issues at once (#6115)
ggerganov Mar 17, 2024
d01b3c4
common: llama_load_model_from_url using --model-url (#6098)
phymbert Mar 17, 2024
3a6efdd
convert : use f32 outtype for bf16 tensors (#6106)
Artefact2 Mar 18, 2024
9b03719
convert : add support for CamembertModel architecture (#6119)
Royalphax Mar 18, 2024
496bc79
common : tidy-up argument parsing (#6105)
dranger003 Mar 18, 2024
2bf8d0f
backend : offload large batches to GPU (#6083)
slaren Mar 18, 2024
4f6d133
ci : temporary disable sanitizer builds (#6128)
ggerganov Mar 18, 2024
ac9ee6a
ci : disable stale issue messages (#6126)
ggerganov Mar 18, 2024
5e1b7f9
backend : set max split inputs to GGML_MAX_SRC (#6137)
slaren Mar 18, 2024
104f5e0
clip : fix memory leak (#6138)
felrock Mar 18, 2024
d199ca7
mpt : implement backwards compatiblity with duped output tensor (#6139)
cebtenzzre Mar 18, 2024
2d15886
flake.lock: Update
github-actions[bot] Mar 17, 2024
4c28b82
common : print usage on '-h' and '--help' (#6145)
dranger003 Mar 19, 2024
970a480
ci : exempt some labels from being tagged as stale (#6140)
slaren Mar 19, 2024
b80cf3b
common : disable repeat penalties by default (#6127)
ggerganov Mar 19, 2024
d0d5de4
gguf-split: split and merge gguf per batch of tensors (#6135)
phymbert Mar 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 48 additions & 24 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ jobs:
CC=gcc-8 make tests -j $(nproc)
make test -j $(nproc)

ubuntu-latest-cmake:
runs-on: ubuntu-latest
ubuntu-focal-make-curl:
runs-on: ubuntu-20.04

steps:
- name: Clone
Expand All @@ -60,32 +60,19 @@ jobs:
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential
sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev

- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DLLAMA_FATAL_WARNINGS=ON
cmake --build . --config Release -j $(nproc)

- name: Test
id: cmake_test
id: make_build
env:
LLAMA_FATAL_WARNINGS: 1
LLAMA_CURL: 1
run: |
cd build
ctest -L main --verbose --timeout 900
CC=gcc-8 make -j $(nproc)

ubuntu-latest-cmake-sanitizer:
ubuntu-latest-cmake:
runs-on: ubuntu-latest

continue-on-error: true

strategy:
matrix:
sanitizer: [ADDRESS, THREAD, UNDEFINED]
build_type: [Debug, Release]

steps:
- name: Clone
id: checkout
Expand All @@ -102,15 +89,50 @@ jobs:
run: |
mkdir build
cd build
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
cmake .. -DLLAMA_FATAL_WARNINGS=ON
cmake --build . --config Release -j $(nproc)

- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900

# ubuntu-latest-cmake-sanitizer:
# runs-on: ubuntu-latest
#
# continue-on-error: true
#
# strategy:
# matrix:
# sanitizer: [ADDRESS, THREAD, UNDEFINED]
# build_type: [Debug, Release]
#
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v3
#
# - name: Dependencies
# id: depends
# run: |
# sudo apt-get update
# sudo apt-get install build-essential
#
# - name: Build
# id: cmake_build
# run: |
# mkdir build
# cd build
# cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
# cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
#
# - name: Test
# id: cmake_test
# run: |
# cd build
# ctest -L main --verbose --timeout 900

ubuntu-latest-cmake-mpi:
runs-on: ubuntu-latest

Expand Down Expand Up @@ -333,6 +355,7 @@ jobs:
mkdir build
cd build
cmake -G Xcode .. \
-DLLAMA_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
Expand Down Expand Up @@ -361,6 +384,7 @@ jobs:
mkdir build
cd build
cmake -G Xcode .. \
-DLLAMA_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
Expand Down
23 changes: 23 additions & 0 deletions .github/workflows/close-issue.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Close inactive issues
on:
schedule:
- cron: "42 0 * * *"

jobs:
close-issues:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- uses: actions/stale@v5
with:
exempt-issue-labels: "refactor,help wanted,good first issue,research"
days-before-issue-stale: 30
days-before-issue-close: 14
stale-issue-label: "stale"
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
days-before-pr-stale: -1
days-before-pr-close: -1
operations-per-run: 1000
repo-token: ${{ secrets.GITHUB_TOKEN }}
41 changes: 28 additions & 13 deletions .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,15 @@ jobs:

strategy:
matrix:
sanitizer: [ADDRESS, THREAD, UNDEFINED]
build_type: [Debug, Release]
# TODO: temporary disabled due to linux kernel issues
#sanitizer: [ADDRESS, THREAD, UNDEFINED]
sanitizer: [UNDEFINED]
build_type: [Debug]
include:
- build_type: Release
sanitizer: ""
exclude:
- build_type: Release
sanitizer: ADDRESS
- build_type: Release
sanitizer: THREAD
- build_type: Release
sanitizer: UNDEFINED
disabled_on_pr: true
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken

container:
image: ubuntu:latest
Expand All @@ -60,7 +57,8 @@ jobs:
cmake \
python3-pip \
wget \
language-pack-en
language-pack-en \
libcurl4-openssl-dev

- name: Build
id: cmake_build
Expand All @@ -70,6 +68,7 @@ jobs:
cmake .. \
-DLLAMA_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_CURL=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
Expand All @@ -81,13 +80,14 @@ jobs:

- name: Tests
id: server_integration_tests
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
run: |
cd examples/server/tests
PORT=8888 ./tests.sh

- name: Slow tests
id: server_integration_tests_slow
if: ${{ github.event.schedule != '' && matrix.build_type == 'Release' || github.event.inputs.slow_tests == 'true' }}
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: |
cd examples/server/tests
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
Expand All @@ -103,12 +103,21 @@ jobs:
with:
fetch-depth: 0

- name: libCURL
id: get_libcurl
env:
CURL_VERSION: 8.6.0_6
run: |
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
mkdir $env:RUNNER_TEMP/libcurl
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl

- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server

- name: Python setup
Expand All @@ -122,15 +131,21 @@ jobs:
run: |
pip install -r examples/server/tests/requirements.txt

- name: Copy Libcurl
id: prepare_libcurl
run: |
cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll

- name: Tests
id: server_integration_tests
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
run: |
cd examples/server/tests
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp

- name: Slow tests
id: server_integration_tests_slow
if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: |
cd examples/server/tests
behave.exe --stop --no-skipped --no-capture --tags slow
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
.vscode/
.idea/

ggml-metal-embed.metal

lcov-report/
gcovr-report/

Expand Down
71 changes: 39 additions & 32 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
"llama: max. batch size for using peer access")
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
Expand Down Expand Up @@ -200,9 +201,6 @@ if (LLAMA_METAL)
add_compile_definitions(GGML_METAL_NDEBUG)
endif()

# get full path to the file
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")

# copy ggml-common.h and ggml-metal.metal to bin directory
configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
Expand All @@ -211,53 +209,62 @@ if (LLAMA_METAL)
enable_language(ASM)
add_compile_definitions(GGML_METAL_EMBED_LIBRARY)

set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")

file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-metallib.s")

# merge ggml-common.h and ggml-metal.metal into a single file
set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s")
set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal")

add_custom_command(
OUTPUT ${EMBED_METALLIB_ASSEMBLY}
COMMAND echo ".section __DATA,__ggml_metallib" > ${EMBED_METALLIB_ASSEMBLY}
COMMAND echo ".globl _ggml_metallib_start" >> ${EMBED_METALLIB_ASSEMBLY}
COMMAND echo "_ggml_metallib_start:" >> ${EMBED_METALLIB_ASSEMBLY}
COMMAND echo ".incbin \\\"${METALLIB_SOURCE}\\\"" >> ${EMBED_METALLIB_ASSEMBLY}
COMMAND echo ".globl _ggml_metallib_end" >> ${EMBED_METALLIB_ASSEMBLY}
COMMAND echo "_ggml_metallib_end:" >> ${EMBED_METALLIB_ASSEMBLY}
DEPENDS ${METALLIB_SOURCE}
OUTPUT ${METALLIB_EMBED_ASM}
COMMAND echo "Embedding Metal library"
COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM}
COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM}
COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM}
COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM}
COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM}
COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM}
DEPENDS ggml-metal.metal ggml-common.h
COMMENT "Generate assembly for embedded Metal library"
)

set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${EMBED_METALLIB_ASSEMBLY})
endif()

if (LLAMA_METAL_SHADER_DEBUG)
# custom command to do the following:
# xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
# xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
#
# note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
# disabling fast math is needed in order to pass tests/test-backend-ops
# note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
# note: unfortunately, we have to call it default.metallib instead of ggml.metallib
# ref: https://github.com/ggerganov/whisper.cpp/issues/1720
set(XC_FLAGS -fno-fast-math -fno-inline -g)
if (LLAMA_QKK_64)
set(XC_FLAGS ${XC_FLAGS} -DQK_K=64)
set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
else()
if (LLAMA_METAL_SHADER_DEBUG)
# custom command to do the following:
# xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
# xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
#
# note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
# disabling fast math is needed in order to pass tests/test-backend-ops
# note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
# note: unfortunately, we have to call it default.metallib instead of ggml.metallib
# ref: https://github.com/ggerganov/whisper.cpp/issues/1720
set(XC_FLAGS -fno-fast-math -fno-inline -g)
else()
set(XC_FLAGS -O3)
endif()

add_custom_command(
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
DEPENDS ggml-metal.metal
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
DEPENDS ggml-metal.metal ggml-common.h
COMMENT "Compiling Metal kernels"
)
)

add_custom_target(
ggml-metal ALL
DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
)
endif()
)
endif() # LLAMA_METAL_EMBED_LIBRARY

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
${FOUNDATION_LIBRARY}
Expand Down
Loading
Loading