diff --git a/.circleci/config.yml b/.circleci/config.yml index 56d8867120f7..2afc1d66399a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -12,9 +12,9 @@ workflows: config-path: .circleci/continue_config.yml mapping: | .circleci/.* run-all-workflows true + gpt4all-backend/.* run-all-workflows true gpt4all-bindings/python/.* run-python-workflow true gpt4all-bindings/typescript/.* run-ts-workflow true gpt4all-bindings/csharp/.* run-csharp-workflow true - gpt4all-backend/.* run-chat-workflow true gpt4all-chat/.* run-chat-workflow true .* run-default-workflow true diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index 5f72c6137c79..d9be352a0acd 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -235,10 +235,8 @@ jobs: name: Build command: | export CMAKE_PREFIX_PATH=~/Qt/6.5.1/gcc_64/lib/cmake - mkdir build - cd build - ~/Qt/Tools/CMake/bin/cmake -DCMAKE_BUILD_TYPE=Release -S ../gpt4all-chat -B . - ~/Qt/Tools/CMake/bin/cmake --build . --target all + ~/Qt/Tools/CMake/bin/cmake -DCMAKE_BUILD_TYPE=Release -S gpt4all-chat -B build + ~/Qt/Tools/CMake/bin/cmake --build build --target all build-gpt4all-chat-windows: machine: @@ -291,17 +289,15 @@ jobs: $Env:INCLUDE = "${Env:INCLUDE};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\include" $Env:INCLUDE = "${Env:INCLUDE};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\ATLMFC\include" $Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1" - mkdir build - cd build & "C:\Qt\Tools\CMake_64\bin\cmake.exe" ` "-DCMAKE_GENERATOR:STRING=Ninja" ` "-DCMAKE_BUILD_TYPE=Release" ` "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_64" ` "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" ` "-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON" ` - "-S ..\gpt4all-chat" ` - "-B ." - & "C:\Qt\Tools\Ninja\ninja.exe" + "-S gpt4all-chat" ` + "-B build" + & "C:\Qt\Tools\Ninja\ninja.exe" -C build build-gpt4all-chat-macos: macos: @@ -332,17 +328,15 @@ jobs: - run: name: Build command: | - mkdir build - cd build ~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake \ -DCMAKE_GENERATOR:STRING=Ninja \ -DBUILD_UNIVERSAL=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_PREFIX_PATH:PATH=~/Qt/6.5.1/macos/lib/cmake/Qt6 \ -DCMAKE_MAKE_PROGRAM:FILEPATH=~/Qt/Tools/Ninja/ninja \ - -S ../gpt4all-chat \ - -B . - ~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake --build . --target all + -S gpt4all-chat \ + -B build + ~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake --build build --target all build-ts-docs: docker: - image: cimg/base:stable @@ -407,13 +401,10 @@ jobs: - run: name: Build C library command: | - git submodule init - git submodule update + git submodule update --init --recursive cd gpt4all-backend - mkdir build - cd build - cmake .. - cmake --build . --parallel + cmake -B build + cmake --build build --parallel - run: name: Build wheel command: | @@ -440,13 +431,10 @@ jobs: - run: name: Build C library command: | - git submodule init - git submodule update + git submodule update --init # don't use --recursive because macOS doesn't use Kompute cd gpt4all-backend - mkdir build - cd build - cmake .. -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64" - cmake --build . --parallel + cmake -B build -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64" + cmake --build build --parallel - run: name: Build wheel command: | @@ -482,16 +470,13 @@ jobs: - run: name: Build C library command: | - git submodule init - git submodule update + git submodule update --init --recursive cd gpt4all-backend - mkdir build - cd build $Env:Path += ";C:\ProgramData\mingw64\mingw64\bin" $Env:Path += ";C:\VulkanSDK\1.3.261.1\bin" $Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1" - cmake -G "MinGW Makefiles" .. -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DKOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER=OFF - cmake --build . --parallel + cmake -G "MinGW Makefiles" -B build -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DKOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER=OFF + cmake --build build --parallel - run: name: Build wheel # TODO: As part of this task, we need to move mingw64 binaries into package. @@ -679,7 +664,7 @@ jobs: build-csharp-linux: docker: - - image: mcr.microsoft.com/dotnet/sdk:7.0-jammy # Ubuntu 22.04 + - image: mcr.microsoft.com/dotnet/sdk:8.0 steps: - checkout - attach_workspace: @@ -735,6 +720,10 @@ jobs: - gpt4all-csharp-nuget-packages-win - attach_workspace: at: C:\Users\circleci\workspace + - run: + name: "Install .NET" + command: | + choco install -y dotnet-8.0-sdk - run: name: "Prepare Native Libs" command: | @@ -782,7 +771,8 @@ jobs: - run: name: Install dependencies command: | - brew install --cask dotnet-sdk + brew tap isen-ng/dotnet-sdk-versions + brew install --cask dotnet-sdk8-0-100 - attach_workspace: at: /tmp/workspace - run: @@ -824,7 +814,7 @@ jobs: store-and-upload-nupkgs: docker: - - image: mcr.microsoft.com/dotnet/sdk:6.0-jammy # Ubuntu 22.04 + - image: mcr.microsoft.com/dotnet/sdk:8.0 steps: - attach_workspace: at: /tmp/workspace @@ -840,9 +830,9 @@ jobs: cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/ mkdir -p runtimes/win-x64/native cp /tmp/workspace/runtimes/win-x64/*.dll runtimes/win-x64/native/ - mkdir -p runtimes/osx/native - cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/ - cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/ + #mkdir -p runtimes/osx/native + #cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/ + #cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/ dotnet pack ./Gpt4All/Gpt4All.csproj -p:IncludeSymbols=true -p:SymbolPackageFormat=snupkg -c Release dotnet nuget push ./Gpt4All/bin/Release/Gpt4All.*.nupkg -s $NUGET_URL -k $NUGET_TOKEN --skip-duplicate - store_artifacts: @@ -1209,4 +1199,4 @@ workflows: - nuget-hold - build-csharp-windows - build-csharp-linux - - build-csharp-macos + #- build-csharp-macos diff --git a/.github/ISSUE_TEMPLATE/bindings-bug.md b/.github/ISSUE_TEMPLATE/bindings-bug.md new file mode 100644 index 000000000000..cbf0d49dd51b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bindings-bug.md @@ -0,0 +1,35 @@ +--- +name: "\U0001F6E0 Bindings Bug Report" +about: A bug report for the GPT4All Bindings +labels: ["bindings", "bug-unconfirmed"] +--- + + + +### Bug Report + + + +### Example Code + + + +### Steps to Reproduce + + + +1. +2. +3. + +### Expected Behavior + + + +### Your Environment + +- Bindings version (e.g. "Version" from `pip show gpt4all`): +- Operating System: +- Chat model used (if applicable): + + diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml deleted file mode 100644 index 4e446ac58225..000000000000 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: "\U0001F41B Bug Report" -description: Submit a bug report to help us improve GPT4All -labels: ["02 Bug Report"] -body: - - type: markdown - attributes: - value: > - Thank you for taking the time to file a bug report. Before creating a new - issue, please make sure to take a few moments to check the issue tracker - for existing issues about the bug. - - - type: textarea - id: system-info - attributes: - label: System Info - description: Please share your system info with us. - placeholder: GPT4All version, platform, python version, etc... - validations: - required: true - - - type: checkboxes - id: information-scripts-examples - attributes: - label: Information - description: "The problem arises when using:" - options: - - label: "The official example notebooks/scripts" - - label: "My own modified scripts" - - - type: textarea - id: reproduction - validations: - required: true - attributes: - label: Reproduction - description: | - Please provide a [code sample](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the problem you ran into. It can be a Colab link or just a code snippet. - If you have code snippets, error messages, stack traces please provide them here as well. - Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting - Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code. - - placeholder: | - Steps to reproduce the behavior: - - 1. - 2. - 3. - - - type: textarea - id: expected-behavior - validations: - required: true - attributes: - label: Expected behavior - description: "A clear and concise description of what you would expect to happen." diff --git a/.github/ISSUE_TEMPLATE/chat-bug.md b/.github/ISSUE_TEMPLATE/chat-bug.md new file mode 100644 index 000000000000..45f3b4099210 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/chat-bug.md @@ -0,0 +1,31 @@ +--- +name: "\U0001F4AC GPT4All Bug Report" +about: A bug report for GPT4All Chat +labels: ["chat", "bug-unconfirmed"] +--- + + + +### Bug Report + + + +### Steps to Reproduce + + + +1. +2. +3. + +### Expected Behavior + + + +### Your Environment + +- GPT4All version: +- Operating System: +- Chat model used (if applicable): + + diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md new file mode 100644 index 000000000000..062c37da3668 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -0,0 +1,9 @@ +--- +name: "\U0001F4C4 Documentation" +about: An issue related to the GPT4All documentation +labels: ["documentation"] +--- + +### Documentation + + diff --git a/.github/ISSUE_TEMPLATE/documentation.yml b/.github/ISSUE_TEMPLATE/documentation.yml deleted file mode 100644 index 0b8626f47d93..000000000000 --- a/.github/ISSUE_TEMPLATE/documentation.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Documentation -description: Report an issue related to the GPT4All documentation. -title: "DOC: " -labels: [03 - Documentation] - -body: -- type: textarea - attributes: - label: "Issue with current documentation:" - description: > - Please make sure to leave a reference to the document/code you're - referring to. - -- type: textarea - attributes: - label: "Idea or request for content:" - description: > - Please describe as clearly as possible what topics you think are missing - from the current documentation. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 000000000000..5d6f2ee84e22 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,10 @@ +--- +name: "\U0001F680 Feature Request" +about: Submit a proposal/request for a new GPT4All feature +title: "[Feature] Feature request title..." +labels: ["enhancement"] +--- + +### Feature Request + + diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml deleted file mode 100644 index 92282dbdb442..000000000000 --- a/.github/ISSUE_TEMPLATE/feature-request.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: "\U0001F680 Feature Request" -description: Submit a proposal/request for a new GPT4All feature -labels: ["02 Feature Request"] -body: - - type: textarea - id: feature-request - validations: - required: true - attributes: - label: Feature request - description: | - A clear and concise description of the feature proposal. Please provide links to any relevant GitHub repos, papers, or other resources if relevant. - - - type: textarea - id: motivation - validations: - required: true - attributes: - label: Motivation - description: | - Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. - - - type: textarea - id: contribution - validations: - required: true - attributes: - label: Your contribution - description: | - Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/nomic-ai/gpt4all/blob/main/CONTRIBUTING.md) \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/other-bug.md b/.github/ISSUE_TEMPLATE/other-bug.md new file mode 100644 index 000000000000..de161bd78bf8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/other-bug.md @@ -0,0 +1,32 @@ +--- +name: "\U0001F41B Other Bug Report" +about: A bug in another component of GPT4All +labels: ["bug-unconfirmed"] +--- + + + +### Bug Report + + + +### Steps to Reproduce + + + +1. +2. +3. + +### Expected Behavior + + + +### Your Environment + +- GPT4All version (if applicable): +- Operating System: +- Chat model used (if applicable): + + + diff --git a/.github/ISSUE_TEMPLATE/other.yml b/.github/ISSUE_TEMPLATE/other.yml deleted file mode 100644 index c0068f0f1e0a..000000000000 --- a/.github/ISSUE_TEMPLATE/other.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Other Issue -description: Raise an issue that wouldn't be covered by the other templates. -title: "Issue: " -labels: [04 - Other] - -body: - - type: textarea - attributes: - label: "Issue you'd like to raise." - description: > - Please describe the issue you'd like to raise as clearly as possible. - Make sure to include any relevant links or references. - - - type: textarea - attributes: - label: "Suggestion:" - description: > - Please outline a suggestion to improve the issue here. \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 0ada233f238a..03751865c0a5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "llama.cpp-mainline"] path = gpt4all-backend/llama.cpp-mainline url = https://github.com/nomic-ai/llama.cpp.git - branch = gguf + branch = master diff --git a/gpt4all-api/README.md b/gpt4all-api/README.md index 577bbd3ec041..5025e0411d31 100644 --- a/gpt4all-api/README.md +++ b/gpt4all-api/README.md @@ -43,7 +43,7 @@ Run ```bash docker compose up --build ``` -and edit files in the `api` directory. The api will hot-reload on changes. +and edit files in the `app` directory. The api will hot-reload on changes. You can run the unit tests with diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 39152a2e0450..f20404e30cc1 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -39,10 +39,6 @@ else() message(STATUS "Interprocedural optimization support detected") endif() -if(NOT APPLE) - set(LLAMA_KOMPUTE YES) -endif() - include(llama.cpp.cmake) set(BUILD_VARIANTS default avxonly) diff --git a/gpt4all-backend/bert.cpp b/gpt4all-backend/bert.cpp index 01b348d0f5cd..342827e2da65 100644 --- a/gpt4all-backend/bert.cpp +++ b/gpt4all-backend/bert.cpp @@ -713,10 +713,16 @@ bool Bert::loadModel(const std::string &modelPath, int n_ctx, int ngl) { (void)n_ctx; (void)ngl; - d_ptr->ctx = bert_load_from_file(modelPath.c_str()); - d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); - d_ptr->modelLoaded = d_ptr->ctx != nullptr; + d_ptr->modelLoaded = false; + + auto * ctx = bert_load_from_file(modelPath.c_str()); fflush(stdout); + if (!ctx) + return false; + + d_ptr->ctx = ctx; + d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); + d_ptr->modelLoaded = true; return true; } diff --git a/gpt4all-backend/gptj.cpp b/gpt4all-backend/gptj.cpp index 40db378a4c6b..51a032f803f2 100644 --- a/gpt4all-backend/gptj.cpp +++ b/gpt4all-backend/gptj.cpp @@ -685,18 +685,21 @@ size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx, int ngl) { bool GPTJ::loadModel(const std::string &modelPath, int n_ctx, int ngl) { (void)n_ctx; (void)ngl; + d_ptr->modelLoaded = false; + std::mt19937 rng(time(NULL)); d_ptr->rng = rng; // load the model - if (!gptj_model_load(modelPath, *d_ptr->model, d_ptr->vocab)) { + bool ok = gptj_model_load(modelPath, *d_ptr->model, d_ptr->vocab); + fflush(stdout); + if (!ok) { std::cerr << "GPT-J ERROR: failed to load model from " << modelPath; return false; } d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); d_ptr->modelLoaded = true; - fflush(stdout); return true; } diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline index cd1b5a104b9d..822a9c894eb3 160000 --- a/gpt4all-backend/llama.cpp-mainline +++ b/gpt4all-backend/llama.cpp-mainline @@ -1 +1 @@ -Subproject commit cd1b5a104b9d3e211a50b9f6c261aced3bf09834 +Subproject commit 822a9c894eb3770c65f0b4a724aae34605c90029 diff --git a/gpt4all-backend/llama.cpp.cmake b/gpt4all-backend/llama.cpp.cmake index f8aa532f1760..0bb79313ae59 100644 --- a/gpt4all-backend/llama.cpp.cmake +++ b/gpt4all-backend/llama.cpp.cmake @@ -38,6 +38,12 @@ else() endif() endif() +if (APPLE) + set(LLAMA_KOMPUTE_DEFAULT OFF) +else() + set(LLAMA_KOMPUTE_DEFAULT ON) +endif() + # # Option list @@ -77,6 +83,7 @@ option(LLAMA_OPENBLAS "llama: use OpenBLAS" #option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) #option(LLAMA_CLBLAST "llama: use CLBlast" OFF) #option(LLAMA_METAL "llama: use Metal" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" ${LLAMA_KOMPUTE_DEFAULT}) set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") @@ -153,6 +160,12 @@ if (LLAMA_OPENBLAS) endif() if (LLAMA_KOMPUTE) + set(LLAMA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp-mainline) + if (NOT EXISTS "${LLAMA_DIR}/kompute/CMakeLists.txt") + message(FATAL_ERROR "Kompute not found") + endif() + message(STATUS "Kompute found") + add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) find_package(Vulkan COMPONENTS glslc REQUIRED) find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc) @@ -160,8 +173,6 @@ if (LLAMA_KOMPUTE) message(FATAL_ERROR "glslc not found") endif() - set(LLAMA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp-mainline) - function(compile_shader) set(options) set(oneValueArgs) @@ -220,91 +231,86 @@ if (LLAMA_KOMPUTE) endforeach() endfunction() - if (EXISTS "${LLAMA_DIR}/kompute/CMakeLists.txt") - message(STATUS "Kompute found") - set(KOMPUTE_OPT_LOG_LEVEL Critical CACHE STRING "Kompute log level") - add_subdirectory(${LLAMA_DIR}/kompute) - - # Compile our shaders - compile_shader(SOURCES - kompute-shaders/op_scale.comp - kompute-shaders/op_scale_8.comp - kompute-shaders/op_add.comp - kompute-shaders/op_addrow.comp - kompute-shaders/op_mul.comp - kompute-shaders/op_silu.comp - kompute-shaders/op_relu.comp - kompute-shaders/op_gelu.comp - kompute-shaders/op_softmax.comp - kompute-shaders/op_norm.comp - kompute-shaders/op_rmsnorm.comp - kompute-shaders/op_diagmask.comp - kompute-shaders/op_mul_mat_mat_f32.comp - kompute-shaders/op_mul_mat_f16.comp - kompute-shaders/op_mul_mat_q8_0.comp - kompute-shaders/op_mul_mat_q4_0.comp - kompute-shaders/op_mul_mat_q4_1.comp - kompute-shaders/op_mul_mat_q6_k.comp - kompute-shaders/op_getrows_f16.comp - kompute-shaders/op_getrows_q4_0.comp - kompute-shaders/op_getrows_q4_1.comp - kompute-shaders/op_getrows_q6_k.comp - kompute-shaders/op_rope_f16.comp - kompute-shaders/op_rope_f32.comp - kompute-shaders/op_cpy_f16_f16.comp - kompute-shaders/op_cpy_f16_f32.comp - kompute-shaders/op_cpy_f32_f16.comp - kompute-shaders/op_cpy_f32_f32.comp - ) + set(KOMPUTE_OPT_LOG_LEVEL Critical CACHE STRING "Kompute log level") + add_subdirectory(${LLAMA_DIR}/kompute) + + # Compile our shaders + compile_shader(SOURCES + kompute-shaders/op_scale.comp + kompute-shaders/op_scale_8.comp + kompute-shaders/op_add.comp + kompute-shaders/op_addrow.comp + kompute-shaders/op_mul.comp + kompute-shaders/op_silu.comp + kompute-shaders/op_relu.comp + kompute-shaders/op_gelu.comp + kompute-shaders/op_softmax.comp + kompute-shaders/op_norm.comp + kompute-shaders/op_rmsnorm.comp + kompute-shaders/op_diagmask.comp + kompute-shaders/op_mul_mat_mat_f32.comp + kompute-shaders/op_mul_mat_f16.comp + kompute-shaders/op_mul_mat_q8_0.comp + kompute-shaders/op_mul_mat_q4_0.comp + kompute-shaders/op_mul_mat_q4_1.comp + kompute-shaders/op_mul_mat_q6_k.comp + kompute-shaders/op_getrows_f16.comp + kompute-shaders/op_getrows_q4_0.comp + kompute-shaders/op_getrows_q4_1.comp + kompute-shaders/op_getrows_q6_k.comp + kompute-shaders/op_rope_f16.comp + kompute-shaders/op_rope_f32.comp + kompute-shaders/op_cpy_f16_f16.comp + kompute-shaders/op_cpy_f16_f32.comp + kompute-shaders/op_cpy_f32_f16.comp + kompute-shaders/op_cpy_f32_f32.comp + ) - # Create a custom target for our generated shaders - add_custom_target(generated_shaders DEPENDS - shaderop_scale.h - shaderop_scale_8.h - shaderop_add.h - shaderop_addrow.h - shaderop_mul.h - shaderop_silu.h - shaderop_relu.h - shaderop_gelu.h - shaderop_softmax.h - shaderop_norm.h - shaderop_rmsnorm.h - shaderop_diagmask.h - shaderop_mul_mat_mat_f32.h - shaderop_mul_mat_f16.h - shaderop_mul_mat_q8_0.h - shaderop_mul_mat_q4_0.h - shaderop_mul_mat_q4_1.h - shaderop_mul_mat_q6_k.h - shaderop_getrows_f16.h - shaderop_getrows_q4_0.h - shaderop_getrows_q4_1.h - shaderop_getrows_q6_k.h - shaderop_rope_f16.h - shaderop_rope_f32.h - shaderop_cpy_f16_f16.h - shaderop_cpy_f16_f32.h - shaderop_cpy_f32_f16.h - shaderop_cpy_f32_f32.h - ) + # Create a custom target for our generated shaders + add_custom_target(generated_shaders DEPENDS + shaderop_scale.h + shaderop_scale_8.h + shaderop_add.h + shaderop_addrow.h + shaderop_mul.h + shaderop_silu.h + shaderop_relu.h + shaderop_gelu.h + shaderop_softmax.h + shaderop_norm.h + shaderop_rmsnorm.h + shaderop_diagmask.h + shaderop_mul_mat_mat_f32.h + shaderop_mul_mat_f16.h + shaderop_mul_mat_q8_0.h + shaderop_mul_mat_q4_0.h + shaderop_mul_mat_q4_1.h + shaderop_mul_mat_q6_k.h + shaderop_getrows_f16.h + shaderop_getrows_q4_0.h + shaderop_getrows_q4_1.h + shaderop_getrows_q6_k.h + shaderop_rope_f16.h + shaderop_rope_f32.h + shaderop_cpy_f16_f16.h + shaderop_cpy_f16_f32.h + shaderop_cpy_f32_f16.h + shaderop_cpy_f32_f32.h + ) - # Create a custom command that depends on the generated_shaders - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp - COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp - DEPENDS generated_shaders - COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp" - ) + # Create a custom command that depends on the generated_shaders + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp + DEPENDS generated_shaders + COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp" + ) - # Add the stamp to the main sources to ensure dependency tracking - set(GGML_SOURCES_KOMPUTE ${LLAMA_DIR}/ggml-kompute.cpp ${LLAMA_DIR}/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) - add_compile_definitions(GGML_USE_KOMPUTE) - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute) - set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR}) - else() - message(WARNING "Kompute not found") - endif() + # Add the stamp to the main sources to ensure dependency tracking + set(GGML_SOURCES_KOMPUTE ${LLAMA_DIR}/ggml-kompute.cpp ${LLAMA_DIR}/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) + add_compile_definitions(GGML_USE_KOMPUTE) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute) + set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR}) endif() if (LLAMA_ALL_WARNINGS) diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 0767accbde0e..5b9960fff1c1 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -150,6 +150,8 @@ size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl) bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl) { + d_ptr->modelLoaded = false; + // clean up after previous loadModel() if (d_ptr->model) { llama_free_model(d_ptr->model); @@ -195,6 +197,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl) d_ptr->model = llama_load_model_from_file_gpt4all(modelPath.c_str(), &d_ptr->model_params); if (!d_ptr->model) { + fflush(stdout); d_ptr->device = -1; std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl; return false; @@ -225,6 +228,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl) d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params); if (!d_ptr->ctx) { + fflush(stdout); std::cerr << "LLAMA ERROR: failed to init context for model " << modelPath << std::endl; llama_free_model(d_ptr->model); d_ptr->model = nullptr; @@ -240,8 +244,8 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl) } #endif + fflush(stdout); d_ptr->modelLoaded = true; - fflush(stderr); return true; } @@ -428,6 +432,8 @@ std::vector LLamaModel::availableGPUDevices(size_t memoryReq free(vkDevices); return devices; } +#else + std::cerr << __func__ << ": built without Kompute\n"; #endif return {}; @@ -508,7 +514,14 @@ DLL_EXPORT bool magic_match(const char *fname) { auto * ctx = load_gguf(fname, arch); bool valid = true; - if (!(arch == "llama" || arch == "starcoder" || arch == "falcon" || arch == "mpt")) { + + static const std::vector known_arches { + "baichuan", "bloom", "codeshell", "falcon", "gpt2", "llama", "mpt", "orion", "persimmon", "phi2", "plamo", + "qwen", "qwen2", "refact", "stablelm", "starcoder" + }; + + if (std::find(known_arches.begin(), known_arches.end(), arch) == known_arches.end()) { + // not supported by this version of llama.cpp if (!(arch == "gptj" || arch == "bert")) { // we support these via other modules std::cerr << __func__ << ": unsupported model architecture: " << arch << "\n"; } diff --git a/gpt4all-bindings/csharp/Directory.Build.props b/gpt4all-bindings/csharp/Directory.Build.props index 75e32e34ae8c..8b307516ad5b 100644 --- a/gpt4all-bindings/csharp/Directory.Build.props +++ b/gpt4all-bindings/csharp/Directory.Build.props @@ -5,7 +5,7 @@ en-US - 0.6.3-alpha + 0.6.4-alpha $(VersionSuffix) $(Version)$(VersionSuffix) true diff --git a/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj b/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj index 39cc0da12cfb..8e6d325a2915 100644 --- a/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj +++ b/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj @@ -2,7 +2,7 @@ Exe - net7.0 + net8.0 enable enable true diff --git a/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj b/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj index 05995c904106..76f61f92ed59 100644 --- a/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj +++ b/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj @@ -1,7 +1,7 @@ - net7.0 + net8.0 enable false diff --git a/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj b/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj index d67b9b24ce62..af338f820804 100644 --- a/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj +++ b/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj @@ -1,10 +1,10 @@  - net6.0 enable enable true true + net8.0 diff --git a/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs b/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs index 8350a66ac1e2..938f44d8a19f 100644 --- a/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs +++ b/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs @@ -32,7 +32,7 @@ public Gpt4AllModelFactory(string? libraryPath = default, bool bypassLoading = t } } - private IGpt4AllModel CreateModel(string modelPath) + private Gpt4All CreateModel(string modelPath) { _logger.LogInformation("Creating model path={ModelPath}", modelPath); IntPtr error; diff --git a/gpt4all-bindings/csharp/README.md b/gpt4all-bindings/csharp/README.md index 9829a9768e99..af8d4e9a353f 100644 --- a/gpt4all-bindings/csharp/README.md +++ b/gpt4all-bindings/csharp/README.md @@ -6,7 +6,10 @@ This package contains a set of C# bindings around the `llmodel` C-API. TBD ## Installation -TBD NuGet + +Windows and Linux builds are available on NuGet: https://www.nuget.org/packages/Gpt4All + +macOS is WIP due to code signing issues, contributions are welcome. ## Project Structure ``` diff --git a/gpt4all-bindings/python/docs/gpt4all_chat.md b/gpt4all-bindings/python/docs/gpt4all_chat.md index 93dd41d0530b..96da44d7fdcd 100644 --- a/gpt4all-bindings/python/docs/gpt4all_chat.md +++ b/gpt4all-bindings/python/docs/gpt4all_chat.md @@ -61,17 +61,7 @@ The general technique this plugin uses is called [Retrieval Augmented Generation These document chunks help your LLM respond to queries with knowledge about the contents of your data. The number of chunks and the size of each chunk can be configured in the LocalDocs plugin settings tab. -LocalDocs supports the following file types: -```json -["txt", "doc", "docx", "pdf", "rtf", "odt", "html", "htm", "xls", "xlsx", "csv", "ods", "ppt", "pptx", "odp", "xml", "json", "log", "md", "org", "tex", "asc", "wks", -"wpd", "wps", "wri", "xhtml", "xht", "xslt", "yaml", "yml", "dtd", "sgml", "tsv", "strings", "resx", -"plist", "properties", "ini", "config", "bat", "sh", "ps1", "cmd", "awk", "sed", "vbs", "ics", "mht", -"mhtml", "epub", "djvu", "azw", "azw3", "mobi", "fb2", "prc", "lit", "lrf", "tcr", "pdb", "oxps", -"xps", "pages", "numbers", "key", "keynote", "abw", "zabw", "123", "wk1", "wk3", "wk4", "wk5", "wq1", -"wq2", "xlw", "xlr", "dif", "slk", "sylk", "wb1", "wb2", "wb3", "qpw", "wdb", "wks", "wku", "wr1", -"wrk", "xlk", "xlt", "xltm", "xltx", "xlsm", "xla", "xlam", "xll", "xld", "xlv", "xlw", "xlc", "xlm", -"xlt", "xln"] -``` +LocalDocs currently supports plain text files (`.txt`, `.md`, and `.rst`) and PDF files (`.pdf`). #### Troubleshooting and FAQ *My LocalDocs plugin isn't using my documents* diff --git a/gpt4all-bindings/python/gpt4all/__init__.py b/gpt4all-bindings/python/gpt4all/__init__.py index 391fab0298f8..01df38fc9347 100644 --- a/gpt4all-bindings/python/gpt4all/__init__.py +++ b/gpt4all-bindings/python/gpt4all/__init__.py @@ -1,2 +1 @@ from .gpt4all import Embed4All as Embed4All, GPT4All as GPT4All -from .pyllmodel import LLModel as LLModel diff --git a/gpt4all-bindings/python/gpt4all/pyllmodel.py b/gpt4all-bindings/python/gpt4all/_pyllmodel.py similarity index 75% rename from gpt4all-bindings/python/gpt4all/pyllmodel.py rename to gpt4all-bindings/python/gpt4all/_pyllmodel.py index f313e3054c5f..eb03a91443fe 100644 --- a/gpt4all-bindings/python/gpt4all/pyllmodel.py +++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py @@ -142,15 +142,6 @@ def empty_response_callback(token_id: int, response: str) -> bool: return True -def _create_model(model_path: bytes) -> ctypes.c_void_p: - err = ctypes.c_char_p() - model = llmodel.llmodel_model_create2(model_path, b"auto", ctypes.byref(err)) - if model is None: - s = err.value - raise ValueError("Unable to instantiate model: {'null' if s is None else s.decode()}") - return model - - # Symbol to terminate from generator class Sentinel(Enum): TERMINATING_SYMBOL = 0 @@ -161,116 +152,77 @@ class LLModel: Base class and universal wrapper for GPT4All language models built around llmodel C-API. - Attributes + Parameters ---------- - model: llmodel_model - Ctype pointer to underlying model - model_name: str - Model name + model_path : str + Path to the model. + n_ctx : int + Maximum size of context window + ngl : int + Number of GPU layers to use (Vulkan) """ - def __init__(self): - self.model = None - self.model_name = None - self.context = None - self.llmodel_lib = llmodel - + def __init__(self, model_path: str, n_ctx: int, ngl: int): + self.model_path = model_path.encode() + self.n_ctx = n_ctx + self.ngl = ngl + self.context: LLModelPromptContext | None = None self.buffer = bytearray() self.buff_expecting_cont_bytes: int = 0 - def __del__(self): - if self.model is not None: - self.llmodel_lib.llmodel_model_destroy(self.model) + # Construct a model implementation + err = ctypes.c_char_p() + model = llmodel.llmodel_model_create2(self.model_path, b"auto", ctypes.byref(err)) + if model is None: + s = err.value + raise ValueError(f"Unable to instantiate model: {'null' if s is None else s.decode()}") + self.model = model - def memory_needed(self, model_path: str, n_ctx: int, ngl: int) -> int: - self.model = None - return self._memory_needed(model_path, n_ctx, ngl) - - def _memory_needed(self, model_path: str, n_ctx: int, ngl: int) -> int: - if self.model is None: - self.model = _create_model(model_path.encode()) - return llmodel.llmodel_required_mem(self.model, model_path.encode(), n_ctx, ngl) - - def list_gpu(self, model_path: str, n_ctx: int, ngl: int) -> list[LLModelGPUDevice]: - """ - Lists available GPU devices that satisfy the model's memory requirements. - - Parameters - ---------- - model_path : str - Path to the model. - n_ctx : int - Maximum size of context window - ngl : int - Number of GPU layers to use (Vulkan) - - Returns - ------- - list - A list of LLModelGPUDevice structures representing available GPU devices. - """ - mem_required = self._memory_needed(model_path, n_ctx, ngl) - return self._list_gpu(mem_required) + def __del__(self): + if hasattr(self, 'model'): + llmodel.llmodel_model_destroy(self.model) def _list_gpu(self, mem_required: int) -> list[LLModelGPUDevice]: num_devices = ctypes.c_int32(0) - devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, mem_required, ctypes.byref(num_devices)) + devices_ptr = llmodel.llmodel_available_gpu_devices(self.model, mem_required, ctypes.byref(num_devices)) if not devices_ptr: raise ValueError("Unable to retrieve available GPU devices") return devices_ptr[:num_devices.value] - def init_gpu(self, model_path: str, device: str, n_ctx: int, ngl: int): - mem_required = self._memory_needed(model_path, n_ctx, ngl) + def init_gpu(self, device: str): + mem_required = llmodel.llmodel_required_mem(self.model, self.model_path, self.n_ctx, self.ngl) + + if llmodel.llmodel_gpu_init_gpu_device_by_string(self.model, mem_required, device.encode()): + return - success = self.llmodel_lib.llmodel_gpu_init_gpu_device_by_string(self.model, mem_required, device.encode()) - if not success: - # Retrieve all GPUs without considering memory requirements. - num_devices = ctypes.c_int32(0) - all_devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, 0, ctypes.byref(num_devices)) - if not all_devices_ptr: - raise ValueError("Unable to retrieve list of all GPU devices") - all_gpus = [d.name.decode() for d in all_devices_ptr[:num_devices.value]] + # Retrieve all GPUs without considering memory requirements. + num_devices = ctypes.c_int32(0) + all_devices_ptr = llmodel.llmodel_available_gpu_devices(self.model, 0, ctypes.byref(num_devices)) + if not all_devices_ptr: + raise ValueError("Unable to retrieve list of all GPU devices") + all_gpus = [d.name.decode() for d in all_devices_ptr[:num_devices.value]] - # Retrieve GPUs that meet the memory requirements using list_gpu - available_gpus = [device.name.decode() for device in self._list_gpu(mem_required)] + # Retrieve GPUs that meet the memory requirements using list_gpu + available_gpus = [device.name.decode() for device in self._list_gpu(mem_required)] - # Identify GPUs that are unavailable due to insufficient memory or features - unavailable_gpus = set(all_gpus).difference(available_gpus) + # Identify GPUs that are unavailable due to insufficient memory or features + unavailable_gpus = set(all_gpus).difference(available_gpus) - # Formulate the error message - error_msg = "Unable to initialize model on GPU: '{}'.".format(device) - error_msg += "\nAvailable GPUs: {}.".format(available_gpus) - error_msg += "\nUnavailable GPUs due to insufficient memory or features: {}.".format(unavailable_gpus) - raise ValueError(error_msg) + # Formulate the error message + error_msg = "Unable to initialize model on GPU: '{}'.".format(device) + error_msg += "\nAvailable GPUs: {}.".format(available_gpus) + error_msg += "\nUnavailable GPUs due to insufficient memory or features: {}.".format(unavailable_gpus) + raise ValueError(error_msg) - def load_model(self, model_path: str, n_ctx: int, ngl: int) -> bool: + def load_model(self) -> bool: """ Load model from a file. - Parameters - ---------- - model_path : str - Model filepath - n_ctx : int - Maximum size of context window - ngl : int - Number of GPU layers to use (Vulkan) - Returns ------- True if model loaded successfully, False otherwise """ - self.model = _create_model(model_path.encode()) - - llmodel.llmodel_loadModel(self.model, model_path.encode(), n_ctx, ngl) - - filename = os.path.basename(model_path) - self.model_name = os.path.splitext(filename)[0] - - if llmodel.llmodel_isModelLoaded(self.model): - return True - else: - return False + return llmodel.llmodel_loadModel(self.model, self.model_path, self.n_ctx, self.ngl) def set_thread_count(self, n_threads): if not llmodel.llmodel_isModelLoaded(self.model): @@ -295,7 +247,7 @@ def _set_context( reset_context: bool = False, ): if self.context is None: - self.context = LLModelPromptContext( + context = LLModelPromptContext( logits_size=0, tokens_size=0, n_past=0, @@ -309,8 +261,11 @@ def _set_context( repeat_last_n=repeat_last_n, context_erase=context_erase, ) - elif reset_context: - self.context.n_past = 0 + self.context = context + else: + context = self.context + if reset_context: + self.context.n_past = 0 self.context.n_predict = n_predict self.context.top_k = top_k diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py index fcd0a91d0bab..02fa1c806bb9 100644 --- a/gpt4all-bindings/python/gpt4all/gpt4all.py +++ b/gpt4all-bindings/python/gpt4all/gpt4all.py @@ -15,7 +15,7 @@ from tqdm import tqdm from urllib3.exceptions import IncompleteRead, ProtocolError -from . import pyllmodel +from . import _pyllmodel # TODO: move to config DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\") @@ -97,12 +97,12 @@ def __init__( verbose: If True, print debug messages. """ self.model_type = model_type - self.model = pyllmodel.LLModel() # Retrieve model and download if allowed self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose) + self.model = _pyllmodel.LLModel(self.config["path"], n_ctx, ngl) if device is not None and device != "cpu": - self.model.init_gpu(model_path=self.config["path"], device=device, n_ctx=n_ctx, ngl=ngl) - self.model.load_model(self.config["path"], n_ctx, ngl) + self.model.init_gpu(device) + self.model.load_model() # Set n_threads if n_threads is not None: self.model.set_thread_count(n_threads) @@ -292,7 +292,7 @@ def generate( n_batch: int = 8, n_predict: Optional[int] = None, streaming: bool = False, - callback: pyllmodel.ResponseCallbackType = pyllmodel.empty_response_callback, + callback: _pyllmodel.ResponseCallbackType = _pyllmodel.empty_response_callback, ) -> Union[str, Iterable[str]]: """ Generate outputs from any GPT4All model. @@ -350,9 +350,9 @@ def generate( output_collector = self.current_chat_session def _callback_wrapper( - callback: pyllmodel.ResponseCallbackType, + callback: _pyllmodel.ResponseCallbackType, output_collector: List[MessageType], - ) -> pyllmodel.ResponseCallbackType: + ) -> _pyllmodel.ResponseCallbackType: def _callback(token_id: int, response: str) -> bool: nonlocal callback, output_collector diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py index 7ff2a1c650dd..c76f1b49254b 100644 --- a/gpt4all-bindings/python/setup.py +++ b/gpt4all-bindings/python/setup.py @@ -1,5 +1,6 @@ from setuptools import setup, find_packages import os +import pathlib import platform import shutil @@ -59,13 +60,25 @@ def copy_prebuilt_C_lib(src_dir, dest_dir, dest_build_dir): DEST_CLIB_DIRECTORY, DEST_CLIB_BUILD_DIRECTORY) + +def get_long_description(): + with open(pathlib.Path(__file__).parent / "README.md", encoding="utf-8") as fp: + return fp.read() + + setup( name=package_name, - version="2.1.0", + version="2.2.1.post1", description="Python bindings for GPT4All", + long_description=get_long_description(), + long_description_content_type="text/markdown", author="Nomic and the Open Source Community", author_email="support@nomic.ai", - url="https://pypi.org/project/gpt4all/", + url="https://gpt4all.io/", + project_urls={ + "Documentation": "https://docs.gpt4all.io/gpt4all_python.html", + "Source code": "https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python", + }, classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 301f6f3c110c..ee72f8463e42 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -17,8 +17,8 @@ if(APPLE) endif() set(APP_VERSION_MAJOR 2) -set(APP_VERSION_MINOR 6) -set(APP_VERSION_PATCH 3) +set(APP_VERSION_MINOR 7) +set(APP_VERSION_PATCH 1) set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}") # Include the binary directory for the generated header file diff --git a/gpt4all-chat/chat.h b/gpt4all-chat/chat.h index 5d72222dd32b..ae6910bf8f2a 100644 --- a/gpt4all-chat/chat.h +++ b/gpt4all-chat/chat.h @@ -8,6 +8,7 @@ #include "chatllm.h" #include "chatmodel.h" #include "database.h" +#include "localdocsmodel.h" class Chat : public QObject { diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp index bb399cbb17dc..844942e44399 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/chatllm.cpp @@ -1,6 +1,7 @@ #include "chatllm.h" #include "chat.h" #include "chatgpt.h" +#include "localdocs.h" #include "modellist.h" #include "network.h" #include "mysettings.h" diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h index 367915f6c334..d6af4cb0c427 100644 --- a/gpt4all-chat/chatllm.h +++ b/gpt4all-chat/chatllm.h @@ -5,7 +5,7 @@ #include #include -#include "localdocs.h" +#include "database.h" #include "modellist.h" #include "../gpt4all-backend/llmodel.h" diff --git a/gpt4all-chat/database.cpp b/gpt4all-chat/database.cpp index 3fdde3acd9c5..f572e0480a9f 100644 --- a/gpt4all-chat/database.cpp +++ b/gpt4all-chat/database.cpp @@ -890,15 +890,7 @@ void Database::scanDocuments(int folder_id, const QString &folder_path) qDebug() << "scanning folder for documents" << folder_path; #endif - static const QList extensions { "txt", "doc", "docx", "pdf", "rtf", "odt", "html", "htm", - "xls", "xlsx", "csv", "ods", "ppt", "pptx", "odp", "xml", "json", "log", "md", "org", "tex", "asc", "wks", - "wpd", "wps", "wri", "xhtml", "xht", "xslt", "yaml", "yml", "dtd", "sgml", "tsv", "strings", "resx", - "plist", "properties", "ini", "config", "bat", "sh", "ps1", "cmd", "awk", "sed", "vbs", "ics", "mht", - "mhtml", "epub", "djvu", "azw", "azw3", "mobi", "fb2", "prc", "lit", "lrf", "tcr", "pdb", "oxps", - "xps", "pages", "numbers", "key", "keynote", "abw", "zabw", "123", "wk1", "wk3", "wk4", "wk5", "wq1", - "wq2", "xlw", "xlr", "dif", "slk", "sylk", "wb1", "wb2", "wb3", "qpw", "wdb", "wks", "wku", "wr1", - "wrk", "xlk", "xlt", "xltm", "xltx", "xlsm", "xla", "xlam", "xll", "xld", "xlv", "xlw", "xlc", "xlm", - "xlt", "xln" }; + static const QList extensions { "txt", "pdf", "md", "rst" }; QDir dir(folder_path); Q_ASSERT(dir.exists()); diff --git a/gpt4all-chat/llm.cpp b/gpt4all-chat/llm.cpp index e5797c1b6d8f..0f454908c6ad 100644 --- a/gpt4all-chat/llm.cpp +++ b/gpt4all-chat/llm.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #ifndef GPT4ALL_OFFLINE_INSTALLER @@ -39,6 +40,10 @@ LLM::LLM() #endif m_compatHardware = minimal; + + QNetworkInformation::loadDefaultBackend(); + connect(QNetworkInformation::instance(), &QNetworkInformation::reachabilityChanged, + this, &LLM::isNetworkOnlineChanged); } bool LLM::hasSettingsAccess() const @@ -100,3 +105,11 @@ QString LLM::systemTotalRAMInGBString() const { return QString::fromStdString(getSystemTotalRAMInGBString()); } + +bool LLM::isNetworkOnline() const +{ + if (!QNetworkInformation::instance()) + return false; + + return QNetworkInformation::instance()->reachability() == QNetworkInformation::Reachability::Online; +} diff --git a/gpt4all-chat/llm.h b/gpt4all-chat/llm.h index 067ee671b41d..55367742202a 100644 --- a/gpt4all-chat/llm.h +++ b/gpt4all-chat/llm.h @@ -6,6 +6,8 @@ class LLM : public QObject { Q_OBJECT + Q_PROPERTY(bool isNetworkOnline READ isNetworkOnline NOTIFY isNetworkOnlineChanged) + public: static LLM *globalInstance(); @@ -17,10 +19,10 @@ class LLM : public QObject Q_INVOKABLE static bool fileExists(const QString &path); Q_INVOKABLE qint64 systemTotalRAMInGB() const; Q_INVOKABLE QString systemTotalRAMInGBString() const; + Q_INVOKABLE bool isNetworkOnline() const; Q_SIGNALS: - void chatListModelChanged(); - void modelListChanged(); + void isNetworkOnlineChanged(); private: bool m_compatHardware; diff --git a/gpt4all-chat/main.qml b/gpt4all-chat/main.qml index 3aafa1a4adf5..72fbc3b8e19a 100644 --- a/gpt4all-chat/main.qml +++ b/gpt4all-chat/main.qml @@ -16,8 +16,8 @@ Window { id: window width: 1920 height: 1080 - minimumWidth: 1280 - minimumHeight: 720 + minimumWidth: 720 + minimumHeight: 480 visible: true title: qsTr("GPT4All v") + Qt.application.version @@ -369,7 +369,7 @@ Window { highlighted: comboBox.highlightedIndex === index } Accessible.role: Accessible.ComboBox - Accessible.name: qsTr("List of available models") + Accessible.name: comboBox.currentModelName Accessible.description: qsTr("The top item is the current model") onActivated: function (index) { currentChat.stopGenerating() @@ -869,6 +869,7 @@ Window { MyButton { id: downloadButton + visible: LLM.isNetworkOnline Layout.alignment: Qt.AlignHCenter Layout.topMargin: 40 text: qsTr("Download models") @@ -904,10 +905,7 @@ Window { model: chatModel ScrollBar.vertical: ScrollBar { - parent: listView.parent - anchors.top: listView.top - anchors.left: listView.right - anchors.bottom: listView.bottom + policy: ScrollBar.AsNeeded } Accessible.role: Accessible.List @@ -960,7 +958,7 @@ Window { } Accessible.role: Accessible.Paragraph - Accessible.name: name + Accessible.name: text Accessible.description: name === qsTr("Response: ") ? "The response by the model" : "The prompt by the user" topPadding: 20 diff --git a/gpt4all-chat/metadata/models2.json b/gpt4all-chat/metadata/models2.json index 98bc4440ffa4..95ef5ad84377 100644 --- a/gpt4all-chat/metadata/models2.json +++ b/gpt4all-chat/metadata/models2.json @@ -10,9 +10,10 @@ "parameters": "7 billion", "quant": "q4_0", "type": "Mistral", - "systemPrompt": " ", "description": "Best overall fast chat model
  • Fast responses
  • Chat based model
  • Trained by Mistral AI
  • Finetuned on OpenOrca dataset curated via Nomic Atlas
  • Licensed for commercial use
", - "url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.Q4_0.gguf" + "url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.Q4_0.gguf", + "promptTemplate": "<|im_start|>user\n%1<|im_end|><|im_start|>assistant\n", + "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>" }, { "order": "b", diff --git a/gpt4all-chat/metadata/release.json b/gpt4all-chat/metadata/release.json index 7c1a5bf40ffe..bd5b9b6836db 100644 --- a/gpt4all-chat/metadata/release.json +++ b/gpt4all-chat/metadata/release.json @@ -657,6 +657,32 @@ * Adam Treat (Nomic AI) * Karthik Nair * Community (beta testers, bug reporters, bindings authors) +" + }, + { + "version": "2.7.0", + "notes": +" +* Add support for twelve new model architectures +* Including Baichuan, BLOOM, CodeShell, GPT-2, Orion, Persimmon, Phi and Phi-2, Plamo, Qwen, Qwen2, Refact, and StableLM +* Fix for progress bar colors on legacy theme +* Fix sizing for model download dialog elements +* Fix dialog sizes to use more screen realestate where available +* Fix for vram leak when model loading fails +* Fix for making the collection dialog progress bar more readable +* Fix for smaller minimum size for main screen +* Fix for mistral crash +* Fix for mistral openorca prompt template to ChatLM +* Fix for excluding non-text documents from localdoc scanning +* Fix for scrollbar missing on main conversation +* Fix accessibility issues for screen readers +* Fix for not showing the download button when not online +", + "contributors": +" +* Jared Van Bortel (Nomic AI) +* Adam Treat (Nomic AI) +* Community (beta testers, bug reporters, bindings authors) " } ] diff --git a/gpt4all-chat/qml/ChatDrawer.qml b/gpt4all-chat/qml/ChatDrawer.qml index fbc604426903..2c4350b0104b 100644 --- a/gpt4all-chat/qml/ChatDrawer.qml +++ b/gpt4all-chat/qml/ChatDrawer.qml @@ -131,7 +131,7 @@ Drawer { } } Accessible.role: Accessible.Button - Accessible.name: qsTr("Select the current chat") + Accessible.name: text Accessible.description: qsTr("Select the current chat or edit the chat when in edit mode") } Row { diff --git a/gpt4all-chat/qml/CollectionsDialog.qml b/gpt4all-chat/qml/CollectionsDialog.qml index 2374fa3de701..c2ad7d20c6e8 100644 --- a/gpt4all-chat/qml/CollectionsDialog.qml +++ b/gpt4all-chat/qml/CollectionsDialog.qml @@ -121,7 +121,7 @@ MyDialog { } Label { id: speedLabel - color: theme.textColor + color: theme.progressText visible: model.indexing || model.currentEmbeddingsToIndex !== model.totalEmbeddingsToIndex anchors.verticalCenter: itemProgressBar.verticalCenter anchors.left: itemProgressBar.left diff --git a/gpt4all-chat/qml/MyDialog.qml b/gpt4all-chat/qml/MyDialog.qml index 4174bad12a80..37d07bda1f09 100644 --- a/gpt4all-chat/qml/MyDialog.qml +++ b/gpt4all-chat/qml/MyDialog.qml @@ -19,6 +19,7 @@ Dialog { Rectangle { id: closeBackground + visible: myCloseButton.visible z: 299 anchors.centerIn: myCloseButton width: myCloseButton.width + 10 diff --git a/gpt4all-chat/qml/Theme.qml b/gpt4all-chat/qml/Theme.qml index 042d4ad39894..49f8343cbc82 100644 --- a/gpt4all-chat/qml/Theme.qml +++ b/gpt4all-chat/qml/Theme.qml @@ -222,6 +222,17 @@ QtObject { } } + property color progressText: { + switch (MySettings.chatTheme) { + case "LegacyDark": + return "#ffffff"; + case "Dark": + return "#000000"; + default: + return "#000000"; + } + } + property color checkboxBorder: { switch (MySettings.chatTheme) { case "LegacyDark":