diff --git a/.circleci/config.yml b/.circleci/config.yml
index 56d8867120f7..2afc1d66399a 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -12,9 +12,9 @@ workflows:
           config-path: .circleci/continue_config.yml
           mapping: |
             .circleci/.* run-all-workflows true
+            gpt4all-backend/.* run-all-workflows true
             gpt4all-bindings/python/.* run-python-workflow true
             gpt4all-bindings/typescript/.* run-ts-workflow true
             gpt4all-bindings/csharp/.* run-csharp-workflow true
-            gpt4all-backend/.* run-chat-workflow true
             gpt4all-chat/.* run-chat-workflow true
             .* run-default-workflow true
diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml
index 5f72c6137c79..d9be352a0acd 100644
--- a/.circleci/continue_config.yml
+++ b/.circleci/continue_config.yml
@@ -235,10 +235,8 @@ jobs:
           name: Build
           command: |
             export CMAKE_PREFIX_PATH=~/Qt/6.5.1/gcc_64/lib/cmake
-            mkdir build
-            cd build
-            ~/Qt/Tools/CMake/bin/cmake -DCMAKE_BUILD_TYPE=Release -S ../gpt4all-chat -B .
-            ~/Qt/Tools/CMake/bin/cmake --build . --target all
+            ~/Qt/Tools/CMake/bin/cmake -DCMAKE_BUILD_TYPE=Release -S gpt4all-chat -B build
+            ~/Qt/Tools/CMake/bin/cmake --build build --target all
 
   build-gpt4all-chat-windows:
     machine:
@@ -291,17 +289,15 @@ jobs:
             $Env:INCLUDE = "${Env:INCLUDE};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\include"
             $Env:INCLUDE = "${Env:INCLUDE};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\ATLMFC\include"
             $Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1"
-            mkdir build
-            cd build
             & "C:\Qt\Tools\CMake_64\bin\cmake.exe" `
               "-DCMAKE_GENERATOR:STRING=Ninja" `
               "-DCMAKE_BUILD_TYPE=Release" `
               "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_64" `
               "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" `
               "-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON" `
-              "-S ..\gpt4all-chat" `
-              "-B ."
-            & "C:\Qt\Tools\Ninja\ninja.exe"
+              "-S gpt4all-chat" `
+              "-B build"
+            & "C:\Qt\Tools\Ninja\ninja.exe" -C build
 
   build-gpt4all-chat-macos:
     macos:
@@ -332,17 +328,15 @@ jobs:
       - run:
           name: Build
           command: |
-            mkdir build
-            cd build
             ~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake \
               -DCMAKE_GENERATOR:STRING=Ninja \
               -DBUILD_UNIVERSAL=ON \
               -DCMAKE_BUILD_TYPE=Release \
               -DCMAKE_PREFIX_PATH:PATH=~/Qt/6.5.1/macos/lib/cmake/Qt6 \
               -DCMAKE_MAKE_PROGRAM:FILEPATH=~/Qt/Tools/Ninja/ninja \
-              -S ../gpt4all-chat \
-              -B .
-            ~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake --build . --target all
+              -S gpt4all-chat \
+              -B build
+            ~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake --build build --target all
   build-ts-docs: 
     docker: 
       - image: cimg/base:stable
@@ -407,13 +401,10 @@ jobs:
       - run:
           name: Build C library
           command: |
-            git submodule init
-            git submodule update
+            git submodule update --init --recursive
             cd gpt4all-backend
-            mkdir build
-            cd build
-            cmake ..
-            cmake --build . --parallel
+            cmake -B build
+            cmake --build build --parallel
       - run:
           name: Build wheel
           command: |
@@ -440,13 +431,10 @@ jobs:
       - run:
           name: Build C library
           command: |
-            git submodule init
-            git submodule update
+            git submodule update --init  # don't use --recursive because macOS doesn't use Kompute
             cd gpt4all-backend
-            mkdir build
-            cd build
-            cmake .. -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64"
-            cmake --build . --parallel
+            cmake -B build -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64"
+            cmake --build build --parallel
       - run:
           name: Build wheel
           command: |
@@ -482,16 +470,13 @@ jobs:
       - run:
           name: Build C library
           command: |
-            git submodule init
-            git submodule update
+            git submodule update --init --recursive
             cd gpt4all-backend
-            mkdir build
-            cd build
             $Env:Path += ";C:\ProgramData\mingw64\mingw64\bin"
             $Env:Path += ";C:\VulkanSDK\1.3.261.1\bin"
             $Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1"
-            cmake -G "MinGW Makefiles" .. -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DKOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER=OFF
-            cmake --build . --parallel
+            cmake -G "MinGW Makefiles" -B build -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DKOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER=OFF
+            cmake --build build --parallel
       - run:
           name: Build wheel
           # TODO: As part of this task, we need to move mingw64 binaries into package.
@@ -679,7 +664,7 @@ jobs:
 
   build-csharp-linux:
     docker:
-      - image: mcr.microsoft.com/dotnet/sdk:7.0-jammy # Ubuntu 22.04
+      - image: mcr.microsoft.com/dotnet/sdk:8.0
     steps:
       - checkout
       - attach_workspace:
@@ -735,6 +720,10 @@ jobs:
             - gpt4all-csharp-nuget-packages-win
       - attach_workspace:
           at: C:\Users\circleci\workspace
+      - run:
+          name: "Install .NET"
+          command: |
+            choco install -y dotnet-8.0-sdk
       - run:
           name: "Prepare Native Libs"
           command: |
@@ -782,7 +771,8 @@ jobs:
       - run:
           name: Install dependencies
           command: |
-            brew install --cask dotnet-sdk
+            brew tap isen-ng/dotnet-sdk-versions
+            brew install --cask dotnet-sdk8-0-100
       - attach_workspace:
           at: /tmp/workspace
       - run:
@@ -824,7 +814,7 @@ jobs:
 
   store-and-upload-nupkgs:
     docker:
-      - image: mcr.microsoft.com/dotnet/sdk:6.0-jammy # Ubuntu 22.04
+      - image: mcr.microsoft.com/dotnet/sdk:8.0
     steps:
       - attach_workspace:
           at: /tmp/workspace
@@ -840,9 +830,9 @@ jobs:
             cp /tmp/workspace/runtimes/linux-x64/*.so runtimes/linux-x64/native/
             mkdir -p runtimes/win-x64/native
             cp /tmp/workspace/runtimes/win-x64/*.dll runtimes/win-x64/native/
-            mkdir -p runtimes/osx/native
-            cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/
-            cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
+            #mkdir -p runtimes/osx/native
+            #cp /tmp/workspace/runtimes/osx-x64/*.dylib runtimes/osx/native/
+            #cp /tmp/workspace/runtimes/osx-x64/*.metal runtimes/osx/native/
             dotnet pack ./Gpt4All/Gpt4All.csproj -p:IncludeSymbols=true -p:SymbolPackageFormat=snupkg -c Release
             dotnet nuget push ./Gpt4All/bin/Release/Gpt4All.*.nupkg -s $NUGET_URL -k $NUGET_TOKEN --skip-duplicate
       - store_artifacts:
@@ -1209,4 +1199,4 @@ workflows:
             - nuget-hold
             - build-csharp-windows
             - build-csharp-linux
-            - build-csharp-macos
+            #- build-csharp-macos
diff --git a/.github/ISSUE_TEMPLATE/bindings-bug.md b/.github/ISSUE_TEMPLATE/bindings-bug.md
new file mode 100644
index 000000000000..cbf0d49dd51b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bindings-bug.md
@@ -0,0 +1,35 @@
+---
+name: "\U0001F6E0 Bindings Bug Report"
+about: A bug report for the GPT4All Bindings
+labels: ["bindings", "bug-unconfirmed"]
+---
+
+<!-- Before creating a new issue, please make sure to take a few moments to check the issue tracker for existing issues about the bug. -->
+
+### Bug Report
+
+<!-- A clear and concise description of what the bug is. -->
+
+### Example Code
+
+<!-- Please provide a minimal code example that can be used to experience this issue. Delete this section if it does not apply. -->
+
+### Steps to Reproduce
+
+<!-- List the steps that should be taken to experience this issue. -->
+
+1.
+2.
+3.
+
+### Expected Behavior
+
+<!-- In a few words, what did you expect to happen? -->
+
+### Your Environment
+
+- Bindings version (e.g. "Version" from `pip show gpt4all`):
+- Operating System:
+- Chat model used (if applicable):
+
+<!-- You can freely edit this text, please remove all the lines you believe are unnecessary. -->
diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
deleted file mode 100644
index 4e446ac58225..000000000000
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-name: "\U0001F41B Bug Report"
-description: Submit a bug report to help us improve GPT4All
-labels: ["02 Bug Report"]
-body:
-  - type: markdown
-    attributes:
-      value: >
-        Thank you for taking the time to file a bug report. Before creating a new
-        issue, please make sure to take a few moments to check the issue tracker
-        for existing issues about the bug.
-
-  - type: textarea
-    id: system-info
-    attributes:
-      label: System Info
-      description: Please share your system info with us.
-      placeholder: GPT4All version, platform, python version, etc...
-    validations:
-      required: true
-
-  - type: checkboxes
-    id: information-scripts-examples
-    attributes:
-      label: Information
-      description: "The problem arises when using:"
-      options:
-        - label: "The official example notebooks/scripts"
-        - label: "My own modified scripts"
-
-  - type: textarea
-    id: reproduction
-    validations:
-      required: true
-    attributes:
-      label: Reproduction
-      description: |
-        Please provide a [code sample](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
-        If you have code snippets, error messages, stack traces please provide them here as well.
-        Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
-        Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
-
-      placeholder: |
-        Steps to reproduce the behavior:
-
-          1.
-          2.
-          3.
-
-  - type: textarea
-    id: expected-behavior
-    validations:
-      required: true
-    attributes:
-      label: Expected behavior
-      description: "A clear and concise description of what you would expect to happen."
diff --git a/.github/ISSUE_TEMPLATE/chat-bug.md b/.github/ISSUE_TEMPLATE/chat-bug.md
new file mode 100644
index 000000000000..45f3b4099210
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/chat-bug.md
@@ -0,0 +1,31 @@
+---
+name: "\U0001F4AC GPT4All Bug Report"
+about: A bug report for GPT4All Chat
+labels: ["chat", "bug-unconfirmed"]
+---
+
+<!-- Before creating a new issue, please make sure to take a few moments to check the issue tracker for existing issues about the bug. -->
+
+### Bug Report
+
+<!-- A clear and concise description of what the bug is. -->
+
+### Steps to Reproduce
+
+<!-- List the steps that should be taken to experience this issue. Provide any relevant information about your configuration, and describe anything that was unexpected. -->
+
+1.
+2.
+3.
+
+### Expected Behavior
+
+<!-- In a few words, what did you expect to happen? -->
+
+### Your Environment
+
+- GPT4All version:
+- Operating System:
+- Chat model used (if applicable):
+
+<!-- You can freely edit this text, please remove all the lines you believe are unnecessary. -->
diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md
new file mode 100644
index 000000000000..062c37da3668
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/documentation.md
@@ -0,0 +1,9 @@
+---
+name: "\U0001F4C4 Documentation"
+about: An issue related to the GPT4All documentation
+labels: ["documentation"]
+---
+
+### Documentation
+
+<!-- Please describe the issue with the documentation as clearly as possible. -->
diff --git a/.github/ISSUE_TEMPLATE/documentation.yml b/.github/ISSUE_TEMPLATE/documentation.yml
deleted file mode 100644
index 0b8626f47d93..000000000000
--- a/.github/ISSUE_TEMPLATE/documentation.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: Documentation
-description: Report an issue related to the GPT4All documentation.
-title: "DOC: <Please write a comprehensive title after the 'DOC: ' prefix>"
-labels: [03 - Documentation]
-
-body:
-- type: textarea
-  attributes: 
-    label: "Issue with current documentation:"
-    description: >
-      Please make sure to leave a reference to the document/code you're
-      referring to.
-
-- type: textarea
-  attributes:
-    label: "Idea or request for content:"
-    description: >
-      Please describe as clearly as possible what topics you think are missing
-      from the current documentation.
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md
new file mode 100644
index 000000000000..5d6f2ee84e22
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@@ -0,0 +1,10 @@
+---
+name: "\U0001F680 Feature Request"
+about: Submit a proposal/request for a new GPT4All feature
+title: "[Feature] Feature request title..."
+labels: ["enhancement"]
+---
+
+### Feature Request
+
+<!-- A clear and concise description of the feature proposal. -->
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
deleted file mode 100644
index 92282dbdb442..000000000000
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: "\U0001F680 Feature Request"
-description: Submit a proposal/request for a new GPT4All feature
-labels: ["02 Feature Request"]
-body:
-  - type: textarea
-    id: feature-request
-    validations:
-      required: true
-    attributes:
-      label: Feature request
-      description: |
-        A clear and concise description of the feature proposal. Please provide links to any relevant GitHub repos, papers, or other resources if relevant.
-
-  - type: textarea
-    id: motivation
-    validations:
-      required: true
-    attributes:
-      label: Motivation
-      description: |
-        Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
-
-  - type: textarea
-    id: contribution
-    validations:
-      required: true
-    attributes:
-      label: Your contribution
-      description: |
-        Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/nomic-ai/gpt4all/blob/main/CONTRIBUTING.md)
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/other-bug.md b/.github/ISSUE_TEMPLATE/other-bug.md
new file mode 100644
index 000000000000..de161bd78bf8
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/other-bug.md
@@ -0,0 +1,32 @@
+---
+name: "\U0001F41B Other Bug Report"
+about: A bug in another component of GPT4All
+labels: ["bug-unconfirmed"]
+---
+
+<!-- Before creating a new issue, please make sure to take a few moments to check the issue tracker for existing issues about the bug. -->
+
+### Bug Report
+
+<!-- A clear and concise description of what the bug is. -->
+
+### Steps to Reproduce
+
+<!-- List the steps that should be taken to experience this issue. Provide any relevant information about your configuration, and describe anything that was unexpected. If this bug involves original code, please provide a minimal version that can reproduce the issue. -->
+
+1.
+2.
+3.
+
+### Expected Behavior
+
+<!-- In a few words, what did you expect to happen? -->
+
+### Your Environment
+
+- GPT4All version (if applicable):
+- Operating System:
+- Chat model used (if applicable):
+
+<!-- You can freely edit this text, please remove all the lines you believe are unnecessary. -->
+
diff --git a/.github/ISSUE_TEMPLATE/other.yml b/.github/ISSUE_TEMPLATE/other.yml
deleted file mode 100644
index c0068f0f1e0a..000000000000
--- a/.github/ISSUE_TEMPLATE/other.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: Other Issue
-description: Raise an issue that wouldn't be covered by the other templates.
-title: "Issue: <Please write a comprehensive title after the 'Issue: ' prefix>"
-labels: [04 - Other]
-
-body:
-  - type: textarea
-    attributes:
-      label: "Issue you'd like to raise."
-      description: >
-        Please describe the issue you'd like to raise as clearly as possible.
-        Make sure to include any relevant links or references.
-
-  - type: textarea
-    attributes:
-      label: "Suggestion:"
-      description: >
-        Please outline a suggestion to improve the issue here.
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
index 0ada233f238a..03751865c0a5 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "llama.cpp-mainline"]
 	path = gpt4all-backend/llama.cpp-mainline
 	url = https://github.com/nomic-ai/llama.cpp.git
-	branch = gguf
+	branch = master
diff --git a/gpt4all-api/README.md b/gpt4all-api/README.md
index 577bbd3ec041..5025e0411d31 100644
--- a/gpt4all-api/README.md
+++ b/gpt4all-api/README.md
@@ -43,7 +43,7 @@ Run
 ```bash
 docker compose up --build
 ```
-and edit files in the `api` directory. The api will hot-reload on changes.
+and edit files in the `app` directory. The api will hot-reload on changes.
 
 You can run the unit tests with
 
diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt
index 39152a2e0450..f20404e30cc1 100644
--- a/gpt4all-backend/CMakeLists.txt
+++ b/gpt4all-backend/CMakeLists.txt
@@ -39,10 +39,6 @@ else()
     message(STATUS "Interprocedural optimization support detected")
 endif()
 
-if(NOT APPLE)
-  set(LLAMA_KOMPUTE YES)
-endif()
-
 include(llama.cpp.cmake)
 
 set(BUILD_VARIANTS default avxonly)
diff --git a/gpt4all-backend/bert.cpp b/gpt4all-backend/bert.cpp
index 01b348d0f5cd..342827e2da65 100644
--- a/gpt4all-backend/bert.cpp
+++ b/gpt4all-backend/bert.cpp
@@ -713,10 +713,16 @@ bool Bert::loadModel(const std::string &modelPath, int n_ctx, int ngl)
 {
     (void)n_ctx;
     (void)ngl;
-    d_ptr->ctx = bert_load_from_file(modelPath.c_str());
-    d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    d_ptr->modelLoaded = d_ptr->ctx != nullptr;
+    d_ptr->modelLoaded = false;
+
+    auto * ctx = bert_load_from_file(modelPath.c_str());
     fflush(stdout);
+    if (!ctx)
+        return false;
+
+    d_ptr->ctx = ctx;
+    d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    d_ptr->modelLoaded = true;
     return true;
 }
 
diff --git a/gpt4all-backend/gptj.cpp b/gpt4all-backend/gptj.cpp
index 40db378a4c6b..51a032f803f2 100644
--- a/gpt4all-backend/gptj.cpp
+++ b/gpt4all-backend/gptj.cpp
@@ -685,18 +685,21 @@ size_t GPTJ::requiredMem(const std::string &modelPath, int n_ctx, int ngl) {
 bool GPTJ::loadModel(const std::string &modelPath, int n_ctx, int ngl) {
     (void)n_ctx;
     (void)ngl;
+    d_ptr->modelLoaded = false;
+
     std::mt19937 rng(time(NULL));
     d_ptr->rng = rng;
 
     // load the model
-    if (!gptj_model_load(modelPath, *d_ptr->model, d_ptr->vocab)) {
+    bool ok = gptj_model_load(modelPath, *d_ptr->model, d_ptr->vocab);
+    fflush(stdout);
+    if (!ok) {
         std::cerr << "GPT-J ERROR: failed to load model from " <<  modelPath;
         return false;
     }
 
     d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
     d_ptr->modelLoaded = true;
-    fflush(stdout);
     return true;
 }
 
diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline
index cd1b5a104b9d..822a9c894eb3 160000
--- a/gpt4all-backend/llama.cpp-mainline
+++ b/gpt4all-backend/llama.cpp-mainline
@@ -1 +1 @@
-Subproject commit cd1b5a104b9d3e211a50b9f6c261aced3bf09834
+Subproject commit 822a9c894eb3770c65f0b4a724aae34605c90029
diff --git a/gpt4all-backend/llama.cpp.cmake b/gpt4all-backend/llama.cpp.cmake
index f8aa532f1760..0bb79313ae59 100644
--- a/gpt4all-backend/llama.cpp.cmake
+++ b/gpt4all-backend/llama.cpp.cmake
@@ -38,6 +38,12 @@ else()
     endif()
 endif()
 
+if (APPLE)
+    set(LLAMA_KOMPUTE_DEFAULT OFF)
+else()
+    set(LLAMA_KOMPUTE_DEFAULT ON)
+endif()
+
 
 #
 # Option list
@@ -77,6 +83,7 @@ option(LLAMA_OPENBLAS               "llama: use OpenBLAS"
 #option(LLAMA_CUBLAS                 "llama: use cuBLAS"                                     OFF)
 #option(LLAMA_CLBLAST                "llama: use CLBlast"                                    OFF)
 #option(LLAMA_METAL                  "llama: use Metal"                                      OFF)
+option(LLAMA_KOMPUTE                "llama: use Kompute"                                    ${LLAMA_KOMPUTE_DEFAULT})
 set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
 set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
 set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING  "llama: y block size for dmmv CUDA kernels")
@@ -153,6 +160,12 @@ if (LLAMA_OPENBLAS)
 endif()
 
 if (LLAMA_KOMPUTE)
+    set(LLAMA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp-mainline)
+    if (NOT EXISTS "${LLAMA_DIR}/kompute/CMakeLists.txt")
+        message(FATAL_ERROR "Kompute not found")
+    endif()
+    message(STATUS "Kompute found")
+
     add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1)
     find_package(Vulkan COMPONENTS glslc REQUIRED)
     find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc)
@@ -160,8 +173,6 @@ if (LLAMA_KOMPUTE)
         message(FATAL_ERROR "glslc not found")
     endif()
 
-    set(LLAMA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp-mainline)
-
     function(compile_shader)
       set(options)
       set(oneValueArgs)
@@ -220,91 +231,86 @@ if (LLAMA_KOMPUTE)
       endforeach()
     endfunction()
 
-    if (EXISTS "${LLAMA_DIR}/kompute/CMakeLists.txt")
-        message(STATUS "Kompute found")
-        set(KOMPUTE_OPT_LOG_LEVEL Critical CACHE STRING "Kompute log level")
-        add_subdirectory(${LLAMA_DIR}/kompute)
-
-        # Compile our shaders
-        compile_shader(SOURCES
-          kompute-shaders/op_scale.comp
-          kompute-shaders/op_scale_8.comp
-          kompute-shaders/op_add.comp
-          kompute-shaders/op_addrow.comp
-          kompute-shaders/op_mul.comp
-          kompute-shaders/op_silu.comp
-          kompute-shaders/op_relu.comp
-          kompute-shaders/op_gelu.comp
-          kompute-shaders/op_softmax.comp
-          kompute-shaders/op_norm.comp
-          kompute-shaders/op_rmsnorm.comp
-          kompute-shaders/op_diagmask.comp
-          kompute-shaders/op_mul_mat_mat_f32.comp
-          kompute-shaders/op_mul_mat_f16.comp
-          kompute-shaders/op_mul_mat_q8_0.comp
-          kompute-shaders/op_mul_mat_q4_0.comp
-          kompute-shaders/op_mul_mat_q4_1.comp
-          kompute-shaders/op_mul_mat_q6_k.comp
-          kompute-shaders/op_getrows_f16.comp
-          kompute-shaders/op_getrows_q4_0.comp
-          kompute-shaders/op_getrows_q4_1.comp
-          kompute-shaders/op_getrows_q6_k.comp
-          kompute-shaders/op_rope_f16.comp
-          kompute-shaders/op_rope_f32.comp
-          kompute-shaders/op_cpy_f16_f16.comp
-          kompute-shaders/op_cpy_f16_f32.comp
-          kompute-shaders/op_cpy_f32_f16.comp
-          kompute-shaders/op_cpy_f32_f32.comp
-        )
+    set(KOMPUTE_OPT_LOG_LEVEL Critical CACHE STRING "Kompute log level")
+    add_subdirectory(${LLAMA_DIR}/kompute)
+
+    # Compile our shaders
+    compile_shader(SOURCES
+      kompute-shaders/op_scale.comp
+      kompute-shaders/op_scale_8.comp
+      kompute-shaders/op_add.comp
+      kompute-shaders/op_addrow.comp
+      kompute-shaders/op_mul.comp
+      kompute-shaders/op_silu.comp
+      kompute-shaders/op_relu.comp
+      kompute-shaders/op_gelu.comp
+      kompute-shaders/op_softmax.comp
+      kompute-shaders/op_norm.comp
+      kompute-shaders/op_rmsnorm.comp
+      kompute-shaders/op_diagmask.comp
+      kompute-shaders/op_mul_mat_mat_f32.comp
+      kompute-shaders/op_mul_mat_f16.comp
+      kompute-shaders/op_mul_mat_q8_0.comp
+      kompute-shaders/op_mul_mat_q4_0.comp
+      kompute-shaders/op_mul_mat_q4_1.comp
+      kompute-shaders/op_mul_mat_q6_k.comp
+      kompute-shaders/op_getrows_f16.comp
+      kompute-shaders/op_getrows_q4_0.comp
+      kompute-shaders/op_getrows_q4_1.comp
+      kompute-shaders/op_getrows_q6_k.comp
+      kompute-shaders/op_rope_f16.comp
+      kompute-shaders/op_rope_f32.comp
+      kompute-shaders/op_cpy_f16_f16.comp
+      kompute-shaders/op_cpy_f16_f32.comp
+      kompute-shaders/op_cpy_f32_f16.comp
+      kompute-shaders/op_cpy_f32_f32.comp
+    )
 
-        # Create a custom target for our generated shaders
-        add_custom_target(generated_shaders DEPENDS
-          shaderop_scale.h
-          shaderop_scale_8.h
-          shaderop_add.h
-          shaderop_addrow.h
-          shaderop_mul.h
-          shaderop_silu.h
-          shaderop_relu.h
-          shaderop_gelu.h
-          shaderop_softmax.h
-          shaderop_norm.h
-          shaderop_rmsnorm.h
-          shaderop_diagmask.h
-          shaderop_mul_mat_mat_f32.h
-          shaderop_mul_mat_f16.h
-          shaderop_mul_mat_q8_0.h
-          shaderop_mul_mat_q4_0.h
-          shaderop_mul_mat_q4_1.h
-          shaderop_mul_mat_q6_k.h
-          shaderop_getrows_f16.h
-          shaderop_getrows_q4_0.h
-          shaderop_getrows_q4_1.h
-          shaderop_getrows_q6_k.h
-          shaderop_rope_f16.h
-          shaderop_rope_f32.h
-          shaderop_cpy_f16_f16.h
-          shaderop_cpy_f16_f32.h
-          shaderop_cpy_f32_f16.h
-          shaderop_cpy_f32_f32.h
-        )
+    # Create a custom target for our generated shaders
+    add_custom_target(generated_shaders DEPENDS
+      shaderop_scale.h
+      shaderop_scale_8.h
+      shaderop_add.h
+      shaderop_addrow.h
+      shaderop_mul.h
+      shaderop_silu.h
+      shaderop_relu.h
+      shaderop_gelu.h
+      shaderop_softmax.h
+      shaderop_norm.h
+      shaderop_rmsnorm.h
+      shaderop_diagmask.h
+      shaderop_mul_mat_mat_f32.h
+      shaderop_mul_mat_f16.h
+      shaderop_mul_mat_q8_0.h
+      shaderop_mul_mat_q4_0.h
+      shaderop_mul_mat_q4_1.h
+      shaderop_mul_mat_q6_k.h
+      shaderop_getrows_f16.h
+      shaderop_getrows_q4_0.h
+      shaderop_getrows_q4_1.h
+      shaderop_getrows_q6_k.h
+      shaderop_rope_f16.h
+      shaderop_rope_f32.h
+      shaderop_cpy_f16_f16.h
+      shaderop_cpy_f16_f32.h
+      shaderop_cpy_f32_f16.h
+      shaderop_cpy_f32_f32.h
+    )
 
-        # Create a custom command that depends on the generated_shaders
-        add_custom_command(
-            OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
-            COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
-            DEPENDS generated_shaders
-            COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
-        )
+    # Create a custom command that depends on the generated_shaders
+    add_custom_command(
+        OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
+        COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp
+        DEPENDS generated_shaders
+        COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp"
+    )
 
-        # Add the stamp to the main sources to ensure dependency tracking
-        set(GGML_SOURCES_KOMPUTE ${LLAMA_DIR}/ggml-kompute.cpp ${LLAMA_DIR}/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
-        add_compile_definitions(GGML_USE_KOMPUTE)
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
-        set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
-    else()
-        message(WARNING "Kompute not found")
-    endif()
+    # Add the stamp to the main sources to ensure dependency tracking
+    set(GGML_SOURCES_KOMPUTE ${LLAMA_DIR}/ggml-kompute.cpp ${LLAMA_DIR}/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
+    add_compile_definitions(GGML_USE_KOMPUTE)
+    set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
+    set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
 endif()
 
 if (LLAMA_ALL_WARNINGS)
diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
index 0767accbde0e..5b9960fff1c1 100644
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -150,6 +150,8 @@ size_t LLamaModel::requiredMem(const std::string &modelPath, int n_ctx, int ngl)
 
 bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
 {
+    d_ptr->modelLoaded = false;
+
     // clean up after previous loadModel()
     if (d_ptr->model) {
         llama_free_model(d_ptr->model);
@@ -195,6 +197,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
 
     d_ptr->model = llama_load_model_from_file_gpt4all(modelPath.c_str(), &d_ptr->model_params);
     if (!d_ptr->model) {
+        fflush(stdout);
         d_ptr->device = -1;
         std::cerr << "LLAMA ERROR: failed to load model from " <<  modelPath << std::endl;
         return false;
@@ -225,6 +228,7 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
 
     d_ptr->ctx = llama_new_context_with_model(d_ptr->model, d_ptr->ctx_params);
     if (!d_ptr->ctx) {
+        fflush(stdout);
         std::cerr << "LLAMA ERROR: failed to init context for model " <<  modelPath << std::endl;
         llama_free_model(d_ptr->model);
         d_ptr->model = nullptr;
@@ -240,8 +244,8 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
     }
 #endif
 
+    fflush(stdout);
     d_ptr->modelLoaded = true;
-    fflush(stderr);
     return true;
 }
 
@@ -428,6 +432,8 @@ std::vector<LLModel::GPUDevice> LLamaModel::availableGPUDevices(size_t memoryReq
         free(vkDevices);
         return devices;
     }
+#else
+    std::cerr << __func__ << ": built without Kompute\n";
 #endif
 
     return {};
@@ -508,7 +514,14 @@ DLL_EXPORT bool magic_match(const char *fname) {
     auto * ctx = load_gguf(fname, arch);
 
     bool valid = true;
-    if (!(arch == "llama" || arch == "starcoder" || arch == "falcon" || arch == "mpt")) {
+
+    static const std::vector<const char *> known_arches {
+        "baichuan", "bloom", "codeshell", "falcon", "gpt2", "llama", "mpt", "orion", "persimmon", "phi2", "plamo",
+        "qwen", "qwen2", "refact", "stablelm", "starcoder"
+    };
+
+    if (std::find(known_arches.begin(), known_arches.end(), arch) == known_arches.end()) {
+        // not supported by this version of llama.cpp
         if (!(arch == "gptj" || arch == "bert")) { // we support these via other modules
             std::cerr << __func__ << ": unsupported model architecture: " << arch << "\n";
         }
diff --git a/gpt4all-bindings/csharp/Directory.Build.props b/gpt4all-bindings/csharp/Directory.Build.props
index 75e32e34ae8c..8b307516ad5b 100644
--- a/gpt4all-bindings/csharp/Directory.Build.props
+++ b/gpt4all-bindings/csharp/Directory.Build.props
@@ -5,7 +5,7 @@
         <Company></Company>
         <Copyright></Copyright>
         <NeutralLanguage>en-US</NeutralLanguage>
-        <Version>0.6.3-alpha</Version>
+        <Version>0.6.4-alpha</Version>
         <VersionSuffix>$(VersionSuffix)</VersionSuffix>
         <Version Condition=" '$(VersionSuffix)' != '' ">$(Version)$(VersionSuffix)</Version>
         <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
diff --git a/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj b/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj
index 39cc0da12cfb..8e6d325a2915 100644
--- a/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj
+++ b/gpt4all-bindings/csharp/Gpt4All.Samples/Gpt4All.Samples.csproj
@@ -2,7 +2,7 @@
 
     <PropertyGroup>
         <OutputType>Exe</OutputType>
-        <TargetFramework>net7.0</TargetFramework>
+        <TargetFramework>net8.0</TargetFramework>
         <ImplicitUsings>enable</ImplicitUsings>
         <Nullable>enable</Nullable>
         <GenerateDocumentationFile>true</GenerateDocumentationFile>
diff --git a/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj b/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj
index 05995c904106..76f61f92ed59 100644
--- a/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj
+++ b/gpt4all-bindings/csharp/Gpt4All.Tests/Gpt4All.Tests.csproj
@@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
     <PropertyGroup>
-        <TargetFramework>net7.0</TargetFramework>
+        <TargetFramework>net8.0</TargetFramework>
         <Nullable>enable</Nullable>
 
         <IsPackable>false</IsPackable>
diff --git a/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj b/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj
index d67b9b24ce62..af338f820804 100644
--- a/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj
+++ b/gpt4all-bindings/csharp/Gpt4All/Gpt4All.csproj
@@ -1,10 +1,10 @@
 ﻿<Project Sdk="Microsoft.NET.Sdk">
     <PropertyGroup>
-        <TargetFramework>net6.0</TargetFramework>
         <ImplicitUsings>enable</ImplicitUsings>
         <Nullable>enable</Nullable>
         <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
         <GenerateDocumentationFile>true</GenerateDocumentationFile>
+        <TargetFramework>net8.0</TargetFramework>
     </PropertyGroup>
     <ItemGroup>
         <!-- Windows -->
diff --git a/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs b/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs
index 8350a66ac1e2..938f44d8a19f 100644
--- a/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs
+++ b/gpt4all-bindings/csharp/Gpt4All/Model/Gpt4AllModelFactory.cs
@@ -32,7 +32,7 @@ public Gpt4AllModelFactory(string? libraryPath = default, bool bypassLoading = t
         }
     }
 
-    private IGpt4AllModel CreateModel(string modelPath)
+    private Gpt4All CreateModel(string modelPath)
     {
         _logger.LogInformation("Creating model path={ModelPath}", modelPath);
         IntPtr error;
diff --git a/gpt4all-bindings/csharp/README.md b/gpt4all-bindings/csharp/README.md
index 9829a9768e99..af8d4e9a353f 100644
--- a/gpt4all-bindings/csharp/README.md
+++ b/gpt4all-bindings/csharp/README.md
@@ -6,7 +6,10 @@ This package contains a set of C# bindings around the `llmodel` C-API.
 TBD
 
 ## Installation
-TBD NuGet
+
+Windows and Linux builds are available on NuGet: https://www.nuget.org/packages/Gpt4All
+
+macOS is WIP due to code signing issues, contributions are welcome.
 
 ## Project Structure
 ```
diff --git a/gpt4all-bindings/python/docs/gpt4all_chat.md b/gpt4all-bindings/python/docs/gpt4all_chat.md
index 93dd41d0530b..96da44d7fdcd 100644
--- a/gpt4all-bindings/python/docs/gpt4all_chat.md
+++ b/gpt4all-bindings/python/docs/gpt4all_chat.md
@@ -61,17 +61,7 @@ The general technique this plugin uses is called [Retrieval Augmented Generation
 These document chunks help your LLM respond to queries with knowledge about the contents of your data.
 The number of chunks and the size of each chunk can be configured in the LocalDocs plugin settings tab.
 
-LocalDocs supports the following file types:
-```json
-["txt", "doc", "docx", "pdf", "rtf", "odt", "html", "htm", "xls", "xlsx", "csv", "ods", "ppt", "pptx", "odp", "xml", "json", "log", "md", "org", "tex", "asc", "wks",
-"wpd", "wps", "wri", "xhtml", "xht", "xslt", "yaml", "yml", "dtd", "sgml", "tsv", "strings", "resx",
-"plist", "properties", "ini", "config", "bat", "sh", "ps1", "cmd", "awk", "sed", "vbs", "ics", "mht",
-"mhtml", "epub", "djvu", "azw", "azw3", "mobi", "fb2", "prc", "lit", "lrf", "tcr", "pdb", "oxps",
-"xps", "pages", "numbers", "key", "keynote", "abw", "zabw", "123", "wk1", "wk3", "wk4", "wk5", "wq1",
-"wq2", "xlw", "xlr", "dif", "slk", "sylk", "wb1", "wb2", "wb3", "qpw", "wdb", "wks", "wku", "wr1",
-"wrk", "xlk", "xlt", "xltm", "xltx", "xlsm", "xla", "xlam", "xll", "xld", "xlv", "xlw", "xlc", "xlm",
-"xlt", "xln"]
-```
+LocalDocs currently supports plain text files (`.txt`, `.md`, and `.rst`) and PDF files (`.pdf`).
 
 #### Troubleshooting and FAQ
 *My LocalDocs plugin isn't using my documents*
diff --git a/gpt4all-bindings/python/gpt4all/__init__.py b/gpt4all-bindings/python/gpt4all/__init__.py
index 391fab0298f8..01df38fc9347 100644
--- a/gpt4all-bindings/python/gpt4all/__init__.py
+++ b/gpt4all-bindings/python/gpt4all/__init__.py
@@ -1,2 +1 @@
 from .gpt4all import Embed4All as Embed4All, GPT4All as GPT4All
-from .pyllmodel import LLModel as LLModel
diff --git a/gpt4all-bindings/python/gpt4all/pyllmodel.py b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
similarity index 75%
rename from gpt4all-bindings/python/gpt4all/pyllmodel.py
rename to gpt4all-bindings/python/gpt4all/_pyllmodel.py
index f313e3054c5f..eb03a91443fe 100644
--- a/gpt4all-bindings/python/gpt4all/pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
@@ -142,15 +142,6 @@ def empty_response_callback(token_id: int, response: str) -> bool:
     return True
 
 
-def _create_model(model_path: bytes) -> ctypes.c_void_p:
-    err = ctypes.c_char_p()
-    model = llmodel.llmodel_model_create2(model_path, b"auto", ctypes.byref(err))
-    if model is None:
-        s = err.value
-        raise ValueError("Unable to instantiate model: {'null' if s is None else s.decode()}")
-    return model
-
-
 # Symbol to terminate from generator
 class Sentinel(Enum):
     TERMINATING_SYMBOL = 0
@@ -161,116 +152,77 @@ class LLModel:
     Base class and universal wrapper for GPT4All language models
     built around llmodel C-API.
 
-    Attributes
+    Parameters
     ----------
-    model: llmodel_model
-        Ctype pointer to underlying model
-    model_name: str
-        Model name
+    model_path : str
+        Path to the model.
+    n_ctx : int
+        Maximum size of context window
+    ngl : int
+        Number of GPU layers to use (Vulkan)
     """
 
-    def __init__(self):
-        self.model = None
-        self.model_name = None
-        self.context = None
-        self.llmodel_lib = llmodel
-
+    def __init__(self, model_path: str, n_ctx: int, ngl: int):
+        self.model_path = model_path.encode()
+        self.n_ctx = n_ctx
+        self.ngl = ngl
+        self.context: LLModelPromptContext | None = None
         self.buffer = bytearray()
         self.buff_expecting_cont_bytes: int = 0
 
-    def __del__(self):
-        if self.model is not None:
-            self.llmodel_lib.llmodel_model_destroy(self.model)
+        # Construct a model implementation
+        err = ctypes.c_char_p()
+        model = llmodel.llmodel_model_create2(self.model_path, b"auto", ctypes.byref(err))
+        if model is None:
+            s = err.value
+            raise ValueError(f"Unable to instantiate model: {'null' if s is None else s.decode()}")
+        self.model = model
 
-    def memory_needed(self, model_path: str, n_ctx: int, ngl: int) -> int:
-        self.model = None
-        return self._memory_needed(model_path, n_ctx, ngl)
-
-    def _memory_needed(self, model_path: str, n_ctx: int, ngl: int) -> int:
-        if self.model is None:
-            self.model = _create_model(model_path.encode())
-        return llmodel.llmodel_required_mem(self.model, model_path.encode(), n_ctx, ngl)
-
-    def list_gpu(self, model_path: str, n_ctx: int, ngl: int) -> list[LLModelGPUDevice]:
-        """
-        Lists available GPU devices that satisfy the model's memory requirements.
-
-        Parameters
-        ----------
-        model_path : str
-            Path to the model.
-        n_ctx : int
-            Maximum size of context window
-        ngl : int
-            Number of GPU layers to use (Vulkan)
-
-        Returns
-        -------
-        list
-            A list of LLModelGPUDevice structures representing available GPU devices.
-        """
-        mem_required = self._memory_needed(model_path, n_ctx, ngl)
-        return self._list_gpu(mem_required)
+    def __del__(self):
+        if hasattr(self, 'model'):
+            llmodel.llmodel_model_destroy(self.model)
 
     def _list_gpu(self, mem_required: int) -> list[LLModelGPUDevice]:
         num_devices = ctypes.c_int32(0)
-        devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, mem_required, ctypes.byref(num_devices))
+        devices_ptr = llmodel.llmodel_available_gpu_devices(self.model, mem_required, ctypes.byref(num_devices))
         if not devices_ptr:
             raise ValueError("Unable to retrieve available GPU devices")
         return devices_ptr[:num_devices.value]
 
-    def init_gpu(self, model_path: str, device: str, n_ctx: int, ngl: int):
-        mem_required = self._memory_needed(model_path, n_ctx, ngl)
+    def init_gpu(self, device: str):
+        mem_required = llmodel.llmodel_required_mem(self.model, self.model_path, self.n_ctx, self.ngl)
+
+        if llmodel.llmodel_gpu_init_gpu_device_by_string(self.model, mem_required, device.encode()):
+            return
 
-        success = self.llmodel_lib.llmodel_gpu_init_gpu_device_by_string(self.model, mem_required, device.encode())
-        if not success:
-            # Retrieve all GPUs without considering memory requirements.
-            num_devices = ctypes.c_int32(0)
-            all_devices_ptr = self.llmodel_lib.llmodel_available_gpu_devices(self.model, 0, ctypes.byref(num_devices))
-            if not all_devices_ptr:
-                raise ValueError("Unable to retrieve list of all GPU devices")
-            all_gpus = [d.name.decode() for d in all_devices_ptr[:num_devices.value]]
+        # Retrieve all GPUs without considering memory requirements.
+        num_devices = ctypes.c_int32(0)
+        all_devices_ptr = llmodel.llmodel_available_gpu_devices(self.model, 0, ctypes.byref(num_devices))
+        if not all_devices_ptr:
+            raise ValueError("Unable to retrieve list of all GPU devices")
+        all_gpus = [d.name.decode() for d in all_devices_ptr[:num_devices.value]]
 
-            # Retrieve GPUs that meet the memory requirements using list_gpu
-            available_gpus = [device.name.decode() for device in self._list_gpu(mem_required)]
+        # Retrieve GPUs that meet the memory requirements using list_gpu
+        available_gpus = [device.name.decode() for device in self._list_gpu(mem_required)]
 
-            # Identify GPUs that are unavailable due to insufficient memory or features
-            unavailable_gpus = set(all_gpus).difference(available_gpus)
+        # Identify GPUs that are unavailable due to insufficient memory or features
+        unavailable_gpus = set(all_gpus).difference(available_gpus)
 
-            # Formulate the error message
-            error_msg = "Unable to initialize model on GPU: '{}'.".format(device)
-            error_msg += "\nAvailable GPUs: {}.".format(available_gpus)
-            error_msg += "\nUnavailable GPUs due to insufficient memory or features: {}.".format(unavailable_gpus)
-            raise ValueError(error_msg)
+        # Formulate the error message
+        error_msg = "Unable to initialize model on GPU: '{}'.".format(device)
+        error_msg += "\nAvailable GPUs: {}.".format(available_gpus)
+        error_msg += "\nUnavailable GPUs due to insufficient memory or features: {}.".format(unavailable_gpus)
+        raise ValueError(error_msg)
 
-    def load_model(self, model_path: str, n_ctx: int, ngl: int) -> bool:
+    def load_model(self) -> bool:
         """
         Load model from a file.
 
-        Parameters
-        ----------
-        model_path : str
-            Model filepath
-        n_ctx : int
-            Maximum size of context window
-        ngl : int
-            Number of GPU layers to use (Vulkan)
-
         Returns
         -------
         True if model loaded successfully, False otherwise
         """
-        self.model = _create_model(model_path.encode())
-
-        llmodel.llmodel_loadModel(self.model, model_path.encode(), n_ctx, ngl)
-
-        filename = os.path.basename(model_path)
-        self.model_name = os.path.splitext(filename)[0]
-
-        if llmodel.llmodel_isModelLoaded(self.model):
-            return True
-        else:
-            return False
+        return llmodel.llmodel_loadModel(self.model, self.model_path, self.n_ctx, self.ngl)
 
     def set_thread_count(self, n_threads):
         if not llmodel.llmodel_isModelLoaded(self.model):
@@ -295,7 +247,7 @@ def _set_context(
         reset_context: bool = False,
     ):
         if self.context is None:
-            self.context = LLModelPromptContext(
+            context = LLModelPromptContext(
                 logits_size=0,
                 tokens_size=0,
                 n_past=0,
@@ -309,8 +261,11 @@ def _set_context(
                 repeat_last_n=repeat_last_n,
                 context_erase=context_erase,
             )
-        elif reset_context:
-            self.context.n_past = 0
+            self.context = context
+        else:
+            context = self.context
+            if reset_context:
+                self.context.n_past = 0
 
         self.context.n_predict = n_predict
         self.context.top_k = top_k
diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py
index fcd0a91d0bab..02fa1c806bb9 100644
--- a/gpt4all-bindings/python/gpt4all/gpt4all.py
+++ b/gpt4all-bindings/python/gpt4all/gpt4all.py
@@ -15,7 +15,7 @@
 from tqdm import tqdm
 from urllib3.exceptions import IncompleteRead, ProtocolError
 
-from . import pyllmodel
+from . import _pyllmodel
 
 # TODO: move to config
 DEFAULT_MODEL_DIRECTORY = os.path.join(str(Path.home()), ".cache", "gpt4all").replace("\\", "\\\\")
@@ -97,12 +97,12 @@ def __init__(
             verbose: If True, print debug messages.
         """
         self.model_type = model_type
-        self.model = pyllmodel.LLModel()
         # Retrieve model and download if allowed
         self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose)
+        self.model = _pyllmodel.LLModel(self.config["path"], n_ctx, ngl)
         if device is not None and device != "cpu":
-            self.model.init_gpu(model_path=self.config["path"], device=device, n_ctx=n_ctx, ngl=ngl)
-        self.model.load_model(self.config["path"], n_ctx, ngl)
+            self.model.init_gpu(device)
+        self.model.load_model()
         # Set n_threads
         if n_threads is not None:
             self.model.set_thread_count(n_threads)
@@ -292,7 +292,7 @@ def generate(
         n_batch: int = 8,
         n_predict: Optional[int] = None,
         streaming: bool = False,
-        callback: pyllmodel.ResponseCallbackType = pyllmodel.empty_response_callback,
+        callback: _pyllmodel.ResponseCallbackType = _pyllmodel.empty_response_callback,
     ) -> Union[str, Iterable[str]]:
         """
         Generate outputs from any GPT4All model.
@@ -350,9 +350,9 @@ def generate(
             output_collector = self.current_chat_session
 
         def _callback_wrapper(
-            callback: pyllmodel.ResponseCallbackType,
+            callback: _pyllmodel.ResponseCallbackType,
             output_collector: List[MessageType],
-        ) -> pyllmodel.ResponseCallbackType:
+        ) -> _pyllmodel.ResponseCallbackType:
             def _callback(token_id: int, response: str) -> bool:
                 nonlocal callback, output_collector
 
diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py
index 7ff2a1c650dd..c76f1b49254b 100644
--- a/gpt4all-bindings/python/setup.py
+++ b/gpt4all-bindings/python/setup.py
@@ -1,5 +1,6 @@
 from setuptools import setup, find_packages
 import os
+import pathlib
 import platform
 import shutil
 
@@ -59,13 +60,25 @@ def copy_prebuilt_C_lib(src_dir, dest_dir, dest_build_dir):
                     DEST_CLIB_DIRECTORY,
                     DEST_CLIB_BUILD_DIRECTORY)
 
+
+def get_long_description():
+    with open(pathlib.Path(__file__).parent / "README.md", encoding="utf-8") as fp:
+        return fp.read()
+
+
 setup(
     name=package_name,
-    version="2.1.0",
+    version="2.2.1.post1",
     description="Python bindings for GPT4All",
+    long_description=get_long_description(),
+    long_description_content_type="text/markdown",
     author="Nomic and the Open Source Community",
     author_email="support@nomic.ai",
-    url="https://pypi.org/project/gpt4all/",
+    url="https://gpt4all.io/",
+    project_urls={
+        "Documentation": "https://docs.gpt4all.io/gpt4all_python.html",
+        "Source code": "https://github.com/nomic-ai/gpt4all/tree/main/gpt4all-bindings/python",
+    },
     classifiers = [
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: MIT License",
diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt
index 301f6f3c110c..ee72f8463e42 100644
--- a/gpt4all-chat/CMakeLists.txt
+++ b/gpt4all-chat/CMakeLists.txt
@@ -17,8 +17,8 @@ if(APPLE)
 endif()
 
 set(APP_VERSION_MAJOR 2)
-set(APP_VERSION_MINOR 6)
-set(APP_VERSION_PATCH 3)
+set(APP_VERSION_MINOR 7)
+set(APP_VERSION_PATCH 1)
 set(APP_VERSION "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
 
 # Include the binary directory for the generated header file
diff --git a/gpt4all-chat/chat.h b/gpt4all-chat/chat.h
index 5d72222dd32b..ae6910bf8f2a 100644
--- a/gpt4all-chat/chat.h
+++ b/gpt4all-chat/chat.h
@@ -8,6 +8,7 @@
 #include "chatllm.h"
 #include "chatmodel.h"
 #include "database.h"
+#include "localdocsmodel.h"
 
 class Chat : public QObject
 {
diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp
index bb399cbb17dc..844942e44399 100644
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@@ -1,6 +1,7 @@
 #include "chatllm.h"
 #include "chat.h"
 #include "chatgpt.h"
+#include "localdocs.h"
 #include "modellist.h"
 #include "network.h"
 #include "mysettings.h"
diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h
index 367915f6c334..d6af4cb0c427 100644
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@@ -5,7 +5,7 @@
 #include <QThread>
 #include <QFileInfo>
 
-#include "localdocs.h"
+#include "database.h"
 #include "modellist.h"
 #include "../gpt4all-backend/llmodel.h"
 
diff --git a/gpt4all-chat/database.cpp b/gpt4all-chat/database.cpp
index 3fdde3acd9c5..f572e0480a9f 100644
--- a/gpt4all-chat/database.cpp
+++ b/gpt4all-chat/database.cpp
@@ -890,15 +890,7 @@ void Database::scanDocuments(int folder_id, const QString &folder_path)
     qDebug() << "scanning folder for documents" << folder_path;
 #endif
 
-    static const QList<QString> extensions { "txt", "doc", "docx", "pdf", "rtf", "odt", "html", "htm",
-    "xls", "xlsx", "csv", "ods", "ppt", "pptx", "odp", "xml", "json", "log", "md", "org", "tex", "asc", "wks",
-    "wpd", "wps", "wri", "xhtml", "xht", "xslt", "yaml", "yml", "dtd", "sgml", "tsv", "strings", "resx",
-    "plist", "properties", "ini", "config", "bat", "sh", "ps1", "cmd", "awk", "sed", "vbs", "ics", "mht",
-    "mhtml", "epub", "djvu", "azw", "azw3", "mobi", "fb2", "prc", "lit", "lrf", "tcr", "pdb", "oxps",
-    "xps", "pages", "numbers", "key", "keynote", "abw", "zabw", "123", "wk1", "wk3", "wk4", "wk5", "wq1",
-    "wq2", "xlw", "xlr", "dif", "slk", "sylk", "wb1", "wb2", "wb3", "qpw", "wdb", "wks", "wku", "wr1",
-    "wrk", "xlk", "xlt", "xltm", "xltx", "xlsm", "xla", "xlam", "xll", "xld", "xlv", "xlw", "xlc", "xlm",
-    "xlt", "xln" };
+    static const QList<QString> extensions { "txt", "pdf", "md", "rst" };
 
     QDir dir(folder_path);
     Q_ASSERT(dir.exists());
diff --git a/gpt4all-chat/llm.cpp b/gpt4all-chat/llm.cpp
index e5797c1b6d8f..0f454908c6ad 100644
--- a/gpt4all-chat/llm.cpp
+++ b/gpt4all-chat/llm.cpp
@@ -9,6 +9,7 @@
 #include <QResource>
 #include <QSettings>
 #include <QUrl>
+#include <QNetworkInformation>
 #include <fstream>
 
 #ifndef GPT4ALL_OFFLINE_INSTALLER
@@ -39,6 +40,10 @@ LLM::LLM()
 #endif
 
     m_compatHardware = minimal;
+
+    QNetworkInformation::loadDefaultBackend();
+    connect(QNetworkInformation::instance(), &QNetworkInformation::reachabilityChanged,
+        this, &LLM::isNetworkOnlineChanged);
 }
 
 bool LLM::hasSettingsAccess() const
@@ -100,3 +105,11 @@ QString LLM::systemTotalRAMInGBString() const
 {
     return QString::fromStdString(getSystemTotalRAMInGBString());
 }
+
+bool LLM::isNetworkOnline() const
+{
+    if (!QNetworkInformation::instance())
+        return false;
+
+    return QNetworkInformation::instance()->reachability() == QNetworkInformation::Reachability::Online;
+}
diff --git a/gpt4all-chat/llm.h b/gpt4all-chat/llm.h
index 067ee671b41d..55367742202a 100644
--- a/gpt4all-chat/llm.h
+++ b/gpt4all-chat/llm.h
@@ -6,6 +6,8 @@
 class LLM : public QObject
 {
     Q_OBJECT
+    Q_PROPERTY(bool isNetworkOnline READ isNetworkOnline NOTIFY isNetworkOnlineChanged)
+
 public:
     static LLM *globalInstance();
 
@@ -17,10 +19,10 @@ class LLM : public QObject
     Q_INVOKABLE static bool fileExists(const QString &path);
     Q_INVOKABLE qint64 systemTotalRAMInGB() const;
     Q_INVOKABLE QString systemTotalRAMInGBString() const;
+    Q_INVOKABLE bool isNetworkOnline() const;
 
 Q_SIGNALS:
-    void chatListModelChanged();
-    void modelListChanged();
+    void isNetworkOnlineChanged();
 
 private:
     bool m_compatHardware;
diff --git a/gpt4all-chat/main.qml b/gpt4all-chat/main.qml
index 3aafa1a4adf5..72fbc3b8e19a 100644
--- a/gpt4all-chat/main.qml
+++ b/gpt4all-chat/main.qml
@@ -16,8 +16,8 @@ Window {
     id: window
     width: 1920
     height: 1080
-    minimumWidth: 1280
-    minimumHeight: 720
+    minimumWidth: 720
+    minimumHeight: 480
     visible: true
     title: qsTr("GPT4All v") + Qt.application.version
 
@@ -369,7 +369,7 @@ Window {
                     highlighted: comboBox.highlightedIndex === index
                 }
                 Accessible.role: Accessible.ComboBox
-                Accessible.name: qsTr("List of available models")
+                Accessible.name: comboBox.currentModelName
                 Accessible.description: qsTr("The top item is the current model")
                 onActivated: function (index) {
                     currentChat.stopGenerating()
@@ -869,6 +869,7 @@ Window {
 
                         MyButton {
                             id: downloadButton
+                            visible: LLM.isNetworkOnline
                             Layout.alignment: Qt.AlignHCenter
                             Layout.topMargin: 40
                             text: qsTr("Download models")
@@ -904,10 +905,7 @@ Window {
                     model: chatModel
 
                     ScrollBar.vertical: ScrollBar {
-                        parent: listView.parent
-                        anchors.top: listView.top
-                        anchors.left: listView.right
-                        anchors.bottom: listView.bottom
+                        policy: ScrollBar.AsNeeded
                     }
 
                     Accessible.role: Accessible.List
@@ -960,7 +958,7 @@ Window {
                         }
 
                         Accessible.role: Accessible.Paragraph
-                        Accessible.name: name
+                        Accessible.name: text
                         Accessible.description: name === qsTr("Response: ") ? "The response by the model" : "The prompt by the user"
 
                         topPadding: 20
diff --git a/gpt4all-chat/metadata/models2.json b/gpt4all-chat/metadata/models2.json
index 98bc4440ffa4..95ef5ad84377 100644
--- a/gpt4all-chat/metadata/models2.json
+++ b/gpt4all-chat/metadata/models2.json
@@ -10,9 +10,10 @@
     "parameters": "7 billion",
     "quant": "q4_0",
     "type": "Mistral",
-    "systemPrompt": " ",
     "description": "<strong>Best overall fast chat model</strong><br><ul><li>Fast responses</li><li>Chat based model</li><li>Trained by Mistral AI<li>Finetuned on OpenOrca dataset curated via <a href=\"https://atlas.nomic.ai/\">Nomic Atlas</a><li>Licensed for commercial use</ul>",
-    "url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.Q4_0.gguf"
+    "url": "https://gpt4all.io/models/gguf/mistral-7b-openorca.Q4_0.gguf",
+    "promptTemplate": "<|im_start|>user\n%1<|im_end|><|im_start|>assistant\n",
+    "systemPrompt": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>"
   },
   {
     "order": "b",
diff --git a/gpt4all-chat/metadata/release.json b/gpt4all-chat/metadata/release.json
index 7c1a5bf40ffe..bd5b9b6836db 100644
--- a/gpt4all-chat/metadata/release.json
+++ b/gpt4all-chat/metadata/release.json
@@ -657,6 +657,32 @@
 * Adam Treat (Nomic AI)
 * Karthik Nair
 * Community (beta testers, bug reporters, bindings authors)
+"
+  },
+  {
+    "version": "2.7.0",
+    "notes":
+"
+* Add support for twelve new model architectures
+* Including Baichuan, BLOOM, CodeShell, GPT-2, Orion, Persimmon, Phi and Phi-2, Plamo, Qwen, Qwen2, Refact, and StableLM
+* Fix for progress bar colors on legacy theme
+* Fix sizing for model download dialog elements
+* Fix dialog sizes to use more screen realestate where available
+* Fix for vram leak when model loading fails
+* Fix for making the collection dialog progress bar more readable
+* Fix for smaller minimum size for main screen
+* Fix for mistral crash
+* Fix for mistral openorca prompt template to ChatLM
+* Fix for excluding non-text documents from localdoc scanning
+* Fix for scrollbar missing on main conversation
+* Fix accessibility issues for screen readers
+* Fix for not showing the download button when not online
+",
+    "contributors":
+"
+* Jared Van Bortel (Nomic AI)
+* Adam Treat (Nomic AI)
+* Community (beta testers, bug reporters, bindings authors)
 "
   }
 ]
diff --git a/gpt4all-chat/qml/ChatDrawer.qml b/gpt4all-chat/qml/ChatDrawer.qml
index fbc604426903..2c4350b0104b 100644
--- a/gpt4all-chat/qml/ChatDrawer.qml
+++ b/gpt4all-chat/qml/ChatDrawer.qml
@@ -131,7 +131,7 @@ Drawer {
                             }
                         }
                         Accessible.role: Accessible.Button
-                        Accessible.name: qsTr("Select the current chat")
+                        Accessible.name: text
                         Accessible.description: qsTr("Select the current chat or edit the chat when in edit mode")
                     }
                     Row {
diff --git a/gpt4all-chat/qml/CollectionsDialog.qml b/gpt4all-chat/qml/CollectionsDialog.qml
index 2374fa3de701..c2ad7d20c6e8 100644
--- a/gpt4all-chat/qml/CollectionsDialog.qml
+++ b/gpt4all-chat/qml/CollectionsDialog.qml
@@ -121,7 +121,7 @@ MyDialog {
                 }
                 Label {
                     id: speedLabel
-                    color: theme.textColor
+                    color: theme.progressText
                     visible: model.indexing || model.currentEmbeddingsToIndex !== model.totalEmbeddingsToIndex
                     anchors.verticalCenter: itemProgressBar.verticalCenter
                     anchors.left: itemProgressBar.left
diff --git a/gpt4all-chat/qml/MyDialog.qml b/gpt4all-chat/qml/MyDialog.qml
index 4174bad12a80..37d07bda1f09 100644
--- a/gpt4all-chat/qml/MyDialog.qml
+++ b/gpt4all-chat/qml/MyDialog.qml
@@ -19,6 +19,7 @@ Dialog {
 
     Rectangle {
         id: closeBackground
+        visible: myCloseButton.visible
         z: 299
         anchors.centerIn: myCloseButton
         width: myCloseButton.width + 10
diff --git a/gpt4all-chat/qml/Theme.qml b/gpt4all-chat/qml/Theme.qml
index 042d4ad39894..49f8343cbc82 100644
--- a/gpt4all-chat/qml/Theme.qml
+++ b/gpt4all-chat/qml/Theme.qml
@@ -222,6 +222,17 @@ QtObject {
         }
     }
 
+    property color progressText: {
+        switch (MySettings.chatTheme) {
+            case "LegacyDark":
+                return "#ffffff";
+            case "Dark":
+                return "#000000";
+            default:
+                return "#000000";
+        }
+    }
+
     property color checkboxBorder: {
         switch (MySettings.chatTheme) {
             case "LegacyDark":