Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

localdocs: implement .docx support #2986

Merged
merged 14 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@
[submodule "gpt4all-chat/deps/fmt"]
path = gpt4all-chat/deps/fmt
url = https://github.com/fmtlib/fmt.git
[submodule "gpt4all-chat/deps/DuckX"]
path = gpt4all-chat/deps/DuckX
url = https://github.com/nomic-ai/DuckX.git
1 change: 1 addition & 0 deletions gpt4all-chat/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

### Added
- Add bm25 hybrid search to localdocs ([#2969](https://github.com/nomic-ai/gpt4all/pull/2969))
- LocalDocs support for .docx files ([#2986](https://github.com/nomic-ai/gpt4all/pull/2986))

### Changed
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
Expand Down
14 changes: 3 additions & 11 deletions gpt4all-chat/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,9 @@ get_filename_component(Qt6_ROOT_DIR "${Qt6_ROOT_DIR}/.." ABSOLUTE)
message(STATUS "qmake binary: ${QMAKE_EXECUTABLE}")
message(STATUS "Qt 6 root directory: ${Qt6_ROOT_DIR}")

set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

set(FMT_INSTALL OFF)
set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}")
set(BUILD_SHARED_LIBS OFF)
add_subdirectory(deps/fmt)
set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

add_subdirectory(deps)
add_subdirectory(../gpt4all-backend llmodel)

set(CHAT_EXE_RESOURCES)
Expand Down Expand Up @@ -133,9 +128,6 @@ if (APPLE)
list(APPEND CHAT_EXE_RESOURCES "${LOCAL_EMBEDDING_MODEL_PATH}")
endif()

set(QAPPLICATION_CLASS QGuiApplication)
add_subdirectory(deps/SingleApplication)

if (DEFINED GGML_METALLIB)
set_source_files_properties("${GGML_METALLIB}" PROPERTIES GENERATED ON)
endif()
Expand Down Expand Up @@ -335,7 +327,7 @@ target_include_directories(chat PRIVATE deps/usearch/include
target_link_libraries(chat
PRIVATE Qt6::Core Qt6::HttpServer Qt6::Pdf Qt6::Quick Qt6::Sql Qt6::Svg)
target_link_libraries(chat
PRIVATE llmodel SingleApplication fmt::fmt)
PRIVATE llmodel SingleApplication fmt::fmt duckx::duckx)


# -- install --
Expand Down
10 changes: 10 additions & 0 deletions gpt4all-chat/deps/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
set(BUILD_SHARED_LIBS OFF)

set(FMT_INSTALL OFF)
add_subdirectory(fmt)

set(QAPPLICATION_CLASS QGuiApplication)
add_subdirectory(SingleApplication)

set(DUCKX_INSTALL OFF)
add_subdirectory(DuckX)
1 change: 1 addition & 0 deletions gpt4all-chat/deps/DuckX
Submodule DuckX added at 6e31df
2 changes: 1 addition & 1 deletion gpt4all-chat/deps/usearch
2 changes: 1 addition & 1 deletion gpt4all-chat/qml/LocalDocsSettings.qml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ MySettingsTab {
/* Blacklist common unsupported file extensions. We only support plain text and PDFs, and although we
* reject binary data, we don't want to waste time trying to index files that we don't support. */
exts = exts.filter(e => ![
/* Microsoft documents */ "rtf", "docx", "ppt", "pptx", "xls", "xlsx",
/* Microsoft documents */ "rtf", "ppt", "pptx", "xls", "xlsx",
/* OpenOffice */ "odt", "ods", "odp", "odg",
/* photos */ "jpg", "jpeg", "png", "gif", "bmp", "tif", "tiff", "webp",
/* audio */ "mp3", "wma", "m4a", "wav", "flac",
Expand Down
Loading