From 2fb6f614dd3b33da144f854c66cf40cb3ee361a0 Mon Sep 17 00:00:00 2001 From: Andrew Hyatt Date: Thu, 28 Sep 2023 19:58:43 -0400 Subject: [PATCH 1/2] Fixing completion issues Default emacs in-buffer completion was not working correctly due to starting completion with a whitespace character sometimes. It was also possible to request completion in the start or middle of a field specifier, which is read-only. We now no longer offer completion there. --- doc/ekg.org | 2 ++ doc/ekg.texi | 12 +++++++++++- ekg.el | 19 +++++++++++++------ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/doc/ekg.org b/doc/ekg.org index 18737468..c1a0e062 100644 --- a/doc/ekg.org +++ b/doc/ekg.org @@ -57,6 +57,8 @@ Clone the ekg library, from whatever branch you would like to use (=main= corres (require 'ekg) #+end_src * Changelog +** Version 0.4.1 +- Fix issues using default emacs in-buffer completion, and allowing completion in places we shouldn't. ** Version 0.4 - Added ability to save in-progress notes. - Added "magic tags", tags that cause elisp to be executed. See the [[#magic-tags][magic tags]] section for more detail. diff --git a/doc/ekg.texi b/doc/ekg.texi index a7bcb3e6..f11c04c9 100644 --- a/doc/ekg.texi +++ b/doc/ekg.texi @@ -66,6 +66,7 @@ Installation Changelog +* Version 0.4.1: Version 041. * Version 0.4: Version 04. * Version 0.3.3: Version 033. * Version 0.3.2: Version 032. @@ -175,6 +176,7 @@ Clone the ekg library, from whatever branch you would like to use (@samp{main} c @chapter Changelog @menu +* Version 0.4.1: Version 041. * Version 0.4: Version 04. * Version 0.3.3: Version 033. * Version 0.3.2: Version 032. @@ -184,6 +186,14 @@ Clone the ekg library, from whatever branch you would like to use (@samp{main} c * Version 0.2: Version 02. @end menu +@node Version 041 +@section Version 0.4.1 + +@itemize +@item +Fix issues using default emacs in-buffer completion, and allowing completion in places we shouldn't. +@end itemize + @node Version 04 @section Version 0.4 @@ -449,7 +459,7 @@ Capturing URLs is a bit clunky as is, if you can wrap it in a function to suppl URL can also point to local files which will be browsed using @samp{find-file} by default. The idea is that you can tag files and folders to make them easier to find. Here is an example note similar to web address URL: @quotation -Resource: @uref{file:///Users/andrewhyatt/notes/20230510T162600__emacs_init-file.org} +Resource: @uref{file:///Users/ahyatt/notes/20230510T162600__emacs_init-file.org} Tags: doc/emacs config, date/2023-05-13, emacs/init Title: Emacs Config diff --git a/ekg.el b/ekg.el index 7ec45715..63a5562f 100644 --- a/ekg.el +++ b/ekg.el @@ -1144,14 +1144,19 @@ The function is expected to behave as normal for a function in "Completion function for all metadata at `completion-at-point-functions'. If no completion function is found for the field type, don't attempt the completion." - (if-let (field (ekg--metadata-current-field)) + ;; Only do something when we aren't in a read-only space. + (when + (or (null (ekg--metadata-current-field)) + ;; + 2 for the colon and space + (>= (current-column) (+ 2 (length (car (ekg--metadata-current-field)))))) + (if-let (field (ekg--metadata-current-field)) (when-let (completion-func (assoc (car field) ekg-capf-field-complete-funcs - #'equal)) + #'equal)) (funcall (cdr completion-func))) - ;; There's no current field, but we're in the metadata, so let's complete - ;; the possible fields. - (when (ekg--in-metadata-p) - (ekg--field-name-complete)))) + ;; There's no current field, but we're in the metadata, so let's complete + ;; the possible fields. + (when (ekg--in-metadata-p) + (ekg--field-name-complete))))) (defun ekg--field-name-complete () "Completion function for metadata field names." @@ -1183,6 +1188,8 @@ Argument FINISHED is non-nil if the user has chosen a completion." (point))) (start (save-excursion (skip-chars-backward "^,\t\n:") + ;; We are at the right boundary, but now ignore whitespace. + (skip-chars-forward "[ \t]") (point)))) (list start end (completion-table-dynamic (lambda (_) (ekg-tags))) From 21c5f8fa60a1c4dee4dedaa4bec969e6f2726983 Mon Sep 17 00:00:00 2001 From: Andrew Hyatt Date: Sat, 30 Sep 2023 10:04:22 -0400 Subject: [PATCH 2/2] Enable ekg-embedding on command, not load Create ekg-embedding-generate-on-save and ekg-embedding-disable-generate-on-save to turn on and off creating embeddings when notes are created or modified. This fixes issues https://github.com/ahyatt/ekg/issues/102 --- README.org | 1 + doc/ekg.org | 10 ++++++++-- doc/ekg.texi | 13 ++++++++++--- ekg-embedding.el | 21 +++++++++++++++++---- 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/README.org b/README.org index a0cab448..48464d96 100644 --- a/README.org +++ b/README.org @@ -124,6 +124,7 @@ If you are using embedding and llm functionality, an example in which you use Op :bind (([f11] . ekg-capture)) :init (require 'ekg-embedding) + (ekg-embedding-generate-on-save) (require 'ekg-llm) (let ((my-provider (make-llm-openai :key "my-openai-api-key"))) (setq ekg-llm-provider my-provider diff --git a/doc/ekg.org b/doc/ekg.org index c1a0e062..1b97c373 100644 --- a/doc/ekg.org +++ b/doc/ekg.org @@ -59,6 +59,7 @@ Clone the ekg library, from whatever branch you would like to use (=main= corres * Changelog ** Version 0.4.1 - Fix issues using default emacs in-buffer completion, and allowing completion in places we shouldn't. +- Add =ekg-embedding-generate-on-save= and =ekg-embedding-disable-generate-on-save= to turn off generating embeddings for notes. ** Version 0.4 - Added ability to save in-progress notes. - Added "magic tags", tags that cause elisp to be executed. See the [[#magic-tags][magic tags]] section for more detail. @@ -414,10 +415,11 @@ Because inline commands exist, the complete text of a note should be retrieved w * Extras The ekg module can have any number of functionality additions. These may appear as other packages with other maintainers, but some are included as part of this package. ** Embeddings -The embeddings functionality can be turned on by requiring the embeddings file, such as: +The embeddings functionality can be turned on by requiring the embeddings file and enabling it, such as: #+begin_src emacs-lisp (require 'ekg-embedding) +(ekg-embedding-generate-on-save) #+end_src This module contains functionality to explore similar notes and search using techniques associated with large language models. Embeddings let you do searches at a semantic level, based on an understood meaning that is separate from the words used. For example, if I have a note with a recipe for linguini, embeddings will let me see that it is similar to notes about spaghetti, and not similar to notes about cold fusion. Because the search is not based on words, but meaning derived from those words, notes that describe the same thing in two different languages should be very similar. In ekg these let you find notes similar to a current note, or in fact any buffer. You can also do a query via embeddings. @@ -429,6 +431,8 @@ The embedding interfaces with your preferred LLM provider via the =llm= package. #+begin_src emacs-lisp (use-package ekg :init + (require ekg-embedding) + (ekg-embedding-generate-on-save) (let ((my-provider (make-llm-openai :key "my-openai-api-key"))) (setq ekg-llm-provider my-provider ekg-embedding-provider my-provider))) @@ -436,10 +440,12 @@ The embedding interfaces with your preferred LLM provider via the =llm= package. The embedding provider should be kept the same as you continue using ekg, however if you do change it, you will need to call =ekg-embedding-generate-all= with a prefix argument (=C-u M-x ekg-embedding-generate-all=), which will regenerate all embeddings asynchronously. The embedding provider does not have to be the same as the LLM provider (if you also use the [[#llm][LLM]] add-on.) Also note that the provider will get the text of all your notes, so if that bothers you, do not use any provider on a server. -Once you have this set up, and you have already called ~(require 'ekg-embedding)~ you can call =M-x ekg-embedding-generate-all=. This may take a long time as each +Once you have this set up, and you have already called ~(require 'ekg-embedding)~ and ~(ekg-embedding-generate-on-save)~ you can call =M-x ekg-embedding-generate-all=. This may take a long time as each embedding has to be generated separately with its own API call. Once you've done this, you can call, in =ekg-notes-mode=, =ekg-embedding-show-similar= to get a list of similar notes. You can also call =ekg-embedding-search= to perform a search over your notes using embeddings. In any buffer, you can call =ekg-embedding-show-similar-to-current-buffer= to similar notes to whatever the text is in the curent buffer. The variable =ekg-embedding-text-selector= has a value that is a function that will pre-process all text that is sent for embeddings. The default value is =ekg-embedding-text-selector-initial=, which will estimate the size of the tokens sent and limit the text to the first 8k tokens. Right now the function is tuned to the limits of Open AI's embedding framework, and a different function may be needed for other embedding APIs. + +If you would like to stop generating embeddings for notes in a session, you can call ~(ekg-embedding-disable-generate-on-save)~. ** Logseq :PROPERTIES: :CUSTOM_ID: logseq diff --git a/doc/ekg.texi b/doc/ekg.texi index f11c04c9..a0bc41c8 100644 --- a/doc/ekg.texi +++ b/doc/ekg.texi @@ -192,6 +192,8 @@ Clone the ekg library, from whatever branch you would like to use (@samp{main} c @itemize @item Fix issues using default emacs in-buffer completion, and allowing completion in places we shouldn't. +@item +Add @samp{ekg-embedding-generate-on-save} and @samp{ekg-embedding-disable-generate-on-save} to turn off generating embeddings for notes. @end itemize @node Version 04 @@ -459,7 +461,7 @@ Capturing URLs is a bit clunky as is, if you can wrap it in a function to suppl URL can also point to local files which will be browsed using @samp{find-file} by default. The idea is that you can tag files and folders to make them easier to find. Here is an example note similar to web address URL: @quotation -Resource: @uref{file:///Users/ahyatt/notes/20230510T162600__emacs_init-file.org} +Resource: @uref{file:///Users/andrewhyatt/notes/20230510T162600__emacs_init-file.org} Tags: doc/emacs config, date/2023-05-13, emacs/init Title: Emacs Config @@ -738,10 +740,11 @@ The ekg module can have any number of functionality additions. These may appear @node Embeddings @section Embeddings -The embeddings functionality can be turned on by requiring the embeddings file, such as: +The embeddings functionality can be turned on by requiring the embeddings file and enabling it, such as: @lisp (require 'ekg-embedding) +(ekg-embedding-generate-on-save) @end lisp This module contains functionality to explore similar notes and search using techniques associated with large language models. Embeddings let you do searches at a semantic level, based on an understood meaning that is separate from the words used. For example, if I have a note with a recipe for linguini, embeddings will let me see that it is similar to notes about spaghetti, and not similar to notes about cold fusion. Because the search is not based on words, but meaning derived from those words, notes that describe the same thing in two different languages should be very similar. In ekg these let you find notes similar to a current note, or in fact any buffer. You can also do a query via embeddings. @@ -753,6 +756,8 @@ The embedding interfaces with your preferred LLM provider via the @samp{llm} pac @lisp (use-package ekg :init + (require ekg-embedding) + (ekg-embedding-generate-on-save) (let ((my-provider (make-llm-openai :key "my-openai-api-key"))) (setq ekg-llm-provider my-provider ekg-embedding-provider my-provider))) @@ -760,11 +765,13 @@ The embedding interfaces with your preferred LLM provider via the @samp{llm} pac The embedding provider should be kept the same as you continue using ekg, however if you do change it, you will need to call @samp{ekg-embedding-generate-all} with a prefix argument (@samp{C-u M-x ekg-embedding-generate-all}), which will regenerate all embeddings asynchronously. The embedding provider does not have to be the same as the LLM provider (if you also use the @ref{LLM} add-on.) Also note that the provider will get the text of all your notes, so if that bothers you, do not use any provider on a server. -Once you have this set up, and you have already called @code{(require 'ekg-embedding)} you can call @samp{M-x ekg-embedding-generate-all}. This may take a long time as each +Once you have this set up, and you have already called @code{(require 'ekg-embedding)} and @code{(ekg-embedding-generate-on-save)} you can call @samp{M-x ekg-embedding-generate-all}. This may take a long time as each embedding has to be generated separately with its own API call. Once you've done this, you can call, in @samp{ekg-notes-mode}, @samp{ekg-embedding-show-similar} to get a list of similar notes. You can also call @samp{ekg-embedding-search} to perform a search over your notes using embeddings. In any buffer, you can call @samp{ekg-embedding-show-similar-to-current-buffer} to similar notes to whatever the text is in the curent buffer. The variable @samp{ekg-embedding-text-selector} has a value that is a function that will pre-process all text that is sent for embeddings. The default value is @samp{ekg-embedding-text-selector-initial}, which will estimate the size of the tokens sent and limit the text to the first 8k tokens. Right now the function is tuned to the limits of Open AI's embedding framework, and a different function may be needed for other embedding APIs. +If you would like to stop generating embeddings for notes in a session, you can call @code{(ekg-embedding-disable-generate-on-save)}. + @node Logseq @section Logseq diff --git a/ekg-embedding.el b/ekg-embedding.el index d03a5672..1cf045da 100644 --- a/ekg-embedding.el +++ b/ekg-embedding.el @@ -300,10 +300,23 @@ The results are in order of most similar to least similar." ekg-notes-size))) nil)) -(add-hook 'ekg-note-pre-save-hook #'ekg-embedding-generate-for-note-async) -;; Generating embeddings from a note's tags has to be post-save, since it works -;; by loading saved embeddings. -(add-hook 'ekg-note-save-hook #'ekg-embedding-generate-for-note-tags-delayed) +(defun ekg-embedding-generate-on-save () + "Enable embedding generation for new notes. +If you have created notes without embeddings enabled, you should +run `ekg-embedding-generate-all' to generate embeddings for all +notes." + (add-hook 'ekg-note-pre-save-hook #'ekg-embedding-generate-for-note-async) + ;; Generating embeddings from a note's tags has to be post-save, since it works + ;; by loading saved embeddings. + (add-hook 'ekg-note-save-hook #'ekg-embedding-generate-for-note-tags-delayed)) + +(defun ekg-embedding-disable-generate-on-save () + "Disable the embedding module for the Emacs session." + (remove-hook 'ekg-note-pre-save-hook #'ekg-embedding-generate-for-note-async) + (remove-hook 'ekg-note-save-hook #'ekg-embedding-generate-for-note-tags-delayed)) + +;; Regardless of whether notes are generated on save, when notes are deleted we +;; need to clean up the embeddings. (add-hook 'ekg-note-delete-hook #'ekg-embedding-delete) (provide 'ekg-embedding)