From 0a3142e08e8a006c7a64486144c9d01c47f28004 Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Fri, 2 Aug 2024 11:26:56 +0200 Subject: [PATCH] shards: only trigger rescan on .zoekt files changing Any write to the index dir triggered a scan. This means on busy instances we are constantly rescanning, leading to an over-representation in CPU profiles around watch. The events are normally writes to our temporary files. By only considering events for .zoekt files (which is what scan reads) we can avoid the constant scan calls. Just in case we also introduce a re-scan every minute in case we miss an event. There is error handling around this, but I thought it is just more reliable to call scan every once in a while. Note: this doesn't represent significant CPU use, but it does muddy the CPU profiler output. So this makes it easier to understand trends in our continuous cpu profiling. Test Plan: CI --- shards/watcher.go | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/shards/watcher.go b/shards/watcher.go index e8cde2e5..16613f70 100644 --- a/shards/watcher.go +++ b/shards/watcher.go @@ -117,6 +117,8 @@ func versionFromPath(path string) (string, int) { } func (s *DirectoryWatcher) scan() error { + // NOTE: if you change which file extensions are read, please update the + // watch implementation. fs, err := filepath.Glob(filepath.Join(s.dir, "*.zoekt")) if err != nil { return err @@ -216,21 +218,38 @@ func (s *DirectoryWatcher) watch() error { signal := make(chan struct{}, 1) go func() { + notify := func() { + select { + case signal <- struct{}{}: + default: + } + } + + ticker := time.NewTicker(time.Minute) + for { select { - case <-watcher.Events: - select { - case signal <- struct{}{}: - default: + case event := <-watcher.Events: + // Only notify if a file we read in has changed. This is important to + // avoid all the events writing to temporary files. + if strings.HasSuffix(event.Name, ".zoekt") || strings.HasSuffix(event.Name, ".meta") { + notify() } + + case <-ticker.C: + // Periodically just double check the disk + notify() + case err := <-watcher.Errors: // Ignore ErrEventOverflow since we rely on the presence of events so // safe to ignore. if err != nil && err != fsnotify.ErrEventOverflow { log.Println("watcher error:", err) } + case <-s.quit: watcher.Close() + ticker.Stop() close(signal) return }