From 0a42e6d3563f0a5d6be792fe43fac8d50f386df1 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Thu, 21 Jul 2022 16:54:43 -0300 Subject: [PATCH] Make HybridDirectory MMAP Extensions Configurable (#3837) (#3970) * Make HybridDirectory MMAP Extensions Configurable Make the HybridDirectory's list of mmap extensions configurable via index settings instead of being hard-coded to a specfic set. Set defaults to the list of currently hard-coded values for backwards compatibility. Signed-off-by: Matt Weber * Add javadoc to INDEX_STORE_HYBRID_MMAP_EXTENSIONS. Signed-off-by: Matt Weber (cherry picked from commit b08a2b87ac5ec54b9a6bca4db8f0c57dc3de4431) Co-authored-by: Matt Weber --- .../common/settings/IndexScopedSettings.java | 1 + .../org/opensearch/index/IndexModule.java | 12 +++++ .../index/store/FsDirectoryFactory.java | 44 +++++-------------- .../index/store/FsDirectoryFactoryTests.java | 38 ++++++++++++++-- 4 files changed, 58 insertions(+), 37 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index ba2666b53d7a8..8841794e0e492 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -179,6 +179,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING, IndexModule.INDEX_STORE_TYPE_SETTING, IndexModule.INDEX_STORE_PRE_LOAD_SETTING, + IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS, IndexModule.INDEX_RECOVERY_TYPE_SETTING, IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING, FsDirectoryFactory.INDEX_LOCK_FACTOR_SETTING, diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 49daf8293656c..362afb5836e90 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -147,6 +147,18 @@ public final class IndexModule { Property.NodeScope ); + /** Which lucene file extensions to load with the mmap directory when using hybridfs store. + * This is an expert setting. + * @see Lucene File Extensions. + */ + public static final Setting> INDEX_STORE_HYBRID_MMAP_EXTENSIONS = Setting.listSetting( + "index.store.hybrid.mmap.extensions", + List.of("nvd", "dvd", "tim", "tip", "dim", "kdd", "kdi", "cfs", "doc"), + Function.identity(), + Property.IndexScope, + Property.NodeScope + ); + public static final String SIMILARITY_SETTINGS_PREFIX = "index.similarity"; // whether to use the query cache diff --git a/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java index 57cc9f09ac37d..88c7e12632863 100644 --- a/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java @@ -97,9 +97,10 @@ protected Directory newFSDirectory(Path location, LockFactory lockFactory, Index case HYBRIDFS: // Use Lucene defaults final FSDirectory primaryDirectory = FSDirectory.open(location, lockFactory); + final Set mmapExtensions = new HashSet<>(indexSettings.getValue(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS)); if (primaryDirectory instanceof MMapDirectory) { MMapDirectory mMapDirectory = (MMapDirectory) primaryDirectory; - return new HybridDirectory(lockFactory, setPreload(mMapDirectory, lockFactory, preLoadExtensions)); + return new HybridDirectory(lockFactory, setPreload(mMapDirectory, lockFactory, preLoadExtensions), mmapExtensions); } else { return primaryDirectory; } @@ -142,10 +143,12 @@ public static boolean isHybridFs(Directory directory) { */ static final class HybridDirectory extends NIOFSDirectory { private final MMapDirectory delegate; + private final Set mmapExtensions; - HybridDirectory(LockFactory lockFactory, MMapDirectory delegate) throws IOException { + HybridDirectory(LockFactory lockFactory, MMapDirectory delegate, Set mmapExtensions) throws IOException { super(delegate.getDirectory(), lockFactory); this.delegate = delegate; + this.mmapExtensions = mmapExtensions; } @Override @@ -164,43 +167,16 @@ public IndexInput openInput(String name, IOContext context) throws IOException { } } + boolean useDelegate(String name) { + final String extension = FileSwitchDirectory.getExtension(name); + return mmapExtensions.contains(extension); + } + @Override public void close() throws IOException { IOUtils.close(super::close, delegate); } - boolean useDelegate(String name) { - String extension = FileSwitchDirectory.getExtension(name); - switch (extension) { - // Norms, doc values and term dictionaries are typically performance-sensitive and hot in the page - // cache, so we use mmap, which provides better performance. - case "nvd": - case "dvd": - case "tim": - // We want to open the terms index and KD-tree index off-heap to save memory, but this only performs - // well if using mmap. - case "tip": - // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10 - case "dim": - case "kdd": - case "kdi": - // Compound files are tricky because they store all the information for the segment. Benchmarks - // suggested that not mapping them hurts performance. - case "cfs": - // MMapDirectory has special logic to read long[] arrays in little-endian order that helps speed - // up the decoding of postings. The same logic applies to positions (.pos) of offsets (.pay) but we - // are not mmaping them as queries that leverage positions are more costly and the decoding of postings - // tends to be less a bottleneck. - case "doc": - return true; - // Other files are either less performance-sensitive (e.g. stored field index, norms metadata) - // or are large and have a random access pattern and mmap leads to page cache trashing - // (e.g. stored fields and term vectors). - default: - return false; - } - } - MMapDirectory getDelegate() { return delegate; } diff --git a/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java b/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java index 70d4ae790f146..cf8d6677b4227 100644 --- a/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java @@ -70,20 +70,52 @@ public void testPreload() throws IOException { try (Directory directory = newDirectory(build)) { assertTrue(FsDirectoryFactory.isHybridFs(directory)); FsDirectoryFactory.HybridDirectory hybridDirectory = (FsDirectoryFactory.HybridDirectory) directory; - assertTrue(hybridDirectory.useDelegate("foo.dvd")); + // test default hybrid mmap extensions assertTrue(hybridDirectory.useDelegate("foo.nvd")); + assertTrue(hybridDirectory.useDelegate("foo.dvd")); assertTrue(hybridDirectory.useDelegate("foo.tim")); assertTrue(hybridDirectory.useDelegate("foo.tip")); - assertTrue(hybridDirectory.useDelegate("foo.cfs")); assertTrue(hybridDirectory.useDelegate("foo.dim")); assertTrue(hybridDirectory.useDelegate("foo.kdd")); assertTrue(hybridDirectory.useDelegate("foo.kdi")); - assertFalse(hybridDirectory.useDelegate("foo.bar")); + assertTrue(hybridDirectory.useDelegate("foo.cfs")); + assertTrue(hybridDirectory.useDelegate("foo.doc")); + assertFalse(hybridDirectory.useDelegate("foo.pos")); + assertFalse(hybridDirectory.useDelegate("foo.pay")); MMapDirectory delegate = hybridDirectory.getDelegate(); assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class)); FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate; assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd")); assertTrue(preLoadMMapDirectory.useDelegate("foo.bar")); + assertFalse(preLoadMMapDirectory.useDelegate("foo.cfs")); + } + build = Settings.builder() + .put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.HYBRIDFS.name().toLowerCase(Locale.ROOT)) + .putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), "nvd", "dvd", "cfs") + .putList(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS.getKey(), "nvd", "dvd", "tim", "pos", "pay") + .build(); + try (Directory directory = newDirectory(build)) { + assertTrue(FsDirectoryFactory.isHybridFs(directory)); + FsDirectoryFactory.HybridDirectory hybridDirectory = (FsDirectoryFactory.HybridDirectory) directory; + // test custom hybrid mmap extensions + assertTrue(hybridDirectory.useDelegate("foo.nvd")); + assertTrue(hybridDirectory.useDelegate("foo.dvd")); + assertTrue(hybridDirectory.useDelegate("foo.tim")); + assertFalse(hybridDirectory.useDelegate("foo.tip")); + assertFalse(hybridDirectory.useDelegate("foo.dim")); + assertFalse(hybridDirectory.useDelegate("foo.kdd")); + assertFalse(hybridDirectory.useDelegate("foo.kdi")); + assertFalse(hybridDirectory.useDelegate("foo.cfs")); + assertFalse(hybridDirectory.useDelegate("foo.doc")); + assertTrue(hybridDirectory.useDelegate("foo.pos")); + assertTrue(hybridDirectory.useDelegate("foo.pay")); + MMapDirectory delegate = hybridDirectory.getDelegate(); + assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class)); + FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate; + assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd")); + assertFalse(preLoadMMapDirectory.useDelegate("foo.bar")); + assertTrue(preLoadMMapDirectory.useDelegate("foo.cfs")); + assertTrue(preLoadMMapDirectory.useDelegate("foo.nvd")); } }