From 0953cf68ce9f16046abfff82d59f3f4cf36d9b88 Mon Sep 17 00:00:00 2001 From: Tejas Shah Date: Thu, 3 Oct 2024 17:13:25 -0700 Subject: [PATCH] Preloads .vec and .vex files LuceneFlatVectorReader uses IOContext.Random to open the read. IOContext.Random indicates the kernel to not read ahead the pages on to physical memory. This causes an increase in merge time due to increase of read ops at runtime. The preload settings signals the kernal to preload the files when the reader is opened Signed-off-by: Tejas Shah --- CHANGELOG.md | 1 + .../knn/index/engine/KNNLibrary.java | 3 +- .../knn/index/engine/lucene/Lucene.java | 6 ---- .../org/opensearch/knn/plugin/KNNPlugin.java | 25 +++++++++++++ .../knn/index/engine/faiss/FaissTests.java | 9 +++++ .../knn/index/engine/nmslib/NMSLibTests.java | 21 +++++++++++ .../opensearch/knn/plugin/KNNPluginTests.java | 35 +++++++++++++++++++ 7 files changed, 92 insertions(+), 8 deletions(-) create mode 100644 src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java create mode 100644 src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index fa86cbe3f..0708814eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * Introducing a loading layer in FAISS [#2033](https://github.com/opensearch-project/k-NN/issues/2033) ### Bug Fixes * Add DocValuesProducers for releasing memory when close index [#1946](https://github.com/opensearch-project/k-NN/pull/1946) +* Prelaods vec and vex files to address regression in force merge latencies [#2186](https://github.com/opensearch-project/k-NN/pull/2186) ### Infrastructure * Removed JDK 11 and 17 version from CI runs [#1921](https://github.com/opensearch-project/k-NN/pull/1921) ### Documentation diff --git a/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java b/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java index cf7c4ad82..066665e5d 100644 --- a/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java +++ b/src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java @@ -8,7 +8,6 @@ import org.opensearch.common.ValidationException; import org.opensearch.knn.index.SpaceType; -import java.util.Collections; import java.util.List; /** @@ -137,6 +136,6 @@ KNNLibraryIndexingContext getKNNLibraryIndexingContext( * @return list of file extensions that will be read/write with mmap */ default List mmapFileExtensions() { - return Collections.emptyList(); + return List.of("vec", "vex"); } } diff --git a/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java b/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java index db516d309..294f9eb66 100644 --- a/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java +++ b/src/main/java/org/opensearch/knn/index/engine/lucene/Lucene.java @@ -15,7 +15,6 @@ import org.opensearch.knn.index.engine.MethodResolver; import org.opensearch.knn.index.engine.ResolvedMethodContext; -import java.util.List; import java.util.Map; import java.util.function.Function; @@ -89,11 +88,6 @@ public Float scoreToRadialThreshold(Float score, SpaceType spaceType) { return score; } - @Override - public List mmapFileExtensions() { - return List.of("vec", "vex"); - } - @Override public ResolvedMethodContext resolveMethod( KNNMethodContext knnMethodContext, diff --git a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java index ff079031f..ad05b6eba 100644 --- a/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java +++ b/src/main/java/org/opensearch/knn/plugin/KNNPlugin.java @@ -11,8 +11,10 @@ import org.opensearch.core.action.ActionResponse; import org.opensearch.index.codec.CodecServiceFactory; import org.opensearch.index.engine.EngineFactory; +import org.opensearch.index.shard.IndexSettingProvider; import org.opensearch.indices.SystemIndexDescriptor; import org.opensearch.knn.index.KNNCircuitBreaker; +import org.opensearch.knn.index.engine.KNNEngine; import org.opensearch.knn.plugin.search.KNNConcurrentSearchRequestDecider; import org.opensearch.knn.index.util.KNNClusterUtil; import org.opensearch.knn.index.query.KNNQueryBuilder; @@ -110,6 +112,7 @@ import java.util.Map; import java.util.Optional; import java.util.function.Supplier; +import java.util.stream.Collectors; import static java.util.Collections.singletonList; import static org.opensearch.knn.common.KNNConstants.KNN_THREAD_POOL_PREFIX; @@ -352,6 +355,28 @@ public Collection getSystemIndexDescriptors(Settings sett return ImmutableList.of(new SystemIndexDescriptor(MODEL_INDEX_NAME, "Index for storing models used for k-NN indices")); } + @Override + public Collection getAdditionalIndexSettingProviders() { + IndexSettingProvider preloadMmapFiles = new IndexSettingProvider() { + @Override + public Settings getAdditionalIndexSettings(String indexName, boolean isDataStreamIndex, Settings templateAndRequestSettings) { + + if (templateAndRequestSettings.getAsBoolean(KNNSettings.KNN_INDEX, Boolean.FALSE)) { + final List mmapFileExtensions = Arrays.stream(KNNEngine.values()) + .flatMap(engine -> engine.mmapFileExtensions().stream()) + .distinct() + .collect(Collectors.toList()); + + return Settings.builder().putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), mmapFileExtensions).build(); + } + + return Settings.EMPTY; + } + }; + + return List.of(preloadMmapFiles); + } + @Override public Optional getConcurrentSearchRequestDeciderFactory() { return Optional.of(new KNNConcurrentSearchRequestDecider.Factory()); diff --git a/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java b/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java index 75da6811e..34aac6232 100644 --- a/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java +++ b/src/test/java/org/opensearch/knn/index/engine/faiss/FaissTests.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; @@ -367,4 +368,12 @@ public void testMethodAsMapBuilder() throws IOException { assertEquals(expectedKNNMethodContext.getVectorValidator(), actualKNNLibraryIndexingContext.getVectorValidator()); } + public void testMmapFileExtensions() { + final List mMapExtensions = Faiss.INSTANCE.mmapFileExtensions(); + assertNotNull(mMapExtensions); + final List expectedSettings = List.of("vex", "vec"); + assertTrue(expectedSettings.containsAll(mMapExtensions)); + assertTrue(mMapExtensions.containsAll(expectedSettings)); + } + } diff --git a/src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java b/src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java new file mode 100644 index 000000000..c76147d01 --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/engine/nmslib/NMSLibTests.java @@ -0,0 +1,21 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.engine.nmslib; + +import org.opensearch.knn.KNNTestCase; + +import java.util.List; + +public class NMSLibTests extends KNNTestCase { + + public void testMmapFileExtensions() { + final List mmapExtensions = Nmslib.INSTANCE.mmapFileExtensions(); + assertNotNull(mmapExtensions); + final List expectedSettings = List.of("vex", "vec"); + assertTrue(expectedSettings.containsAll(mmapExtensions)); + assertTrue(mmapExtensions.containsAll(expectedSettings)); + } +} diff --git a/src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java b/src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java new file mode 100644 index 000000000..1bbc22477 --- /dev/null +++ b/src/test/java/org/opensearch/knn/plugin/KNNPluginTests.java @@ -0,0 +1,35 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.plugin; + +import org.opensearch.common.settings.Settings; +import org.opensearch.knn.index.KNNSettings; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.List; + +public class KNNPluginTests extends OpenSearchTestCase { + + public void testKNNPlugin_additionalIndexProviderSettings() throws IOException { + try (KNNPlugin knnPlugin = new KNNPlugin()) { + Settings additionalSettings = knnPlugin.getAdditionalIndexSettingProviders() + .iterator() + .next() + .getAdditionalIndexSettings("index", false, Settings.builder().put(KNNSettings.KNN_INDEX, Boolean.TRUE).build()); + + Settings settings = Settings.builder().putList("index.store.preload", List.of("vec", "vex")).build(); + assertEquals(settings, additionalSettings); + + additionalSettings = knnPlugin.getAdditionalIndexSettingProviders() + .iterator() + .next() + .getAdditionalIndexSettings("index", false, Settings.builder().build()); + + assertEquals(Settings.EMPTY, additionalSettings); + } + } +}