From 8b5c0b5a51ade898fcaef8839615981a25873956 Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Thu, 20 Jun 2024 12:49:13 -0700 Subject: [PATCH] Add allowlist setting for ingest-common processors Add a new static setting that lets an operator choose specific ingest processors to enable by name. The behavior is as follows: - If the allowlist setting is not defined, all installed processors are enabled. This is the status quo. - If the allowlist setting is defined as the empty set, then all processors are disabled. - If the allowlist setting contains the names of valid processors, only those processors are enabled. - If the allowlist setting contains a name of a processor that does not exist, then the server will fail to start with an IllegalStateException listing which processors were defined in the allowlist but are not installed. - If the allowlist setting is changed between server restarts then any ingest pipeline using a now-disabled processor will fail. This is the same experience if a pipeline used a processor defined by a plugin but then that plugin were to be uninstalled across restarts. Related to #14439 Signed-off-by: Andrew Ross --- CHANGELOG.md | 1 + .../common/IngestCommonModulePlugin.java | 39 ++++++- .../common/IngestCommonModulePluginTests.java | 109 ++++++++++++++++++ 3 files changed, 146 insertions(+), 3 deletions(-) create mode 100644 modules/ingest-common/src/test/java/org/opensearch/ingest/common/IngestCommonModulePluginTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index f71ba46745ef1..b31784d5ac31c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Apply the date histogram rewrite optimization to range aggregation ([#13865](https://github.com/opensearch-project/OpenSearch/pull/13865)) - [Writable Warm] Add composite directory implementation and integrate it with FileCache ([12782](https://github.com/opensearch-project/OpenSearch/pull/12782)) - Fix race condition while parsing derived fields from search definition ([14445](https://github.com/opensearch-project/OpenSearch/pull/14445)) +- Add allowlist setting for ingest-common processors ([#14439](https://github.com/opensearch-project/OpenSearch/issues/14439)) ### Dependencies - Bump `org.gradle.test-retry` from 1.5.8 to 1.5.9 ([#13442](https://github.com/opensearch-project/OpenSearch/pull/13442)) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java index 162934efa6778..0e599e028368c 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/IngestCommonModulePlugin.java @@ -58,10 +58,20 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.function.Function; import java.util.function.Supplier; +import java.util.stream.Collectors; public class IngestCommonModulePlugin extends Plugin implements ActionPlugin, IngestPlugin { + static final Setting> PROCESSORS_ALLOWLIST_SETTING = Setting.listSetting( + "ingest.common.processors.allowed", + List.of(), + Function.identity(), + Setting.Property.NodeScope + ); + static final Setting WATCHDOG_INTERVAL = Setting.timeSetting( "ingest.grok.watchdog.interval", TimeValue.timeValueSeconds(1), @@ -77,7 +87,7 @@ public IngestCommonModulePlugin() {} @Override public Map getProcessors(Processor.Parameters parameters) { - Map processors = new HashMap<>(); + final Map processors = new HashMap<>(); processors.put(DateProcessor.TYPE, new DateProcessor.Factory(parameters.scriptService)); processors.put(SetProcessor.TYPE, new SetProcessor.Factory(parameters.scriptService)); processors.put(AppendProcessor.TYPE, new AppendProcessor.Factory(parameters.scriptService)); @@ -110,7 +120,7 @@ public Map getProcessors(Processor.Parameters paramet processors.put(RemoveByPatternProcessor.TYPE, new RemoveByPatternProcessor.Factory()); processors.put(CommunityIdProcessor.TYPE, new CommunityIdProcessor.Factory()); processors.put(FingerprintProcessor.TYPE, new FingerprintProcessor.Factory()); - return Collections.unmodifiableMap(processors); + return filterForAllowlistSetting(parameters.env.settings(), processors); } @Override @@ -133,7 +143,7 @@ public List getRestHandlers( @Override public List> getSettings() { - return Arrays.asList(WATCHDOG_INTERVAL, WATCHDOG_MAX_EXECUTION_TIME); + return Arrays.asList(WATCHDOG_INTERVAL, WATCHDOG_MAX_EXECUTION_TIME, PROCESSORS_ALLOWLIST_SETTING); } private static MatcherWatchdog createGrokThreadWatchdog(Processor.Parameters parameters) { @@ -147,4 +157,27 @@ private static MatcherWatchdog createGrokThreadWatchdog(Processor.Parameters par ); } + private Map filterForAllowlistSetting(Settings settings, Map map) { + if (PROCESSORS_ALLOWLIST_SETTING.exists(settings) == false) { + return Map.copyOf(map); + } + final Set allowlist = Set.copyOf(PROCESSORS_ALLOWLIST_SETTING.get(settings)); + // Assert that no unknown processors are defined in the allowlist + final Set unknownAllowlistProcessors = allowlist.stream() + .filter(p -> map.containsKey(p) == false) + .collect(Collectors.toSet()); + if (unknownAllowlistProcessors.isEmpty() == false) { + throw new IllegalArgumentException( + "Processor(s) [" + + unknownAllowlistProcessors + + "] were defined in [" + + PROCESSORS_ALLOWLIST_SETTING.getKey() + + "] but do not exist" + ); + } + return map.entrySet() + .stream() + .filter(e -> allowlist.contains(e.getKey())) + .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); + } } diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/IngestCommonModulePluginTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/IngestCommonModulePluginTests.java new file mode 100644 index 0000000000000..b0c1e0fdbaa63 --- /dev/null +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/IngestCommonModulePluginTests.java @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.ingest.common; + +import org.opensearch.common.settings.Settings; +import org.opensearch.env.TestEnvironment; +import org.opensearch.ingest.Processor; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.List; +import java.util.Set; + +public class IngestCommonModulePluginTests extends OpenSearchTestCase { + + public void testAllowlist() throws IOException { + runAllowlistTest(List.of()); + runAllowlistTest(List.of("date")); + runAllowlistTest(List.of("set")); + runAllowlistTest(List.of("copy", "date")); + runAllowlistTest(List.of("date", "set", "copy")); + } + + private void runAllowlistTest(List allowlist) throws IOException { + final Settings settings = Settings.builder() + .putList(IngestCommonModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey(), allowlist) + .build(); + try (IngestCommonModulePlugin plugin = new IngestCommonModulePlugin()) { + assertEquals(Set.copyOf(allowlist), plugin.getProcessors(createParameters(settings)).keySet()); + } + } + + public void testAllowlistNotSpecified() throws IOException { + final Settings.Builder builder = Settings.builder(); + builder.remove(IngestCommonModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey()); + final Settings settings = builder.build(); + try (IngestCommonModulePlugin plugin = new IngestCommonModulePlugin()) { + final Set expected = Set.of( + "append", + "urldecode", + "sort", + "fail", + "trim", + "set", + "fingerprint", + "pipeline", + "json", + "join", + "kv", + "bytes", + "date", + "drop", + "community_id", + "lowercase", + "convert", + "copy", + "gsub", + "dot_expander", + "rename", + "remove_by_pattern", + "html_strip", + "remove", + "csv", + "grok", + "date_index_name", + "foreach", + "script", + "dissect", + "uppercase", + "split" + ); + assertEquals(expected, plugin.getProcessors(createParameters(settings)).keySet()); + } + } + + public void testAllowlistHasNonexistentProcessors() throws IOException { + final Settings settings = Settings.builder() + .putList(IngestCommonModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey(), List.of("threeve")) + .build(); + try (IngestCommonModulePlugin plugin = new IngestCommonModulePlugin()) { + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> plugin.getProcessors(createParameters(settings)) + ); + assertTrue(e.getMessage(), e.getMessage().contains("threeve")); + } + } + + private static Processor.Parameters createParameters(Settings settings) { + return new Processor.Parameters( + TestEnvironment.newEnvironment(Settings.builder().put(settings).put("path.home", "").build()), + null, + null, + null, + () -> 0L, + (a, b) -> null, + null, + null, + $ -> {}, + null + ); + } +}