diff --git a/CHANGELOG.md b/CHANGELOG.md index d6e76d946866b..f8b70e46d3d27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,103 +5,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added -- Add support for async deletion in S3BlobContainer ([#15621](https://github.com/opensearch-project/OpenSearch/pull/15621)) -- MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637)) -- [Workload Management] Add Integration Tests for Workload Management CRUD APIs ([#15955](https://github.com/opensearch-project/OpenSearch/pull/15955)) -- [Workload Management] QueryGroup resource cancellation framework changes ([#15651](https://github.com/opensearch-project/OpenSearch/pull/15651)) -- [Workload Management] Add orchestrator for wlm resiliency (QueryGroupService) ([#15925](https://github.com/opensearch-project/OpenSearch/pull/15925)) -- [Workload Management] Add QueryGroup Stats API Logic ([15777](https://github.com/opensearch-project/OpenSearch/pull/15777)) -- Fallback to Remote cluster-state on Term-Version check mismatch - ([#15424](https://github.com/opensearch-project/OpenSearch/pull/15424)) -- Implement WithFieldName interface in ValuesSourceAggregationBuilder & FieldSortBuilder ([#15916](https://github.com/opensearch-project/OpenSearch/pull/15916)) -- Add successfulSearchShardIndices in searchRequestContext ([#15967](https://github.com/opensearch-project/OpenSearch/pull/15967), [#16110](https://github.com/opensearch-project/OpenSearch/pull/16110)) -- [Tiered Caching] Segmented cache changes ([#16047](https://github.com/opensearch-project/OpenSearch/pull/16047)) -- Add support for msearch API to pass search pipeline name - ([#15923](https://github.com/opensearch-project/OpenSearch/pull/15923)) -- Add _list/indices API as paginated alternate to _cat/indices ([#14718](https://github.com/opensearch-project/OpenSearch/pull/14718)) -- Add success and failure metrics for async shard fetch ([#15976](https://github.com/opensearch-project/OpenSearch/pull/15976)) -- Add new metric REMOTE_STORE to NodeStats API response ([#15611](https://github.com/opensearch-project/OpenSearch/pull/15611)) -- Add support to dynamically resize threadpools size. ([#16236](https://github.com/opensearch-project/OpenSearch/pull/16236)) -- [S3 Repository] Change default retry mechanism of s3 clients to Standard Mode ([#15978](https://github.com/opensearch-project/OpenSearch/pull/15978)) -- Add changes to block calls in cat shards, indices and segments based on dynamic limit settings ([#15986](https://github.com/opensearch-project/OpenSearch/pull/15986)) -- New `phone` & `phone-search` analyzer + tokenizer ([#15915](https://github.com/opensearch-project/OpenSearch/pull/15915)) -- Add _list/shards API as paginated alternate to _cat/shards ([#14641](https://github.com/opensearch-project/OpenSearch/pull/14641)) - Latency and Memory allocation improvements to Multi Term Aggregation queries ([#14993](https://github.com/opensearch-project/OpenSearch/pull/14993)) -- Flat object field use IndexOrDocValuesQuery to optimize query ([#14383](https://github.com/opensearch-project/OpenSearch/issues/14383)) -- Add support for renaming aliases during snapshot restore ([#16292](https://github.com/opensearch-project/OpenSearch/pull/16292)) -- Add method to return dynamic SecureTransportParameters from SecureTransportSettingsProvider interface ([#16387](https://github.com/opensearch-project/OpenSearch/pull/16387)) -- URI path filtering support in cluster stats API ([#15938](https://github.com/opensearch-project/OpenSearch/pull/15938)) -- [Star Tree - Search] Add support for metric aggregations with/without term query ([15289](https://github.com/opensearch-project/OpenSearch/pull/15289)) - Add support for restoring from snapshot with search replicas ([#16111](https://github.com/opensearch-project/OpenSearch/pull/16111)) +- Add logic in master service to optimize performance and retain detailed logging for critical cluster operations. ([#14795](https://github.com/opensearch-project/OpenSearch/pull/14795)) - Add Setting to adjust the primary constraint weights ([#16471](https://github.com/opensearch-project/OpenSearch/pull/16471)) +- Switch from `buildSrc/version.properties` to Gradle version catalog (`gradle/libs.versions.toml`) to enable dependabot to perform automated upgrades on common libs ([#16284](https://github.com/opensearch-project/OpenSearch/pull/16284)) ### Dependencies -- Bump `com.azure:azure-identity` from 1.13.0 to 1.13.2 ([#15578](https://github.com/opensearch-project/OpenSearch/pull/15578)) -- Bump `protobuf` from 3.22.3 to 3.25.4 ([#15684](https://github.com/opensearch-project/OpenSearch/pull/15684)) -- Bump `org.apache.logging.log4j:log4j-core` from 2.23.1 to 2.24.1 ([#15858](https://github.com/opensearch-project/OpenSearch/pull/15858), [#16134](https://github.com/opensearch-project/OpenSearch/pull/16134)) -- Bump `peter-evans/create-pull-request` from 6 to 7 ([#15863](https://github.com/opensearch-project/OpenSearch/pull/15863)) -- Bump `com.nimbusds:oauth2-oidc-sdk` from 11.9.1 to 11.19.1 ([#15862](https://github.com/opensearch-project/OpenSearch/pull/15862)) -- Bump `com.microsoft.azure:msal4j` from 1.17.0 to 1.17.2 ([#15945](https://github.com/opensearch-project/OpenSearch/pull/15945), [#16406](https://github.com/opensearch-project/OpenSearch/pull/16406)) -- Bump `ch.qos.logback:logback-core` from 1.5.6 to 1.5.10 ([#15946](https://github.com/opensearch-project/OpenSearch/pull/15946), [#16307](https://github.com/opensearch-project/OpenSearch/pull/16307)) -- Update protobuf from 3.25.4 to 3.25.5 ([#16011](https://github.com/opensearch-project/OpenSearch/pull/16011)) -- Bump `org.roaringbitmap:RoaringBitmap` from 1.2.1 to 1.3.0 ([#16040](https://github.com/opensearch-project/OpenSearch/pull/16040)) -- Bump `com.nimbusds:nimbus-jose-jwt` from 9.40 to 9.41.1 ([#16038](https://github.com/opensearch-project/OpenSearch/pull/16038)) -- Bump `actions/github-script` from 5 to 7 ([#16039](https://github.com/opensearch-project/OpenSearch/pull/16039)) -- Bump `dnsjava:dnsjava` from 3.6.1 to 3.6.2 ([#16041](https://github.com/opensearch-project/OpenSearch/pull/16041)) -- Bump `com.maxmind.geoip2:geoip2` from 4.2.0 to 4.2.1 ([#16042](https://github.com/opensearch-project/OpenSearch/pull/16042)) -- Bump `com.maxmind.db:maxmind-db` from 3.1.0 to 3.1.1 ([#16137](https://github.com/opensearch-project/OpenSearch/pull/16137)) -- Bump Apache lucene from 9.11.1 to 9.12.0 ([#15333](https://github.com/opensearch-project/OpenSearch/pull/15333)) -- Bump `com.azure:azure-core-http-netty` from 1.15.3 to 1.15.5 ([#16133](https://github.com/opensearch-project/OpenSearch/pull/16133), [#16311](https://github.com/opensearch-project/OpenSearch/pull/16311)) -- Bump `org.jline:jline` from 3.26.3 to 3.27.0 ([#16135](https://github.com/opensearch-project/OpenSearch/pull/16135)) -- Bump `netty` from 4.1.112.Final to 4.1.114.Final ([#16182](https://github.com/opensearch-project/OpenSearch/pull/16182)) -- Bump `com.google.api-client:google-api-client` from 2.2.0 to 2.7.0 ([#16216](https://github.com/opensearch-project/OpenSearch/pull/16216)) -- Bump `com.azure:azure-json` from 1.1.0 to 1.3.0 ([#16217](https://github.com/opensearch-project/OpenSearch/pull/16217)) -- Bump `io.grpc:grpc-api` from 1.57.2 to 1.68.0 ([#16213](https://github.com/opensearch-project/OpenSearch/pull/16213)) -- Bump `com.squareup.okio:okio` from 3.9.0 to 3.9.1 ([#16212](https://github.com/opensearch-project/OpenSearch/pull/16212)) -- Bump `me.champeau.gradle.japicmp` from 0.4.3 to 0.4.4 ([#16309](https://github.com/opensearch-project/OpenSearch/pull/16309)) -- Bump `com.google.oauth-client:google-oauth-client` from 1.35.0 to 1.36.0 ([#16306](https://github.com/opensearch-project/OpenSearch/pull/16306)) -- Bump `lycheeverse/lychee-action` from 1.10.0 to 2.0.2 ([#16310](https://github.com/opensearch-project/OpenSearch/pull/16310)) -- Bump `com.google.code.gson:gson` from 2.10.1 to 2.11.0 ([#16308](https://github.com/opensearch-project/OpenSearch/pull/16308)) ### Changed -- Add support for docker compose v2 in TestFixturesPlugin ([#16049](https://github.com/opensearch-project/OpenSearch/pull/16049)) -- Remove identity-related feature flagged code from the RestController ([#15430](https://github.com/opensearch-project/OpenSearch/pull/15430)) -- Remove Identity FeatureFlag ([#16024](https://github.com/opensearch-project/OpenSearch/pull/16024)) -- Ensure RestHandler.Wrapper delegates all implementations to the wrapped handler ([#16154](https://github.com/opensearch-project/OpenSearch/pull/16154)) -- Enable coordinator search.request_stats_enabled by default ([#16290](https://github.com/opensearch-project/OpenSearch/pull/16290)) -- Code cleanup: Remove ApproximateIndexOrDocValuesQuery ([#16273](https://github.com/opensearch-project/OpenSearch/pull/16273)) -- Optimise clone operation for incremental full cluster snapshots ([#16296](https://github.com/opensearch-project/OpenSearch/pull/16296)) -- Update last seen cluster state in the commit phase ([#16215](https://github.com/opensearch-project/OpenSearch/pull/16215)) -- Make multiple settings dynamic for tuning on larger clusters([#16347](https://github.com/opensearch-project/OpenSearch/pull/16347)) ### Deprecated ### Removed ### Fixed -- Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737)) -- Fix case-insensitive query on wildcard field ([#15882](https://github.com/opensearch-project/OpenSearch/pull/15882)) -- Add validation for the search backpressure cancellation settings ([#15501](https://github.com/opensearch-project/OpenSearch/pull/15501)) -- Fix search_as_you_type not supporting multi-fields ([#15988](https://github.com/opensearch-project/OpenSearch/pull/15988)) -- Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985)) -- Fix infinite loop in nested agg ([#15931](https://github.com/opensearch-project/OpenSearch/pull/15931)) -- Fix update settings with null replica not honoring cluster setting bug ([#14948](https://github.com/opensearch-project/OpenSearch/pull/14948)) -- Fix race condition in node-join and node-left ([#15521](https://github.com/opensearch-project/OpenSearch/pull/15521)) -- Streaming bulk request hangs ([#16158](https://github.com/opensearch-project/OpenSearch/pull/16158)) -- Fix warnings from SLF4J on startup when repository-s3 is installed ([#16194](https://github.com/opensearch-project/OpenSearch/pull/16194)) -- Fix protobuf-java leak through client library dependencies ([#16254](https://github.com/opensearch-project/OpenSearch/pull/16254)) - Fix get index settings API doesn't show `number_of_routing_shards` setting when it was explicitly set ([#16294](https://github.com/opensearch-project/OpenSearch/pull/16294)) -- Fix multi-search with template doesn't return status code ([#16265](https://github.com/opensearch-project/OpenSearch/pull/16265)) -- [Streaming Indexing] Fix intermittent 'The bulk request must be terminated by a newline [\n]' failures [#16337](https://github.com/opensearch-project/OpenSearch/pull/16337)) -- Fix wrong default value when setting `index.number_of_routing_shards` to null on index creation ([#16331](https://github.com/opensearch-project/OpenSearch/pull/16331)) -- [Workload Management] Make query groups persistent across process restarts ([#16370](https://github.com/opensearch-project/OpenSearch/pull/16370)) -- [Workload Management] Enhance rejection mechanism in workload management ([#16417](https://github.com/opensearch-project/OpenSearch/pull/16417)) -- Fix inefficient Stream API call chains ending with count() ([#15386](https://github.com/opensearch-project/OpenSearch/pull/15386)) -- Fix array hashCode calculation in ResyncReplicationRequest ([#16378](https://github.com/opensearch-project/OpenSearch/pull/16378)) -- Fix missing fields in task index mapping to ensure proper task result storage ([#16201](https://github.com/opensearch-project/OpenSearch/pull/16201)) -- Fix typo super->sb in method toString() of RemoteStoreNodeAttribute ([#15362](https://github.com/opensearch-project/OpenSearch/pull/15362)) -- [Workload Management] Fixing Create/Update QueryGroup TransportActions to execute from non-cluster manager nodes ([16422](https://github.com/opensearch-project/OpenSearch/pull/16422)) -- Fix flaky test in `testApproximateRangeWithSizeOverDefault` by adjusting totalHits assertion logic ([#16434](https://github.com/opensearch-project/OpenSearch/pull/16434#pullrequestreview-2386999409)) - Revert changes to upload remote state manifest using minimum codec version([#16403](https://github.com/opensearch-project/OpenSearch/pull/16403)) ### Security -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.17...2.x +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.18...2.x diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index aaa1d8bdef85c..c62f20e106e8c 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -57,7 +57,7 @@ if (project == rootProject) { // we update the version property to reflect if we are building a snapshot or a release build // we write this back out below to load it in the Build.java which will be shown in rest main action // to indicate this being a snapshot build or a release build. -Properties props = VersionPropertiesLoader.loadBuildSrcVersion(project.file('version.properties')) +Properties props = VersionPropertiesLoader.loadBuildSrcVersion(project) version = props.getProperty("opensearch") def generateVersionProperties = tasks.register("generateVersionProperties", WriteProperties) { @@ -287,15 +287,19 @@ if (project != rootProject) { } } -// Define this here because we need it early. +// Define this here because we need it early. It uses VersionCatalogsExtension to extract all versions +// and converts to a Java Properties object class VersionPropertiesLoader { - static Properties loadBuildSrcVersion(File input) throws IOException { + static Properties loadBuildSrcVersion(Project project) throws IOException { Properties props = new Properties(); - InputStream is = new FileInputStream(input) - try { - props.load(is) - } finally { - is.close() + + var catalogs = project.extensions.getByType(VersionCatalogsExtension) + var libs = catalogs.named("libs") + libs.getVersionAliases().forEach { + libs.findVersion(it).ifPresent { v -> + // Gradle replaces '_' with '.' so 'google_http_client' becomes 'google.http.client' instead + props.setProperty(it.replaceAll("[.]", "_"), v.requiredVersion) + } } loadBuildSrcVersion(props, System.getProperties()) return props diff --git a/buildSrc/settings.gradle b/buildSrc/settings.gradle index 963177afaa2de..661137fbadae2 100644 --- a/buildSrc/settings.gradle +++ b/buildSrc/settings.gradle @@ -10,3 +10,11 @@ */ include 'reaper' + +dependencyResolutionManagement { + versionCatalogs { + libs { + from(files("../gradle/libs.versions.toml")) + } + } +} diff --git a/buildSrc/version.properties b/buildSrc/version.properties index f9a8bee5783b1..61f4fbbf10b1d 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -1,79 +1,2 @@ +# Please use ../gradle/libs.versions.toml for dependency management opensearch = 3.0.0 -lucene = 9.12.0 - -bundled_jdk_vendor = adoptium -bundled_jdk = 23.0.1+11 - -# optional dependencies -spatial4j = 0.7 -jts = 1.15.0 -jackson = 2.17.2 -jackson_databind = 2.17.2 -snakeyaml = 2.1 -icu4j = 75.1 -supercsv = 2.4.0 -log4j = 2.21.0 -slf4j = 1.7.36 -asm = 9.7 -jettison = 1.5.4 -woodstox = 6.4.0 -kotlin = 1.7.10 -antlr4 = 4.13.1 -guava = 32.1.1-jre -protobuf = 3.25.5 -jakarta_annotation = 1.3.5 -google_http_client = 1.44.1 -tdigest = 3.3 -hdrhistogram = 2.2.2 -grpc = 1.68.0 - -# when updating the JNA version, also update the version in buildSrc/build.gradle -jna = 5.13.0 - -netty = 4.1.114.Final -joda = 2.12.7 - -# project reactor -reactor_netty = 1.1.23 -reactor = 3.5.20 - -# client dependencies -httpclient5 = 5.3.1 -httpcore5 = 5.2.5 -httpclient = 4.5.14 -httpcore = 4.4.16 -httpasyncclient = 4.1.5 -commonslogging = 1.2 -commonscodec = 1.16.1 -commonslang = 3.14.0 -commonscompress = 1.26.1 -commonsio = 2.16.0 -# plugin dependencies -aws = 2.20.86 -reactivestreams = 1.0.4 - -# when updating this version, you need to ensure compatibility with: -# - plugins/ingest-attachment (transitive dependency, check the upstream POM) -# - distribution/tools/plugin-cli -bouncycastle=1.78 -# test dependencies -randomizedrunner = 2.7.1 -junit = 4.13.2 -hamcrest = 2.1 -mockito = 5.14.1 -objenesis = 3.2 -bytebuddy = 1.15.4 - -# benchmark dependencies -jmh = 1.35 - -# compression -zstd = 1.5.5-5 - -jzlib = 1.1.3 - -resteasy = 6.2.4.Final - -# opentelemetry dependencies -opentelemetry = 1.41.0 -opentelemetrysemconv = 1.27.0-alpha diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml new file mode 100644 index 0000000000000..8ff3f6e45397d --- /dev/null +++ b/gradle/libs.versions.toml @@ -0,0 +1,80 @@ +[versions] +opensearch = "3.0.0" +lucene = "9.12.0" + +bundled_jdk_vendor = "adoptium" +bundled_jdk = "23.0.1+11" + +# optional dependencies +spatial4j = "0.7" +jts = "1.15.0" +jackson = "2.17.2" +jackson_databind = "2.17.2" +snakeyaml = "2.1" +icu4j = "75.1" +supercsv = "2.4.0" +log4j = "2.21.0" +slf4j = "1.7.36" +asm = "9.7" +jettison = "1.5.4" +woodstox = "6.4.0" +kotlin = "1.7.10" +antlr4 = "4.13.1" +guava = "32.1.1-jre" +protobuf = "3.25.5" +jakarta_annotation = "1.3.5" +google_http_client = "1.44.1" +tdigest = "3.3" +hdrhistogram = "2.2.2" +grpc = "1.68.0" + +# when updating the JNA version, also update the version in buildSrc/build.gradle +jna = "5.13.0" + +netty = "4.1.114.Final" +joda = "2.12.7" + +# project reactor +reactor_netty = "1.1.23" +reactor = "3.5.20" + +# client dependencies +httpclient5 = "5.3.1" +httpcore5 = "5.2.5" +httpclient = "4.5.14" +httpcore = "4.4.16" +httpasyncclient = "4.1.5" +commonslogging = "1.2" +commonscodec = "1.16.1" +commonslang = "3.14.0" +commonscompress = "1.26.1" +commonsio = "2.16.0" +# plugin dependencies +aws = "2.20.86" +reactivestreams = "1.0.4" + +# when updating this version, you need to ensure compatibility with: +# - plugins/ingest-attachment (transitive dependency, check the upstream POM) +# - distribution/tools/plugin-cli +bouncycastle="1.78" +# test dependencies +randomizedrunner = "2.7.1" +junit = "4.13.2" +hamcrest = "2.1" +mockito = "5.14.1" +objenesis = "3.2" +bytebuddy = "1.15.4" + +# benchmark dependencies +jmh = "1.35" + +# compression +zstd = "1.5.5-5" + +jzlib = "1.1.3" + +resteasy = "6.2.4.Final" + +# opentelemetry dependencies +opentelemetry = "1.41.0" +opentelemetrysemconv = "1.27.0-alpha" diff --git a/release-notes/opensearch.release-notes-2.18.0.md b/release-notes/opensearch.release-notes-2.18.0.md new file mode 100644 index 0000000000000..f0dc1d06acdcb --- /dev/null +++ b/release-notes/opensearch.release-notes-2.18.0.md @@ -0,0 +1,92 @@ +## 2024-10-28 Version 2.18.0 Release Notes + +## [2.18.0] +### Added +- [Workload Management] Add orchestrator for wlm resiliency (QueryGroupService) ([#15925](https://github.com/opensearch-project/OpenSearch/pull/15925)) +- [Offline Nodes] Adds offline-tasks library containing various interfaces to be used for Offline Background Tasks. ([#13574](https://github.com/opensearch-project/OpenSearch/pull/13574)) +- Add support for async deletion in S3BlobContainer ([#15621](https://github.com/opensearch-project/OpenSearch/pull/15621)) +- [Workload Management] QueryGroup resource cancellation framework changes ([#15651](https://github.com/opensearch-project/OpenSearch/pull/15651)) +- [Workload Management] Add QueryGroup Stats API Logic ([15777](https://github.com/opensearch-project/OpenSearch/pull/15777)) +- Implement WithFieldName interface in ValuesSourceAggregationBuilder & FieldSortBuilder ([#15916](https://github.com/opensearch-project/OpenSearch/pull/15916)) +- Add successfulSearchShardIndices in searchRequestContext ([#15967](https://github.com/opensearch-project/OpenSearch/pull/15967), [#16110](https://github.com/opensearch-project/OpenSearch/pull/16110)) +- Fallback to Remote cluster-state on Term-Version check mismatch - ([#15424](https://github.com/opensearch-project/OpenSearch/pull/15424)) +- [Tiered Caching] Segmented cache changes ([#16047](https://github.com/opensearch-project/OpenSearch/pull/16047)) +- Add support for msearch API to pass search pipeline name - ([#15923](https://github.com/opensearch-project/OpenSearch/pull/15923)) +- Add success and failure metrics for async shard fetch ([#15976](https://github.com/opensearch-project/OpenSearch/pull/15976)) +- Add support to dynamically resize threadpools size. ([#16236](https://github.com/opensearch-project/OpenSearch/pull/16236)) +- [S3 Repository] Change default retry mechanism of s3 clients to Standard Mode ([#15978](https://github.com/opensearch-project/OpenSearch/pull/15978)) +- [Workload Management] Add Integration Tests for Workload Management CRUD APIs ([#15955](https://github.com/opensearch-project/OpenSearch/pull/15955)) +- Add new metric REMOTE_STORE to NodeStats API response ([#15611](https://github.com/opensearch-project/OpenSearch/pull/15611)) +- New `phone` & `phone-search` analyzer + tokenizer ([#15915](https://github.com/opensearch-project/OpenSearch/pull/15915)) +- Add _list/indices API as paginated alternate to _cat/indices ([#14718](https://github.com/opensearch-project/OpenSearch/pull/14718)) +- Add changes to block calls in cat shards, indices and segments based on dynamic limit settings ([#15986](https://github.com/opensearch-project/OpenSearch/pull/15986)) +- Flat object field use IndexOrDocValuesQuery to optimize query ([#14383](https://github.com/opensearch-project/OpenSearch/issues/14383)) +- Add method to return dynamic SecureTransportParameters from SecureTransportSettingsProvider interface ([#16387](https://github.com/opensearch-project/OpenSearch/pull/16387) +- Add _list/shards API as paginated alternate to _cat/shards ([#14641](https://github.com/opensearch-project/OpenSearch/pull/14641)) +- [Star Tree - Search] Add support for metric aggregations with/without term query ([15289](https://github.com/opensearch-project/OpenSearch/pull/15289)) +- URI path filtering support in cluster stats API ([#15938](https://github.com/opensearch-project/OpenSearch/pull/15938)) +- Add support for renaming aliases during snapshot restore ([#16292](https://github.com/opensearch-project/OpenSearch/pull/16292)) + +### Dependencies +- Bump `org.apache.logging.log4j:log4j-core` from 2.23.1 to 2.24.0 ([#15858](https://github.com/opensearch-project/OpenSearch/pull/15858)) +- Bump `com.azure:azure-identity` from 1.13.0 to 1.13.2 ([#15578](https://github.com/opensearch-project/OpenSearch/pull/15578)) +- Bump `protobuf` from 3.22.3 to 3.25.4 ([#15684](https://github.com/opensearch-project/OpenSearch/pull/15684)) +- Bump `peter-evans/create-pull-request` from 6 to 7 ([#15863](https://github.com/opensearch-project/OpenSearch/pull/15863)) +- Bump `com.nimbusds:oauth2-oidc-sdk` from 11.9.1 to 11.19.1 ([#15862](https://github.com/opensearch-project/OpenSearch/pull/15862)) +- Bump `com.microsoft.azure:msal4j` from 1.17.0 to 1.17.2 ([#15945](https://github.com/opensearch-project/OpenSearch/pull/15945), [#16406](https://github.com/opensearch-project/OpenSearch/pull/16406)) +- Bump `ch.qos.logback:logback-core` from 1.5.6 to 1.5.10 ([#15946](https://github.com/opensearch-project/OpenSearch/pull/15946), [#16307](https://github.com/opensearch-project/OpenSearch/pull/16307)) +- Update protobuf from 3.25.4 to 3.25.5 ([#16011](https://github.com/opensearch-project/OpenSearch/pull/16011)) +- Bump `org.roaringbitmap:RoaringBitmap` from 1.2.1 to 1.3.0 ([#16040](https://github.com/opensearch-project/OpenSearch/pull/16040)) +- Bump `com.nimbusds:nimbus-jose-jwt` from 9.40 to 9.41.1 ([#16038](https://github.com/opensearch-project/OpenSearch/pull/16038)) +- Bump `actions/github-script` from 5 to 7 ([#16039](https://github.com/opensearch-project/OpenSearch/pull/16039)) +- Bump `dnsjava:dnsjava` from 3.6.1 to 3.6.2 ([#16041](https://github.com/opensearch-project/OpenSearch/pull/16041)) +- Bump `com.maxmind.geoip2:geoip2` from 4.2.0 to 4.2.1 ([#16042](https://github.com/opensearch-project/OpenSearch/pull/16042)) +- Bump `com.maxmind.db:maxmind-db` from 3.1.0 to 3.1.1 ([#16137](https://github.com/opensearch-project/OpenSearch/pull/16137)) +- Bump Apache lucene from 9.11.1 to 9.12.0 ([#15333](https://github.com/opensearch-project/OpenSearch/pull/15333)) +- Bump `com.azure:azure-core-http-netty` from 1.15.3 to 1.15.5 ([#16133](https://github.com/opensearch-project/OpenSearch/pull/16133), [#16311](https://github.com/opensearch-project/OpenSearch/pull/16311)) +- Bump `netty` from 4.1.112.Final to 4.1.114.Final ([#16182](https://github.com/opensearch-project/OpenSearch/pull/16182)) +- Bump `com.google.api-client:google-api-client` from 2.2.0 to 2.7.0 ([#16216](https://github.com/opensearch-project/OpenSearch/pull/16216)) +- Bump `com.azure:azure-json` from 1.1.0 to 1.3.0 ([#16217](https://github.com/opensearch-project/OpenSearch/pull/16217)) +- Bump `io.grpc:grpc-api` from 1.57.2 to 1.68.0 ([#16213](https://github.com/opensearch-project/OpenSearch/pull/16213)) +- Bump `org.jline:jline` from 3.26.3 to 3.27.0 ([#16135](https://github.com/opensearch-project/OpenSearch/pull/16135)) +- Bump `com.squareup.okio:okio` from 3.9.0 to 3.9.1 ([#16212](https://github.com/opensearch-project/OpenSearch/pull/16212)) +- Bump `lycheeverse/lychee-action` from 1.10.0 to 2.0.2 ([#16310](https://github.com/opensearch-project/OpenSearch/pull/16310)) +- Bump `com.google.code.gson:gson` from 2.10.1 to 2.11.0 ([#16308](https://github.com/opensearch-project/OpenSearch/pull/16308)) +- Bump `io.grpc:grpc-api` from 1.57.2 to 1.68.0 ([#16213](https://github.com/opensearch-project/OpenSearch/pull/16213)) +- Bump `me.champeau.gradle.japicmp` from 0.4.3 to 0.4.4 ([#16309](https://github.com/opensearch-project/OpenSearch/pull/16309)) +- Bump `com.google.oauth-client:google-oauth-client` from 1.35.0 to 1.36.0 ([#16306](https://github.com/opensearch-project/OpenSearch/pull/16306)) + +### Changed +- Add support for docker compose v2 in TestFixturesPlugin ([#16049](https://github.com/opensearch-project/OpenSearch/pull/16049)) +- Remove identity-related feature flagged code from the RestController ([#15430](https://github.com/opensearch-project/OpenSearch/pull/15430)) +- Remove Identity FeatureFlag ([#16024](https://github.com/opensearch-project/OpenSearch/pull/16024)) +- Ensure RestHandler.Wrapper delegates all implementations to the wrapped handler ([#16154](https://github.com/opensearch-project/OpenSearch/pull/16154)) +- Enable coordinator search.request_stats_enabled by default ([#16290](https://github.com/opensearch-project/OpenSearch/pull/16290)) +- Optimise clone operation for incremental full cluster snapshots ([#16296](https://github.com/opensearch-project/OpenSearch/pull/16296)) +- Code cleanup: Remove ApproximateIndexOrDocValuesQuery ([#16273](https://github.com/opensearch-project/OpenSearch/pull/16273)) +- Update last seen cluster state in the commit phase ([#16215](https://github.com/opensearch-project/OpenSearch/pull/16215)) +- Make multiple settings dynamic for tuning on larger clusters([#16347](https://github.com/opensearch-project/OpenSearch/pull/16347)) + +### Fixed +- Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737)) +- Fix case-insensitive query on wildcard field ([#15882](https://github.com/opensearch-project/OpenSearch/pull/15882)) +- Add validation for the search backpressure cancellation settings ([#15501](https://github.com/opensearch-project/OpenSearch/pull/15501)) +- Fix search_as_you_type not supporting multi-fields ([#15988](https://github.com/opensearch-project/OpenSearch/pull/15988)) +- Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985)) +- Fix infinite loop in nested agg ([#15931](https://github.com/opensearch-project/OpenSearch/pull/15931)) +- Fix update settings with null replica not honoring cluster setting bug ([#14948](https://github.com/opensearch-project/OpenSearch/pull/14948)) +- Fix race condition in node-join and node-left ([#15521](https://github.com/opensearch-project/OpenSearch/pull/15521)) +- Streaming bulk request hangs ([#16158](https://github.com/opensearch-project/OpenSearch/pull/16158)) +- Fix warnings from SLF4J on startup when repository-s3 is installed ([#16194](https://github.com/opensearch-project/OpenSearch/pull/16194)) +- Fix protobuf-java leak through client library dependencies ([#16254](https://github.com/opensearch-project/OpenSearch/pull/16254)) +- Fix multi-search with template doesn't return status code ([#16265](https://github.com/opensearch-project/OpenSearch/pull/16265)) +- [Streaming Indexing] Fix intermittent 'The bulk request must be terminated by a newline [\n]' failures [#16337](https://github.com/opensearch-project/OpenSearch/pull/16337)) +- Fix wrong default value when setting `index.number_of_routing_shards` to null on index creation ([#16331](https://github.com/opensearch-project/OpenSearch/pull/16331)) +- Fix inefficient Stream API call chains ending with count() ([#15386](https://github.com/opensearch-project/OpenSearch/pull/15386)) +- [Workload Management] Make query groups persistent across process restarts [#16370](https://github.com/opensearch-project/OpenSearch/pull/16370) +- Fix missing fields in task index mapping to ensure proper task result storage ([#16201](https://github.com/opensearch-project/OpenSearch/pull/16201)) +- Fix typo super->sb in method toString() of RemoteStoreNodeAttribute ([#15362](https://github.com/opensearch-project/OpenSearch/pull/15362)) +- Fix array hashCode calculation in ResyncReplicationRequest ([#16378](https://github.com/opensearch-project/OpenSearch/pull/16378)) +- [Workload Management] Enhance rejection mechanism in workload management ([#16417](https://github.com/opensearch-project/OpenSearch/pull/16417)) +- [Workload Management] Fixing Create/Update QueryGroup TransportActions to execute from non-cluster manager nodes ([16422](https://github.com/opensearch-project/OpenSearch/pull/16422)) +- Fix flaky test in `testApproximateRangeWithSizeOverDefault` by adjusting totalHits assertion logic ([#16434](https://github.com/opensearch-project/OpenSearch/pull/16434#pullrequestreview-2386999409)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.get_settings/40_number_of_routing_shards.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.get_settings/40_number_of_routing_shards.yml index 3fb392d6db134..3c86678a690b8 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.get_settings/40_number_of_routing_shards.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.get_settings/40_number_of_routing_shards.yml @@ -22,8 +22,8 @@ setup: --- Test retrieval of number_routing_shards settings: - skip: - version: " - 2.99.99" - reason: "introduced in 3.0.0" # TODO: change it to 2.18.0 after backport to 2.x branch + version: " - 2.18.99" + reason: "introduced in 2.19.0" - do: indices.get_settings: flat_settings: true diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/RareClusterStateIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/RareClusterStateIT.java index b3cb15d028090..cc0264f375103 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/RareClusterStateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/RareClusterStateIT.java @@ -40,6 +40,7 @@ import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.action.shard.ShardStateAction; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.MappingMetadata; @@ -48,29 +49,39 @@ import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.ExistingShardsAllocator; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.action.ActionFuture; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionResponse; import org.opensearch.core.index.Index; +import org.opensearch.core.transport.TransportResponse; import org.opensearch.discovery.Discovery; import org.opensearch.index.IndexService; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.MapperService; import org.opensearch.indices.IndicesService; +import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.disruption.BlockClusterStateProcessing; +import org.opensearch.test.transport.MockTransportService; +import org.opensearch.transport.TransportService; import org.opensearch.transport.TransportSettings; +import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; import static org.opensearch.action.DocWriteResponse.Result.CREATED; +import static org.opensearch.cluster.action.shard.ShardStateAction.SHARD_STARTED_ACTION_NAME; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.equalTo; @@ -409,4 +420,172 @@ public void testDelayedMappingPropagationOnReplica() throws Exception { assertThat(dynamicMappingsFut.get(10, TimeUnit.SECONDS).getResult(), equalTo(CREATED)); } + public void testDisassociateNodesWhileShardInit() throws InterruptedException { + final String clusterManagerName = internalCluster().startClusterManagerOnlyNode( + Settings.builder() + .put(TransportSettings.CONNECT_TIMEOUT.getKey(), "1s") + .put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true) + .build() + ); + internalCluster().startDataOnlyNode(Settings.builder().put(TransportSettings.CONNECT_TIMEOUT.getKey(), "1s").build()); + internalCluster().startDataOnlyNode(Settings.builder().put(TransportSettings.CONNECT_TIMEOUT.getKey(), "1s").build()); + String node2 = internalCluster().startDataOnlyNode( + Settings.builder().put(TransportSettings.CONNECT_TIMEOUT.getKey(), "1s").build() + ); + + final ClusterService clusterService = internalCluster().clusterService(clusterManagerName); + blockShardStartedResponse(clusterManagerName, clusterService); + + final String index = "index"; + + // create index with 3 primary and 1 replica each + prepareCreate(index).setSettings( + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 3).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + // .put(INDEX_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), "primaries") + ).get(); + ensureGreen(index); + + // close to have some unassigned started shards shards.. + client().admin().indices().prepareClose(index).get(); + + // block so that replicas are always in init and not started + blockReplicaStart.set(true); + final AllocationService allocationService = internalCluster().getInstance(AllocationService.class, clusterManagerName); + clusterService.submitStateUpdateTask("test-delete-node-and-reroute", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + ClusterState.Builder builder = ClusterState.builder(currentState); + // open index + final IndexMetadata indexMetadata = IndexMetadata.builder(currentState.metadata().index(index)) + .state(IndexMetadata.State.OPEN) + .build(); + + builder.metadata(Metadata.builder(currentState.metadata()).put(indexMetadata, true)); + builder.blocks(ClusterBlocks.builder().blocks(currentState.blocks()).removeIndexBlocks(index)); + ClusterState updatedState = builder.build(); + RoutingTable.Builder routingTable = RoutingTable.builder(updatedState.routingTable()); + routingTable.addAsRecovery(updatedState.metadata().index(index)); + updatedState = ClusterState.builder(updatedState).routingTable(routingTable.build()).build(); + ClusterState state = allocationService.reroute(updatedState, "reroute"); + return state; + } + + @Override + public void onFailure(String source, Exception e) { + logger.error(e.getMessage(), e); + } + }); + + ensureYellow(index); + assertTrue(waitUntil(() -> { + ClusterState state = clusterService.state(); + return state.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size() == 3; + + })); + + logger.info("Initializing shards"); + logger.info(clusterService.state().getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING)); + + // trigger 2nd reroute after shard in initialized + clusterService.submitStateUpdateTask("test-delete-node-and-reroute", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + return allocationService.reroute(currentState, "reroute"); + } + + @Override + public void onFailure(String source, Exception e) {} + }); + + ensureYellow(index); + assertTrue(waitUntil(() -> clusterService.state().getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size() == 3)); + clusterService.submitStateUpdateTask("test-remove-injected-node", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) throws Exception { + // remove the primary node of replica shard which is in init + ShardRouting next = currentState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0); + ShardRouting primaryShard = currentState.getRoutingNodes().activePrimary(next.shardId()); + + ClusterState.Builder builder = ClusterState.builder(currentState); + builder.nodes(DiscoveryNodes.builder(currentState.nodes()).remove(primaryShard.currentNodeId())); + currentState = builder.build(); + logger.info("removed the node {}", primaryShard.currentNodeId()); + logger.info("shard {}", next); + ClusterState state = allocationService.disassociateDeadNodes(currentState, true, "reroute"); + return state; + } + + @Override + public void onFailure(String source, Exception e) {} + }); + assertTrue(waitUntil(() -> { + ClusterState state = clusterService.state(); + logger.info("current state {} ", state); + return clusterService.state().nodes().getSize() == 3; + + })); + + logger.info(clusterService.state().getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING)); + blockReplicaStart.set(false); + + clusterService.submitStateUpdateTask("test-inject-node-and-reroute", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + ClusterState.Builder builder = ClusterState.builder(currentState); + final IndexMetadata indexMetadata = IndexMetadata.builder(currentState.metadata().index(index)) + .state(IndexMetadata.State.OPEN) + .build(); + builder.metadata(Metadata.builder(currentState.metadata()).put(indexMetadata, true)); + builder.blocks(ClusterBlocks.builder().blocks(currentState.blocks()).removeIndexBlocks(index)); + ClusterState updatedState = builder.build(); + RoutingTable.Builder routingTable = RoutingTable.builder(updatedState.routingTable()); + routingTable.addAsRecovery(updatedState.metadata().index(index)); + updatedState = ClusterState.builder(updatedState).routingTable(routingTable.build()).build(); + + return allocationService.reroute(updatedState, "reroute"); + } + + @Override + public void onFailure(String source, Exception e) {} + }); + + ensureGreen(index); + } + + AtomicBoolean blockReplicaStart = new AtomicBoolean(false); + + private void blockShardStartedResponse(String master, ClusterService service) { + MockTransportService primaryService = (MockTransportService) internalCluster().getInstance(TransportService.class, master); + primaryService.addRequestHandlingBehavior(SHARD_STARTED_ACTION_NAME, (handler, request, channel, task) -> { + + if (blockReplicaStart.get()) { + ShardStateAction.StartedShardEntry req = (ShardStateAction.StartedShardEntry) request; + String stringRep = req.toString(); + logger.info("ShardStateAction.StartedShardEntry {}", stringRep); + + String incomingRequest = req.toString(); + Optional matchReplica = service.state() + .routingTable() + .allShardsSatisfyingPredicate(r -> !r.primary()) + .getShardRoutings() + .stream() + .filter(r -> r.allocationId() != null) + .filter(r -> incomingRequest.contains(r.allocationId().getId())) + .findAny(); + + if (matchReplica.isPresent()) { + channel.sendResponse(TransportResponse.Empty.INSTANCE); + } else { + handler.messageReceived(request, channel, task); + } + } else { + handler.messageReceived(request, channel, task); + } + }); + } + + @Override + protected Collection> nodePlugins() { + return List.of(MockTransportService.TestPlugin.class); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemotePublicationConfigurationIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemotePublicationConfigurationIT.java index 98859f517aad4..57bf9eccbf5b4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemotePublicationConfigurationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemotePublicationConfigurationIT.java @@ -14,6 +14,7 @@ import org.opensearch.remotemigration.MigrationBaseTestCase; import org.opensearch.remotestore.multipart.mocks.MockFsRepositoryPlugin; import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.fs.FsRepository; import org.opensearch.repositories.fs.ReloadableFsRepository; import org.opensearch.test.InternalSettingsPlugin; import org.opensearch.test.OpenSearchIntegTestCase; @@ -97,23 +98,26 @@ public Settings.Builder remotePublishConfiguredNodeSetting() { .put(stateRepoSettingsAttributeKeyPrefix + prefixModeVerificationSuffix, true) .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, ROUTING_TABLE_REPO_NAME) - .put(routingTableRepoTypeAttributeKey, ReloadableFsRepository.TYPE) + .put(routingTableRepoTypeAttributeKey, FsRepository.TYPE) .put(routingTableRepoSettingsAttributeKeyPrefix + "location", segmentRepoPath); return builder; } public Settings.Builder remoteWithRoutingTableNodeSetting() { // Remote Cluster with Routing table + return Settings.builder() .put( - buildRemoteStoreNodeAttributes( + remoteStoreClusterSettings( REPOSITORY_NAME, segmentRepoPath, + ReloadableFsRepository.TYPE, REPOSITORY_2_NAME, translogRepoPath, + ReloadableFsRepository.TYPE, REPOSITORY_NAME, segmentRepoPath, - false + ReloadableFsRepository.TYPE ) ) .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 13033b670d44b..d597b51c32ccd 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -42,6 +42,7 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.RepositoriesMetadata; +import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.RerouteService; @@ -57,6 +58,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -185,11 +187,30 @@ public ClusterTasksResult execute(ClusterState currentState, List jo // for every set of node join task which we can optimize to not compute if cluster state already has // repository information. Optional remoteDN = currentNodes.getNodes().values().stream().filter(DiscoveryNode::isRemoteStoreNode).findFirst(); - DiscoveryNode dn = remoteDN.orElseGet(() -> (currentNodes.getNodes().values()).stream().findFirst().get()); - RepositoriesMetadata repositoriesMetadata = remoteStoreNodeService.updateRepositoriesMetadata( - dn, - currentState.getMetadata().custom(RepositoriesMetadata.TYPE) - ); + Optional remotePublicationDN = currentNodes.getNodes() + .values() + .stream() + .filter(DiscoveryNode::isRemoteStatePublicationEnabled) + .findFirst(); + RepositoriesMetadata existingRepositoriesMetadata = currentState.getMetadata().custom(RepositoriesMetadata.TYPE); + Map repositories = new LinkedHashMap<>(); + if (existingRepositoriesMetadata != null) { + existingRepositoriesMetadata.repositories().forEach(r -> repositories.putIfAbsent(r.name(), r)); + } + if (remoteDN.isPresent()) { + RepositoriesMetadata repositoriesMetadata = remoteStoreNodeService.updateRepositoriesMetadata( + remoteDN.get(), + existingRepositoriesMetadata + ); + repositoriesMetadata.repositories().forEach(r -> repositories.putIfAbsent(r.name(), r)); + } + if (remotePublicationDN.isPresent()) { + RepositoriesMetadata repositoriesMetadata = remoteStoreNodeService.updateRepositoriesMetadata( + remotePublicationDN.get(), + existingRepositoriesMetadata + ); + repositoriesMetadata.repositories().forEach(r -> repositories.putIfAbsent(r.name(), r)); + } assert nodesBuilder.isLocalNodeElectedClusterManager(); @@ -219,15 +240,16 @@ public ClusterTasksResult execute(ClusterState currentState, List jo ensureNodeCommissioned(node, currentState.metadata()); nodesBuilder.add(node); - if (remoteDN.isEmpty() && node.isRemoteStoreNode()) { + if ((remoteDN.isEmpty() && node.isRemoteStoreNode()) + || (remotePublicationDN.isEmpty() && node.isRemoteStatePublicationEnabled())) { // This is hit only on cases where we encounter first remote node logger.info("Updating system repository now for remote store"); - repositoriesMetadata = remoteStoreNodeService.updateRepositoriesMetadata( + RepositoriesMetadata repositoriesMetadata = remoteStoreNodeService.updateRepositoriesMetadata( node, - currentState.getMetadata().custom(RepositoriesMetadata.TYPE) + existingRepositoriesMetadata ); + repositoriesMetadata.repositories().forEach(r -> repositories.putIfAbsent(r.name(), r)); } - nodesChanged = true; minClusterNodeVersion = Version.min(minClusterNodeVersion, node.getVersion()); maxClusterNodeVersion = Version.max(maxClusterNodeVersion, node.getVersion()); @@ -241,7 +263,7 @@ public ClusterTasksResult execute(ClusterState currentState, List jo } results.success(joinTask); } - + RepositoriesMetadata repositoriesMetadata = new RepositoriesMetadata(new ArrayList<>(repositories.values())); if (nodesChanged) { rerouteService.reroute( "post-join reroute", diff --git a/server/src/main/java/org/opensearch/cluster/service/MasterService.java b/server/src/main/java/org/opensearch/cluster/service/MasterService.java index 713de8cdd0fda..455e7301a490d 100644 --- a/server/src/main/java/org/opensearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/MasterService.java @@ -299,33 +299,37 @@ public static boolean assertNotMasterUpdateThread(String reason) { } private void runTasks(TaskInputs taskInputs) { - final String longSummary = logger.isTraceEnabled() ? taskInputs.taskSummaryGenerator.apply(true) : ""; - final String shortSummary = taskInputs.taskSummaryGenerator.apply(false); + final String summary; + if (logger.isTraceEnabled()) { + summary = taskInputs.taskSummaryGenerator.apply(true); + } else { + summary = taskInputs.taskSummaryGenerator.apply(false); + } if (!lifecycle.started()) { - logger.debug("processing [{}]: ignoring, cluster-manager service not started", shortSummary); + logger.debug("processing [{}]: ignoring, cluster-manager service not started", summary); return; } if (logger.isTraceEnabled()) { - logger.trace("executing cluster state update for [{}]", longSummary); + logger.trace("executing cluster state update for [{}]", summary); } else { - logger.debug("executing cluster state update for [{}]", shortSummary); + logger.debug("executing cluster state update for [{}]", summary); } final ClusterState previousClusterState = state(); if (!previousClusterState.nodes().isLocalNodeElectedClusterManager() && taskInputs.runOnlyWhenClusterManager()) { - logger.debug("failing [{}]: local node is no longer cluster-manager", shortSummary); + logger.debug("failing [{}]: local node is no longer cluster-manager", summary); taskInputs.onNoLongerClusterManager(); return; } final long computationStartTime = threadPool.preciseRelativeTimeInNanos(); - final TaskOutputs taskOutputs = calculateTaskOutputs(taskInputs, previousClusterState, shortSummary); + final TaskOutputs taskOutputs = calculateTaskOutputs(taskInputs, previousClusterState, summary); taskOutputs.notifyFailedTasks(); final TimeValue computationTime = getTimeSince(computationStartTime); - logExecutionTime(computationTime, "compute cluster state update", shortSummary); + logExecutionTime(computationTime, "compute cluster state update", summary); clusterManagerMetrics.recordLatency( clusterManagerMetrics.clusterStateComputeHistogram, @@ -337,17 +341,17 @@ private void runTasks(TaskInputs taskInputs) { final long notificationStartTime = threadPool.preciseRelativeTimeInNanos(); taskOutputs.notifySuccessfulTasksOnUnchangedClusterState(); final TimeValue executionTime = getTimeSince(notificationStartTime); - logExecutionTime(executionTime, "notify listeners on unchanged cluster state", shortSummary); + logExecutionTime(executionTime, "notify listeners on unchanged cluster state", summary); } else { final ClusterState newClusterState = taskOutputs.newClusterState; if (logger.isTraceEnabled()) { - logger.trace("cluster state updated, source [{}]\n{}", longSummary, newClusterState); + logger.trace("cluster state updated, source [{}]\n{}", summary, newClusterState); } else { - logger.debug("cluster state updated, version [{}], source [{}]", newClusterState.version(), shortSummary); + logger.debug("cluster state updated, version [{}], source [{}]", newClusterState.version(), summary); } final long publicationStartTime = threadPool.preciseRelativeTimeInNanos(); try { - ClusterChangedEvent clusterChangedEvent = new ClusterChangedEvent(shortSummary, newClusterState, previousClusterState); + ClusterChangedEvent clusterChangedEvent = new ClusterChangedEvent(summary, newClusterState, previousClusterState); // new cluster state, notify all listeners final DiscoveryNodes.Delta nodesDelta = clusterChangedEvent.nodesDelta(); if (nodesDelta.hasChanges() && logger.isInfoEnabled()) { @@ -355,7 +359,7 @@ private void runTasks(TaskInputs taskInputs) { if (nodesDeltaSummary.length() > 0) { logger.info( "{}, term: {}, version: {}, delta: {}", - shortSummary, + summary, newClusterState.term(), newClusterState.version(), nodesDeltaSummary @@ -366,7 +370,7 @@ private void runTasks(TaskInputs taskInputs) { logger.debug("publishing cluster state version [{}]", newClusterState.version()); publish(clusterChangedEvent, taskOutputs, publicationStartTime); } catch (Exception e) { - handleException(shortSummary, publicationStartTime, newClusterState, e); + handleException(summary, publicationStartTime, newClusterState, e); } } } diff --git a/server/src/main/java/org/opensearch/cluster/service/TaskBatcher.java b/server/src/main/java/org/opensearch/cluster/service/TaskBatcher.java index 3513bfffb7157..ac54693b8ad1e 100644 --- a/server/src/main/java/org/opensearch/cluster/service/TaskBatcher.java +++ b/server/src/main/java/org/opensearch/cluster/service/TaskBatcher.java @@ -195,16 +195,12 @@ void runIfNotProcessed(BatchedTask updateTask) { if (toExecute.isEmpty() == false) { Function taskSummaryGenerator = (longSummaryRequired) -> { if (longSummaryRequired == null || !longSummaryRequired) { - return buildShortSummary(updateTask.batchingKey, toExecute.size()); + final List sampleTasks = toExecute.stream() + .limit(Math.min(1000, toExecute.size())) + .collect(Collectors.toList()); + return buildShortSummary(updateTask.batchingKey, toExecute.size(), getSummary(updateTask, sampleTasks)); } - final Map> processTasksBySource = new HashMap<>(); - for (final BatchedTask task : toExecute) { - processTasksBySource.computeIfAbsent(task.source, s -> new ArrayList<>()).add(task); - } - return processTasksBySource.entrySet().stream().map(entry -> { - String tasks = updateTask.describeTasks(entry.getValue()); - return tasks.isEmpty() ? entry.getKey() : entry.getKey() + "[" + tasks + "]"; - }).reduce((s1, s2) -> s1 + ", " + s2).orElse(""); + return getSummary(updateTask, toExecute); }; taskBatcherListener.onBeginProcessing(toExecute); run(updateTask.batchingKey, toExecute, taskSummaryGenerator); @@ -212,8 +208,24 @@ void runIfNotProcessed(BatchedTask updateTask) { } } - private String buildShortSummary(final Object batchingKey, final int taskCount) { - return "Tasks batched with key: " + batchingKey.toString().split("\\$")[0] + " and count: " + taskCount; + private String getSummary(final BatchedTask updateTask, final List toExecute) { + final Map> processTasksBySource = new HashMap<>(); + for (final BatchedTask task : toExecute) { + processTasksBySource.computeIfAbsent(task.source, s -> new ArrayList<>()).add(task); + } + return processTasksBySource.entrySet().stream().map(entry -> { + String tasks = updateTask.describeTasks(entry.getValue()); + return tasks.isEmpty() ? entry.getKey() : entry.getKey() + "[" + tasks + "]"; + }).reduce((s1, s2) -> s1 + ", " + s2).orElse(""); + } + + private String buildShortSummary(final Object batchingKey, final int taskCount, final String sampleTasks) { + return "Tasks batched with key: " + + batchingKey.toString().split("\\$")[0] + + ", count:" + + taskCount + + " and sample tasks: " + + sampleTasks; } /** diff --git a/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java b/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java index d86d41bb1a359..095730bd84f8d 100644 --- a/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java +++ b/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java @@ -241,11 +241,13 @@ private void fillShardData(Map shardDataFromNode, Map shardData : shardDataFromNode.entrySet()) { if (shardData.getValue() != null) { ShardId shardId = shardData.getKey(); - if (emptyShardResponsePredicate.test(shardData.getValue())) { - this.emptyShardResponse[shardIdKey.get(shardId)] = true; - this.shardData[shardIdKey.get(shardId)] = null; - } else { - this.shardData[shardIdKey.get(shardId)] = shardData.getValue(); + if (shardIdKey.get(shardId) != null) {// the response might be for shard which is no longer present in cache + if (emptyShardResponsePredicate.test(shardData.getValue())) { + this.emptyShardResponse[shardIdKey.get(shardId)] = true; + this.shardData[shardIdKey.get(shardId)] = null; + } else { + this.shardData[shardIdKey.get(shardId)] = shardData.getValue(); + } } } } diff --git a/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java index 020a543ac5fc5..2b6fdfa8dd411 100644 --- a/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java @@ -12,6 +12,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.NodeAllocationResult; @@ -51,6 +52,12 @@ public abstract class ReplicaShardBatchAllocator extends ReplicaShardAllocator { */ public void processExistingRecoveries(RoutingAllocation allocation, List> shardBatches) { List shardCancellationActions = new ArrayList<>(); + Map> initReplicasFromRouting = new HashMap<>(); + allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).stream().filter(r -> !r.primary()).forEach(r -> { + initReplicasFromRouting.putIfAbsent(r.shardId(), new ArrayList<>()); + initReplicasFromRouting.get(r.shardId()).add(r); + }); + // iterate through the batches, each batch needs to be processed together as fetch call should be made for shards from same batch for (List shardBatch : shardBatches) { List eligibleShards = new ArrayList<>(); @@ -58,6 +65,12 @@ public void processExistingRecoveries(RoutingAllocation allocation, List nodeShardStores = convertToNodeStoreFilesMetadataMap(shard, shardState); - - Runnable cancellationAction = cancelExistingRecoveryForBetterMatch(shard, allocation, nodeShardStores); - if (cancellationAction != null) { - shardCancellationActions.add(cancellationAction); + for (ShardRouting initShardsFromAllocation : initReplicasFromRouting.get(shard.shardId())) { + Map nodeShardStores = convertToNodeStoreFilesMetadataMap( + initShardsFromAllocation, + shardState + ); + Runnable cancellationAction = cancelExistingRecoveryForBetterMatch( + initShardsFromAllocation, + allocation, + nodeShardStores + ); + if (cancellationAction != null) { + shardCancellationActions.add(cancellationAction); + } } } } diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java index cc5d8b0e62e90..c1c041ce01198 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java @@ -168,7 +168,7 @@ public void createAndVerifyRepositories(DiscoveryNode localNode) { * node repository metadata an exception will be thrown and the node will not be allowed to join the cluster. */ public RepositoriesMetadata updateRepositoriesMetadata(DiscoveryNode joiningNode, RepositoriesMetadata existingRepositories) { - if (joiningNode.isRemoteStoreNode()) { + if (joiningNode.isRemoteStoreNode() || joiningNode.isRemoteStatePublicationEnabled()) { List updatedRepositoryMetadataList = new ArrayList<>(); List newRepositoryMetadataList = new RemoteStoreNodeAttribute(joiningNode).getRepositoriesMetadata() .repositories(); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index d192a2556c36b..f6fb203bfe1a9 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -959,6 +959,198 @@ public void testUpdatesClusterStateWithMultiNodeClusterAndSameRepository() throw validateRepositoryMetadata(result.resultingState, clusterManagerNode, 2); } + public void testUpdatesRepoRemoteNodeJoinPublicationCluster() throws Exception { + final AllocationService allocationService = mock(AllocationService.class); + when(allocationService.adaptAutoExpandReplicas(any())).then(invocationOnMock -> invocationOnMock.getArguments()[0]); + final RerouteService rerouteService = (reason, priority, listener) -> listener.onResponse(null); + final RemoteStoreNodeService remoteStoreNodeService = new RemoteStoreNodeService( + new SetOnce<>(mock(RepositoriesService.class))::get, + null + ); + + final JoinTaskExecutor joinTaskExecutor = new JoinTaskExecutor( + Settings.EMPTY, + allocationService, + logger, + rerouteService, + remoteStoreNodeService + ); + + final DiscoveryNode clusterManagerNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes( + DiscoveryNodes.builder() + .add(clusterManagerNode) + .localNodeId(clusterManagerNode.getId()) + .clusterManagerNodeId(clusterManagerNode.getId()) + ) + .build(); + + final ClusterStateTaskExecutor.ClusterTasksResult result = joinTaskExecutor.execute( + clusterState, + List.of(new JoinTaskExecutor.Task(clusterManagerNode, "elect leader")) + ); + assertThat(result.executionResults.entrySet(), hasSize(1)); + final ClusterStateTaskExecutor.TaskResult taskResult = result.executionResults.values().iterator().next(); + assertTrue(taskResult.isSuccess()); + validatePublicationRepositoryMetadata(result.resultingState, clusterManagerNode); + + final Settings settings = Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE) + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + .build(); + final Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + Metadata metadata = Metadata.builder().persistentSettings(settings).build(); + + ClusterState currentState = ClusterState.builder(result.resultingState).metadata(metadata).build(); + + final DiscoveryNode remoteStoreNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final ClusterStateTaskExecutor.ClusterTasksResult resultAfterRemoteNodeJoin = joinTaskExecutor.execute( + currentState, + List.of(new JoinTaskExecutor.Task(remoteStoreNode, "test")) + ); + assertThat(resultAfterRemoteNodeJoin.executionResults.entrySet(), hasSize(1)); + final ClusterStateTaskExecutor.TaskResult taskResult1 = resultAfterRemoteNodeJoin.executionResults.values().iterator().next(); + assertTrue(taskResult1.isSuccess()); + validateRepositoriesMetadata(resultAfterRemoteNodeJoin.resultingState, remoteStoreNode, clusterManagerNode); + } + + public void testUpdatesRepoPublicationNodeJoinRemoteCluster() throws Exception { + final AllocationService allocationService = mock(AllocationService.class); + when(allocationService.adaptAutoExpandReplicas(any())).then(invocationOnMock -> invocationOnMock.getArguments()[0]); + final RerouteService rerouteService = (reason, priority, listener) -> listener.onResponse(null); + final RemoteStoreNodeService remoteStoreNodeService = new RemoteStoreNodeService( + new SetOnce<>(mock(RepositoriesService.class))::get, + null + ); + + final JoinTaskExecutor joinTaskExecutor = new JoinTaskExecutor( + Settings.EMPTY, + allocationService, + logger, + rerouteService, + remoteStoreNodeService + ); + + final DiscoveryNode clusterManagerNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes( + DiscoveryNodes.builder() + .add(clusterManagerNode) + .localNodeId(clusterManagerNode.getId()) + .clusterManagerNodeId(clusterManagerNode.getId()) + ) + .build(); + + final ClusterStateTaskExecutor.ClusterTasksResult result = joinTaskExecutor.execute( + clusterState, + List.of(new JoinTaskExecutor.Task(clusterManagerNode, "elect leader")) + ); + final Settings settings = Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE) + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + .build(); + final Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + Metadata metadata = Metadata.builder().persistentSettings(settings).build(); + + ClusterState currentState = ClusterState.builder(result.resultingState).metadata(metadata).build(); + + final DiscoveryNode remotePublicationNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final ClusterStateTaskExecutor.ClusterTasksResult resultAfterRemoteNodeJoin = joinTaskExecutor.execute( + currentState, + List.of(new JoinTaskExecutor.Task(remotePublicationNode, "test")) + ); + assertThat(resultAfterRemoteNodeJoin.executionResults.entrySet(), hasSize(1)); + final ClusterStateTaskExecutor.TaskResult taskResult1 = resultAfterRemoteNodeJoin.executionResults.values().iterator().next(); + assertTrue(taskResult1.isSuccess()); + validateRepositoriesMetadata(resultAfterRemoteNodeJoin.resultingState, clusterManagerNode, remotePublicationNode); + } + + public void testUpdatesClusterStateWithMultiplePublicationNodeJoin() throws Exception { + Map remoteStoreNodeAttributes = remotePublicationNodeAttributes(); + final AllocationService allocationService = mock(AllocationService.class); + when(allocationService.adaptAutoExpandReplicas(any())).then(invocationOnMock -> invocationOnMock.getArguments()[0]); + final RerouteService rerouteService = (reason, priority, listener) -> listener.onResponse(null); + final RemoteStoreNodeService remoteStoreNodeService = new RemoteStoreNodeService( + new SetOnce<>(mock(RepositoriesService.class))::get, + null + ); + + final JoinTaskExecutor joinTaskExecutor = new JoinTaskExecutor( + Settings.EMPTY, + allocationService, + logger, + rerouteService, + remoteStoreNodeService + ); + + final DiscoveryNode clusterManagerNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + List repositoriesMetadata = new ArrayList<>(); + + final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes( + DiscoveryNodes.builder() + .add(clusterManagerNode) + .localNodeId(clusterManagerNode.getId()) + .clusterManagerNodeId(clusterManagerNode.getId()) + ) + .metadata(Metadata.builder().putCustom(RepositoriesMetadata.TYPE, new RepositoriesMetadata(repositoriesMetadata))) + .build(); + + final DiscoveryNode joiningNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final ClusterStateTaskExecutor.ClusterTasksResult result = joinTaskExecutor.execute( + clusterState, + List.of(new JoinTaskExecutor.Task(joiningNode, "test")) + ); + assertThat(result.executionResults.entrySet(), hasSize(1)); + final ClusterStateTaskExecutor.TaskResult taskResult = result.executionResults.values().iterator().next(); + assertTrue(taskResult.isSuccess()); + validatePublicationRepositoryMetadata(result.resultingState, clusterManagerNode); + } + public void testNodeJoinInMixedMode() { List versions = allOpenSearchVersions(); assert versions.size() >= 2 : "test requires at least two open search versions"; @@ -1191,7 +1383,9 @@ private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode final RepositoriesMetadata repositoriesMetadata = updatedState.metadata().custom(RepositoriesMetadata.TYPE); assertTrue(repositoriesMetadata.repositories().size() == expectedRepositories); - if (repositoriesMetadata.repositories().size() == 2 || repositoriesMetadata.repositories().size() == 3) { + if (repositoriesMetadata.repositories().size() == 2 + || repositoriesMetadata.repositories().size() == 3 + || repositoriesMetadata.repositories().size() == 4) { final RepositoryMetadata segmentRepositoryMetadata = buildRepositoryMetadata(existingNode, SEGMENT_REPO); final RepositoryMetadata translogRepositoryMetadata = buildRepositoryMetadata(existingNode, TRANSLOG_REPO); for (RepositoryMetadata repositoryMetadata : repositoriesMetadata.repositories()) { @@ -1212,6 +1406,43 @@ private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode } } + private void validatePublicationRepositoryMetadata(ClusterState updatedState, DiscoveryNode existingNode) throws Exception { + final RepositoriesMetadata repositoriesMetadata = updatedState.metadata().custom(RepositoriesMetadata.TYPE); + assertTrue(repositoriesMetadata.repositories().size() == 2); + final RepositoryMetadata clusterStateRepoMetadata = buildRepositoryMetadata(existingNode, CLUSTER_STATE_REPO); + final RepositoryMetadata routingTableRepoMetadata = buildRepositoryMetadata(existingNode, ROUTING_TABLE_REPO); + for (RepositoryMetadata repositoryMetadata : repositoriesMetadata.repositories()) { + if (repositoryMetadata.name().equals(clusterStateRepoMetadata.name())) { + assertTrue(clusterStateRepoMetadata.equalsIgnoreGenerations(repositoryMetadata)); + } else if (repositoryMetadata.name().equals(routingTableRepoMetadata.name())) { + assertTrue(routingTableRepoMetadata.equalsIgnoreGenerations(repositoryMetadata)); + } + } + } + + private void validateRepositoriesMetadata(ClusterState updatedState, DiscoveryNode remoteNode, DiscoveryNode publicationNode) + throws Exception { + + final RepositoriesMetadata repositoriesMetadata = updatedState.metadata().custom(RepositoriesMetadata.TYPE); + assertEquals(4, repositoriesMetadata.repositories().size()); + final RepositoryMetadata segmentRepositoryMetadata = buildRepositoryMetadata(remoteNode, SEGMENT_REPO); + final RepositoryMetadata translogRepositoryMetadata = buildRepositoryMetadata(remoteNode, TRANSLOG_REPO); + final RepositoryMetadata clusterStateRepositoryMetadata = buildRepositoryMetadata(remoteNode, CLUSTER_STATE_REPO); + final RepositoryMetadata routingTableRepositoryMetadata = buildRepositoryMetadata(publicationNode, ROUTING_TABLE_REPO); + + for (RepositoryMetadata repositoryMetadata : repositoriesMetadata.repositories()) { + if (repositoryMetadata.name().equals(segmentRepositoryMetadata.name())) { + assertTrue(segmentRepositoryMetadata.equalsIgnoreGenerations(repositoryMetadata)); + } else if (repositoryMetadata.name().equals(translogRepositoryMetadata.name())) { + assertTrue(translogRepositoryMetadata.equalsIgnoreGenerations(repositoryMetadata)); + } else if (repositoryMetadata.name().equals(clusterStateRepositoryMetadata.name())) { + assertTrue(clusterStateRepositoryMetadata.equalsIgnoreGenerations(repositoryMetadata)); + } else if (repositoryMetadata.name().equals(routingTableRepositoryMetadata.name())) { + assertTrue(repositoryMetadata.equalsIgnoreGenerations(routingTableRepositoryMetadata)); + } + } + } + private DiscoveryNode newDiscoveryNode(Map attributes) { return new DiscoveryNode( randomAlphaOfLength(10), diff --git a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java index db9abe0310e40..bb9e34d93431f 100644 --- a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java @@ -391,7 +391,7 @@ public void testClusterStateUpdateLoggingWithTraceEnabled() throws Exception { "test1 computation", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [1s] to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [1s] to compute cluster state update for [test1]" ) ); mockAppender.addExpectation( @@ -399,7 +399,7 @@ public void testClusterStateUpdateLoggingWithTraceEnabled() throws Exception { "test1 notification", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [0s] to notify listeners on unchanged cluster state for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [0s] to notify listeners on unchanged cluster state for [test1]" ) ); @@ -416,7 +416,7 @@ public void testClusterStateUpdateLoggingWithTraceEnabled() throws Exception { "test2 failure", MasterService.class.getCanonicalName(), Level.TRACE, - "failed to execute cluster state update (on version: [*], uuid: [*]) for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]*" + "failed to execute cluster state update (on version: [*], uuid: [*]) for [test2]*" ) ); mockAppender.addExpectation( @@ -424,7 +424,7 @@ public void testClusterStateUpdateLoggingWithTraceEnabled() throws Exception { "test2 computation", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [2s] to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [2s] to compute cluster state update for [test2]" ) ); mockAppender.addExpectation( @@ -432,7 +432,7 @@ public void testClusterStateUpdateLoggingWithTraceEnabled() throws Exception { "test2 notification", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [0s] to notify listeners on unchanged cluster state for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [0s] to notify listeners on unchanged cluster state for [test2]" ) ); @@ -449,7 +449,7 @@ public void testClusterStateUpdateLoggingWithTraceEnabled() throws Exception { "test3 computation", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [3s] to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [3s] to compute cluster state update for [test3]" ) ); mockAppender.addExpectation( @@ -457,7 +457,7 @@ public void testClusterStateUpdateLoggingWithTraceEnabled() throws Exception { "test3 notification", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [4s] to notify listeners on successful publication of cluster state (version: *, uuid: *) for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [4s] to notify listeners on successful publication of cluster state (version: *, uuid: *) for [test3]" ) ); @@ -548,7 +548,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test1 start", MasterService.class.getCanonicalName(), Level.DEBUG, - "executing cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "executing cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test1]" ) ); mockAppender.addExpectation( @@ -556,7 +556,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test1 computation", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [1s] to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [1s] to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test1]" ) ); mockAppender.addExpectation( @@ -564,7 +564,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test1 notification", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [0s] to notify listeners on unchanged cluster state for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [0s] to notify listeners on unchanged cluster state for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test1]" ) ); @@ -573,7 +573,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test2 start", MasterService.class.getCanonicalName(), Level.DEBUG, - "executing cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "executing cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test2]" ) ); mockAppender.addExpectation( @@ -581,7 +581,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test2 failure", MasterService.class.getCanonicalName(), Level.DEBUG, - "failed to execute cluster state update (on version: [*], uuid: [*]) for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]*" + "failed to execute cluster state update (on version: [*], uuid: [*]) for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test2]*" ) ); mockAppender.addExpectation( @@ -589,7 +589,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test2 computation", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [2s] to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [2s] to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test2]" ) ); mockAppender.addExpectation( @@ -597,7 +597,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test2 notification", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [0s] to notify listeners on unchanged cluster state for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [0s] to notify listeners on unchanged cluster state for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test2]" ) ); @@ -606,7 +606,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test3 start", MasterService.class.getCanonicalName(), Level.DEBUG, - "executing cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "executing cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test3]" ) ); mockAppender.addExpectation( @@ -614,7 +614,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test3 computation", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [3s] to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [3s] to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test3]" ) ); mockAppender.addExpectation( @@ -622,7 +622,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test3 notification", MasterService.class.getCanonicalName(), Level.DEBUG, - "took [4s] to notify listeners on successful publication of cluster state (version: *, uuid: *) for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "took [4s] to notify listeners on successful publication of cluster state (version: *, uuid: *) for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test3]" ) ); @@ -631,7 +631,7 @@ public void testClusterStateUpdateLoggingWithDebugEnabled() throws Exception { "test4", MasterService.class.getCanonicalName(), Level.DEBUG, - "executing cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "executing cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test4]" ) ); @@ -1238,7 +1238,7 @@ public void testLongClusterStateUpdateLogging() throws Exception { "test2", MasterService.class.getCanonicalName(), Level.WARN, - "*took [*], which is over [10s], to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "*took [*], which is over [10s], to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test2]" ) ); mockAppender.addExpectation( @@ -1246,7 +1246,7 @@ public void testLongClusterStateUpdateLogging() throws Exception { "test3", MasterService.class.getCanonicalName(), Level.WARN, - "*took [*], which is over [10s], to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "*took [*], which is over [10s], to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test3]" ) ); mockAppender.addExpectation( @@ -1254,7 +1254,7 @@ public void testLongClusterStateUpdateLogging() throws Exception { "test4", MasterService.class.getCanonicalName(), Level.WARN, - "*took [*], which is over [10s], to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]" + "*took [*], which is over [10s], to compute cluster state update for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test4]" ) ); mockAppender.addExpectation( @@ -1432,7 +1432,7 @@ public void testLongClusterStateUpdateLoggingForFailedPublication() throws Excep "test1 should log due to slow and failing publication", MasterService.class.getCanonicalName(), Level.WARN, - "took [*] and then failed to publish updated cluster state (version: *, uuid: *) for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests and count: 1]:*" + "took [*] and then failed to publish updated cluster state (version: *, uuid: *) for [Tasks batched with key: org.opensearch.cluster.service.MasterServiceTests, count:1 and sample tasks: test1]:*" ) ); diff --git a/test/fixtures/hdfs-fixture/build.gradle b/test/fixtures/hdfs-fixture/build.gradle index 2bd0268ca136b..d27273f357758 100644 --- a/test/fixtures/hdfs-fixture/build.gradle +++ b/test/fixtures/hdfs-fixture/build.gradle @@ -74,7 +74,7 @@ dependencies { api 'org.apache.zookeeper:zookeeper:3.9.2' api "org.apache.commons:commons-text:1.12.0" api "commons-net:commons-net:3.11.1" - api "ch.qos.logback:logback-core:1.5.10" + api "ch.qos.logback:logback-core:1.5.12" api "ch.qos.logback:logback-classic:1.2.13" api "org.jboss.xnio:xnio-nio:3.8.16.Final" api 'org.jline:jline:3.27.1'