diff --git a/docs/changelog/83221.yaml b/docs/changelog/83221.yaml new file mode 100644 index 000000000000..957bf0020499 --- /dev/null +++ b/docs/changelog/83221.yaml @@ -0,0 +1,5 @@ +pr: 83221 +summary: Add detail to slow cluster state warning message +area: Cluster Coordination +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java index b075135e82d4..61ee1b170611 100644 --- a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java +++ b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java @@ -84,6 +84,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; @@ -770,18 +771,13 @@ public void writeFullStateAndCommit(long currentTerm, ClusterState clusterState) final TimeValue finalSlowWriteLoggingThreshold = slowWriteLoggingThresholdSupplier.get(); if (durationMillis >= finalSlowWriteLoggingThreshold.getMillis()) { logger.warn( - "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " - + "wrote full state with [{}] indices", + "writing full cluster state took [{}ms] which is above the warn threshold of [{}]; {}", durationMillis, finalSlowWriteLoggingThreshold, - stats.numIndicesUpdated + stats ); } else { - logger.debug( - "writing cluster state took [{}ms]; " + "wrote full state with [{}] indices", - durationMillis, - stats.numIndicesUpdated - ); + logger.debug("writing full cluster state took [{}ms]; {}", durationMillis, stats); } } finally { closeIfAnyIndexWriterHasTragedyOrIsClosed(); @@ -809,23 +805,13 @@ void writeIncrementalStateAndCommit(long currentTerm, ClusterState previousClust final TimeValue finalSlowWriteLoggingThreshold = slowWriteLoggingThresholdSupplier.get(); if (durationMillis >= finalSlowWriteLoggingThreshold.getMillis()) { logger.warn( - "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " - + "wrote global metadata [{}] and metadata for [{}] indices and skipped [{}] unchanged indices", + "writing cluster state took [{}ms] which is above the warn threshold of [{}]; {}", durationMillis, finalSlowWriteLoggingThreshold, - stats.globalMetaUpdated, - stats.numIndicesUpdated, - stats.numIndicesUnchanged + stats ); } else { - logger.debug( - "writing cluster state took [{}ms]; " - + "wrote global metadata [{}] and metadata for [{}] indices and skipped [{}] unchanged indices", - durationMillis, - stats.globalMetaUpdated, - stats.numIndicesUpdated, - stats.numIndicesUnchanged - ); + logger.debug("writing cluster state took [{}ms]; {}", durationMillis, stats); } } finally { closeIfAnyIndexWriterHasTragedyOrIsClosed(); @@ -864,7 +850,9 @@ private WriterStats updateMetadata(Metadata previouslyWrittenMetadata, Metadata assert previousValue == null : indexMetadata.getIndexUUID() + " already mapped to " + previousValue; } + int numIndicesAdded = 0; int numIndicesUpdated = 0; + int numIndicesRemoved = 0; int numIndicesUnchanged = 0; for (IndexMetadata indexMetadata : metadata.indices().values()) { final Long previousVersion = indexMetadataVersionByUUID.get(indexMetadata.getIndexUUID()); @@ -875,7 +863,11 @@ private WriterStats updateMetadata(Metadata previouslyWrittenMetadata, Metadata previousVersion, indexMetadata.getVersion() ); - numIndicesUpdated++; + if (previousVersion == null) { + numIndicesAdded++; + } else { + numIndicesUpdated++; + } for (MetadataIndexWriter metadataIndexWriter : metadataIndexWriters) { metadataIndexWriter.deleteIndexMetadata(indexMetadata.getIndexUUID()); @@ -891,6 +883,7 @@ private WriterStats updateMetadata(Metadata previouslyWrittenMetadata, Metadata for (String removedIndexUUID : indexMetadataVersionByUUID.keySet()) { for (MetadataIndexWriter metadataIndexWriter : metadataIndexWriters) { + numIndicesRemoved++; metadataIndexWriter.deleteIndexMetadata(removedIndexUUID); } } @@ -901,7 +894,7 @@ private WriterStats updateMetadata(Metadata previouslyWrittenMetadata, Metadata metadataIndexWriter.flush(); } - return new WriterStats(updateGlobalMeta, numIndicesUpdated, numIndicesUnchanged); + return new WriterStats(false, updateGlobalMeta, numIndicesUnchanged, numIndicesAdded, numIndicesUpdated, numIndicesRemoved); } private static int lastPageValue(boolean isLastPage) { @@ -977,7 +970,7 @@ private WriterStats addMetadata(Metadata metadata) throws IOException { metadataIndexWriter.flush(); } - return new WriterStats(true, metadata.indices().size(), 0); + return new WriterStats(true, true, 0, 0, metadata.indices().size(), 0); } public void writeIncrementalTermUpdateAndCommit(long currentTerm, long lastAcceptedVersion, Version oldestIndexVersion) @@ -1056,15 +1049,31 @@ public void close() throws IOException { } } - static class WriterStats { - final boolean globalMetaUpdated; - final long numIndicesUpdated; - final long numIndicesUnchanged; - - WriterStats(boolean globalMetaUpdated, long numIndicesUpdated, long numIndicesUnchanged) { - this.globalMetaUpdated = globalMetaUpdated; - this.numIndicesUpdated = numIndicesUpdated; - this.numIndicesUnchanged = numIndicesUnchanged; + private record WriterStats( + boolean isFullWrite, + boolean globalMetaUpdated, + int numIndicesUnchanged, + int numIndicesAdded, + int numIndicesUpdated, + int numIndicesRemoved + ) { + @Override + public String toString() { + if (isFullWrite) { + return String.format(Locale.ROOT, "wrote global metadata and metadata for [%d] indices", numIndicesUpdated); + } else { + return String.format( + Locale.ROOT, + """ + [%s] global metadata, wrote metadata for [%d] new indices and [%d] existing indices, \ + removed metadata for [%d] indices and skipped [%d] unchanged indices""", + globalMetaUpdated ? "wrote" : "skipped writing", + numIndicesAdded, + numIndicesUpdated, + numIndicesRemoved, + numIndicesUnchanged + ); + } } } } diff --git a/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java index 75e4ac19af15..8c967aec47df 100644 --- a/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java @@ -1192,7 +1192,9 @@ public void testSlowLogging() throws IOException, IllegalAccessException { "should see warning at threshold", PersistedClusterStateService.class.getCanonicalName(), Level.WARN, - "writing cluster state took [*] which is above the warn threshold of [*]; " + "wrote full state with [0] indices" + """ + writing full cluster state took [*] which is above the warn threshold of [*]; \ + wrote global metadata and metadata for [0] indices""" ) ); @@ -1206,7 +1208,9 @@ public void testSlowLogging() throws IOException, IllegalAccessException { "should see warning above threshold", PersistedClusterStateService.class.getCanonicalName(), Level.WARN, - "writing cluster state took [*] which is above the warn threshold of [*]; " + "wrote full state with [0] indices" + """ + writing full cluster state took [*] which is above the warn threshold of [*]; \ + wrote global metadata and metadata for [0] indices""" ) ); @@ -1238,7 +1242,9 @@ public void testSlowLogging() throws IOException, IllegalAccessException { "should see warning at reduced threshold", PersistedClusterStateService.class.getCanonicalName(), Level.WARN, - "writing cluster state took [*] which is above the warn threshold of [*]; " + "wrote full state with [0] indices" + """ + writing full cluster state took [*] which is above the warn threshold of [*]; \ + wrote global metadata and metadata for [0] indices""" ) ); @@ -1269,8 +1275,10 @@ public void testSlowLogging() throws IOException, IllegalAccessException { "should see warning at threshold", PersistedClusterStateService.class.getCanonicalName(), Level.WARN, - "writing cluster state took [*] which is above the warn threshold of [*]; " - + "wrote global metadata [false] and metadata for [1] indices and skipped [0] unchanged indices" + """ + writing cluster state took [*] which is above the warn threshold of [*]; [skipped writing] global metadata, \ + wrote metadata for [1] new indices and [0] existing indices, removed metadata for [0] indices and \ + skipped [0] unchanged indices""" ) );