Skip to content

Commit

Permalink
Inactive replica of new primary shard is green (elastic#99995)
Browse files Browse the repository at this point in the history
When we have a new initialisation of a primary shard we consider the
this indicator `green` based on the idea that this are unexceptional
events in the cluster's lifecycle. However, when we have a replica of
this shard that is inactive we turn to `yellow`. 

With this PR, we change this behaviour to signal that if the primary is
inactive and that is `green`, then an inactive replica of this shard is
also `green`.

Fixes elastic#99951
  • Loading branch information
gmarouli authored Sep 28, 2023
1 parent 2499f6c commit c6f4cb4
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 7 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/99995.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 99995
summary: When a primary is inactive but this is considered expected, the same applies for the replica of this shard.
area: Health
type: enhancement
issues:
- 99951
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ private class ShardAllocationCounts {
private final Map<Diagnosis.Definition, Set<String>> diagnosisDefinitions = new HashMap<>();

public void increment(ShardRouting routing, ClusterState state, NodesShutdownMetadata shutdowns, boolean verbose) {
boolean isNew = isUnassignedDueToNewInitialization(routing);
boolean isNew = isUnassignedDueToNewInitialization(routing, state);
boolean isRestarting = isUnassignedDueToTimelyRestart(routing, shutdowns);
available &= routing.active() || isRestarting || isNew;
if ((routing.active() || isRestarting || isNew) == false) {
Expand Down Expand Up @@ -454,8 +454,14 @@ private static boolean isUnassignedDueToTimelyRestart(ShardRouting routing, Node
return now - restartingAllocationDelayExpiration <= 0;
}

private static boolean isUnassignedDueToNewInitialization(ShardRouting routing) {
return routing.primary() && routing.active() == false && getInactivePrimaryHealth(routing) == ClusterHealthStatus.YELLOW;
private static boolean isUnassignedDueToNewInitialization(ShardRouting routing, ClusterState state) {
if (routing.active()) {
return false;
}
// If the primary is inactive for unexceptional events in the cluster lifecycle, both the primary and the
// replica are considered new initializations.
ShardRouting primary = routing.primary() ? routing : state.routingTable().shardRoutingTable(routing.shardId()).primaryShard();
return primary.active() == false && getInactivePrimaryHealth(primary) == ClusterHealthStatus.YELLOW;
}

/**
Expand Down Expand Up @@ -815,13 +821,15 @@ public String getSymptom() {
|| primaries.unassigned_new > 0
|| primaries.unassigned_restarting > 0
|| replicas.unassigned > 0
|| replicas.unassigned_new > 0
|| replicas.unassigned_restarting > 0
|| primaries.initializing > 0
|| replicas.initializing > 0) {
builder.append(
Stream.of(
createMessage(primaries.unassigned, "unavailable primary shard", "unavailable primary shards"),
createMessage(primaries.unassigned_new, "creating primary shard", "creating primary shards"),
createMessage(replicas.unassigned_new, "creating replica shard", "creating replica shards"),
createMessage(primaries.unassigned_restarting, "restarting primary shard", "restarting primary shards"),
createMessage(replicas.unassigned, "unavailable replica shard", "unavailable replica shards"),
createMessage(primaries.initializing, "initializing primary shard", "initializing primary shards"),
Expand Down Expand Up @@ -861,6 +869,8 @@ public HealthIndicatorDetails getDetails(boolean verbose) {
replicas.unassigned,
"initializing_replicas",
replicas.initializing,
"creating_replicas",
replicas.unassigned_new,
"restarting_replicas",
replicas.unassigned_restarting,
"started_replicas",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -533,17 +533,20 @@ public void testShouldBeYellowWhenRestartingReplicasReachedAllocationDelay() {
);
}

public void testShouldBeGreenWhenThereAreInitializingPrimaries() {
var clusterState = createClusterStateWith(List.of(index("restarting-index", new ShardAllocation("node-0", CREATING))), List.of());
public void testShouldBeGreenWhenThereAreInitializingPrimariesAndReplicas() {
var clusterState = createClusterStateWith(
List.of(index("restarting-index", new ShardAllocation("node-0", CREATING), new ShardAllocation("node-1", CREATING))),
List.of()
);
var service = createShardsAvailabilityIndicatorService(clusterState);

assertThat(
service.calculate(true, HealthInfo.EMPTY_HEALTH_INFO),
equalTo(
createExpectedResult(
GREEN,
"This cluster has 1 creating primary shard.",
Map.of("creating_primaries", 1),
"This cluster has 1 creating primary shard, 1 creating replica shard.",
Map.of("creating_primaries", 1, "creating_replicas", 1),
emptyList(),
emptyList()
)
Expand Down Expand Up @@ -1765,6 +1768,8 @@ private static Map<String, Object> addDefaults(Map<String, Object> override) {
override.getOrDefault("started_primaries", 0),
"unassigned_replicas",
override.getOrDefault("unassigned_replicas", 0),
"creating_replicas",
override.getOrDefault("creating_replicas", 0),
"initializing_replicas",
override.getOrDefault("initializing_replicas", 0),
"restarting_replicas",
Expand Down

0 comments on commit c6f4cb4

Please sign in to comment.