Skip to content

Commit

Permalink
Fix SegRep bug where primary shards do not update tracked replica sta…
Browse files Browse the repository at this point in the history
…te post failover/relocation (opensearch-project#11017)

Signed-off-by: Marc Handalian <[email protected]>
  • Loading branch information
mch2 authored Oct 31, 2023
1 parent 63aff16 commit a2febe9
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.index.StandardDirectoryReader;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.opensearch.action.admin.cluster.stats.ClusterStatsResponse;
import org.opensearch.action.admin.indices.alias.Alias;
import org.opensearch.action.admin.indices.flush.FlushRequest;
import org.opensearch.action.admin.indices.stats.IndicesStatsRequest;
Expand Down Expand Up @@ -62,6 +63,7 @@
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.IndexModule;
import org.opensearch.index.ReplicationStats;
import org.opensearch.index.SegmentReplicationPerGroupStats;
import org.opensearch.index.SegmentReplicationPressureService;
import org.opensearch.index.SegmentReplicationShardStats;
Expand Down Expand Up @@ -94,6 +96,7 @@
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;

import static java.util.Arrays.asList;
Expand Down Expand Up @@ -1776,4 +1779,58 @@ public void testRealtimeTermVectorRequestsUnSuccessful() throws IOException {
assertThat(response.getIndex(), equalTo(INDEX_NAME));

}

public void testReplicaAlreadyAtCheckpoint() throws Exception {
final List<String> nodes = new ArrayList<>();
final String primaryNode = internalCluster().startDataOnlyNode();
nodes.add(primaryNode);
final Settings settings = Settings.builder().put(indexSettings()).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build();
createIndex(INDEX_NAME, settings);
ensureGreen(INDEX_NAME);
// start a replica node, initially will be empty with no shard assignment.
final String replicaNode = internalCluster().startDataOnlyNode();
nodes.add(replicaNode);
final String replicaNode2 = internalCluster().startDataOnlyNode();
assertAcked(
client().admin()
.indices()
.prepareUpdateSettings(INDEX_NAME)
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2))
);
ensureGreen(INDEX_NAME);

// index a doc.
client().prepareIndex(INDEX_NAME).setId("1").setSource("foo", randomInt()).get();
refresh(INDEX_NAME);

internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNode));
ensureYellowAndNoInitializingShards(INDEX_NAME);
IndexShard replica_1 = getIndexShard(replicaNode, INDEX_NAME);
IndexShard replica_2 = getIndexShard(replicaNode2, INDEX_NAME);
// wait until a replica is promoted & finishes engine flip, we don't care which one
AtomicReference<IndexShard> primary = new AtomicReference<>();
assertBusy(() -> {
assertTrue("replica should be promoted as a primary", replica_1.routingEntry().primary() || replica_2.routingEntry().primary());
primary.set(replica_1.routingEntry().primary() ? replica_1 : replica_2);
});

FlushRequest request = new FlushRequest(INDEX_NAME);
request.force(true);
primary.get().flush(request);

assertBusy(() -> {
assertEquals(
replica_1.getLatestReplicationCheckpoint().getSegmentInfosVersion(),
replica_2.getLatestReplicationCheckpoint().getSegmentInfosVersion()
);
});

assertBusy(() -> {
ClusterStatsResponse clusterStatsResponse = client().admin().cluster().prepareClusterStats().get();
ReplicationStats replicationStats = clusterStatsResponse.getIndicesStats().getSegments().getReplicationStats();
assertEquals(0L, replicationStats.maxBytesBehind);
assertEquals(0L, replicationStats.maxReplicationLag);
assertEquals(0L, replicationStats.totalBytesBehind);
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;

import static org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED;
import static org.opensearch.indices.replication.SegmentReplicationSourceService.Actions.UPDATE_VISIBLE_CHECKPOINT;

/**
Expand Down Expand Up @@ -282,6 +283,12 @@ public void onReplicationFailure(
}
}
});
} else if (replicaShard.isSegmentReplicationAllowed()) {
// if we didn't process the checkpoint because we are up to date,
// send our latest checkpoint to the primary to update tracking.
// replicationId is not used by the primary set to a default value.
final long replicationId = NO_OPS_PERFORMED;
updateVisibleCheckpoint(replicationId, replicaShard);
}
} else {
logger.trace(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
import java.util.concurrent.TimeUnit;
import java.util.function.BiConsumer;

import static org.junit.Assert.assertEquals;
import static org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.Mockito.atLeastOnce;
Expand Down Expand Up @@ -247,6 +247,7 @@ public void testAlreadyOnNewCheckpoint() {
SegmentReplicationTargetService spy = spy(sut);
spy.onNewCheckpoint(replicaShard.getLatestReplicationCheckpoint(), replicaShard);
verify(spy, times(0)).startReplication(any(), any(), any());
verify(spy, times(1)).updateVisibleCheckpoint(NO_OPS_PERFORMED, replicaShard);
}

@TestLogging(reason = "Getting trace logs from replication package", value = "org.opensearch.indices.replication:TRACE")
Expand Down

0 comments on commit a2febe9

Please sign in to comment.