-
Notifications
You must be signed in to change notification settings - Fork 114
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Detect rollover transient failure and continues execution #866
Conversation
src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt
Outdated
Show resolved
Hide resolved
src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt
Outdated
Show resolved
Hide resolved
src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt
Outdated
Show resolved
Hide resolved
Codecov Report
@@ Coverage Diff @@
## 2.x #866 +/- ##
============================================
- Coverage 76.06% 75.95% -0.11%
- Complexity 2908 2909 +1
============================================
Files 366 366
Lines 16558 16589 +31
Branches 2397 2404 +7
============================================
+ Hits 12595 12601 +6
- Misses 2582 2594 +12
- Partials 1381 1394 +13
|
src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt
Outdated
Show resolved
Hide resolved
Signed-off-by: Ronnak Saxena <[email protected]>
Signed-off-by: Ronnak Saxena <[email protected]>
Signed-off-by: Ronnak Saxena <[email protected]>
src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt
Outdated
Show resolved
Hide resolved
src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt
Outdated
Show resolved
Hide resolved
Signed-off-by: Ronnak Saxena <[email protected]>
Signed-off-by: Ronnak Saxena <[email protected]>
src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt
Show resolved
Hide resolved
...opensearch/indexmanagement/indexstatemanagement/step/forcemerge/AttemptCallForceMergeStep.kt
Show resolved
Hide resolved
...opensearch/indexmanagement/indexstatemanagement/step/forcemerge/AttemptCallForceMergeStep.kt
Show resolved
Hide resolved
...opensearch/indexmanagement/indexstatemanagement/step/notification/AttemptNotificationStep.kt
Show resolved
Hide resolved
...opensearch/indexmanagement/indexstatemanagement/step/notification/AttemptNotificationStep.kt
Show resolved
Hide resolved
.../kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt
Show resolved
Hide resolved
.../kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt
Show resolved
Hide resolved
...lin/org/opensearch/indexmanagement/indexstatemanagement/step/snapshot/AttemptSnapshotStep.kt
Show resolved
Hide resolved
...lin/org/opensearch/indexmanagement/indexstatemanagement/step/snapshot/AttemptSnapshotStep.kt
Show resolved
Hide resolved
src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/RolloverActionIT.kt
Outdated
Show resolved
Hide resolved
val response: ClusterStateResponse = client.suspendUntil { | ||
client.admin().cluster().state(ClusterStateRequest(), it) | ||
} | ||
// If the index was rolled over, this is a transient failure |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if index was rolled over that's the best case scenario.
what if the rollover failed beacuse of a lack of disk space.
even that is a transient failure because user might delete data to create space or add more nodes etc.
What if there was a circuit breaker exception which was later resolved. even in that case rollover would fail but in the upcoming executions of policy it might not encounter that error.
Signed-off-by: Ronnak Saxena <[email protected]>
Signed-off-by: Ronnak Saxena <[email protected]>
@@ -104,6 +106,10 @@ class AttemptCallForceMergeStep(private val action: ForceMergeAction) : Step(nam | |||
} | |||
|
|||
override fun isIdempotent() = false | |||
override suspend fun isTransientFailure(client: Client, stepContext: StepContext, managedIndexMetaData: ManagedIndexMetaData): Boolean { | |||
// TODO implement logic for detecting transient failure in attempt call force merge step | |||
return false |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@@ -66,6 +68,10 @@ class AttemptNotificationStep(private val action: NotificationAction) : Step(nam | |||
} | |||
|
|||
override fun isIdempotent(): Boolean = false | |||
override suspend fun isTransientFailure(client: Client, stepContext: StepContext, managedIndexMetaData: ManagedIndexMetaData): Boolean { | |||
// TODO implement logic for detecting transient failure in attempt notification step | |||
return false |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
} | ||
if (aliasName == null) { | ||
logger.error("Index $indexName has no alias attached to it. Not a Transient Failure in step attemptRolloverStep") | ||
isTransientFailure = false |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@@ -135,6 +136,10 @@ class AttemptShrinkStep(private val action: ShrinkAction) : ShrinkStep(name, tru | |||
} | |||
|
|||
override fun isIdempotent() = false | |||
override suspend fun isTransientFailure(client: Client, stepContext: StepContext, managedIndexMetaData: ManagedIndexMetaData): Boolean { | |||
// TODO implement logic for detecting transient failure in attempt shrink step | |||
return false |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@@ -155,6 +157,10 @@ class AttemptSnapshotStep(private val action: SnapshotAction) : Step(name) { | |||
} | |||
|
|||
override fun isIdempotent(): Boolean = false | |||
override suspend fun isTransientFailure(client: Client, stepContext: StepContext, managedIndexMetaData: ManagedIndexMetaData): Boolean { | |||
// TODO implement logic for detecting transient failure in attempt snapshot step | |||
return false |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Issue 33
Description of changes:
•Abstracted logic to handle nonidempotent failures that could cause inapropriate error log "Previous action was not able to update IndexMetaData."
•Detects transient failure for rollover step with testing
CheckList:
[ x] Commits are signed per the DCO using --signoff
By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
For more information on following Developer Certificate of Origin and signing off your commits, please check here.