From 17b820f4982a406ef3fab9546fd5d1539d67702e Mon Sep 17 00:00:00 2001 From: khushboo-rancher Date: Mon, 29 Jan 2024 23:25:39 +0000 Subject: [PATCH] Update testing docs Signed-off-by: khushboo-rancher --- integration/test_ha.html | 87 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/integration/test_ha.html b/integration/test_ha.html index 8984d85c6b..033009a516 100644 --- a/integration/test_ha.html +++ b/integration/test_ha.html @@ -3334,6 +3334,34 @@

Module tests.test_ha

assert test_data == to_be_verified_data +@pytest.mark.skip(reason="TODO") # NOQA +def test_retain_potentially_useful_replicas_in_autosalvage_loop(): + """ + Related issue: + https://github.com/longhorn/longhorn/issues/7425 + + Related manual test steps: + https://github.com/longhorn/longhorn-manager/pull/2432#issuecomment-1894675916 + + Steps: + 1. Create a volume with numberOfReplicas=2 and staleReplicaTimeout=1. + Consider its two replicas ReplicaA and ReplicaB. + 2. Attach the volume to a node. + 3. Write data to the volume. + 4. Exec into the instance-manager for ReplicaB and delete all .img.meta + files. This makes it impossible to restart ReplicaB successfully. + 5. Cordon the node for Replica A. This makes it unavailable for + autosalvage. + 6. Crash the instance-managers for both ReplicaA and ReplicaB. + 7. Wait one minute and fifteen seconds. This is longer than + staleReplicaTimeout. + 8. Confirm the volume is not healthy. + 9. Confirm ReplicaA was not deleted. + 10. Delete ReplicaB. + 11. Wait for the volume to become healthy. + 12. Verify the data. + """ + def restore_with_replica_failure(client, core_api, volume_name, csi_pv, # NOQA pvc, pod_make, # NOQA allow_degraded_availability, @@ -7074,6 +7102,64 @@

Functions

assert v.name != res_name +
+def test_retain_potentially_useful_replicas_in_autosalvage_loop() +
+
+

Related issue: +https://github.com/longhorn/longhorn/issues/7425

+

Related manual test steps: +https://github.com/longhorn/longhorn-manager/pull/2432#issuecomment-1894675916

+

Steps: +1. Create a volume with numberOfReplicas=2 and staleReplicaTimeout=1. +Consider its two replicas ReplicaA and ReplicaB. +2. Attach the volume to a node. +3. Write data to the volume. +4. Exec into the instance-manager for ReplicaB and delete all .img.meta +files. This makes it impossible to restart ReplicaB successfully. +5. Cordon the node for Replica A. This makes it unavailable for +autosalvage. +6. Crash the instance-managers for both ReplicaA and ReplicaB. +7. Wait one minute and fifteen seconds. This is longer than +staleReplicaTimeout. +8. Confirm the volume is not healthy. +9. Confirm ReplicaA was not deleted. +10. Delete ReplicaB. +11. Wait for the volume to become healthy. +12. Verify the data.

+
+ +Expand source code + +
@pytest.mark.skip(reason="TODO")  # NOQA
+def test_retain_potentially_useful_replicas_in_autosalvage_loop():
+    """
+    Related issue:
+    https://github.com/longhorn/longhorn/issues/7425
+
+    Related manual test steps:
+    https://github.com/longhorn/longhorn-manager/pull/2432#issuecomment-1894675916
+
+    Steps:
+    1. Create a volume with numberOfReplicas=2 and staleReplicaTimeout=1.
+       Consider its two replicas ReplicaA and ReplicaB.
+    2. Attach the volume to a node.
+    3. Write data to the volume.
+    4. Exec into the instance-manager for ReplicaB and delete all .img.meta
+       files. This makes it impossible to restart ReplicaB successfully.
+    5. Cordon the node for Replica A. This makes it unavailable for
+       autosalvage.
+    6. Crash the instance-managers for both ReplicaA and ReplicaB.
+    7. Wait one minute and fifteen seconds. This is longer than
+       staleReplicaTimeout.
+    8. Confirm the volume is not healthy.
+    9. Confirm ReplicaA was not deleted.
+    10. Delete ReplicaB.
+    11. Wait for the volume to become healthy.
+    12. Verify the data.
+    """
+
+
def test_reuse_failed_replica(client, core_api, volume_name)
@@ -7894,6 +7980,7 @@

Index

  • test_recovery_from_im_deletion
  • test_replica_failure_during_attaching
  • test_restore_volume_with_invalid_backupstore
  • +
  • test_retain_potentially_useful_replicas_in_autosalvage_loop
  • test_reuse_failed_replica
  • test_reuse_failed_replica_with_scheduling_check
  • test_salvage_auto_crash_all_replicas