Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(backup): fix test cases for multiple backup stores #2202

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 48 additions & 6 deletions manager/integration/tests/backupstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from common import SETTING_BACKUP_TARGET_CREDENTIAL_SECRET
from common import SETTING_BACKUPSTORE_POLL_INTERVAL
from common import LONGHORN_NAMESPACE
from common import NODE_UPDATE_RETRY_COUNT
from common import NODE_UPDATE_RETRY_INTERVAL
from common import cleanup_all_volumes
from common import is_backupTarget_s3
from common import is_backupTarget_nfs
Expand All @@ -31,6 +33,9 @@

BACKUPSTORE_BV_PREFIX = "/backupstore/volumes/"
BACKUPSTORE_LOCK_DURATION = 150
DEFAULT_BACKUPTARGET = "default"
object_has_been_modified = "the object has been modified; " + \
"please apply your changes to the latest version and try again"

TEMP_FILE_PATH = "/tmp/temp_file"

Expand Down Expand Up @@ -91,6 +96,13 @@ def reset_backupstore_setting(client):
SETTING_BACKUPSTORE_POLL_INTERVAL)
client.update(backup_store_poll_interval, value="300")

for _ in range(NODE_UPDATE_RETRY_COUNT):
bt = client.by_id_backupTarget(DEFAULT_BACKUPTARGET)
if bt.backupTargetURL == "" and \
bt.credentialSecret == "" and bt.pollInterval == "5m0s":
break
time.sleep(NODE_UPDATE_RETRY_INTERVAL)


def set_backupstore_invalid(client):
poll_interval = get_backupstore_poll_interval()
Expand Down Expand Up @@ -148,24 +160,54 @@ def set_backupstore_azurite(client):

def set_backupstore_url(client, url):
backup_target_setting = client.by_id_setting(SETTING_BACKUP_TARGET)
backup_target_setting = client.update(backup_target_setting,
value=url)
for _ in range(NODE_UPDATE_RETRY_COUNT):
try:
backup_target_setting = client.update(backup_target_setting,
value=url)
except Exception as e:
if object_has_been_modified in str(e.error.message):
time.sleep(NODE_UPDATE_RETRY_INTERVAL)
continue
print(e)
raise
else:
break
assert backup_target_setting.value == url


def set_backupstore_credential_secret(client, credential_secret):
backup_target_credential_setting = client.by_id_setting(
SETTING_BACKUP_TARGET_CREDENTIAL_SECRET)
backup_target_credential_setting = client.update(
backup_target_credential_setting, value=credential_secret)
for _ in range(NODE_UPDATE_RETRY_COUNT):
try:
backup_target_credential_setting = client.update(
backup_target_credential_setting, value=credential_secret)
except Exception as e:
if object_has_been_modified in str(e.error.message):
time.sleep(NODE_UPDATE_RETRY_INTERVAL)
continue
print(e)
raise
else:
break
Comment on lines +181 to +192
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider extracting common retry logic.

The retry mechanism is duplicated across set_backupstore_url(), set_backupstore_credential_secret() and set_backupstore_poll_interval().

Consider extracting the retry logic into a decorator or helper function:

def retry_on_conflict(func):
    def wrapper(*args, **kwargs):
        for _ in range(NODE_UPDATE_RETRY_COUNT):
            try:
                return func(*args, **kwargs)
            except Exception as e:
                if object_has_been_modified in str(e.error.message):
                    time.sleep(NODE_UPDATE_RETRY_INTERVAL)
                    continue
                print(e)
                raise
    return wrapper

@retry_on_conflict
def set_backupstore_url(client, url):
    backup_target_setting = client.by_id_setting(SETTING_BACKUP_TARGET)
    backup_target_setting = client.update(backup_target_setting, value=url)
    assert backup_target_setting.value == url

This would:

  1. Eliminate code duplication
  2. Make the retry logic more maintainable
  3. Make it easier to add retry to other functions

Also applies to: 199-210

assert backup_target_credential_setting.value == credential_secret


def set_backupstore_poll_interval(client, poll_interval):
backup_store_poll_interval_setting = client.by_id_setting(
SETTING_BACKUPSTORE_POLL_INTERVAL)
backup_target_poll_interal_setting = client.update(
backup_store_poll_interval_setting, value=poll_interval)
for _ in range(NODE_UPDATE_RETRY_COUNT):
try:
backup_target_poll_interal_setting = client.update(
backup_store_poll_interval_setting, value=poll_interval)
except Exception as e:
if object_has_been_modified in str(e.error.message):
time.sleep(NODE_UPDATE_RETRY_INTERVAL)
continue
print(e)
raise
else:
break
Comment on lines +199 to +210
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Check poll interval usage & clarity.
set_backupstore_poll_interval is also wrapped in concurrency logic. If the code attempts repeated updates triggered by “object_has_been_modified” errors, consider adding logs to ensure debugging clarity in production.

assert backup_target_poll_interal_setting.value == poll_interval


Expand Down
33 changes: 17 additions & 16 deletions manager/integration/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,16 +516,15 @@ def wait_for_backup_count(backup_volume, number, retry_counts=120):
assert ok


def delete_backup(client, volume_name, backup_name):
backup_volume = client.by_id_backupVolume(volume_name)
backup_volume.backupDelete(name=backup_name)
wait_for_backup_delete(client, volume_name, backup_name)
def delete_backup(client, bv, backup_name):
bv.backupDelete(name=backup_name)
wait_for_backup_delete(client, bv.volumeName, backup_name)


def delete_backup_volume(client, volume_name):
bv = client.by_id_backupVolume(volume_name)
def delete_backup_volume(client, backup_volume_name):
bv = client.by_id_backupVolume(backup_volume_name)
client.delete(bv)
wait_for_backup_volume_delete(client, volume_name)
wait_for_backup_volume_delete(client, backup_volume_name)


def delete_backup_backing_image(client, backing_image_name):
Expand Down Expand Up @@ -3219,11 +3218,13 @@ def check_volume_endpoint(v):
return endpoint


def find_backup_volume(client, volume_name):
bvs = client.list_backupVolume()
for bv in bvs:
if bv.name == volume_name and bv.created != "":
return bv
def find_backup_volume(client, volume_name, retry=1):
for _ in range(retry):
bvs = client.list_backupVolume()
for bv in bvs:
if bv.volumeName == volume_name and bv.created != "":
return bv
time.sleep(RETRY_BACKUP_INTERVAL)
Comment on lines +3221 to +3227
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Consider returning partial result or raising error.
When find_backup_volume cannot find the volume, it returns None after retries. Consider logging a warning or raising a custom exception to help test debugging.

+raise Exception(f"No backup volume found for volume {volume_name} after {retry} retries")

Committable suggestion skipped: line range outside the PR's diff.

return None


Expand Down Expand Up @@ -3952,17 +3953,17 @@ def is_backupTarget_azurite(s):
return s.startswith("azblob://")


def wait_for_backup_volume(client, vol_name, backing_image=""):
def wait_for_backup_volume(client, bv_name, backing_image=""):
for _ in range(RETRY_BACKUP_COUNTS):
bv = client.by_id_backupVolume(vol_name)
bv = client.by_id_backupVolume(bv_name)
if bv is not None:
if backing_image == "":
break
if bv.backingImageName == backing_image \
and bv.backingImageChecksum != "":
break
time.sleep(RETRY_BACKUP_INTERVAL)
assert bv is not None, "failed to find backup volume " + vol_name
assert bv is not None, "failed to find backup volume " + bv_name


def wait_for_backup_target_available(client, available):
Expand Down Expand Up @@ -4877,7 +4878,7 @@ def wait_for_volume_restoration_start(client, volume_name, backup_name,
started = True
if started:
break
time.sleep(RETRY_INTERVAL)
time.sleep(RETRY_INTERVAL_SHORT)
assert started
return status.replica

Expand Down
Loading
Loading