From eeabdcb5b7179384187c1eeae189152795b57eaf Mon Sep 17 00:00:00 2001
From: Chin-Ya Huang <chin-ya.huang@suse.com>
Date: Mon, 16 Sep 2024 08:06:41 +0800
Subject: [PATCH 1/5] test(robot): v2 volume should block trim when volume is
 degraded

longhorn/longhorn-8430

Signed-off-by: Chin-Ya Huang <chin-ya.huang@suse.com>
---
 e2e/keywords/workload.resource         | 11 +++++++++++
 e2e/libs/keywords/workload_keywords.py |  4 ++++
 e2e/libs/volume/crd.py                 |  3 +++
 e2e/libs/volume/rest.py                | 17 +++++++++++++++++
 e2e/libs/volume/volume.py              |  3 +++
 e2e/tests/regression/test_v2.robot     | 22 ++++++++++++++++++++++
 6 files changed, 60 insertions(+)

diff --git a/e2e/keywords/workload.resource b/e2e/keywords/workload.resource
index 6b71fdbca5..e3dce4a006 100644
--- a/e2e/keywords/workload.resource
+++ b/e2e/keywords/workload.resource
@@ -189,3 +189,14 @@ Check ${workload_kind} ${workload_id} pod is ${expect_state} on another node
 Delete Longhorn ${workload_kind} ${workload_name} pod on node ${node_id}
     ${node_name} =    get_node_by_index    ${node_id}
     delete_workload_pod_on_node    ${workload_name}    ${node_name}    longhorn-system
+
+Trim ${workload_kind} ${workload_id} volume should ${condition}
+    ${workload_name} =   generate_name_with_suffix    ${workload_kind}    ${workload_id}
+
+    IF    $condition == "fail"
+        trim_workload_volume_filesystem    ${workload_name}    is_expect_fail=True
+    ELSE IF    $condition == "pass"
+        trim_workload_volume_filesystem    ${workload_name}    is_expect_fail=False
+    ELSE
+        Fail    "Invalid condition value: ${condition}"
+    END
diff --git a/e2e/libs/keywords/workload_keywords.py b/e2e/libs/keywords/workload_keywords.py
index 6f9175bd7b..d27845d91a 100644
--- a/e2e/libs/keywords/workload_keywords.py
+++ b/e2e/libs/keywords/workload_keywords.py
@@ -192,3 +192,7 @@ def is_workloads_pods_has_annotations(self, workload_names, annotation_key, name
             if not is_workload_pods_has_annotations(workload_name, annotation_key, namespace=namespace, label_selector=label_selector):
                 return False
         return True
+
+    def trim_workload_volume_filesystem(self, workload_name, is_expect_fail=False):
+        volume_name = get_workload_volume_name(workload_name)
+        self.volume.trim_filesystem(volume_name, is_expect_fail=is_expect_fail)
diff --git a/e2e/libs/volume/crd.py b/e2e/libs/volume/crd.py
index eea996d79b..b8ff66f586 100644
--- a/e2e/libs/volume/crd.py
+++ b/e2e/libs/volume/crd.py
@@ -511,3 +511,6 @@ def validate_volume_setting(self, volume_name, setting_name, value):
         volume = self.get(volume_name)
         assert str(volume["spec"][setting_name]) == value, \
             f"Expected volume {volume_name} setting {setting_name} is {value}, but it's {str(volume['spec'][setting_name])}"
+
+    def trim_filesystem(self, volume_name, is_expect_fail=False):
+        return Rest(self).trim_filesystem(volume_name, is_expect_fail=is_expect_fail)
diff --git a/e2e/libs/volume/rest.py b/e2e/libs/volume/rest.py
index 502d8f64e7..9cce306a46 100644
--- a/e2e/libs/volume/rest.py
+++ b/e2e/libs/volume/rest.py
@@ -370,3 +370,20 @@ def wait_for_replica_ready_to_rw(self, volume_name):
                 break
             time.sleep(self.retry_interval)
         assert ready, f"Failed to get volume {volume_name} replicas ready: {replicas}"
+
+    def trim_filesystem(self, volume_name, is_expect_fail=False):
+        is_unexpected_pass = False
+        try:
+            self.get(volume_name).trimFilesystem(name=volume_name)
+
+            if is_expect_fail:
+                is_unexpected_pass = True
+
+        except Exception as e:
+            if is_expect_fail:
+                logging(f"Failed to trim filesystem: {e}")
+            else:
+                raise e
+
+        if is_unexpected_pass:
+            raise Exception(f"Expected volume {volume_name} trim filesystem to fail")
diff --git a/e2e/libs/volume/volume.py b/e2e/libs/volume/volume.py
index bbfb2832bf..a6f5da7a85 100644
--- a/e2e/libs/volume/volume.py
+++ b/e2e/libs/volume/volume.py
@@ -154,3 +154,6 @@ def wait_for_engine_image_upgrade_completed(self, volume_name, engine_image_name
 
     def validate_volume_setting(self, volume_name, setting_name, value):
         return self.volume.validate_volume_setting(volume_name, setting_name, value)
+
+    def trim_filesystem(self, volume_name, is_expect_fail=False):
+        return self.volume.trim_filesystem(volume_name, is_expect_fail=is_expect_fail)
diff --git a/e2e/tests/regression/test_v2.robot b/e2e/tests/regression/test_v2.robot
index 27c3831665..137d7eb7c3 100644
--- a/e2e/tests/regression/test_v2.robot
+++ b/e2e/tests/regression/test_v2.robot
@@ -11,6 +11,8 @@ Resource    ../keywords/workload.resource
 Resource    ../keywords/volume.resource
 Resource    ../keywords/setting.resource
 Resource    ../keywords/node.resource
+Resource    ../keywords/host.resource
+Resource    ../keywords/longhorn.resource
 
 Test Setup    Set test environment
 Test Teardown    Cleanup test resources
@@ -50,3 +52,23 @@ Degraded Volume Replica Rebuilding
         And Wait for deployment 0 pods stable
         Then Check deployment 0 data in file data.txt is intact
     END
+
+V2 Volume Should Block Trim When Volume Is Degraded
+    Given Set setting auto-salvage to true
+    And Create storageclass longhorn-test with    dataEngine=v2
+    And Create persistentvolumeclaim 0 using RWO volume with longhorn-test storageclass
+    And Create deployment 0 with persistentvolumeclaim 0
+
+    FOR    ${i}    IN RANGE    ${LOOP_COUNT}
+        And Keep writing data to pod of deployment 0
+
+        When Restart cluster
+        And Wait for longhorn ready
+        And Wait for volume of deployment 0 attached and degraded
+        Then Trim deployment 0 volume should fail
+
+        When Wait for workloads pods stable
+        ...    deployment 0
+        And Check deployment 0 works
+        Then Trim deployment 0 volume should pass
+    END

From 07472cfa8a1fe86108d4db2933da4f98dc5b5e89 Mon Sep 17 00:00:00 2001
From: Chin-Ya Huang <chin-ya.huang@suse.com>
Date: Tue, 29 Oct 2024 15:00:03 +0800
Subject: [PATCH 2/5] test(integration/system-backup): check outdated backup in
 if-not-present volume backup policy

longhorn/longhorn-6027

Signed-off-by: Chin-Ya Huang <chin-ya.huang@suse.com>
---
 .../tests/test_system_backup_restore.py       | 41 +++++++++++--------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/manager/integration/tests/test_system_backup_restore.py b/manager/integration/tests/test_system_backup_restore.py
index c77092d461..9703ed4b59 100644
--- a/manager/integration/tests/test_system_backup_restore.py
+++ b/manager/integration/tests/test_system_backup_restore.py
@@ -31,6 +31,7 @@
 from common import check_pv_existence
 from common import check_backing_image_disk_map_status
 from common import wait_for_backup_restore_completed
+from common import write_volume_random_data
 
 from common import SETTING_BACKUPSTORE_POLL_INTERVAL
 from common import SIZE
@@ -207,10 +208,11 @@ def test_system_backup_and_restore_volume_with_backingimage(client, core_api, vo
 def test_system_backup_with_volume_backup_policy_if_not_present(client, volume_name, set_random_backupstore):  # NOQA
     """
     Scenario: system backup with volume backup policy (if-not-present) should
-              only create volume backup when there is no existing backup in
-              the volume.
+              create volume backup when no backup exists for the volume or when
+              the last backup is outdated.
 
     Issue: https://github.com/longhorn/longhorn/issues/5011
+           https://github.com/longhorn/longhorn/issues/6027
 
     Given a volume is created.
 
@@ -225,6 +227,13 @@ def test_system_backup_with_volume_backup_policy_if_not_present(client, volume_n
     And system backup (system-backup-2) created.
     Then system backup is in state (Ready).
     And volume has backup count (1).
+
+    When system backup (system-backup-3) has volume backup policy
+         (if-not-present).
+    And write data to volume.
+    And system backup (system-backup-3) created.
+    Then system backup is in state (Ready).
+    And volume has backup count (2).
     """
     host_id = get_self_host_id()
 
@@ -232,25 +241,23 @@ def test_system_backup_with_volume_backup_policy_if_not_present(client, volume_n
     volume.attach(hostId=host_id)
     volume = wait_for_volume_healthy(client, volume_name)
 
-    system_backup_name_1 = system_backup_random_name()
-    client.create_system_backup(Name=system_backup_name_1)
+    def create_system_backup_and_assert_volume_backup_count(count):
+        system_backup_name = system_backup_random_name()
+        client.create_system_backup(Name=system_backup_name,
+                                    VolumeBackupPolicy=IF_NOT_PRESENT)
 
-    system_backup = client.by_id_system_backup(system_backup_name_1)
-    assert system_backup.volumeBackupPolicy == IF_NOT_PRESENT
+        system_backup = client.by_id_system_backup(system_backup_name)
+        assert system_backup.volumeBackupPolicy == IF_NOT_PRESENT
 
-    system_backup_wait_for_state("Ready", system_backup_name_1, client)
+        system_backup_wait_for_state("Ready", system_backup_name, client)
 
-    backup_volume = client.by_id_backupVolume(volume_name)
-    wait_for_backup_count(backup_volume, 1)
-
-    system_backup_name_2 = system_backup_random_name()
-    client.create_system_backup(Name=system_backup_name_2,
-                                VolumeBackupPolicy=IF_NOT_PRESENT)
+        backup_volume = client.by_id_backupVolume(volume_name)
+        wait_for_backup_count(backup_volume, count)
 
-    system_backup_wait_for_state("Ready", system_backup_name_2, client)
-
-    backup_volume = client.by_id_backupVolume(volume_name)
-    wait_for_backup_count(backup_volume, 1)
+    create_system_backup_and_assert_volume_backup_count(1)
+    create_system_backup_and_assert_volume_backup_count(1)
+    write_volume_random_data(volume)
+    create_system_backup_and_assert_volume_backup_count(2)
 
 
 @pytest.mark.system_backup_restore   # NOQA

From f0a878d7df7619fb5453e72a81697182b880784b Mon Sep 17 00:00:00 2001
From: Roger Yao <roger.yao@suse.com>
Date: Fri, 18 Oct 2024 10:12:39 +0800
Subject: [PATCH 3/5] Add case test_metric_longhorn_backup

longhorn/longhorn#9430

Signed-off-by: Roger Yao <roger.yao@suse.com>
---
 manager/integration/tests/test_metric.py | 167 ++++++++++++++++++++++-
 1 file changed, 162 insertions(+), 5 deletions(-)

diff --git a/manager/integration/tests/test_metric.py b/manager/integration/tests/test_metric.py
index e35a72ebb8..164043e010 100644
--- a/manager/integration/tests/test_metric.py
+++ b/manager/integration/tests/test_metric.py
@@ -6,7 +6,7 @@
 from kubernetes.stream import stream
 from prometheus_client.parser import text_string_to_metric_families
 
-from common import client, core_api, pod, volume_name  # NOQA
+from common import client, core_api, pod, volume_name, batch_v1_api  # NOQA
 
 from common import crash_engine_process_with_sigkill
 from common import delete_replica_processes
@@ -35,6 +35,25 @@
 from common import DEFAULT_DISK_PATH
 from common import Gi
 
+from backupstore import set_random_backupstore  # NOQA
+from common import create_recurring_jobs
+from common import check_recurring_jobs
+from common import wait_for_cron_job_count
+from common import create_backup
+from common import wait_for_backup_count
+from common import delete_backup_volume
+
+RECURRING_JOB_NAME = "recurring-test"
+TASK = "task"
+GROUPS = "groups"
+CRON = "cron"
+RETAIN = "retain"
+BACKUP = "backup"
+CONCURRENCY = "concurrency"
+LABELS = "labels"
+DEFAULT = "default"
+SCHEDULE_1MIN = "* * * * *"
+
 # The dictionaries use float type of value because the value obtained from
 # prometheus_client is in float type.
 # https://github.com/longhorn/longhorn-tests/pull/1531#issuecomment-1833349994
@@ -138,6 +157,21 @@ def examine_metric_value(found_metric, metric_labels, expected_value=None):
         assert found_metric.value >= 0.0
 
 
+def wait_for_metric_sum_on_all_nodes(client, core_api, metric_name, metric_labels, expected_value): # NOQA
+    for _ in range(RETRY_COUNTS):
+        time.sleep(RETRY_INTERVAL)
+
+        try:
+            check_metric_sum_on_all_nodes(client, core_api, metric_name,
+                                          metric_labels, expected_value)
+            return
+        except AssertionError:
+            continue
+
+    check_metric_sum_on_all_nodes(client, core_api, metric_name,
+                                  metric_labels, expected_value)
+
+
 def check_metric_sum_on_all_nodes(client, core_api, metric_name, expected_labels, expected_value=None): # NOQA
     # Initialize total_metrics to store the sum of the metric values.
     total_metrics = {"labels": defaultdict(None), "value": 0.0}
@@ -440,12 +474,12 @@ def test_metric_longhorn_snapshot_actual_size_bytes(client, core_api, volume_nam
 
     When 1 snapshot is created by user
     And 1 snapshot is created by system
-    Then has a metric longhorn_snapshot_actual_size_bytes value equals to the
-         size of the user created snapshot,
+    Then has a metric longhorn_snapshot_actual_size_bytes value
+         equals to the size of the user created snapshot,
          and volume label is the volume name
          and user_created label is true
-    And has a metric longhorn_snapshot_actual_size_bytes value equals to the
-        size of the system created snapshot,
+    And has a metric longhorn_snapshot_actual_size_bytes value
+        equals to the size of the system created snapshot,
         and volume label is the volume name
         and user_created label is false
 
@@ -615,3 +649,126 @@ def test_node_metrics(client, core_api): # NOQA
     wait_for_node_update(client, lht_hostId, "allowScheduling", False)
     check_metric_with_condition(core_api, "longhorn_node_status",
                                 metric_labels, 0.0)
+
+
+def test_metric_longhorn_backup(set_random_backupstore, client, core_api, batch_v1_api, volume_name): # NOQA
+    """
+    Scenario: test metric longhorn_backup_actual_size_bytes and
+                          longhorn_backup_state
+
+    Issue: https://github.com/longhorn/longhorn/issues/9429
+
+    Given a volume
+
+    When a backup is created by user
+    Then has a metric longhorn_backup_actual_size_bytes value
+         equals to the size of the backup,
+         and volume label is the volume name
+         and recurring_job label is empty
+    And has a metric longhorn_backup_state value equals to 3 (Completed),
+        and volume label is the volume name
+        and recurring_job label is empty
+
+    When a recurring backup job is created
+    Then should have a metric longhorn_backup_actual_size_bytes value
+         equals to the size of the backup,
+         and volume label is the volume name
+         and recurring_job label is the job name
+    And should have a metric longhorn_backup_state
+        value equals to 3 (Completed),
+        and volume label is the volume name
+        and recurring_job label is the job name
+    """
+    self_hostId = get_self_host_id()
+
+    # create a volume and attach it to a node.
+    volume_size = 50 * Mi
+    client.create_volume(name=volume_name,
+                         numberOfReplicas=1,
+                         size=str(volume_size))
+    volume = wait_for_volume_detached(client, volume_name)
+    volume.attach(hostId=self_hostId)
+    volume = wait_for_volume_healthy(client, volume_name)
+
+    # create the user backup.
+    data_size = 10 * Mi
+    backup_data = {'pos': 0,
+                   'len': data_size,
+                   'content': generate_random_data(data_size)}
+    write_volume_data(volume, backup_data)
+    create_backup(client, volume_name)
+    bv = client.by_id_backupVolume(volume_name)
+    wait_for_backup_count(bv, 1)
+
+    # get the backup size.
+    backup_size = 0
+    backups = bv.backupList().data
+    for backup in backups:
+        if backup['snapshotName'] == "volume-head":
+            continue
+
+        backup_size = int(backup['size'])
+    assert backup_size > 0
+
+    # assert the metric values for the user backup.
+    user_backup_metric_labels = {
+        "volume": volume_name,
+        "recurring_job": "",
+    }
+    wait_for_metric_sum_on_all_nodes(client, core_api,
+                                     "longhorn_backup_actual_size_bytes",
+                                     user_backup_metric_labels,
+                                     backup_size)
+
+    wait_for_metric_sum_on_all_nodes(client, core_api,
+                                     "longhorn_backup_state",
+                                     user_backup_metric_labels,
+                                     3)
+
+    # delete the existing backup before creating a recurring backup job.
+    delete_backup_volume(client, volume_name)
+
+    # create a recurring backup job.
+    recurring_jobs = {
+        RECURRING_JOB_NAME: {
+            TASK: BACKUP,
+            GROUPS: [DEFAULT],
+            CRON: SCHEDULE_1MIN,
+            RETAIN: 1,
+            CONCURRENCY: 1,
+            LABELS: {},
+        },
+    }
+    create_recurring_jobs(client, recurring_jobs)
+    check_recurring_jobs(client, recurring_jobs)
+    wait_for_cron_job_count(batch_v1_api, 1)
+
+    # wait for the recurring backup job to run.
+    time.sleep(60)
+    bv = client.by_id_backupVolume(volume_name)
+    wait_for_backup_count(bv, 1)
+
+    # get the recurring backup size.
+    recurring_backup_size = 0
+    backups = bv.backupList().data
+    for backup in backups:
+        if backup['snapshotName'] == "volume-head":
+            continue
+
+        recurring_backup_size = int(backup['size'])
+    assert recurring_backup_size > 0
+
+    # assert the metric values for the recurring backup.
+    recurring_backup_metric_labels = {
+        "volume": volume_name,
+        "recurring_job": RECURRING_JOB_NAME,
+    }
+    wait_for_metric_sum_on_all_nodes(client, core_api,
+                                     "longhorn_backup_actual_size_bytes",
+                                     recurring_backup_metric_labels,
+                                     recurring_backup_size)
+
+    wait_for_metric_sum_on_all_nodes(client, core_api,
+                                     "longhorn_backup_state",
+                                     recurring_backup_metric_labels,
+                                     3)

From fa8abb495b39089fef3bd2277dd1b68eb8f4f603 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 11 Nov 2024 00:55:37 +0000
Subject: [PATCH 4/5] chore(deps): update terraform azurerm to v3.117.0

---
 test_framework/terraform/azure/aks/main.tf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_framework/terraform/azure/aks/main.tf b/test_framework/terraform/azure/aks/main.tf
index 0d5870bdb2..fb01aab3d2 100644
--- a/test_framework/terraform/azure/aks/main.tf
+++ b/test_framework/terraform/azure/aks/main.tf
@@ -2,7 +2,7 @@ terraform {
   required_providers {
     azurerm = {
       source  = "hashicorp/azurerm"
-      version = "3.116.0"
+      version = "3.117.0"
     }
   }
 }

From 95b2041cca5d42ee2c09397105476845ffd4eef3 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 11 Nov 2024 00:55:30 +0000
Subject: [PATCH 5/5] chore(deps): update dependency boto3 to v1.35.57

---
 e2e/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/e2e/requirements.txt b/e2e/requirements.txt
index d877954200..099d7c8efd 100644
--- a/e2e/requirements.txt
+++ b/e2e/requirements.txt
@@ -4,6 +4,6 @@ directio==1.3
 flake8
 kubernetes==27.2.0
 requests==2.32.3
-boto3==1.35.54
+boto3==1.35.57
 pyyaml==6.0.2
 minio==5.0.10