diff --git a/.github/mergify.yml b/.github/mergify.yml
index 2dd1aee601..0da48caa09 100644
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -5,9 +5,8 @@ pull_request_rules:
- check-success=DCO
- check-success=CodeFactor
- check-success=codespell
- - "#approved-reviews-by>=1"
+ - "#approved-reviews-by>=2"
- approved-reviews-by=@longhorn/maintainer
- - label=ready-to-merge
actions:
merge:
method: rebase
@@ -17,18 +16,4 @@ pull_request_rules:
- conflict
actions:
comment:
- message: This pull request is now in conflicts. Could you fix it @{{author}}? 🙏
-
-# Comment on the PR to trigger backport. ex: @Mergifyio copy stable/3.1 stable/4.0
-- name: backport patches to stable branch
- conditions:
- - base=master
- actions:
- backport:
- title: "[BACKPORT][{{ destination_branch }}] {{ title }}"
- body: |
- This is an automatic backport of pull request #{{number}}.
-
- {{cherry_pick_error}}
- assignees:
- - "{{ author }}"
\ No newline at end of file
+ message: This pull request is now in conflict. Could you fix it @{{author}}? 🙏
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
new file mode 100644
index 0000000000..3e239f5e31
--- /dev/null
+++ b/.github/workflows/codespell.yml
@@ -0,0 +1,23 @@
+name: Codespell
+
+on:
+ pull_request:
+ branches:
+ - master
+ - main
+ - "v*.*.*"
+
+jobs:
+ codespell:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+ - name: Check code spell
+ uses: codespell-project/actions-codespell@v2
+ with:
+ check_filenames: true
+ skip: "*/**.yaml,*/**.yml,./scripts,./vendor,MAINTAINERS,LICENSE,go.mod,go.sum"
+ ignore_words_list: aks
diff --git a/build_engine_test_images/terraform/aws/ubuntu/main.tf b/build_engine_test_images/terraform/aws/ubuntu/main.tf
index 82dab61c9f..70ece8d133 100644
--- a/build_engine_test_images/terraform/aws/ubuntu/main.tf
+++ b/build_engine_test_images/terraform/aws/ubuntu/main.tf
@@ -99,7 +99,7 @@ resource "aws_route_table" "build_engine_aws_public_rt" {
}
}
-# Assciate public subnet to public route table
+# Associate public subnet to public route table
resource "aws_route_table_association" "build_engine_aws_public_subnet_rt_association" {
depends_on = [
aws_subnet.build_engine_aws_public_subnet,
diff --git a/build_engine_test_images/terraform/aws/ubuntu/variables.tf b/build_engine_test_images/terraform/aws/ubuntu/variables.tf
index f1608de61a..c9bd3c38a2 100644
--- a/build_engine_test_images/terraform/aws/ubuntu/variables.tf
+++ b/build_engine_test_images/terraform/aws/ubuntu/variables.tf
@@ -56,7 +56,7 @@ variable "build_engine_aws_instance_name" {
variable "build_engine_aws_instance_type" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
default = ""
}
diff --git a/docs/content/manual/functional-test-cases/backup.md b/docs/content/manual/functional-test-cases/backup.md
index 9795af967c..ea366600e9 100644
--- a/docs/content/manual/functional-test-cases/backup.md
+++ b/docs/content/manual/functional-test-cases/backup.md
@@ -28,7 +28,7 @@ Backup create operations test cases
|-----| --- | --- | --- |
| 1 | Create backup from existing snapshot | **Prerequisite:**
* Backup target is set to NFS server, or S3 compatible target.
1. Create a workload using Longhorn volume
2. Write data to volume, compute it’s checksum (checksum#1)
3. Create a snapshot (snapshot#1)
4. Create a backup from (snapshot#1)
5. Restore backup to a different volume
6. Attach volume to a node and check it’s data, and compute it’s checksum | * Backup should be created
* Restored volume data checksum should match (checksum#1) |
| 2 | Create volume backup for a volume attached to a node | **Prerequisite:**
* Backup target is set to NFS server, or S3 compatible target.
1. Create a volume, attach it to a node
2. Format volume using ext4/xfs filesystem and mount it to a directory on the node
3. Write data to volume, compute it’s checksum (checksum#1)
4. Create a backup
5. Restore backup to a different volume
6. Attach volume to a node and check it’s data, and compute it’s checksum
7. Check volume backup labels | * Backup should be created
* Restored volume data checksum should match (checksum#1)
* backup should have no backup labels |
-| 3 | Create volume backup used by Kubernetes workload | **Prerequisite:**
* Backup target is set to NFS server, or S3 compatible target.
1. Create a deployment workload with `nReplicas = 1` using Longhorn volume
2. Write data to volume, compute it’s checksum (checksum#1)
3. Create a backup
4. Check backup labels
5. Scale down deployment `nReplicas = 0`
6. Delete Longhorn volume
7. Restore backup to a volume with the same deleted volume name
8. Scale back deployment `nReplicas = 1`
9. Check volume data checksum | * Backup labels should contain the following informations about workload that was using the volume at time of backup.
* Namespace
* PV Name
* PVC Name
* PV Status
* Workloads Status
* Pod Name
Workload Name
Workload Type
Pod Status
* After volume restore, data checksum should match (checksum#1) |
+| 3 | Create volume backup used by Kubernetes workload | **Prerequisite:**
* Backup target is set to NFS server, or S3 compatible target.
1. Create a deployment workload with `nReplicas = 1` using Longhorn volume
2. Write data to volume, compute it’s checksum (checksum#1)
3. Create a backup
4. Check backup labels
5. Scale down deployment `nReplicas = 0`
6. Delete Longhorn volume
7. Restore backup to a volume with the same deleted volume name
8. Scale back deployment `nReplicas = 1`
9. Check volume data checksum | * Backup labels should contain the following information about workload that was using the volume at time of backup.
* Namespace
* PV Name
* PVC Name
* PV Status
* Workloads Status
* Pod Name
Workload Name
Workload Type
Pod Status
* After volume restore, data checksum should match (checksum#1) |
| 4 | Create volume backup with customized labels | **Prerequisite:**
* Backup target is set to NFS server, or S3 compatible target.
1. Create a volume, attach it to a node
2. Create a backup, add customized labels
key: `K1` value: `V1`
3. Check volume backup labels | * Backup should be created with customized labels |
| 5 | Create recurring backups | 1. Create a deployment workload with `nReplicas = 1` using Longhorn volume
2. Write data to volume , compute it’s checksum (checksum#1)
3. Create a recurring backup `every 5 minutes`. and set retain count to `5`
4. add customized labels key: `K1` value: `V1`
5. Wait for recurring backup to triggered (backup#1, backup#2 )
6. Scale down deployment `nReplicas = 0`
7. Delete the volume.
8. Restore backup to a volume with the same deleted volume name
9. Scale back deployment `nReplicas = 1`
10. Check volume data checksum | * backups should be created with Kubernetes status labels and customized labels
* After volume restore, data checksum should match (checksum#1)
* after restoring the backup recurring backups should continue to be created |
| 6 | Backup created using Longhorn behind proxy | **Prerequisite:**
* Setup a Proxy on an instance (Optional: use squid)
* Create a single node cluster in EC2
* Deploy Longhorn
1. Block outgoing traffic except for the proxy instance.
2. Create AWS secret in longhorn.
3. In UI Settings page, set backupstore target and backupstore credential secret
4. Create a volume, attach it to a node, format the volume, and mount it to a directory.
5. Write some data to the volume, and create a backup. | * Ensure backup is created |
@@ -99,7 +99,7 @@ Disaster Recovery test cases
| DR volume across the cluster #5 | Cluster A:
* Create volume Y
* Attach the volume Y
* Create a backup of Y
Cluster B:
* Backup Volume list page, click \`Create Disaster Recovery Volume\` from volume dropdown
* Create two DR volumes Ydr1 and Ydr2.
* Attach the volume Y to any node
* Mount the volume Y on the node
* Write a file of 10Mb into it, use \`/dev/urandom\` to generate the file
* Calculate the checksum of the file
* Make a Backup
* Attach Ydr1 and Ydr2 to any nodes | * DR volume's last backup should be updated automatically, after settings.BackupPollInterval passed.
* DR volume.LastBackup should be different from DR volume's controller\[0\].LastRestoredBackup temporarily (it's restoring the last backup)
* During the restoration, DR volume cannot be activated.
* Eventually, DR volume.LastBackup should equal to controller\[0\].LastRestoredBackup. |
| DR volume across the cluster #6 | \[follow #5\]
Cluster A:
* In the directory mounted volume Y, write a new file of 100Mb.
* Record the checksum of the file
* Create a backup of volume Y
Cluster B:
* Wait for restoration of volume Ydr1 and Ydr2 to complete
* Activate Ydr1
* Attach it to one node and verify the content | * DR volume's last backup should be updated automatically, after settings.BackupPollInterval passed.
* Eventually, DR volume.LastBackup should equal to controller\[0\].LastRestoredBackup.
* Ydr1 should have the same file checksum of volume Y |
| DR volume across the cluster #7 | \[follow #6\]
Cluster A
* In the directory mounted volume Y, remove all the files. Write a file of 50Mb
* Record the checksum of the file
Cluster B
* Change setting.BackupPollInterval to longer e.g. 1h
Cluster A
* Create a backup of volume Y
Cluster B
\[DO NOT CLICK BACKUP PAGE, which will update last backup as a side effect\]
* Before Ydr2's last backup updated, activate Ydr2 | * YBdr2's last backup should be immediately updated to the last backup of volume Y
* Activate should fail due to restoration is in progress | When user clicks on “activate DRV”, restoration happens
And the volume goes into detached state |
-| DR volume across the cluster #8 | Cluster A
* Create volume Z
* Attach the volume Z
* Create a backup of Z
Cluster B
* Backup Volume list page, click \`Create Disaster Recovery Volume\` from volume dropdown
* Create DR volumes Zdr1, Zdr2 and Zdr3
* Attach the volume Zdr1, Zdr2 and Zdr3 to any node
* Change setting.BackupPollInterval to approriate interval for multiple backups e.g. 15min
* Make sure LastBackup of Zdr is consistent with that of Z
Cluster A
* Create multiple backups for volume Z before Zdr's last backup updated. For each backup, write or modify at least one file then record the cheksum.
Cluster B
* Wait for restoration of volume Zdr1 to complete
* Activate Zdr1
* Attach it to one node and verify the content | * Zdr1's last backup should be updated after settings.BackupPollInterval passed.
* Zdr1 should have the same files with the the same checksums of volume Z |
+| DR volume across the cluster #8 | Cluster A
* Create volume Z
* Attach the volume Z
* Create a backup of Z
Cluster B
* Backup Volume list page, click \`Create Disaster Recovery Volume\` from volume dropdown
* Create DR volumes Zdr1, Zdr2 and Zdr3
* Attach the volume Zdr1, Zdr2 and Zdr3 to any node
* Change setting.BackupPollInterval to appropriate interval for multiple backups e.g. 15min
* Make sure LastBackup of Zdr is consistent with that of Z
Cluster A
* Create multiple backups for volume Z before Zdr's last backup updated. For each backup, write or modify at least one file then record the checksum.
Cluster B
* Wait for restoration of volume Zdr1 to complete
* Activate Zdr1
* Attach it to one node and verify the content | * Zdr1's last backup should be updated after settings.BackupPollInterval passed.
* Zdr1 should have the same files with the the same checksums of volume Z |
| DR volume across the cluster #9 | \[follow #8\]
Cluster A
* Delete the latest backup of Volume Z | * Last backup of Zdr2 and Zdr3 should be empty after settings.BackupPollInterval passed. Field controller\[0\].LastRestoredBackup and controller\[0\].RequestedBackupRestore should retain. |
| DR volume across the cluster #10 | \[follow #9\]
Cluster B
* Activate Zdr2
* Attach it to one node and verify the content | * Zdr2 should have the same files with the the same checksums of volume Z | |
| DR volume across the cluster #11 | \[follow #10\]
Cluster A
* Create one more backup with at least one file modified.
Cluster B
* Wait for restoration of volume Zdr3 to complete
* Activate Zdr3
* Attach it to one node and verify the content | * Zdr3 should have the same files with the the same checksums of volume Z |
@@ -150,7 +150,7 @@ The setup requirements:
| 4 | Delete the backup with `DeletionPolicy` as delete | 1. Repeat the steps from test scenario 1.
2. Delete the `VolumeSnapshot` using `kubectl delete volumesnapshots test-snapshot-pvc` | 1. The `VolumeSnapshot` should be deleted.
2. By default the `DeletionPolicy` is delete, so the `VolumeSnapshotContent` should be deleted.
3. Verify in the backup store, the backup should be deleted. |
| 5 | Delete the backup with `DeletionPolicy` as retain | 1. Create a `VolumeSnapshotClass` class with `deletionPolicy` as Retain
kind: VolumeSnapshotClass
apiVersion: snapshot.storage.k8s.io/v1beta1
metadata:
name: longhorn
driver: driver.longhorn.io
deletionPolicy: Retain
2. Repeat the steps from test scenario 1.
3. Delete the `VolumeSnapshot` using `kubectl delete volumesnapshots test-snapshot-pvc` | 1. The `VolumeSnapshot` should be deleted.
2. `VolumeSnapshotContent` should NOT be deleted.
3. Verify in the backup store, the backup should NOT be deleted. |
| 6 | Take a backup from longhorn of a snapshot created by csi snapshotter. | 1. Create a volume test-vol and write into it.
1. Compute the md5sum
2. Create the below `VolumeSnapshot` object
apiVersion: snapshot.storage.k8s.io/v1beta1
kind: VolumeSnapshot
metadata:
name: test-snapshot-pvc
spec:
volumeSnapshotClassName: longhorn
source:
persistentVolumeClaimName: test-vol
3. Go to longhorn UI and click on the snapshot created and take another backup | 1. On creating a `VolumeSnapshot`, a backup should be created in the backup store.
2. On creating another backup from longhorn UI, one more backup should be created in backup store. |
-| 7 | Delete the `csi plugin` while a backup is in progress. | 1. Create a volume and write into it.
Compute the md5sum of the data.
2. Create the below `VolumeSnapshot` object
apiVersion: snapshot.storage.k8s.io/v1beta1
kind: VolumeSnapshot
metadata:
name: test-snapshot-pvc
spec:
volumeSnapshotClassName: longhorn
source:
persistentVolumeClaimName: test-vol
3. While the backup is in progress, delete the `csi plugin` | On deleting `csi plugin` , a new pod of `csi plugin` should get created and the bacup should continue to complete. |
+| 7 | Delete the `csi plugin` while a backup is in progress. | 1. Create a volume and write into it.
Compute the md5sum of the data.
2. Create the below `VolumeSnapshot` object
apiVersion: snapshot.storage.k8s.io/v1beta1
kind: VolumeSnapshot
metadata:
name: test-snapshot-pvc
spec:
volumeSnapshotClassName: longhorn
source:
persistentVolumeClaimName: test-vol
3. While the backup is in progress, delete the `csi plugin` | On deleting `csi plugin` , a new pod of `csi plugin` should get created and the backup should continue to complete. |
| 8 | Take a backup using csi snapshotter with backup store as NFS server. | | |
| 9 | Restore from NFS backup store. | | |
| 10 | Delete from NFS backup store. | | |
diff --git a/docs/content/manual/functional-test-cases/kubernetes.md b/docs/content/manual/functional-test-cases/kubernetes.md
index 5f13237538..a4c603d61f 100644
--- a/docs/content/manual/functional-test-cases/kubernetes.md
+++ b/docs/content/manual/functional-test-cases/kubernetes.md
@@ -43,13 +43,13 @@ title: 5. Kubernetes
| 2 | Persistent Volume: Create a PV | **Pre condition:**
* Longhorn is deployed in the cluster
**Steps:**
1. Create a Volume in Longhorn UI `test-volume`
2. Go to cluster → Storage → Persistent Volumes
3. Click on Add PV
4. Select Volume Plugin **Longhorn**
5. Give in other required parameters including replica count.
6. Give in Volume Plugin - `test-volume` which an existing volume in longhorn
7. Click on **Save**.
8. Verify **test-1** PV is created
9. Go to Cluster → Project (default) → Workloads
10. Deploy a workload
11. In the Volumes section → Add a New Volume Claim → Use an existing persistent volume → Select **test-1** from PV dropdown.
12. Click on Define
13. Enter Mount Point.
14. Click on create workload
15. Verify workload is created successfully.
16. Volume gets attached to the pod in the workload
17. Navigate to Longhorn UI.
18. Verify user is able to view the volume attached to the workload in the UI
19. Navigate to volume details page of the volume and Verify the replica count mentioned in Step 4 is available | * Longhorn PV should be created
* Workload should be deployed with the volume mounted from the PV
* Verify volume is available on the Longhorn UI.
* Verify the replica count is as mentioned during storage class creation. |
| 3 | Create Storage class in Rancher; From Longhorn create volumes from this storage class. | **Pre condition:**
* Longhorn is deployed in the cluster
**Steps:**
1. Go to cluster → Storage → Storage Classes
2. Click on Add class
3. Select Provisioner **Longhorn**
4. Give in other required parameters including replica count.
5. Click on **Save**.
6. Verify **test-1** storage class is created
7. Go to Longhorn UI
8. In the Settings page for “Default Longhorn Static StorageClass Name”, give in the value: “test-1”
9. Go to Volumes page, click on create volume.
10. Create a volume name : v1
11. Verify v1 is created
12. using kubectl -
13. kubectl get pv -o yaml
14. Verify “storageClassName:” ---> test-1 | * Longhorn storage class should be created
* Value of Default Longhorn Static StorageClass Name should be changed in the settings page
* volume should be created in longhorn UI
* “storageClassName:” value should be **test-1** |
| 4 | Create Storage Class using backup URL | 1. Create volume and PV/PVC/POD in Longhorn
2. Write `test_data` into pod
3. Create a snapshot and back it up. Get the backup URL
4. Create a new StorageClass `longhorn-from-backup` in rancher and set backup URL.
5. Use `longhorn-from-backup` to create a new PVC
6. Wait for the volume to be created and complete the restoration.
7. Create the pod using the PVC. Verify the data | |
-| 5 | Create Storage class - by using different values for the input list of paramters | **Pre condition:**
* Longhorn is deployed in the cluster
**Steps:**
1. Go to cluster → Storage → Storage Classes
2. Click on Add class
3. Select Provisioner **Longhorn**
4. Give in other required parameters.
5. Click on **Save**.
6. Use this storage class to create a PVC and deploy in a workload.
7. Verify the parameters of the volume created. | Volume parameters should match the storage class paramaters. |
+| 5 | Create Storage class - by using different values for the input list of parameters | **Pre condition:**
* Longhorn is deployed in the cluster
**Steps:**
1. Go to cluster → Storage → Storage Classes
2. Click on Add class
3. Select Provisioner **Longhorn**
4. Give in other required parameters.
5. Click on **Save**.
6. Use this storage class to create a PVC and deploy in a workload.
7. Verify the parameters of the volume created. | Volume parameters should match the storage class parameters. |
| 6 | StorageClass with `reclaimPolicy` parameter set to `Delete` - PVC from storage class | **Pre conditions:**
* Create PVC from “Longhorn” storage class in rancher.
* It will have a dynamic PV bound
**Steps**:
1. 'Delete PVC from Rancher
2. Verify PVC is deleted
3. Verify PV bound to this PVC is deleted - Rancher → Cluster → Storage → PV
4. Verify the volume(Dynamic PV) in Longhorn is deleted | |
| 7 | Volume/PV/PVC created in Longhorn | **Pre conditions:**
* Create volume, PV, PVC in longhorn
**Steps:**
1. 'Delete PVC from Rancher
2. Verify PVC is deleted
3. PV will NOT. be deleted but be in “released” state in Rancher UI
4. Verify Volume does not get deleted | |
| 8 | StorageClass with `reclaimPolicy` parameter set to `Retain` - PVC from storage class | **Pre conditions:**
* Create PVC from “Longhorn” storage class in rancher.
* It will have a dynamic PV bound
**Steps**:
1. 'Delete PVC from Rancher
2. Verify PVC is deleted
3. Verify PV bound to this PVC is NOT deleted - Rancher → Cluster → Storage → PV
4. Verify the volume(Dynamic PV) in Longhorn is NOT deleted | |
| 9 | StorageClass with `reclaimPolicy` parameter set to `Retain` - Volume/PV/PVC created in Longhorn | **Pre conditions:**
* Create volume, PV, PVC in longhorn
**Steps:**
1. 'Delete PVC from Rancher
2. Verify PVC is deleted
3. PV will NOT. be deleted but be in “released” state in Rancher UI
4. Verify Volume does not get deleted | |
-| 10 | Power down node | 1. Power down
2. Replica migrates
3. Power back on
4. Verify if the replicas in the node have been deleted | * When a node is powered down, the replica is rebuilt on the 4th wrker node.
* When the node is powered back on, and the replica on the powered down node is not available in Longhorn UI anymore, there is no data in `/var/lib/longhorn/replicas` folder in the powered on node. |
-| 11 | Power down node with. Node tag/disk tag | 1. Add a node tag/disk tag
2. Power down
3. Replica cannot migrate
4. Power back on
5. Replica should get rebuilt on this node | * When a node is powered down, the replica is rebuilt on the 4th wrker node.
* When the node is powered back on, and the replica on the powered down node is not available in Longhorn UI anymore, there is no data in `/var/lib/longhorn/replicas` folder in the powered on node.
* The new replica is rebuilt on a node which has a tag. |
+| 10 | Power down node | 1. Power down
2. Replica migrates
3. Power back on
4. Verify if the replicas in the node have been deleted | * When a node is powered down, the replica is rebuilt on the 4th worker node.
* When the node is powered back on, and the replica on the powered down node is not available in Longhorn UI anymore, there is no data in `/var/lib/longhorn/replicas` folder in the powered on node. |
+| 11 | Power down node with. Node tag/disk tag | 1. Add a node tag/disk tag
2. Power down
3. Replica cannot migrate
4. Power back on
5. Replica should get rebuilt on this node | * When a node is powered down, the replica is rebuilt on the 4th worker node.
* When the node is powered back on, and the replica on the powered down node is not available in Longhorn UI anymore, there is no data in `/var/lib/longhorn/replicas` folder in the powered on node.
* The new replica is rebuilt on a node which has a tag. |
| 12 | Drain a node | 1. Drain use case — drain a worker nodeÂ
2. Check if the State of the node reflects in the Longhorn UI —> Node
3. Verify if replica is rebuilt on another node?Â
4. Verify if the pod migrates
5. And the volume get migrated | All the components should be successfully drained. |
| 13 | kubectl - force drain | Using kubectl - force drain a node where the pod with the volume attached is available
Have snapshots before
Verify data after pod migrates | Volume attaches on the new pod
2 of the 3 replicas are in “Stopped” state - Caused replica rebuild. |
| 14 | Cordon a node | 1. Cordon state - cordon a worker node | |
diff --git a/docs/content/manual/functional-test-cases/monitoring.md b/docs/content/manual/functional-test-cases/monitoring.md
index ef0bcd4dce..1fd2b42146 100644
--- a/docs/content/manual/functional-test-cases/monitoring.md
+++ b/docs/content/manual/functional-test-cases/monitoring.md
@@ -157,8 +157,8 @@ spec:
| 6 | longhorn\_instance\_manager\_cpu\_usage\_millicpu | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create a volume and attach it to a pod.2. Write 1 Gi data into it.3. Set multiple recurring backup on the volume.4. Go to Prometheus web UI.5. Select `longhorn_instance_manager_cpu_usage_millicpu` and execute. | 1. The reading of cpu\_usage should be shown correctly2. The reading of other instance managers should not get impacted. |
| 7 | longhorn\_instance\_manager\_memory\_requests\_bytes | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create a volume and attach it to a pod.2. Write 1 Gi data into it.3. Set multiple recurring backup on the volume.4. Go to Prometheus web UI.5. Select `longhorn_instance_manager_memory_requests_bytes` and execute. | 1. The reading of memory\_requests should go up for the attached instance manager.2. The reading of other instance managers should not get impacted. |
| 8 | longhorn\_instance\_manager\_memory\_usage\_bytes | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create a volume and attach it to a pod.2. Write 1 Gi data into it.3. Set multiple recurring backup on the volume.4. Go to Prometheus web UI.5. Select `longhorn_instance_manager_memory_usage_bytes` and execute. | 1. The reading of memory\_usage should go up for the attached instance manager.2. The reading of other instance managers should not get impacted. |
-| 9 | longhorn\_manager\_cpu\_usage\_millicpu | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create 3 volumes of different sizes.2. Attach 1st volume to a pod and write 1 Gi data into it.3. Leave the 2rd volume to the detached state.4. Attach the 3th volume to pod and write 1.5 Gi data into it. Attach the volume in maintenance mode.5. Set a recurring backup on volume 1st.6. Perform revert to snapshot with 3rd volume.7. Go to Prometheus web UI.8. Select `longhorn_manager_cpu_usage_millicpu` and execute. | 1. Monitor the graph and the console on the Prometheus server, the cpu\_usage should go up. |
-| 10 | longhorn\_manager\_memory\_usage\_bytes | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create 3 volumes of different sizes.2. Attach 1st volume to a pod and write 1 Gi data into it.3. Leave the 2rd volume to the detached state.4. Attach the 3th volume to pod and write 1.5 Gi data into it. Attach the volume in maintenance mode.5. Set a recurring backup on volume 1st.6. Perform revert to snapshot with 3rd volume.7. Try to make disk full of a node where `longhorn-manager` is running.8. Go to Prometheus web UI.9. Select `longhorn_manager_memory_usage_bytes` and execute. | 1. Monitor the graph and the console on the Prometheus server, the memory\_usage should go up. |
+| 9 | longhorn\_manager\_cpu\_usage\_millicpu | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create 3 volumes of different sizes.2. Attach 1st volume to a pod and write 1 Gi data into it.3. Leave the 2nd volume to the detached state.4. Attach the 3th volume to pod and write 1.5 Gi data into it. Attach the volume in maintenance mode.5. Set a recurring backup on volume 1st.6. Perform revert to snapshot with 3rd volume.7. Go to Prometheus web UI.8. Select `longhorn_manager_cpu_usage_millicpu` and execute. | 1. Monitor the graph and the console on the Prometheus server, the cpu\_usage should go up. |
+| 10 | longhorn\_manager\_memory\_usage\_bytes | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create 3 volumes of different sizes.2. Attach 1st volume to a pod and write 1 Gi data into it.3. Leave the 2nd volume to the detached state.4. Attach the 3th volume to pod and write 1.5 Gi data into it. Attach the volume in maintenance mode.5. Set a recurring backup on volume 1st.6. Perform revert to snapshot with 3rd volume.7. Try to make disk full of a node where `longhorn-manager` is running.8. Go to Prometheus web UI.9. Select `longhorn_manager_memory_usage_bytes` and execute. | 1. Monitor the graph and the console on the Prometheus server, the memory\_usage should go up. |
| 11 | longhorn\_disk\_capacity\_bytes | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create volumes and attach them to each node.2. Add an additional disk to all the nodes. (Different size)3. Write into the volumes.4. Power down a node.5. Disable a node.6. Add a new node in the cluster.7. Delete a node from the cluster.8. Go to Prometheus web UI.9. Select `longhorn_disk_capacity_bytes` and execute. | 1. All the disks should be identified by Prometheus.2. All the disks should show the correct total size of the disks. |
| 12 | longhorn\_disk\_usage\_bytes | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create volumes and attach them to each node.2. Add an additional disk to all the nodes. (Different size)3. Write into the volumes.4. Power down a node.5. Disable a node.6. Add a new node in the cluster.7. Delete a node from the cluster.8. Go to Prometheus web UI.9. Select `longhorn_disk_usage_bytes` and execute. | 1. All the disks should be identified by Prometheus.2. All the disks should show the occupied size of the disks. |
| 13 | longhorn\_node\_capacity\_bytes | **Pre-requisite:**
1. Prometheus is setup is done and Prometheus web UI is accessible.
**Test Steps:**
1. Create volumes and attach them to each node.2. Add an additional disk to all the nodes. (Different size)3. Write into the volumes.4. Power down a node.5. Disable a node.6. Add a new node in the cluster.7. Delete a node from the cluster.8. Go to Prometheus web UI.9. Select `longhorn_node_capacity_bytes` and execute. | 1. All the nodes should be identified by Prometheus.2. All the nodes should show the total capacity available of disks available. |
diff --git a/docs/content/manual/functional-test-cases/node.md b/docs/content/manual/functional-test-cases/node.md
index 645c7467d1..1129ea6a7e 100644
--- a/docs/content/manual/functional-test-cases/node.md
+++ b/docs/content/manual/functional-test-cases/node.md
@@ -24,7 +24,7 @@ Test cases
| | **Test Case** | **Test Instructions** | **Expected Results** |
| --- | --- | --- | --- |
| 1 | Node scheduling | * **Prerequisites:**
* Longhorn Deployed with 3 nodes
1. Disable Node Scheduling on a node
2. Create a volume with 3 replicas, and attach it to a node
3. Re-enabled node scheduling on the node | * Volume should be created and attached
* Volume replicas should be scheduled to Schedulable nodes only
* Re-enabling node scheduling will not affect existing scheduled replicas, it will only affect new replicas being created, or rebuilt. | |
-| 2 | Disk Scheduling | * **Prerequisites:**
* Longhorn Deployed with 3 nodes
* Add additional disk (Disk#1) ,attach it and mounted to Node-01.
1. Create a New Disk, Keep Disk Scheduling disabled
2. Create a volume (vol#1), set replica count to `4` and attach it to a node
3. Check (vol#1) replica paths
4. Enable Scheduling on (disk#1)
5. Create a volume (vol#2), set replica count to `4` and attach it to a node
6. Check (vol#2) replica paths | * (vol#1) replicas should be scheduled only to Disks withe Scheduling enabled, no replicas should be scheduled to (disk#1)
* One of (vol#2) replica paths will be scheduled to (disk#1) | Pass
Case of vol#2 - Not necessarily replica will exists on disk#1 provided soft anti affinity is enabled. It might scheduled on disk#1 |
+| 2 | Disk Scheduling | * **Prerequisites:**
* Longhorn Deployed with 3 nodes
* Add additional disk (Disk#1) ,attach it and mounted to Node-01.
1. Create a New Disk, Keep Disk Scheduling disabled
2. Create a volume (vol#1), set replica count to `4` and attach it to a node
3. Check (vol#1) replica paths
4. Enable Scheduling on (disk#1)
5. Create a volume (vol#2), set replica count to `4` and attach it to a node
6. Check (vol#2) replica paths | * (vol#1) replicas should be scheduled only to Disks with Scheduling enabled, no replicas should be scheduled to (disk#1)
* One of (vol#2) replica paths will be scheduled to (disk#1) | Pass
Case of vol#2 - Not necessarily replica will exists on disk#1 provided soft anti affinity is enabled. It might scheduled on disk#1 |
| 3 | Volume Created with Node Tags | * **Prerequisites:**
* Longhorn Deployed with 3 nodes
1. Create Node tags as follows:
1. Node-01: fast
2. Node-02: slow
3. Node-02: fast
2. Create a volume (vol#1), set Node tags to slow
3. Create a volume (vol#2), set Node tags to fast
4. Check Volumes replicas paths
5. Check Volume detail `Node Tags` | * vol#1 replicas should only be scheduled to Node-02
* vol#2 replicas should only be scheduled to Node-01 and Node-03
* Node Tag volume detail should contain Node tag specified in volume creation request. |
| 4 | Volumes created with Disk Tags | * **Prerequisites:**
* Longhorn Deployed with 3 nodes, with default disks (disk#01-1, disk#02-1, disk#03-1)
* `disk#0X-Y` indicate that disk is attached to `Node-0X` , and it is disk number `Y` on that node.
* Create 3 additional disks (disk#01-2, disk#02-2, disk#03-2), attach each one to a different node, and mount it to a directory on that node.
1. Create Disk tags as follows:
1. disk#01-1: fast
2. disk#01-2: fast
3. disk#02-1: slow
4. disk#02-2: slow
5. disk#03-1: fast
6. disk#01-2: fast
2. Create a volume (vol#1), set Disk tags to slow
3. Create a volume (vol#2), set Disk tags to fast
4. Check Volumes replicas paths
5. Check Volume detail `Disk Tags` | * vol#1 replicas should only be scheduled to disks have slow tag (disk#02-1 and disk#02-2)
* vol#2 replicas should can be scheduled to disks have fast Tag
(disk#01-1, disk#01-2, disk#03-1, disk#03-2)
* Disk Tag volume detail should contain Disk tag specified in volume creation request. |
| 5 | Volumes created with both DIsk and Node Tags | * Create a volume, set Disk and node tags, and attach it to a node | * Volume replicas should be scheduled only to node that have Node tags, and only on disks that have Disk tags specified on volume creation request
* If No Node match both Node and Disk tags, volume replicas will not be created. |
diff --git a/docs/content/manual/functional-test-cases/volume.md b/docs/content/manual/functional-test-cases/volume.md
index 9692941fd9..c9ccefb14f 100644
--- a/docs/content/manual/functional-test-cases/volume.md
+++ b/docs/content/manual/functional-test-cases/volume.md
@@ -12,7 +12,7 @@ title: 3. Volume
| 5 | Attach multiple volumes in maintenance mode | * **Prerequisite:**
* Create multiple volumes
1. Select multiple volumes and Attach them to a node in maintenance mode | * All Volumes should be attached in maintenance mode to the same node specified in volume attach request. |
| 6 | Detach multiple volumes | * **Prerequisite:**
* Multiple attached volumes
* Select multiple volumes and detach | * Volumes should be detached |
| 7 | Backup multiple Volumes | * **Prerequisite:**
* Longhorn should be configured to point to a backupstore
* Multiple volumes existed and attached to node/used buy kubernetes workload
* Write some data to multiple volumes and compute it’s checksum
* Select multiple volumes and Create a backup
* restore volumes backups and check its data checksum | * Volume backups should be created
* Restored volumes from backup should contain the same data when backup is created |
-| 8 | Create PV/PVC for multiple volumes | **Prerequisite:**
* Create multiple volumes
1. Select multiple volumes
2. Create a PV, specify filesysem
3. Check PV in Lonhgorn UI and in Kubernetes
4. Create PVC
5. Check PVC in Lonhgorn UI and in Kubernetes
6. Delete PVC
7. Check PV in Lonhgorn UI and in Kubernetes | * For all selected volumes
* PV should created
* PV/PVC status in UI should be `Available`
* PV `spec.csi.fsType` should match filesystem specified in PV creation request
* PV `spec.storageClassName` should match the setting in `Default Longhorn Static StorageClass Name`
* PV `spec.csi.volumeHandle` should be the volume name
* PV/PVC status in UI should be `Bound` in Longhorn UI
* PVC namespace should match namespace specified in PVC creation request
* After Deleting PVC, PV/PVC status should be `Relased` in Longhorn UI. |
+| 8 | Create PV/PVC for multiple volumes | **Prerequisite:**
* Create multiple volumes
1. Select multiple volumes
2. Create a PV, specify filesystem
3. Check PV in Lonhgorn UI and in Kubernetes
4. Create PVC
5. Check PVC in Lonhgorn UI and in Kubernetes
6. Delete PVC
7. Check PV in Lonhgorn UI and in Kubernetes | * For all selected volumes
* PV should created
* PV/PVC status in UI should be `Available`
* PV `spec.csi.fsType` should match filesystem specified in PV creation request
* PV `spec.storageClassName` should match the setting in `Default Longhorn Static StorageClass Name`
* PV `spec.csi.volumeHandle` should be the volume name
* PV/PVC status in UI should be `Bound` in Longhorn UI
* PVC namespace should match namespace specified in PVC creation request
* After Deleting PVC, PV/PVC status should be `Released` in Longhorn UI. |
| 9 | Volume expansion | Check Multiple Volume expansion test cases work for multiple volumes
[Test Cases in Volume Details page](https://rancher.atlassian.net/wiki/spaces/LON/pages/354453117/Volume+detail+page) | Volume expansion should work for multiple volumes. |
| 10 | Engine Offline Upgrade For Multiple Volumes | **Prerequisite:**
* Volume is consumed by Kubernetes deployment workload
* Volume use old Longhorn Engine
1. Write data to volume, compute it’s checksum (checksum#1)
2. Scale down deployment , volume gets detached
3. Upgrade Longhorn engine image to use new deployed engine image
4. Scale up deployment, volume gets attached | * Volume read/write operations should work before and after engine upgrade.
* Old Engine `Reference Count` will be decreased by 1
* New Engine `Reference Count` will be increased by 1 |
| 12 | Show System Hidden | **Prerequisite**:
* Volume is created and attached to a pod.
1. Click the volume appearing on volume list page, it takes user to volume.
2. Take snapshot and upgrade the replicas.
3. Under snapshot section, enable option 'Show System Hidden | Enabling this option will show system created snapshots while rebuilding of replicas. |
diff --git a/docs/content/manual/pre-release/backup-and-restore/sync-up-with-backup-target-during-dr-volume-activation.md b/docs/content/manual/pre-release/backup-and-restore/sync-up-with-backup-target-during-dr-volume-activation.md
new file mode 100644
index 0000000000..d316e20ed9
--- /dev/null
+++ b/docs/content/manual/pre-release/backup-and-restore/sync-up-with-backup-target-during-dr-volume-activation.md
@@ -0,0 +1,16 @@
+---
+title: "Sync up with backup target during DR volume activation"
+---
+
+#### Related Issue:
+- https://github.com/longhorn/longhorn/issues/5292
+- https://github.com/longhorn/longhorn/issues/7945
+
+1. Launch 2 clusters and both have Longhorn installed
+1. Set up a backup target.
+1. Create a volume and write data in the `1st cluster`. Then create `1st backup`.
+1. Restore the backup as a DR volume in the `2nd cluster`.
+1. Modify the backup poll interval to a large value.
+1. Write more data for the volume in the `1st cluster`, and create the `2nd backup`.
+1. Activate the DR volume in the `2nd cluster`. Then verify the data
+1. The activated DR volume should contain the latest data.
diff --git a/docs/content/manual/pre-release/cluster-restore/restore-to-an-old-cluster.md b/docs/content/manual/pre-release/cluster-restore/restore-to-an-old-cluster.md
index 10fb2f80cf..838522dd97 100644
--- a/docs/content/manual/pre-release/cluster-restore/restore-to-an-old-cluster.md
+++ b/docs/content/manual/pre-release/cluster-restore/restore-to-an-old-cluster.md
@@ -35,15 +35,15 @@ This test may need to be validated for both kind of cluster.
4. Deploy a StatefulSet with volume D. Write some data and do some snapshot operations. (Validate 2 cases: <1> volume can be recovered automatically if some replicas are removed and some new replicas are replenished; <2> snapshot info will be resynced;)
5. Deploy a Deployment with volume E. Write some data and do some snapshot operations. (Validate 4 cases: <1> engine upgrade; <2> offline expansion)
3. Create a cluster snapshot via Rancher.
-4. Do the followings before the restore:
+4. Do the following before the restore:
1. Delete volume A.
2. Write more data to volume B and create more backups.
3. Remove all current replicas one by one for volume C. Then all replicas of volume C are new replicas.
4. Remove some replicas for volume D. Do snapshot creation, deletion, and revert.
5. Scale down the workload. Upgrade volume E from the default image to another engine image. And do expansion.
- 6. Create and attach volume F via UI. Write some data and do some snapshot operations. (Validate 1 case: Users need to manuall recover the volume if it's created after the cluster snapshot)
+ 6. Create and attach volume F via UI. Write some data and do some snapshot operations. (Validate 1 case: Users need to manually recover the volume if it's created after the cluster snapshot)
5. Restore the cluster.
-6. Check the followings according to the doc:
+6. Check the following according to the doc:
1. Volume A is back. But there is no data in it. And users can re-delete it.
2. Volume B can be reattached or keep attached with correct data. The backup info of volume B is resynced when the volume is reattahed. The pod can use the volume after restart.
3. All old removed replicas are back and all newly rebuilt replicas in step4-3 disappear for volume C. There is no data in volume C. The data directories of the disappeared replicas are still on the node. Hence the data are be recovered by exporting a single replica volume.
diff --git a/docs/content/manual/pre-release/node-not-ready/node-down/single-replica-node-down.md b/docs/content/manual/pre-release/node-not-ready/node-down/single-replica-node-down.md
index 9bc72860b6..53a3c0fb27 100644
--- a/docs/content/manual/pre-release/node-not-ready/node-down/single-replica-node-down.md
+++ b/docs/content/manual/pre-release/node-not-ready/node-down/single-replica-node-down.md
@@ -20,7 +20,7 @@ https://github.com/longhorn/longhorn/issues/3957
6. Power up node or delete the workload pod so that kubernetes will recreate pod on another node.
7. Verify auto salvage finishes (i.e pod completes start).
8. Verify volume attached & accessible by pod (i.e test data is available).
- - For data locality = strict-local volume, volume wiil keep in detaching, attaching status for about 10 minutes, after volume attached to node which replica located, check volume healthy and pod status.
+ - For data locality = strict-local volume, volume will keep in detaching, attaching status for about 10 minutes, after volume attached to node which replica located, check volume healthy and pod status.
## Node restart/down scenario with `Pod Deletion Policy When Node is Down` set to `delete-both-statefulset-and-deployment-pod`
1. Create RWO|RWX volume with replica count = 1 & data locality = enabled|disabled|strict-local.
diff --git a/docs/content/manual/pre-release/node/degraded-availability.md b/docs/content/manual/pre-release/node/degraded-availability.md
index 26f2b17a43..fedbc8b3c1 100644
--- a/docs/content/manual/pre-release/node/degraded-availability.md
+++ b/docs/content/manual/pre-release/node/degraded-availability.md
@@ -15,8 +15,8 @@ title: Degraded availability with added nodes
##### Steps:
1. Create a Deployment Pod with a volume and three replicas.
1. After the volume is attached, on Volume page it should be displayed as `Degraded`
- 1. Hover the crusor to the red circle exclamation mark, the tooltip will says, "The volume cannot be scheduled".
- 1. Click into the volume detail page it will display `Scheduling Failure` but the volume remain fuctional as expected.
+ 1. Hover the cursor to the red circle exclamation mark, the tooltip will says, "The volume cannot be scheduled".
+ 1. Click into the volume detail page it will display `Scheduling Failure` but the volume remain functional as expected.
1. Write data to the Pod.
1. Scale down the deployment to 0 to detach the volume.
1. Volume return to `Detached` state.
diff --git a/docs/content/manual/pre-release/upgrade/backing-image-during-upgrade.md b/docs/content/manual/pre-release/upgrade/backing-image-during-upgrade.md
index ec44abb402..9ca7dc0132 100644
--- a/docs/content/manual/pre-release/upgrade/backing-image-during-upgrade.md
+++ b/docs/content/manual/pre-release/upgrade/backing-image-during-upgrade.md
@@ -38,7 +38,7 @@ title: Test Backing Image during Longhorn upgrade
1. Deploy Longhorn.
2. Create a backing images. Wait for the backing image being ready in the 1st disk.
3. Create and attach volumes with the backing image. Wait for all disk files of the backing image being ready.
-4. Run `kubectl -n longhorn system get pod -w` in a seperate session.
+4. Run `kubectl -n longhorn system get pod -w` in a separate session.
5. Upgrade Longhorn manager but with the backing image manager image unchanged. (Actually we can mock this upgrade by removing all longhorn manager pods simultaneously.)
6. Check if all disk file status of the backing image becomes `unknown` then `ready` during the longhorn manager pods termination and restart. (May need to refresh the UI page after restart.)
7. After the longhorn manager pods restart, Verify there is no backing image data source pod launched for the backing image in the output of step4.
diff --git a/docs/content/manual/release-specific/v1.2.0/label-driven-recurring-job.md b/docs/content/manual/release-specific/v1.2.0/label-driven-recurring-job.md
index 30f6134599..6f448fd7e8 100644
--- a/docs/content/manual/release-specific/v1.2.0/label-driven-recurring-job.md
+++ b/docs/content/manual/release-specific/v1.2.0/label-driven-recurring-job.md
@@ -15,11 +15,11 @@ https://github.com/longhorn/longhorn/issues/467
*And* create volume `test-job-4`.
*And* create volume `test-job-5`.
-**Then** moniter the cron job pod log.
+**Then** monitor the cron job pod log.
*And* should see 2 jobs created concurrently.
**When** update `snapshot1` recurring job with `concurrency` set to `3`.
-**Then** moniter the cron job pod log.
+**Then** monitor the cron job pod log.
*And* should see 3 jobs created concurrently.
diff --git a/docs/content/manual/release-specific/v1.2.0/test-backing-image-upload.md b/docs/content/manual/release-specific/v1.2.0/test-backing-image-upload.md
index e42178b19f..5b8740893c 100644
--- a/docs/content/manual/release-specific/v1.2.0/test-backing-image-upload.md
+++ b/docs/content/manual/release-specific/v1.2.0/test-backing-image-upload.md
@@ -37,7 +37,7 @@ title: Test backing image
1. Create a valid backing image
2. Create a StorageClass, which use the same backing image name but different data source type/parameters.
3. Create a PVC with the StorageClass.
- ==> The corresponding creation should fail. The longhorn-csi-plugin will repeatly print out error logs like this `existing backing image %v data source is different from the parameters in the creation request or StorageClass`.
+ ==> The corresponding creation should fail. The longhorn-csi-plugin will repeatedly print out error logs like this `existing backing image %v data source is different from the parameters in the creation request or StorageClass`.
4. Delete the PVC and the StorageClass.
5. Recreate a StorageClass in which the backing image fields match the existing backing image.
6. Create a PVC with the StorageClass.
diff --git a/docs/content/manual/release-specific/v1.2.3/test-backing-image-checksum-mismatching.md b/docs/content/manual/release-specific/v1.2.3/test-backing-image-checksum-mismatching.md
index fe096f8e5c..89f015c376 100644
--- a/docs/content/manual/release-specific/v1.2.3/test-backing-image-checksum-mismatching.md
+++ b/docs/content/manual/release-specific/v1.2.3/test-backing-image-checksum-mismatching.md
@@ -3,7 +3,7 @@ title: Test backing image checksum mismatching
---
### Test step
-1. Modify setting `Backing Image Recovery Wait Interval` to a shorter value so that the backing image will start auto recovery eariler.
+1. Modify setting `Backing Image Recovery Wait Interval` to a shorter value so that the backing image will start auto recovery earlier.
2. Create a backing image file with type `Download From URL`.
3. Launch a volume using the backing image file so that there are 2 disk records for the backing image.
4. Modify one disk file for the backing image and make sure the file size is not changed. This will lead to data inconsistency/corruption later. e.g.,
diff --git a/docs/content/manual/release-specific/v1.3.0/extend_CSI_snapshot_support.md b/docs/content/manual/release-specific/v1.3.0/extend_CSI_snapshot_support.md
index d096cdd538..d70c46ecdc 100644
--- a/docs/content/manual/release-specific/v1.3.0/extend_CSI_snapshot_support.md
+++ b/docs/content/manual/release-specific/v1.3.0/extend_CSI_snapshot_support.md
@@ -132,7 +132,7 @@ https://github.com/longhorn/longhorn/issues/2534
* Scale down the workload to detach the `test-vol`
* Create the same PVC `test-restore-pvc` as in the `Source volume is attached && Longhorn snapshot exist` section
* Verify that PVC provisioning failed because the source volume is detached so Longhorn cannot verify the existence of the Longhorn snapshot in the source volume.
- * Scale up the workload to attache `test-vol`
+ * Scale up the workload to attach `test-vol`
* Wait for PVC to finish provisioning and be bounded
* Attach the PVC `test-restore-pvc` and verify the data
* Delete the PVC
diff --git a/docs/content/manual/release-specific/v1.6.0/test-engine-version-enforcement.md b/docs/content/manual/release-specific/v1.6.0/test-engine-version-enforcement.md
index ba4f32d956..0d2b543e78 100644
--- a/docs/content/manual/release-specific/v1.6.0/test-engine-version-enforcement.md
+++ b/docs/content/manual/release-specific/v1.6.0/test-engine-version-enforcement.md
@@ -22,7 +22,7 @@ longhorn-manager-grhsf 0/1 CrashLoopBackOff
```
And should see incompatible version error in longhorn-manager Pod logs
```
-time="2023-08-17T03:03:20Z" level=fatal msg="Error starting manager: failed checking Engine upgarde path: incompatible Engine ei-7fa7c208 client API version: found version 7 is below required minimal version 8"
+time="2023-08-17T03:03:20Z" level=fatal msg="Error starting manager: failed checking Engine upgrade path: incompatible Engine ei-7fa7c208 client API version: found version 7 is below required minimal version 8"
```
**When** downgraded Longhorn to v1.5.x
@@ -39,5 +39,5 @@ ei-7fa7c208 true deployed longhornio/longhorn-engine:v1.4.1 0
ei-ad420081 false deployed c3y1huang/research:2017-lh-ei 0 44h 24s
```
-**When** update existing volume/engine/replica custom resourcs `spec.image` with `longhornio/longhorn-engine:v1.4.x`
+**When** update existing volume/engine/replica custom resources `spec.image` with `longhornio/longhorn-engine:v1.4.x`
**Then** should be blocked
diff --git a/docs/content/manual/release-specific/v1.6.0/test-rebuild-in-meta-blocks-engine-start.md b/docs/content/manual/release-specific/v1.6.0/test-rebuild-in-meta-blocks-engine-start.md
index f81a56c604..a1cfaed7e0 100644
--- a/docs/content/manual/release-specific/v1.6.0/test-rebuild-in-meta-blocks-engine-start.md
+++ b/docs/content/manual/release-specific/v1.6.0/test-rebuild-in-meta-blocks-engine-start.md
@@ -32,7 +32,7 @@ index b48ddd46..c4523f11 100644
**And** the `auto-salvage` setting is set to `true`.
**And** a new StorageClass is created with `NumberOfReplica` set to `1`.
**And** a StatefulSet is created with `Replica` set to `1`.
-**And** the node of the StatefulSet Pod and the node of its volume Replica are different. This is necessary to trigger the rebuilding in reponse to the data locality setting update later.
+**And** the node of the StatefulSet Pod and the node of its volume Replica are different. This is necessary to trigger the rebuilding in response to the data locality setting update later.
**And** Volume have 1 running Replica.
**And** data exists in the volume.
diff --git a/docs/content/manual/release-specific/v1.7.0/_index.md b/docs/content/manual/release-specific/v1.7.0/_index.md
new file mode 100644
index 0000000000..f4fc9aeb62
--- /dev/null
+++ b/docs/content/manual/release-specific/v1.7.0/_index.md
@@ -0,0 +1,3 @@
+---
+title: v1.7.0
+---
diff --git a/docs/content/manual/release-specific/v1.7.0/test-kubelet-restart-no-pending-pod-event.md b/docs/content/manual/release-specific/v1.7.0/test-kubelet-restart-no-pending-pod-event.md
new file mode 100644
index 0000000000..2758cd8b67
--- /dev/null
+++ b/docs/content/manual/release-specific/v1.7.0/test-kubelet-restart-no-pending-pod-event.md
@@ -0,0 +1,22 @@
+---
+title: restarting Kubelet should not result in repeated "no Pending workload pods ..." event for the workload pod.
+---
+
+## Related issues
+
+- https://github.com/longhorn/longhorn/issues/8072
+
+## Test step
+
+**Given** A [deployment](https://github.com/longhorn/longhorn/blob/master/examples/deployment.yaml) is created.
+
+**When** Kubelet on the node with attached volume of the deployment is restarted.
+```bash
+systemctl restart k3s-agent.service
+```
+
+**Then** Observe the events of the deployment pod.
+```
+kubectl get events --field-selector involvedObject.name=${POD_NAME} -w
+```
+**And** There are no recurring `no Pending workload pods for volume xxx to be mounted` events.
diff --git a/e2e/Dockerfile b/e2e/Dockerfile
index f2f69efa65..8a28d9e1a9 100644
--- a/e2e/Dockerfile
+++ b/e2e/Dockerfile
@@ -1,4 +1,4 @@
-FROM registry.suse.com/bci/python:3.9
+FROM registry.suse.com/bci/python:3.11
ARG KUBECTL_VERSION=v1.17.0
ARG YQ_VERSION=v4.24.2
diff --git a/e2e/requirements.txt b/e2e/requirements.txt
index 81eac4a39a..dc39de7348 100644
--- a/e2e/requirements.txt
+++ b/e2e/requirements.txt
@@ -1,8 +1,8 @@
robotframework==6.1.1
-argcomplete==1.10.0
-directio==1.2
+argcomplete==1.12.3
+directio==1.3
flake8
kubernetes==27.2.0
requests==2.31.0
-boto3==1.26.86
+boto3==1.34.51
pyyaml==6.0.1
diff --git a/engine/environment-setup/setupRancher.py b/engine/environment-setup/setupRancher.py
index 8882c14343..4c49b28428 100644
--- a/engine/environment-setup/setupRancher.py
+++ b/engine/environment-setup/setupRancher.py
@@ -32,7 +32,7 @@ def silent_remove_file(filename):
os.remove(filename)
except OSError as e:
if e.errno != errno.ENOENT: # errno.ENOENT = no such file or directory
- raise # re-raise exception if a different error occured
+ raise # re-raise exception if a different error occurred
def gce_create_instance(compute, name, gce_startup_script):
diff --git a/engine/validation-test/requirements.txt b/engine/validation-test/requirements.txt
index a8a1d16462..c7a9aedfcb 100644
--- a/engine/validation-test/requirements.txt
+++ b/engine/validation-test/requirements.txt
@@ -1,10 +1,10 @@
-flake8==2.5.1
+flake8==2.6.2
paramiko
pytest==2.9.2
pytest-xdist
-requests==2.20.0
-cattle==0.5.1
-selenium==2.33.0
-websocket-client==0.23.0
-docker-py==1.2.3
+requests==2.31.0
+cattle==0.5.4
+selenium==2.53.6
+websocket-client==0.59.0
+docker-py==1.10.6
boto
diff --git a/manager/integration/Dockerfile b/manager/integration/Dockerfile
index 8541f5edd9..a6f5a218d1 100644
--- a/manager/integration/Dockerfile
+++ b/manager/integration/Dockerfile
@@ -1,12 +1,12 @@
-FROM registry.suse.com/bci/python:3.9
+FROM registry.suse.com/bci/python:3.11
-ARG KUBECTL_VERSION=v1.17.0
+ARG KUBECTL_VERSION=v1.28.4
ARG YQ_VERSION=v4.24.2
ARG TERRAFORM_VERSION=1.3.5
ARG ARCH=amd64
RUN zypper ref -f
-RUN zypper in -y vim-small nfs-client xfsprogs e2fsprogs util-linux-systemd gcc python39-devel gawk java-11-openjdk tar awk gzip wget unzip && \
+RUN zypper in -y vim-small nfs-client xfsprogs e2fsprogs util-linux-systemd gcc python311-devel gawk java-11-openjdk tar awk gzip wget unzip && \
rm -rf /var/cache/zypp/*
RUN curl -sO https://storage.googleapis.com/kubernetes-release/release/$KUBECTL_VERSION/bin/linux/${ARCH}/kubectl && \
diff --git a/manager/integration/README.md b/manager/integration/README.md
index 864698964f..77fcef7cf5 100644
--- a/manager/integration/README.md
+++ b/manager/integration/README.md
@@ -18,7 +18,8 @@ Requirement:
Run the test:
1. Deploy all backupstore servers(including `NFS` server and `Minio` as s3 server) for test purposes.
```
-kubectl create -Rf integration/deploy/backupstores
+kubectl create -f https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/minio-backupstore.yaml \
+ -f https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/nfs-backupstore.yaml
```
2. Deploy the test script to the Kubernetes cluster.
```
diff --git a/manager/integration/deploy/backupstores/minio-backupstore.yaml b/manager/integration/deploy/backupstores/minio-backupstore.yaml
deleted file mode 100644
index 0654bfbab2..0000000000
--- a/manager/integration/deploy/backupstores/minio-backupstore.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-apiVersion: v1
-kind: Secret
-metadata:
- name: minio-secret
- namespace: default
-type: Opaque
-data:
- AWS_ACCESS_KEY_ID: bG9uZ2hvcm4tdGVzdC1hY2Nlc3Mta2V5 # longhorn-test-access-key
- AWS_SECRET_ACCESS_KEY: bG9uZ2hvcm4tdGVzdC1zZWNyZXQta2V5 # longhorn-test-secret-key
- AWS_ENDPOINTS: aHR0cHM6Ly9taW5pby1zZXJ2aWNlLmRlZmF1bHQ6OTAwMA== # https://minio-service.default:9000
- AWS_CERT: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURMRENDQWhTZ0F3SUJBZ0lSQU1kbzQycGhUZXlrMTcvYkxyWjVZRHN3RFFZSktvWklodmNOQVFFTEJRQXcKR2pFWU1CWUdBMVVFQ2hNUFRHOXVaMmh2Y200Z0xTQlVaWE4wTUNBWERUSXdNRFF5TnpJek1EQXhNVm9ZRHpJeApNakF3TkRBek1qTXdNREV4V2pBYU1SZ3dGZ1lEVlFRS0V3OU1iMjVuYUc5eWJpQXRJRlJsYzNRd2dnRWlNQTBHCkNTcUdTSWIzRFFFQkFRVUFBNElCRHdBd2dnRUtBb0lCQVFEWHpVdXJnUFpEZ3pUM0RZdWFlYmdld3Fvd2RlQUQKODRWWWF6ZlN1USs3K21Oa2lpUVBvelVVMmZvUWFGL1BxekJiUW1lZ29hT3l5NVhqM1VFeG1GcmV0eDBaRjVOVgpKTi85ZWFJNWRXRk9teHhpMElPUGI2T0RpbE1qcXVEbUVPSXljdjRTaCsvSWo5Zk1nS0tXUDdJZGxDNUJPeThkCncwOVdkckxxaE9WY3BKamNxYjN6K3hISHd5Q05YeGhoRm9tb2xQVnpJbnlUUEJTZkRuSDBuS0lHUXl2bGhCMGsKVHBHSzYxc2prZnFTK3hpNTlJeHVrbHZIRXNQcjFXblRzYU9oaVh6N3lQSlorcTNBMWZoVzBVa1JaRFlnWnNFbQovZ05KM3JwOFhZdURna2kzZ0UrOElXQWRBWHExeWhqRDdSSkI4VFNJYTV0SGpKUUtqZ0NlSG5HekFnTUJBQUdqCmF6QnBNQTRHQTFVZER3RUIvd1FFQXdJQ3BEQVRCZ05WSFNVRUREQUtCZ2dyQmdFRkJRY0RBVEFQQmdOVkhSTUIKQWY4RUJUQURBUUgvTURFR0ExVWRFUVFxTUNpQ0NXeHZZMkZzYUc5emRJSVZiV2x1YVc4dGMyVnlkbWxqWlM1awpaV1poZFd4MGh3Ui9BQUFCTUEwR0NTcUdTSWIzRFFFQkN3VUFBNElCQVFDbUZMMzlNSHVZMzFhMTFEajRwMjVjCnFQRUM0RHZJUWozTk9kU0dWMmQrZjZzZ3pGejFXTDhWcnF2QjFCMVM2cjRKYjJQRXVJQkQ4NFlwVXJIT1JNU2MKd3ViTEppSEtEa0Jmb2U5QWI1cC9VakpyS0tuajM0RGx2c1cvR3AwWTZYc1BWaVdpVWorb1JLbUdWSTI0Q0JIdgpnK0JtVzNDeU5RR1RLajk0eE02czNBV2xHRW95YXFXUGU1eHllVWUzZjFBWkY5N3RDaklKUmVWbENtaENGK0JtCmFUY1RSUWN3cVdvQ3AwYmJZcHlERFlwUmxxOEdQbElFOW8yWjZBc05mTHJVcGFtZ3FYMmtYa2gxa3lzSlEralAKelFadHJSMG1tdHVyM0RuRW0yYmk0TktIQVFIcFc5TXUxNkdRakUxTmJYcVF0VEI4OGpLNzZjdEg5MzRDYWw2VgotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0t
- AWS_CERT_KEY: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRRFh6VXVyZ1BaRGd6VDMKRFl1YWViZ2V3cW93ZGVBRDg0VllhemZTdVErNyttTmtpaVFQb3pVVTJmb1FhRi9QcXpCYlFtZWdvYU95eTVYagozVUV4bUZyZXR4MFpGNU5WSk4vOWVhSTVkV0ZPbXh4aTBJT1BiNk9EaWxNanF1RG1FT0l5Y3Y0U2grL0lqOWZNCmdLS1dQN0lkbEM1Qk95OGR3MDlXZHJMcWhPVmNwSmpjcWIzeit4SEh3eUNOWHhoaEZvbW9sUFZ6SW55VFBCU2YKRG5IMG5LSUdReXZsaEIwa1RwR0s2MXNqa2ZxUyt4aTU5SXh1a2x2SEVzUHIxV25Uc2FPaGlYejd5UEpaK3EzQQoxZmhXMFVrUlpEWWdac0VtL2dOSjNycDhYWXVEZ2tpM2dFKzhJV0FkQVhxMXloakQ3UkpCOFRTSWE1dEhqSlFLCmpnQ2VIbkd6QWdNQkFBRUNnZ0VBZlVyQ1hrYTN0Q2JmZjNpcnp2cFFmZnVEbURNMzV0TmlYaDJTQVpSVW9FMFYKbSsvZ1UvdnIrN2s2eUgvdzhMOXhpZXFhQTljVkZkL0JuTlIrMzI2WGc2dEpCNko2ZGZxODJZdmZOZ0VDaUFMaQpqalNGemFlQmhnT3ZsWXZHbTR5OTU1Q0FGdjQ1cDNac1VsMTFDRXJlL1BGbGtaWHRHeGlrWFl6NC85UTgzblhZCnM2eDdPYTgyUjdwT2lraWh3Q0FvVTU3Rjc4ZWFKOG1xTmkwRlF2bHlxSk9QMTFCbVp4dm54ZU11S2poQjlPTnAKTFNwMWpzZXk5bDZNR2pVbjBGTG53RHZkVWRiK0ZlUEkxTjdWYUNBd3hJK3JHa3JTWkhnekhWWE92VUpON2t2QQpqNUZPNW9uNGgvK3hXbkYzM3lxZ0VvWWZ0MFFJL2pXS2NOV1d1a2pCd1FLQmdRRGVFNlJGRUpsT2Q1aVcxeW1qCm45RENnczVFbXFtRXN3WU95bkN3U2RhK1lNNnZVYmlac1k4WW9wMVRmVWN4cUh2NkFQWGpVd2NBUG1QVE9KRW8KMlJtS0xTYkhsTnc4bFNOMWJsWDBEL3Mzamc1R3VlVW9nbW5TVnhMa0h1OFhKR0o3VzFReEUzZG9IUHRrcTNpagpoa09QTnJpZFM0UmxqNTJwYkhscjUvQzRjUUtCZ1FENHhFYmpuck1heFV2b0xxVTRvT2xiOVc5UytSUllTc0cxCmxJUmgzNzZTV0ZuTTlSdGoyMTI0M1hkaE4zUFBtSTNNeiswYjdyMnZSUi9LMS9Cc1JUQnlrTi9kbkVuNVUxQkEKYm90cGZIS1Jvc1FUR1hIQkEvM0JrNC9qOWplU3RmVXgzZ2x3eUI0L2hORy9KM1ZVV2FXeURTRm5qZFEvcGJsRwp6VWlsSVBmK1l3S0JnUUNwMkdYYmVJMTN5TnBJQ3psS2JqRlFncEJWUWVDQ29CVHkvUHRncUtoM3BEeVBNN1kyCnZla09VMWgyQVN1UkhDWHRtQXgzRndvVXNxTFFhY1FEZEw4bXdjK1Y5eERWdU02TXdwMDBjNENVQmE1L2d5OXoKWXdLaUgzeFFRaVJrRTZ6S1laZ3JqSkxYYXNzT1BHS2cxbEFYV1NlckRaV3R3MEEyMHNLdXQ0NlEwUUtCZ0hGZQpxZHZVR0ZXcjhvTDJ0dzlPcmVyZHVJVTh4RnZVZmVFdHRRTVJ2N3pjRE5qT0gxUnJ4Wk9aUW0ySW92dkp6MTIyCnFKMWhPUXJtV3EzTHFXTCtTU3o4L3pqMG4vWERWVUIzNElzTFR2ODJDVnVXN2ZPRHlTSnVDRlpnZ0VVWkxZd3oKWDJRSm4xZGRSV1Z6S3hKczVJbDNXSERqL3dXZWxnaEJSOGtSZEZOM0FvR0FJNldDdjJQQ1lUS1ZZNjAwOFYwbgpyTDQ3YTlPanZ0Yy81S2ZxSjFpMkpKTUgyQi9jbU1WRSs4M2dpODFIU1FqMWErNnBjektmQVppZWcwRk9nL015ClB6VlZRYmpKTnY0QzM5KzdxSDg1WGdZTXZhcTJ0aDFEZWUvQ3NsMlM4QlV0cW5mc0VuMUYwcWhlWUJZb2RibHAKV3RUaE5oRi9oRVhzbkJROURyWkJKT1U9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K
----
-# same secret for longhorn-system namespace
-apiVersion: v1
-kind: Secret
-metadata:
- name: minio-secret
- namespace: longhorn-system
-type: Opaque
-data:
- AWS_ACCESS_KEY_ID: bG9uZ2hvcm4tdGVzdC1hY2Nlc3Mta2V5 # longhorn-test-access-key
- AWS_SECRET_ACCESS_KEY: bG9uZ2hvcm4tdGVzdC1zZWNyZXQta2V5 # longhorn-test-secret-key
- AWS_ENDPOINTS: aHR0cHM6Ly9taW5pby1zZXJ2aWNlLmRlZmF1bHQ6OTAwMA== # https://minio-service.default:9000
- AWS_CERT: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURMRENDQWhTZ0F3SUJBZ0lSQU1kbzQycGhUZXlrMTcvYkxyWjVZRHN3RFFZSktvWklodmNOQVFFTEJRQXcKR2pFWU1CWUdBMVVFQ2hNUFRHOXVaMmh2Y200Z0xTQlVaWE4wTUNBWERUSXdNRFF5TnpJek1EQXhNVm9ZRHpJeApNakF3TkRBek1qTXdNREV4V2pBYU1SZ3dGZ1lEVlFRS0V3OU1iMjVuYUc5eWJpQXRJRlJsYzNRd2dnRWlNQTBHCkNTcUdTSWIzRFFFQkFRVUFBNElCRHdBd2dnRUtBb0lCQVFEWHpVdXJnUFpEZ3pUM0RZdWFlYmdld3Fvd2RlQUQKODRWWWF6ZlN1USs3K21Oa2lpUVBvelVVMmZvUWFGL1BxekJiUW1lZ29hT3l5NVhqM1VFeG1GcmV0eDBaRjVOVgpKTi85ZWFJNWRXRk9teHhpMElPUGI2T0RpbE1qcXVEbUVPSXljdjRTaCsvSWo5Zk1nS0tXUDdJZGxDNUJPeThkCncwOVdkckxxaE9WY3BKamNxYjN6K3hISHd5Q05YeGhoRm9tb2xQVnpJbnlUUEJTZkRuSDBuS0lHUXl2bGhCMGsKVHBHSzYxc2prZnFTK3hpNTlJeHVrbHZIRXNQcjFXblRzYU9oaVh6N3lQSlorcTNBMWZoVzBVa1JaRFlnWnNFbQovZ05KM3JwOFhZdURna2kzZ0UrOElXQWRBWHExeWhqRDdSSkI4VFNJYTV0SGpKUUtqZ0NlSG5HekFnTUJBQUdqCmF6QnBNQTRHQTFVZER3RUIvd1FFQXdJQ3BEQVRCZ05WSFNVRUREQUtCZ2dyQmdFRkJRY0RBVEFQQmdOVkhSTUIKQWY4RUJUQURBUUgvTURFR0ExVWRFUVFxTUNpQ0NXeHZZMkZzYUc5emRJSVZiV2x1YVc4dGMyVnlkbWxqWlM1awpaV1poZFd4MGh3Ui9BQUFCTUEwR0NTcUdTSWIzRFFFQkN3VUFBNElCQVFDbUZMMzlNSHVZMzFhMTFEajRwMjVjCnFQRUM0RHZJUWozTk9kU0dWMmQrZjZzZ3pGejFXTDhWcnF2QjFCMVM2cjRKYjJQRXVJQkQ4NFlwVXJIT1JNU2MKd3ViTEppSEtEa0Jmb2U5QWI1cC9VakpyS0tuajM0RGx2c1cvR3AwWTZYc1BWaVdpVWorb1JLbUdWSTI0Q0JIdgpnK0JtVzNDeU5RR1RLajk0eE02czNBV2xHRW95YXFXUGU1eHllVWUzZjFBWkY5N3RDaklKUmVWbENtaENGK0JtCmFUY1RSUWN3cVdvQ3AwYmJZcHlERFlwUmxxOEdQbElFOW8yWjZBc05mTHJVcGFtZ3FYMmtYa2gxa3lzSlEralAKelFadHJSMG1tdHVyM0RuRW0yYmk0TktIQVFIcFc5TXUxNkdRakUxTmJYcVF0VEI4OGpLNzZjdEg5MzRDYWw2VgotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0t
----
-apiVersion: v1
-kind: Pod
-metadata:
- name: longhorn-test-minio
- namespace: default
- labels:
- app: longhorn-test-minio
-spec:
- volumes:
- - name: minio-volume
- emptyDir: {}
- - name: minio-certificates
- secret:
- secretName: minio-secret
- items:
- - key: AWS_CERT
- path: public.crt
- - key: AWS_CERT_KEY
- path: private.key
-
- containers:
- - name: minio
- image: minio/minio:RELEASE.2022-02-01T18-00-14Z
- command: ["sh", "-c", "mkdir -p /storage/backupbucket && mkdir -p /root/.minio/certs && ln -s /root/certs/private.key /root/.minio/certs/private.key && ln -s /root/certs/public.crt /root/.minio/certs/public.crt && exec minio server /storage"]
- env:
- - name: MINIO_ROOT_USER
- valueFrom:
- secretKeyRef:
- name: minio-secret
- key: AWS_ACCESS_KEY_ID
- - name: MINIO_ROOT_PASSWORD
- valueFrom:
- secretKeyRef:
- name: minio-secret
- key: AWS_SECRET_ACCESS_KEY
- ports:
- - containerPort: 9000
- volumeMounts:
- - name: minio-volume
- mountPath: "/storage"
- - name: minio-certificates
- mountPath: "/root/certs"
- readOnly: true
----
-apiVersion: v1
-kind: Service
-metadata:
- name: minio-service
- namespace: default
-spec:
- selector:
- app: longhorn-test-minio
- ports:
- - port: 9000
- targetPort: 9000
- protocol: TCP
- sessionAffinity: ClientIP
diff --git a/manager/integration/deploy/backupstores/nfs-backupstore.yaml b/manager/integration/deploy/backupstores/nfs-backupstore.yaml
deleted file mode 100644
index e351c5075a..0000000000
--- a/manager/integration/deploy/backupstores/nfs-backupstore.yaml
+++ /dev/null
@@ -1,52 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
- name: longhorn-test-nfs
- namespace: default
- labels:
- app: longhorn-test-nfs
-spec:
- volumes:
- - name: nfs-volume
- emptyDir: {}
- containers:
- - name: longhorn-test-nfs-container
- image: longhornio/nfs-ganesha:latest
- imagePullPolicy: Always
- env:
- - name: EXPORT_ID
- value: "14"
- - name: EXPORT_PATH
- value: /opt/backupstore
- - name: PSEUDO_PATH
- value: /opt/backupstore
- - name: NFS_DISK_IMAGE_SIZE_MB
- value: "4096"
- command: ["bash", "-c", "chmod 700 /opt/backupstore && /opt/start_nfs.sh | tee /var/log/ganesha.log"]
- securityContext:
- privileged: true
- capabilities:
- add: ["SYS_ADMIN", "DAC_READ_SEARCH"]
- volumeMounts:
- - name: nfs-volume
- mountPath: "/opt/backupstore"
- livenessProbe:
- exec:
- command: ["bash", "-c", "grep \"No export entries found\" /var/log/ganesha.log > /dev/null 2>&1 ; [ $? -ne 0 ]"]
- initialDelaySeconds: 5
- periodSeconds: 5
- timeoutSeconds: 4
----
-kind: Service
-apiVersion: v1
-metadata:
- name: longhorn-test-nfs-svc
- namespace: default
-spec:
- selector:
- app: longhorn-test-nfs
- clusterIP: None
- ports:
- - name: notnecessary
- port: 1234
- targetPort: 1234
diff --git a/manager/integration/tests/common.py b/manager/integration/tests/common.py
index 28503ae13e..47dd4daee9 100644
--- a/manager/integration/tests/common.py
+++ b/manager/integration/tests/common.py
@@ -216,6 +216,7 @@
"allow-empty-node-selector-volume"
SETTING_REPLICA_DISK_SOFT_ANTI_AFFINITY = "replica-disk-soft-anti-affinity"
SETTING_ALLOW_EMPTY_DISK_SELECTOR_VOLUME = "allow-empty-disk-selector-volume"
+SETTING_NODE_DRAIN_POLICY = "node-drain-policy"
DEFAULT_BACKUP_COMPRESSION_METHOD = "lz4"
BACKUP_COMPRESSION_METHOD_LZ4 = "lz4"
@@ -1315,7 +1316,7 @@ def check_pvc_in_specific_status(api, pvc_name, status):
claim = \
api.read_namespaced_persistent_volume_claim(name=pvc_name,
namespace='default')
- if claim.status.phase == "bound":
+ if claim.status.phase == status:
break
time.sleep(RETRY_INTERVAL)
@@ -1970,7 +1971,7 @@ def wait_for_volume_faulted(client, name):
def wait_for_volume_status(client, name, key, value,
- retry_count=RETRY_COUNTS):
+ retry_count=RETRY_COUNTS_LONG):
wait_for_volume_creation(client, name)
for i in range(retry_count):
volume = client.by_id_volume(name)
@@ -2220,10 +2221,10 @@ def wait_for_engine_image_condition(client, image_name, state):
# This helps to prevent the flaky test case in which the ENGINE_NAME
# is flapping between ready and not ready a few times before settling
# down to the ready state
- # https://github.com/longhorn/longhorn-tests/pull/1638
+ # https://github.com/longhorn/longhorn/issues/7438
state_count = 1
if state == "True":
- state_count = 5
+ state_count = 60
c = 0
for i in range(RETRY_COUNTS):
@@ -2287,7 +2288,7 @@ class AssertErrorCheckThread(threading.Thread):
Parameters:
target : The threading function.
- args : Arguments of the target fucntion.
+ args : Arguments of the target function.
"""
def __init__(self, target, args):
threading.Thread.__init__(self)
@@ -3113,17 +3114,20 @@ def check_volume_endpoint(v):
return endpoint
+def find_backup_volume(client, volume_name):
+ bvs = client.list_backupVolume()
+ for bv in bvs:
+ if bv.name == volume_name and bv.created != "":
+ return bv
+ return None
+
+
def wait_for_backup_volume_backing_image_synced(
client, volume_name, backing_image, retry_count=RETRY_BACKUP_COUNTS):
- def find_backup_volume():
- bvs = client.list_backupVolume()
- for bv in bvs:
- if bv.name == volume_name:
- return bv
- return None
+
completed = False
for _ in range(retry_count):
- bv = find_backup_volume()
+ bv = find_backup_volume(client, volume_name)
assert bv is not None
if bv.backingImageName == backing_image:
completed = True
@@ -3756,17 +3760,10 @@ def find_backup(client, vol_name, snap_name):
been completed successfully
"""
- def find_backup_volume():
- bvs = client.list_backupVolume()
- for bv in bvs:
- if bv.name == vol_name and bv.created != "":
- return bv
- return None
-
bv = None
for i in range(120):
if bv is None:
- bv = find_backup_volume()
+ bv = find_backup_volume(client, vol_name)
if bv is not None:
backups = bv.backupList().data
for b in backups:
@@ -5161,15 +5158,8 @@ def wait_for_instance_manager_desire_state(client, core_api, im_name,
def wait_for_backup_delete(client, volume_name, backup_name):
- def find_backup_volume():
- bvs = client.list_backupVolume()
- for bv in bvs:
- if bv.name == volume_name:
- return bv
- return None
-
def backup_exists():
- bv = find_backup_volume()
+ bv = find_backup_volume(client, volume_name)
if bv is not None:
backups = bv.backupList()
for b in backups:
@@ -5758,7 +5748,7 @@ def generate_support_bundle(case_name): # NOQA
Generate support bundle into folder ./support_bundle/case_name.zip
Won't generate support bundle if current support bundle count
- greate than MAX_SUPPORT_BINDLE_NUMBER.
+ greater than MAX_SUPPORT_BINDLE_NUMBER.
Args:
case_name: support bundle will named case_name.zip
"""
@@ -5808,7 +5798,7 @@ def generate_support_bundle(case_name): # NOQA
with open('./support_bundle/{0}.zip'.format(case_name), 'wb') as f:
f.write(r.content)
except Exception as e:
- warnings.warn("Error occured while downloading support bundle {}.zip\n\
+ warnings.warn("Error occurred when downloading support bundle {}.zip\n\
The error was {}".format(case_name, e))
@@ -6122,3 +6112,42 @@ def wait_for_instance_manager_count(client, number, retry_counts=120):
time.sleep(RETRY_INTERVAL_LONG)
return len(ims)
+
+
+def create_deployment_and_write_data(client, # NOQA
+ core_api, # NOQA
+ make_deployment_with_pvc, # NOQA
+ volume_name, # NOQA
+ size, # NOQA
+ replica_count, # NOQA
+ data_size, # NOQA
+ attach_node_id=None): # NOQA
+ apps_api = get_apps_api_client()
+ volume = client.create_volume(name=volume_name,
+ size=size,
+ numberOfReplicas=replica_count)
+ volume = wait_for_volume_detached(client, volume_name)
+
+ pvc_name = volume_name + "-pvc"
+ create_pv_for_volume(client, core_api, volume, volume_name)
+ create_pvc_for_volume(client, core_api, volume, pvc_name)
+ deployment_name = volume_name + "-dep"
+ deployment = make_deployment_with_pvc(deployment_name, pvc_name)
+ if attach_node_id:
+ deployment["spec"]["template"]["spec"]["nodeSelector"] \
+ = {"kubernetes.io/hostname": attach_node_id}
+
+ create_and_wait_deployment(apps_api, deployment)
+
+ data_path = '/data/test'
+ deployment_pod_names = get_deployment_pod_names(core_api,
+ deployment)
+ write_pod_volume_random_data(core_api,
+ deployment_pod_names[0],
+ data_path,
+ data_size)
+ checksum = get_pod_data_md5sum(core_api,
+ deployment_pod_names[0],
+ data_path)
+
+ return client.by_id_volume(volume_name), deployment_pod_names[0], checksum
diff --git a/manager/integration/tests/requirements.txt b/manager/integration/tests/requirements.txt
index 51fdbdb828..b0bb8357b5 100644
--- a/manager/integration/tests/requirements.txt
+++ b/manager/integration/tests/requirements.txt
@@ -4,11 +4,11 @@ argcomplete==1.10.0
directio==1.2
flake8
kubernetes==25.3.0
-pytest==5.3.1
+pytest==6.2.4
pytest-repeat==0.9.1
pytest-order==1.0.1
six==1.12.0
minio==5.0.10
-pyyaml==5.4.1
+pyyaml==6.0
pandas
prometheus_client
diff --git a/manager/integration/tests/test_backing_image.py b/manager/integration/tests/test_backing_image.py
index 5fac7c272b..118fbfccbe 100644
--- a/manager/integration/tests/test_backing_image.py
+++ b/manager/integration/tests/test_backing_image.py
@@ -431,7 +431,7 @@ def test_backing_image_with_disk_migration(): # NOQA
`-` is removed.
9. Remount the host disk to another path. Then create another Longhorn disk
based on the migrated path (disk migration).
- 10. Verify the followings.
+ 10. Verify the following.
1. The disk added in step3 (before the migration) should
be "unschedulable".
2. The disk added in step9 (after the migration) should
diff --git a/manager/integration/tests/test_basic.py b/manager/integration/tests/test_basic.py
index bedd863e03..d6acfd7faa 100644
--- a/manager/integration/tests/test_basic.py
+++ b/manager/integration/tests/test_basic.py
@@ -101,6 +101,7 @@
from common import create_and_wait_deployment
from common import get_custom_object_api_client
from common import RETRY_COUNTS_SHORT
+from common import scale_up_engine_image_daemonset
from backupstore import backupstore_delete_volume_cfg_file
from backupstore import backupstore_cleanup
@@ -342,7 +343,7 @@ def test_volume_iscsi_basic(client, volume_name): # NOQA
1. Create and attach a volume with iscsi frontend
2. Check the volume endpoint and connect it using the iscsi
- initator on the node.
+ initiator on the node.
3. Write then read back volume data for validation
"""
@@ -985,26 +986,32 @@ def test_dr_volume_with_backup_block_deletion_abort_during_backup_in_progress(se
check_volume_data(dr_vol, final_data, False)
-def test_dr_volume_with_all_backup_blocks_deleted(set_random_backupstore, client, core_api, volume_name): # NOQA
+def test_dr_volume_with_backup_and_backup_volume_deleted(set_random_backupstore, client, core_api, volume_name): # NOQA
"""
- Test DR volume can be activate after delete all backups.
+ Test DR volume can be activated after delete all backups.
Context:
- We want to make sure that DR volume can activate after delete all backups.
+ We want to make sure that DR volume can activate after deleting
+ some/all backups or the backup volume.
Steps:
1. Create a volume and attach to the current node.
2. Write 4 MB to the beginning of the volume (2 x 2MB backup blocks).
- 3. Create backup(0) of the volume.
- 6. Verify backup block count == 2.
- 7. Create DR volume from backup(0).
- 8. Verify DR volume last backup is backup(0).
- 9. Delete backup(0).
- 10. Verify backup block count == 0.
- 11. Verify DR volume last backup is empty.
- 15. Activate and verify DR volume data is data(0).
+ 3. Create backup(0) then backup(1) for the volume.
+ 6. Verify backup block count == 4.
+ 7. Create DR volume(1) and DR volume(2) from backup(1).
+ 8. Verify DR volumes last backup is backup(1).
+ 9. Delete backup(1).
+ 10. Verify backup block count == 2.
+ 11. Verify DR volumes last backup becomes backup(0).
+ 12. Activate and verify DR volume(1) data is data(0).
+ 13. Delete backup(0).
+ 14. Verify backup block count == 0.
+ 15. Verify DR volume last backup is empty.
+ 16. Delete the backup volume.
+ 17. Activate and verify DR volume data is data(0).
"""
backupstore_cleanup(client)
@@ -1020,30 +1027,57 @@ def test_dr_volume_with_all_backup_blocks_deleted(set_random_backupstore, client
'content': common.generate_random_data(2 * BACKUP_BLOCK_SIZE)}
_, backup0, _, data0 = create_backup(
client, volume_name, data0)
+ data1 = {'pos': 0, 'len': 2 * BACKUP_BLOCK_SIZE,
+ 'content': common.generate_random_data(2 * BACKUP_BLOCK_SIZE)}
+ _, backup1, _, data1 = create_backup(
+ client, volume_name, data1)
backup_blocks_count = backupstore_count_backup_block_files(client,
core_api,
volume_name)
- assert backup_blocks_count == 2
+ assert backup_blocks_count == 4
- dr_vol_name = "dr-" + volume_name
- client.create_volume(name=dr_vol_name, size=SIZE,
- numberOfReplicas=2, fromBackup=backup0.url,
+ dr_vol_name1 = "dr-" + volume_name + "1"
+ dr_vol_name2 = "dr-" + volume_name + "2"
+ client.create_volume(name=dr_vol_name1, size=SIZE,
+ numberOfReplicas=2, fromBackup=backup1.url,
frontend="", standby=True)
- check_volume_last_backup(client, dr_vol_name, backup0.name)
- wait_for_backup_restore_completed(client, dr_vol_name, backup0.name)
+ client.create_volume(name=dr_vol_name2, size=SIZE,
+ numberOfReplicas=2, fromBackup=backup1.url,
+ frontend="", standby=True)
+ check_volume_last_backup(client, dr_vol_name1, backup1.name)
+ wait_for_backup_restore_completed(client, dr_vol_name1, backup1.name)
+ check_volume_last_backup(client, dr_vol_name2, backup1.name)
+ wait_for_backup_restore_completed(client, dr_vol_name2, backup1.name)
+
+ delete_backup(client, volume_name, backup1.name)
+ assert backupstore_count_backup_block_files(client,
+ core_api,
+ volume_name) == 2
+ check_volume_last_backup(client, dr_vol_name1, backup0.name)
+ wait_for_backup_restore_completed(client, dr_vol_name1, backup0.name)
+ check_volume_last_backup(client, dr_vol_name2, backup0.name)
+ wait_for_backup_restore_completed(client, dr_vol_name2, backup0.name)
+
+ activate_standby_volume(client, dr_vol_name1)
+ dr_vol1 = client.by_id_volume(dr_vol_name1)
+ dr_vol1.attach(hostId=host_id)
+ dr_vol1 = common.wait_for_volume_healthy(client, dr_vol_name1)
+ check_volume_data(dr_vol1, data0, False)
delete_backup(client, volume_name, backup0.name)
assert backupstore_count_backup_block_files(client,
core_api,
volume_name) == 0
- check_volume_last_backup(client, dr_vol_name, "")
+ check_volume_last_backup(client, dr_vol_name2, "")
- activate_standby_volume(client, dr_vol_name)
- dr_vol = client.by_id_volume(dr_vol_name)
- dr_vol.attach(hostId=host_id)
- dr_vol = common.wait_for_volume_healthy(client, dr_vol_name)
- check_volume_data(dr_vol, data0, False)
+ delete_backup_volume(client, volume_name)
+
+ activate_standby_volume(client, dr_vol_name2)
+ dr_vol2 = client.by_id_volume(dr_vol_name2)
+ dr_vol2.attach(hostId=host_id)
+ dr_vol2 = common.wait_for_volume_healthy(client, dr_vol_name2)
+ check_volume_data(dr_vol2, data0, False)
def test_backup_volume_list(set_random_backupstore, client, core_api): # NOQA
@@ -3421,7 +3455,7 @@ def test_allow_volume_creation_with_degraded_availability(client, volume_name):
2. `node-level-soft-anti-affinity` to false.
Steps:
- (degraded availablity)
+ (degraded availability)
1. Disable scheduling for node 2 and 3.
2. Create a volume with three replicas.
1. Volume should be `ready` after creation and `Scheduled` is true.
@@ -4393,10 +4427,7 @@ def test_backuptarget_available_during_engine_image_not_ready(client, apps_api):
common.wait_for_backup_target_available(client, False)
# Scale up the engine image DaemonSet
- body = [{"op": "remove",
- "path": "/spec/template/spec/nodeSelector/foo"}]
- apps_api.patch_namespaced_daemon_set(
- name=ds_name, namespace='longhorn-system', body=body)
+ scale_up_engine_image_daemonset(client)
common.wait_for_backup_target_available(client, True)
# Sleep 1 second to prevent the same time
@@ -5022,7 +5053,7 @@ def test_space_usage_for_rebuilding_only_volume(client, volume_name, request):
snap_offset = 1
volume_endpoint = get_volume_endpoint(volume)
write_volume_dev_random_mb_data(volume_endpoint,
- snap_offset, 3000, 5)
+ snap_offset, 3000, 10)
snap2 = create_snapshot(client, volume_name)
volume.snapshotDelete(name=snap2.name)
@@ -5030,7 +5061,7 @@ def test_space_usage_for_rebuilding_only_volume(client, volume_name, request):
wait_for_snapshot_purge(client, volume_name, snap2.name)
write_volume_dev_random_mb_data(volume_endpoint,
- snap_offset, 3000, 5)
+ snap_offset, 3000, 10)
for r in volume.replicas:
if r.hostId != lht_hostId:
@@ -5073,14 +5104,14 @@ def test_space_usage_for_rebuilding_only_volume_worst_scenario(client, volume_na
snap_offset = 1
volume_endpoint = get_volume_endpoint(volume)
write_volume_dev_random_mb_data(volume_endpoint,
- snap_offset, 2000)
+ snap_offset, 2000, 10)
snap1 = create_snapshot(client, volume_name)
volume.snapshotDelete(name=snap1.name)
volume.snapshotPurge()
wait_for_snapshot_purge(client, volume_name, snap1.name)
write_volume_dev_random_mb_data(volume_endpoint,
- snap_offset, 2000)
+ snap_offset, 2000, 10)
for r in volume.replicas:
if r.hostId != lht_hostId:
@@ -5090,7 +5121,7 @@ def test_space_usage_for_rebuilding_only_volume_worst_scenario(client, volume_na
wait_for_volume_degraded(client, volume_name)
wait_for_rebuild_start(client, volume_name)
write_volume_dev_random_mb_data(volume_endpoint,
- snap_offset, 2000)
+ snap_offset, 2000, 10)
wait_for_rebuild_complete(client, volume_name)
volume = client.by_id_volume(volume_name)
diff --git a/manager/integration/tests/test_csi_snapshotter.py b/manager/integration/tests/test_csi_snapshotter.py
index b1bf905f26..a6cc4f33bd 100644
--- a/manager/integration/tests/test_csi_snapshotter.py
+++ b/manager/integration/tests/test_csi_snapshotter.py
@@ -29,7 +29,6 @@
from common import make_deployment_with_pvc, apps_api # NOQA
from common import check_pvc_in_specific_status # NOQA
from common import wait_for_pvc_phase
-from common import RETRY_COMMAND_COUNT
from common import BACKING_IMAGE_QCOW2_URL, BACKING_IMAGE_QCOW2_CHECKSUM
from common import BACKING_IMAGE_RAW_URL, BACKING_IMAGE_RAW_CHECKSUM
from common import BACKING_IMAGE_SOURCE_TYPE_DOWNLOAD, RETRY_COUNTS_SHORT
@@ -280,7 +279,7 @@ def get_volumesnapshotcontent(volumesnapshot_uid):
def wait_volumesnapshot_deleted(name,
namespace,
- retry_counts=RETRY_COMMAND_COUNT,
+ retry_counts=RETRY_COUNTS,
can_be_deleted=True):
api = get_custom_object_api_client()
api_group = "snapshot.storage.k8s.io"
@@ -435,7 +434,7 @@ def csi_volumesnapshot_creation_test(snapshotClass=longhorn|custom):
4. check creation of a new longhorn snapshot named `snapshot-uuid`
5. check for `VolumeSnapshotContent` named `snapcontent-uuid`
6. wait for `VolumeSnapshotContent.readyToUse` flag to be set to **true**
- 7. check for backup existance on the backupstore
+ 7. check for backup existence on the backupstore
# the csi snapshot restore sets the fromBackup field same as
# the StorageClass based restore approach.
@@ -860,16 +859,16 @@ def test_csi_snapshot_snap_create_volume_from_snapshot(apps_api, # NOQA
- Attach the PVC and verify data
- Source volume is detached
- Scale down the workload
- - Create PVC from VolumeSnapshot generated from step beggining
+ - Create PVC from VolumeSnapshot generated from step beginning
- Verify PVC provision failed
- Scale up the workload
- Wait for PVC to finish provisioning and be bounded
- Attach the PVC test-restore-pvc and verify the data
- Source volume is attached && Longhorn snapshot doesn’t exist
- Use VolumeSnapshotContent.snapshotHandle to
- specify Longhorn snapshot generated in step beggining
+ specify Longhorn snapshot generated in step beginning
- Delete the Longhorn snapshot
- - Create PVC from VolumeSnapshot generated from step beggining
+ - Create PVC from VolumeSnapshot generated from step beginning
- PVC should be stuck in provisioning state
"""
vol, deployment, csisnapclass, expected_md5sum = \
@@ -1349,8 +1348,7 @@ def finalizer():
delete_and_wait_pvc(core_api, restore_pvc_name)
delete_volumesnapshot(csivolsnap_name, "default")
wait_volumesnapshot_deleted(csivolsnap_name,
- "default",
- retry_counts=RETRY_COUNTS_SHORT)
+ "default")
request.addfinalizer(finalizer)
@@ -1661,7 +1659,6 @@ def finalizer():
delete_and_wait_pvc(core_api, pvc['metadata']['name'])
delete_volumesnapshot(csivolsnap_name, "default")
wait_volumesnapshot_deleted(csivolsnap_name,
- "default",
- retry_counts=RETRY_COUNTS_SHORT)
+ "default")
request.addfinalizer(finalizer)
diff --git a/manager/integration/tests/test_engine_upgrade.py b/manager/integration/tests/test_engine_upgrade.py
index 2c16d6a8c1..31fc22859a 100644
--- a/manager/integration/tests/test_engine_upgrade.py
+++ b/manager/integration/tests/test_engine_upgrade.py
@@ -43,7 +43,7 @@ def test_engine_image(client, core_api, volume_name): # NOQA
"""
Test Engine Image deployment
- 1. List Engine Images and validate basic properities.
+ 1. List Engine Images and validate basic properties.
2. Try deleting default engine image and it should fail.
3. Try creating a duplicate engine image as default and it should fail
4. Get upgrade test image for the same versions
@@ -1196,3 +1196,19 @@ def test_engine_live_upgrade_while_replica_concurrent_rebuild(client, # NOQA
for replica in volume2.replicas:
assert replica.image == engine_upgrade_image
assert replica.currentImage == engine_upgrade_image
+
+@pytest.mark.skip(reason="TODO") # NOQA
+def test_engine_crash_during_live_upgrade():
+ """
+ 1. Create and attach a volume to a workload, then write data into the
+ volume.
+ 2. Deploy an extra engine image.
+ 3. Send live upgrade request then immediately delete the related engine
+ manager pod/engine process (The new replicas are not in active in this
+ case).
+ 4. Verify the workload will be restarted and the volume will be reattached
+ automatically.
+ 5. Verify the upgrade is done during the reattachment.
+ (It actually becomes offline upgrade.)
+ 6. Verify volume healthy and the data is correct.
+ """
diff --git a/manager/integration/tests/test_ha.py b/manager/integration/tests/test_ha.py
index 6c1a794515..c5ee886185 100644
--- a/manager/integration/tests/test_ha.py
+++ b/manager/integration/tests/test_ha.py
@@ -1033,7 +1033,7 @@ def test_inc_restoration_with_multiple_rebuild_and_expansion(set_random_backupst
wait_for_volume_healthy(client, std_volume_name)
# Step 9:
- # When the total writen data size is more than 1Gi, there must be data in
+ # When the total written data size is more than 1Gi, there must be data in
# the expanded part.
data_path2 = "/data/test2"
write_pod_volume_random_data(core_api, std_pod_name,
@@ -1093,7 +1093,7 @@ def test_inc_restoration_with_multiple_rebuild_and_expansion(set_random_backupst
wait_for_volume_expansion(client, std_volume_name)
# Step 15:
- # When the total writen data size is more than 2Gi, there must be data in
+ # When the total written data size is more than 2Gi, there must be data in
# the 2nd expanded part.
data_path3 = "/data/test3"
write_pod_volume_random_data(core_api, std_pod_name,
@@ -1689,7 +1689,7 @@ def test_engine_crash_for_restore_volume(set_random_backupstore, client, core_ap
# The complete state transition would be like:
# detaching -> detached -> attaching -> attached -> restore -> detached .
# Now the state change too fast, script eventually caught final detach
- # So temporaly comment out below line of code
+ # So temporarily comment out below line of code
# wait_for_volume_detached(client, res_name)
res_volume = wait_for_volume_healthy_no_frontend(client, res_name)
@@ -1806,7 +1806,7 @@ def test_engine_crash_for_dr_volume(set_random_backupstore, client, core_api, vo
# The complete state transition would be like:
# detaching -> detached -> attaching -> attached -> restore -> detached .
# Now the state change too fast, script eventually caught final detach
- # So temporaly comment out below line of code
+ # So temporarily comment out below line of code
# wait_for_volume_detached(client, dr_volume_name)
# Check if the DR volume is auto reattached then continue
@@ -1943,10 +1943,10 @@ def test_extra_replica_cleanup(client, volume_name, settings_reset): # NOQA
save the checksum.
4. Increase the volume replica number to 4.
5. Volume should show failed to schedule and an extra stop replica.
- 6. Decrease the volume replica nubmer to 3.
+ 6. Decrease the volume replica number to 3.
7. Volume should show healthy and the extra failed to scheduled replica
should be removed.
- 8. Check the data in the volume and make sure it's same as the chechsum.
+ 8. Check the data in the volume and make sure it's same as the checksum.
"""
replica_node_soft_anti_affinity_setting = \
client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
@@ -1984,7 +1984,7 @@ def test_extra_replica_cleanup(client, volume_name, settings_reset): # NOQA
wait_for_volume_replica_count(client, volume_name, 3)
volume = client.by_id_volume(volume_name)
- assert volume.robustness == "healthy"
+ wait_for_volume_healthy(client, volume_name)
check_volume_data(volume, data)
diff --git a/manager/integration/tests/test_infra.py b/manager/integration/tests/test_infra.py
index 6842db3090..73f0995ca4 100644
--- a/manager/integration/tests/test_infra.py
+++ b/manager/integration/tests/test_infra.py
@@ -184,7 +184,7 @@ def test_offline_node(reset_cluster_ready_status):
"""
Test offline node
- 1. Bring down one of the nodes in Kuberntes cluster (avoid current node)
+ 1. Bring down one of the nodes in Kubernetes cluster (avoid current node)
2. Make sure the Longhorn node state become `down`
"""
pod_lable_selector = "longhorn-test=test-job"
diff --git a/manager/integration/tests/test_metric.py b/manager/integration/tests/test_metric.py
index 3210cf1f00..217fc24486 100644
--- a/manager/integration/tests/test_metric.py
+++ b/manager/integration/tests/test_metric.py
@@ -8,6 +8,7 @@
from common import client, core_api, volume_name # NOQA
from common import delete_replica_processes
+from common import check_volume_data
from common import create_pv_for_volume
from common import create_pvc_for_volume
from common import create_snapshot
@@ -82,7 +83,7 @@ def find_metrics(metric_data, metric_name):
def check_metric_with_condition(core_api, metric_name, metric_labels, expected_value=None, metric_node_id=get_self_host_id()): # NOQA)
"""
- Some metric have multiple conditions, for exameple metric
+ Some metric have multiple conditions, for example metric
longhorn_node_status have condition
- allowScheduling
- mountpropagation
@@ -194,6 +195,21 @@ def filter_metric_by_labels(metrics, labels):
assert total_metrics["value"] >= 0.0
+def wait_for_metric_volume_actual_size(core_api, metric_name, metric_labels, actual_size): # NOQA
+ for _ in range(RETRY_COUNTS):
+ time.sleep(RETRY_INTERVAL)
+
+ try:
+ check_metric(core_api, metric_name,
+ metric_labels, actual_size)
+ return
+ except AssertionError:
+ continue
+
+ check_metric(core_api, metric_name,
+ metric_labels, actual_size)
+
+
def wait_for_metric_count_all_nodes(client, core_api, metric_name, metric_labels, expected_count): # NOQA
for _ in range(RETRY_COUNTS):
time.sleep(RETRY_INTERVAL)
@@ -271,7 +287,8 @@ def test_volume_metrics(client, core_api, volume_name, pvc_namespace): # NOQA
volume = client.by_id_volume(volume_name)
volume.attach(hostId=lht_hostId)
volume = wait_for_volume_healthy(client, volume_name)
- write_volume_random_data(volume)
+ data = write_volume_random_data(volume)
+ check_volume_data(volume, data)
volume = client.by_id_volume(volume_name)
actual_size = float(volume.controllers[0].actualSize)
capacity_size = float(volume.size)
@@ -284,8 +301,9 @@ def test_volume_metrics(client, core_api, volume_name, pvc_namespace): # NOQA
}
# check volume metric basic
- check_metric(core_api, "longhorn_volume_actual_size_bytes",
- metric_labels, actual_size)
+ wait_for_metric_volume_actual_size(core_api,
+ "longhorn_volume_actual_size_bytes",
+ metric_labels, actual_size)
check_metric(core_api, "longhorn_volume_capacity_bytes",
metric_labels, capacity_size)
check_metric(core_api, "longhorn_volume_read_throughput",
diff --git a/manager/integration/tests/test_node.py b/manager/integration/tests/test_node.py
index 1e4ad5dd32..7e199e4f5b 100644
--- a/manager/integration/tests/test_node.py
+++ b/manager/integration/tests/test_node.py
@@ -3,6 +3,7 @@
import os
import subprocess
import time
+import yaml
from random import choice
from string import ascii_lowercase, digits
@@ -47,8 +48,13 @@
from common import set_node_scheduling_eviction
from common import update_node_disks
from common import update_setting
+from common import SETTING_NODE_DRAIN_POLICY, DATA_SIZE_IN_MB_3
+from common import make_deployment_with_pvc # NOQA
+from common import prepare_host_disk, wait_for_volume_degraded
+from common import create_deployment_and_write_data
from backupstore import set_random_backupstore # NOQA
+from concurrent.futures import ThreadPoolExecutor, TimeoutError
CREATE_DEFAULT_DISK_LABEL = "node.longhorn.io/create-default-disk"
@@ -190,7 +196,7 @@ def test_node_disk_update(client): # NOQA
3. Create two disks `disk1` and `disk2`, attach them to the current node.
4. Add two disks to the current node.
5. Verify two extra disks have been added to the node
- 6. Disbale the two disks' scheduling, and set StorageReserved
+ 6. Disable the two disks' scheduling, and set StorageReserved
7. Update the two disks.
8. Validate all the disks properties.
9. Delete other two disks. Validate deletion works.
@@ -1919,7 +1925,7 @@ def test_node_config_annotation_missing(client, core_api, reset_default_disk_lab
3. Verify disk update works.
4. Verify tag update works
5. Verify using tag annotation for configuration works.
- 6. After remove the tag annotaion, verify unset tag node works fine.
+ 6. After remove the tag annotation, verify unset tag node works fine.
7. Set tag annotation again. Verify node updated for the tag.
"""
setting = client.by_id_setting(SETTING_CREATE_DEFAULT_DISK_LABELED_NODES)
@@ -2012,7 +2018,7 @@ def test_replica_scheduler_rebuild_restore_is_too_big(set_random_backupstore, cl
data cannot fit in the small disk
6. Delete a replica of volume.
1. Verify the volume reports `scheduled = false` due to unable to find
- a suitable disk for rebuliding replica, since the replica with the
+ a suitable disk for rebuilding replica, since the replica with the
existing data cannot fit in the small disk
6. Enable the scheduling for other disks, disable scheduling for small disk
7. Verify the volume reports `scheduled = true`. And verify the data.
@@ -2680,8 +2686,120 @@ def finalizer():
request.addfinalizer(finalizer)
-@pytest.mark.skip(reason="TODO") # NOQA
-def test_drain_with_block_for_eviction_success():
+
+def drain_node(core_api, node): # NOQA
+ set_node_cordon(core_api, node.id, True)
+
+ command = [
+ "kubectl",
+ "drain",
+ node.id,
+ "--ignore-daemonsets",
+ "--delete-emptydir-data",
+ "--grace-period=-1"
+ ]
+
+ subprocess.run(command, check=True)
+
+
+def get_replica_detail(replica_name):
+ """
+ Get allreplica information by this function
+ """
+ command = ["kubectl", "get",
+ "replicas.longhorn.io",
+ "-n",
+ "longhorn-system",
+ replica_name,
+ "-o",
+ "yaml"]
+ output = subprocess.check_output(command, text=True)
+ replica_info = yaml.safe_load(output)
+ return replica_info
+
+
+def check_node_auto_evict_state(client, target_node, expect_state): # NOQA
+ def get_specific_node(client, target_node):
+ nodes = client.list_node()
+ for node in nodes:
+ if node.id == target_node.id:
+ return node
+
+ for i in range(RETRY_COUNTS):
+ node = get_specific_node(client, target_node)
+ if node.autoEvicting is expect_state:
+ break
+ time.sleep(RETRY_INTERVAL)
+ assert node.autoEvicting is expect_state
+
+
+def check_replica_evict_state(client, volume_name, node, expect_state): # NOQA
+ volume = client.by_id_volume(volume_name)
+ for replica in volume.replicas:
+ if replica.hostId == node.id:
+ replica_name = replica.name
+ break
+
+ replica_info = get_replica_detail(replica_name)
+ eviction_requested = replica_info["spec"]["evictionRequested"]
+ assert eviction_requested is expect_state
+
+
+def wait_drain_complete(future, timeout, copmpleted=True):
+ """
+ Wait concurrent.futures object complete in a duration
+ """
+ def stop_drain_process():
+ """
+ Both future.cancel() and executer.shutdown(wait=False) can not really
+ stop the drain process.
+ Use this function to stop drain process
+ """
+ command = ["pkill", "-f", "kubectl drain"]
+ subprocess.check_output(command, text=True)
+
+ thread_timeout = timeout
+ try:
+ future.result(timeout=thread_timeout)
+ drain_complete = True
+ except TimeoutError:
+ print("drain node thread exceed timeout ({})s".format(thread_timeout))
+ drain_complete = False
+ stop_drain_process()
+ finally:
+ assert drain_complete is copmpleted
+
+
+def make_replica_on_specific_node(client, volume_name, node): # NOQA
+ volume = client.by_id_volume(volume_name)
+ volume.updateReplicaCount(replicaCount=1)
+ for replica in volume.replicas:
+ if replica.hostId != node.id:
+ volume.replicaRemove(name=replica.name)
+ wait_for_volume_replica_count(client, volume_name, 1)
+
+
+def get_all_replica_name(client, volume_name): # NOQA
+ volume_replicas = []
+ volume = client.by_id_volume(volume_name)
+ for replica in volume.replicas:
+ volume_replicas.append(replica.name)
+
+ return volume_replicas
+
+
+def check_all_replicas_evict_state(client, volume_name, expect_state): # NOQA
+ volume = client.by_id_volume(volume_name)
+ for replica in volume.replicas:
+ replica_info = get_replica_detail(replica.name)
+ eviction_requested = replica_info["spec"]["evictionRequested"]
+ assert eviction_requested is expect_state
+
+
+def test_drain_with_block_for_eviction_success(client, # NOQA
+ core_api, # NOQA
+ volume_name, # NOQA
+ make_deployment_with_pvc): # NOQA
"""
Test drain completes after evicting replica with node-drain-policy
block-for-eviction
@@ -2693,7 +2811,6 @@ def test_drain_with_block_for_eviction_success():
4. Write data to the volume.
5. Drain a node one of the volume's replicas is scheduled to.
6. While the drain is ongoing:
- - Verify that the volume never becomes degraded.
- Verify that `node.status.autoEvicting == true`.
- Optionally verify that `replica.spec.evictionRequested == true`.
7. Verify the drain completes.
@@ -2703,9 +2820,74 @@ def test_drain_with_block_for_eviction_success():
11. Verify that `replica.spec.evictionRequested == false`.
12. Verify the volume's data.
"""
+ host_id = get_self_host_id()
+ nodes = client.list_node()
+ evict_nodes = [node for node in nodes if node.id != host_id][:2]
+ evict_source_node = evict_nodes[0]
+ evict_target_node = evict_nodes[1]
-@pytest.mark.skip(reason="TODO") # NOQA
-def test_drain_with_block_for_eviction_if_contains_last_replica_success():
+ # Step 1
+ setting = client.by_id_setting(
+ SETTING_NODE_DRAIN_POLICY)
+ client.update(setting, value="block-for-eviction")
+
+ # Step 2, 3, 4
+ volume, pod, checksum = create_deployment_and_write_data(client,
+ core_api,
+ make_deployment_with_pvc, # NOQA
+ volume_name,
+ str(1 * Gi),
+ 3,
+ DATA_SIZE_IN_MB_3, host_id) # NOQA
+
+ # Make replica not locate on eviction target node
+ volume.updateReplicaCount(replicaCount=2)
+ for replica in volume.replicas:
+ if replica.hostId == evict_target_node.id:
+ volume.replicaRemove(name=replica.name)
+ break
+
+ wait_for_volume_replica_count(client, volume_name, 2)
+
+ # Step 5
+ # drain eviction source node
+ executor = ThreadPoolExecutor(max_workers=5)
+ future = executor.submit(drain_node, core_api, evict_source_node)
+
+ # Step 6
+ check_replica_evict_state(client, volume_name, evict_source_node, True)
+ check_node_auto_evict_state(client, evict_source_node, True)
+
+ # Step 7
+ wait_drain_complete(future, 60)
+ wait_for_volume_replica_count(client, volume_name, 2)
+
+ # Step 8
+ set_node_cordon(core_api, evict_source_node.id, False)
+
+ # Step 9
+ volume = wait_for_volume_healthy(client, volume_name)
+ assert len(volume.replicas) == 2
+ for replica in volume.replicas:
+ assert replica.hostId != evict_source_node.id
+
+ # Stpe 10
+ check_node_auto_evict_state(client, evict_source_node, False)
+
+ # Step 11
+ check_replica_evict_state(client, volume_name, evict_target_node, False)
+
+ # Step 12
+ data_path = data_path = '/data/test'
+ test_data_checksum = get_pod_data_md5sum(core_api,
+ pod,
+ data_path)
+ assert checksum == test_data_checksum
+
+
+def test_drain_with_block_for_eviction_if_contains_last_replica_success(client, # NOQA
+ core_api, # NOQA
+ make_deployment_with_pvc): # NOQA
"""
Test drain completes after evicting replicas with node-drain-policy
block-for-eviction-if-contains-last-replica
@@ -2719,7 +2901,6 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
4. Write data to the volumes.
5. Drain a node both volumes have a replica scheduled to.
6. While the drain is ongoing:
- - Verify that the volume with one replica never becomes degraded.
- Verify that the volume with three replicas becomes degraded.
- Verify that `node.status.autoEvicting == true`.
- Optionally verify that `replica.spec.evictionRequested == true` on the
@@ -2735,9 +2916,116 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
12. Verify that `replica.spec.evictionRequested == false` on all replicas.
13. Verify the the data in both volumes.
"""
+ host_id = get_self_host_id()
+ nodes = client.list_node()
+ evict_nodes = [node for node in nodes if node.id != host_id][:2]
+ evict_source_node = evict_nodes[0]
+ # Create extra disk on current node
+ node = client.by_id_node(host_id)
+ disks = node.disks
-@pytest.mark.skip(reason="TODO") # NOQA
-def test_drain_with_block_for_eviction_failure():
+ disk_volume_name = 'vol-disk'
+ disk_volume = client.create_volume(name=disk_volume_name,
+ size=str(2 * Gi),
+ numberOfReplicas=1,
+ dataLocality="strict-local")
+ disk_volume = wait_for_volume_detached(client, disk_volume_name)
+
+ disk_volume.attach(hostId=host_id)
+ disk_volume = wait_for_volume_healthy(client, disk_volume_name)
+ disk_path = prepare_host_disk(get_volume_endpoint(disk_volume),
+ disk_volume_name)
+ disk = {"path": disk_path, "allowScheduling": True}
+
+ update_disk = get_update_disks(disks)
+ update_disk["disk1"] = disk
+
+ node = update_node_disks(client, node.name, disks=update_disk, retry=True)
+ node = wait_for_disk_update(client, host_id, len(update_disk))
+ assert len(node.disks) == len(update_disk)
+
+ # Step 1
+ setting = client.by_id_setting(
+ SETTING_NODE_DRAIN_POLICY)
+ client.update(setting, value="block-for-eviction-if-contains-last-replica")
+
+ # Step 2, 3
+ volume1_name = "vol-1"
+ volume2_name = "vol-2"
+ volume1, pod1, checksum1 = create_deployment_and_write_data(client,
+ core_api,
+ make_deployment_with_pvc, # NOQA
+ volume1_name,
+ str(1 * Gi),
+ 3,
+ DATA_SIZE_IN_MB_3, # NOQA
+ host_id) # NOQA
+ volume2, pod2, checksum2 = create_deployment_and_write_data(client,
+ core_api,
+ make_deployment_with_pvc, # NOQA
+ volume2_name,
+ str(1 * Gi),
+ 3,
+ DATA_SIZE_IN_MB_3, # NOQA
+ host_id) # NOQA
+ # Make volume 1 replica only located on evict_source_node
+ make_replica_on_specific_node(client, volume1_name, evict_source_node)
+ volume2_replicas = get_all_replica_name(client, volume2_name)
+
+ # Step 5
+ executor = ThreadPoolExecutor(max_workers=5)
+ future = executor.submit(drain_node, core_api, evict_source_node)
+
+ # Step 6
+ check_replica_evict_state(client, volume1_name, evict_source_node, True)
+ check_node_auto_evict_state(client, evict_source_node, True)
+
+ volume2 = wait_for_volume_degraded(client, volume2_name)
+ check_all_replicas_evict_state(client, volume2_name, False)
+
+ # Step 7
+ wait_drain_complete(future, 60)
+
+ # Step 8
+ set_node_cordon(core_api, evict_source_node.id, False)
+
+ # Step 9
+ volume1 = client.by_id_volume(volume1_name)
+ wait_for_volume_replica_count(client, volume1_name, 1)
+ for replica in volume1.replicas:
+ assert replica.hostId != evict_source_node.id
+
+ # Step 10
+ # Verify volume2 replicas not moved by check replica name
+ # stored before the node drain
+ volume2 = wait_for_volume_healthy(client, volume2_name)
+ for replica in volume2.replicas:
+ assert replica.name in volume2_replicas
+
+ # Step 11
+ check_node_auto_evict_state(client, evict_source_node, False)
+
+ # Step 12
+ check_all_replicas_evict_state(client, volume1_name, False)
+ check_all_replicas_evict_state(client, volume2_name, False)
+
+ # Step 13
+ data_path = '/data/test'
+ test_data_checksum1 = get_pod_data_md5sum(core_api,
+ pod1,
+ data_path)
+ assert checksum1 == test_data_checksum1
+
+ test_data_checksum2 = get_pod_data_md5sum(core_api,
+ pod2,
+ data_path)
+ assert checksum2 == test_data_checksum2
+
+
+def test_drain_with_block_for_eviction_failure(client, # NOQA
+ core_api, # NOQA
+ volume_name, # NOQA
+ make_deployment_with_pvc): # NOQA
"""
Test drain never completes with node-drain-policy block-for-eviction
@@ -2752,7 +3040,47 @@ def test_drain_with_block_for_eviction_failure():
- Verify that `node.status.autoEvicting == true`.
- Verify that `replica.spec.evictionRequested == true`.
7. Verify the drain never completes.
+ 8. Stop the drain, check volume is healthy and data correct
"""
+ host_id = get_self_host_id()
+ nodes = client.list_node()
+ evict_nodes = [node for node in nodes if node.id != host_id][:2]
+ evict_source_node = evict_nodes[0]
+
+ # Step 1
+ setting = client.by_id_setting(
+ SETTING_NODE_DRAIN_POLICY)
+ client.update(setting, value="block-for-eviction")
+
+ # Step 2, 3, 4
+ volume, pod, checksum = create_deployment_and_write_data(client,
+ core_api,
+ make_deployment_with_pvc, # NOQA
+ volume_name,
+ str(1 * Gi),
+ 3,
+ DATA_SIZE_IN_MB_3, host_id) # NOQA
+
+ # Step 5
+ executor = ThreadPoolExecutor(max_workers=5)
+ future = executor.submit(drain_node, core_api, evict_source_node)
+
+ # Step 6
+ check_replica_evict_state(client, volume_name, evict_source_node, True)
+ check_node_auto_evict_state(client, evict_source_node, True)
+
+ # Step 7
+ wait_drain_complete(future, 90, False)
+
+ # Step 8
+ set_node_cordon(core_api, evict_source_node.id, False)
+ wait_for_volume_healthy(client, volume_name)
+ data_path = '/data/test'
+ test_data_checksum = get_pod_data_md5sum(core_api,
+ pod,
+ data_path)
+ assert checksum == test_data_checksum
+
@pytest.mark.node # NOQA
def test_auto_detach_volume_when_node_is_cordoned(client, core_api, volume_name): # NOQA
diff --git a/manager/integration/tests/test_rwx.py b/manager/integration/tests/test_rwx.py
index 79ea321117..2132acf020 100644
--- a/manager/integration/tests/test_rwx.py
+++ b/manager/integration/tests/test_rwx.py
@@ -538,7 +538,7 @@ def test_rwx_online_expansion(): # NOQA
- Create a rwx pvc using longhorn storage class of size 1 Gi.
And
- - Atach it to a workload (deployment) and write some data.
+ - Attach it to a workload (deployment) and write some data.
When
- Expand the volume to 5 Gi
@@ -566,7 +566,7 @@ def test_rwx_offline_expansion(client, core_api, pvc, make_deployment_with_pvc):
- Create a rwx pvc using longhorn storage class of size 1 Gi.
And
- - Atach it to a workload (deployment) and write some data.
+ - Attach it to a workload (deployment) and write some data.
- Scale down the workload, wait volume detached
- Share manager pod will terminate automatically
- Expand the volume to 4 Gi, wait exoansion complete
diff --git a/manager/integration/tests/test_scheduling.py b/manager/integration/tests/test_scheduling.py
index 2c164cad38..c95ce6101e 100644
--- a/manager/integration/tests/test_scheduling.py
+++ b/manager/integration/tests/test_scheduling.py
@@ -463,6 +463,7 @@ def test_replica_rebuild_per_volume_limit(client, core_api, storage_class, sts_n
vol = common.wait_for_volume_replicas_mode(client, vol_name, 'RW',
replica_count=r_count)
+ wait_for_volume_healthy(client, vol_name)
# Delete 4 volume replicas
del vol.replicas[0]
@@ -1917,7 +1918,7 @@ def test_global_disk_soft_anti_affinity(client, volume_name, request): # NOQA
assert num_running == 2
# After enable SETTING_REPLICA_DISK_SOFT_ANTI_AFFINITY to true,
- # replicas can schedule on the same disk, threrefore volume become healthy
+ # replicas can schedule on the same disk, therefore volume become healthy
update_setting(client, SETTING_REPLICA_DISK_SOFT_ANTI_AFFINITY, "true")
volume = wait_for_volume_healthy(client, volume_name)
@@ -2088,7 +2089,7 @@ def test_volume_disk_soft_anti_affinity(client, volume_name, request): # NOQA
assert num_running == 2
# After set update volume.updateReplicaDiskSoftAntiAffinity to enabled,
- # replicas can schedule on the same disk, threrefore volume become healthy
+ # replicas can schedule on the same disk, therefore volume become healthy
volume = volume.updateReplicaDiskSoftAntiAffinity(
replicaDiskSoftAntiAffinity="enabled")
assert volume.replicaDiskSoftAntiAffinity == "enabled"
diff --git a/manager/integration/tests/test_settings.py b/manager/integration/tests/test_settings.py
index 1f025b2fae..aff12a2732 100644
--- a/manager/integration/tests/test_settings.py
+++ b/manager/integration/tests/test_settings.py
@@ -995,7 +995,7 @@ def setting_concurrent_volume_backup_restore_limit_concurrent_restoring_test(cli
break
assert is_case_tested, \
- f"Unexpected cocurrent count: {concurrent_count}\n"
+ f"Unexpected concurrent count: {concurrent_count}\n"
for restore_volume_name in restore_volume_names:
if is_DR_volumes:
@@ -1197,7 +1197,7 @@ def test_setting_update_with_invalid_value_via_configmap(core_api, request): #
2. Initialize longhorn-default-setting configmap containing
valid and invalid settings
3. Update longhorn-default-setting configmap with invalid settings.
- The invalid settings SETTING_TAINT_TOLERATION will be ingored
+ The invalid settings SETTING_TAINT_TOLERATION will be ignored
when there is an attached volume.
4. Validate the default settings values.
"""
diff --git a/manager/integration/tests/test_statefulset.py b/manager/integration/tests/test_statefulset.py
index a4a216dbd7..428119cfae 100644
--- a/manager/integration/tests/test_statefulset.py
+++ b/manager/integration/tests/test_statefulset.py
@@ -100,7 +100,7 @@ def test_statefulset_mount(client, core_api, storage_class, statefulset): # NOQ
1. Create a StatefulSet using dynamic provisioned Longhorn volume.
2. Wait for pods to become running
- 3. Check volume properites are consistent with the StorageClass
+ 3. Check volume properties are consistent with the StorageClass
"""
statefulset_name = 'statefulset-mount-test'
@@ -138,7 +138,7 @@ def test_statefulset_scaling(client, core_api, storage_class, statefulset): # N
1. Create a StatefulSet with VolumeClaimTemplate and Longhorn.
2. Wait for pods to run.
- 3. Verify the properities of volumes.
+ 3. Verify the properties of volumes.
4. Scale the StatefulSet to 3 replicas
5. Wait for the new pod to become ready.
6. Verify the new volume properties.
@@ -259,7 +259,7 @@ def test_statefulset_backup(set_random_backupstore, client, core_api, storage_cl
4. Create a third snapshot
5. Backup the snapshot `backup_snapshot`
6. Wait for backup to show up.
- 1 Verify the backup informations
+ 1 Verify the backup information
"""
statefulset_name = 'statefulset-backup-test'
diff --git a/mirror_csi_images/Dockerfile.setup b/mirror_csi_images/Dockerfile.setup
index 9f4511b6c3..23aa8e287a 100644
--- a/mirror_csi_images/Dockerfile.setup
+++ b/mirror_csi_images/Dockerfile.setup
@@ -6,6 +6,6 @@ WORKDIR $WORKSPACE
RUN apk add --no-cache skopeo docker jq bash grep
-COPY --from=docker/buildx-bin:v0.8 /buildx /usr/libexec/docker/cli-plugins/docker-buildx
+COPY --from=docker/buildx-bin:v0.13 /buildx /usr/libexec/docker/cli-plugins/docker-buildx
COPY [".", "$WORKSPACE"]
diff --git a/mirror_csi_images/scripts/publish.sh b/mirror_csi_images/scripts/publish.sh
index 4d918fb099..8ff6bf755c 100755
--- a/mirror_csi_images/scripts/publish.sh
+++ b/mirror_csi_images/scripts/publish.sh
@@ -9,7 +9,7 @@ if [[ -n "${LONGHORN_IMAGES_FILE_URL}" ]]; then
wget "${LONGHORN_IMAGES_FILE_URL}" -O "${LONGHORN_IMAGES_FILE}"
while read -r LINE; do
- if [[ "${LINE}" =~ "csi-" ]]; then
+ if [[ "${LINE}" =~ csi-|livenessprobe ]]; then
CSI_IMAGE=$(echo "${LINE}" | sed -e "s/longhornio\///g")
IFS=: read -ra IMAGE_TAG_PAIR <<< "${CSI_IMAGE}"
echo "registry.k8s.io/sig-storage/${IMAGE_TAG_PAIR[0]}" "longhornio/${IMAGE_TAG_PAIR[0]}" "${IMAGE_TAG_PAIR[1]}" >> "${INFILE}"
@@ -23,7 +23,7 @@ else
IFS=, read -ra CSI_IMAGES_ARR <<< "${CSI_IMAGES}"
for CSI_IMAGE in "${CSI_IMAGES_ARR[@]}"; do
IFS=: read -ra IMAGE_TAG_PAIR <<< "$CSI_IMAGE"
- if [[ "${CSI_IMAGE}" =~ "csi-" ]]; then
+ if [[ "${CSI_IMAGE}" =~ csi-|livenessprobe ]]; then
echo "registry.k8s.io/sig-storage/${IMAGE_TAG_PAIR[0]}" "longhornio/${IMAGE_TAG_PAIR[0]}" "${IMAGE_TAG_PAIR[1]}" >> "${INFILE}"
elif [[ "${CSI_IMAGE}" =~ "support-bundle-kit" ]]; then
echo "rancher/${IMAGE_TAG_PAIR[0]}" "longhornio/${IMAGE_TAG_PAIR[0]}" "${IMAGE_TAG_PAIR[1]}" >> "${INFILE}"
diff --git a/pipelines/gke/scripts/longhorn-setup.sh b/pipelines/gke/scripts/longhorn-setup.sh
index 163f9cc4bb..b9c6b6346c 100755
--- a/pipelines/gke/scripts/longhorn-setup.sh
+++ b/pipelines/gke/scripts/longhorn-setup.sh
@@ -139,8 +139,8 @@ create_longhorn_namespace(){
install_backupstores(){
- MINIO_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn-tests/master/manager/integration/deploy/backupstores/minio-backupstore.yaml"
- NFS_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn-tests/master/manager/integration/deploy/backupstores/nfs-backupstore.yaml"
+ MINIO_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/minio-backupstore.yaml"
+ NFS_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/nfs-backupstore.yaml"
kubectl create -f ${MINIO_BACKUPSTORE_URL} \
-f ${NFS_BACKUPSTORE_URL}
}
diff --git a/pipelines/storage_network/Dockerfile.setup b/pipelines/storage_network/Dockerfile.setup
new file mode 100644
index 0000000000..4e7e9b10e1
--- /dev/null
+++ b/pipelines/storage_network/Dockerfile.setup
@@ -0,0 +1,31 @@
+From alpine:latest
+
+ARG KUBECTL_VERSION=v1.20.2
+
+ARG RKE_VERSION=v1.3.4
+
+ARG TERRAFORM_VERSION=1.3.5
+
+ARG YQ_VERSION=v4.24.2
+
+ENV WORKSPACE /src/longhorn-tests
+
+WORKDIR $WORKSPACE
+
+RUN wget -q https://storage.googleapis.com/kubernetes-release/release/$KUBECTL_VERSION/bin/linux/amd64/kubectl && \
+ mv kubectl /usr/local/bin/kubectl && \
+ chmod +x /usr/local/bin/kubectl && \
+ wget -q https://github.com/rancher/rke/releases/download/$RKE_VERSION/rke_linux-amd64 && \
+ mv rke_linux-amd64 /usr/bin/rke && \
+ chmod +x /usr/bin/rke && \
+ wget -q https://releases.hashicorp.com/terraform/${TERRAFORM_VERSION}/terraform_${TERRAFORM_VERSION}_linux_amd64.zip && \
+ unzip terraform_${TERRAFORM_VERSION}_linux_amd64.zip && rm terraform_${TERRAFORM_VERSION}_linux_amd64.zip && \
+ mv terraform /usr/bin/terraform && \
+ chmod +x /usr/bin/terraform && \
+ wget -q "https://github.com/mikefarah/yq/releases/download/${YQ_VERSION}/yq_linux_amd64" && \
+ mv yq_linux_amd64 /usr/local/bin/yq && \
+ chmod +x /usr/local/bin/yq && \
+ apk add openssl openssh-client ca-certificates git rsync bash curl jq python3 py3-pip aws-cli && \
+ ssh-keygen -t rsa -b 4096 -N "" -f ~/.ssh/id_rsa
+
+COPY [".", "$WORKSPACE"]
diff --git a/pipelines/storage_network/Jenkinsfile b/pipelines/storage_network/Jenkinsfile
new file mode 100644
index 0000000000..0824204ccf
--- /dev/null
+++ b/pipelines/storage_network/Jenkinsfile
@@ -0,0 +1,218 @@
+def imageName = "${JOB_BASE_NAME}-${env.BUILD_NUMBER}"
+def summary
+def WORKSPACE = "/src/longhorn-tests"
+def BUILD_TRIGGER_BY = "\n${currentBuild.getBuildCauses()[0].shortDescription}"
+
+// define optional parameters
+def SELINUX_MODE = params.SELINUX_MODE ? params.SELINUX_MODE : ""
+
+def CREDS_ID = JOB_BASE_NAME == "longhorn-tests-regression" ? "AWS_CREDS_RANCHER_QA" : "AWS_CREDS"
+def REGISTRATION_CODE_ID = params.ARCH == "amd64" ? "REGISTRATION_CODE" : "REGISTRATION_CODE_ARM64"
+
+// parameters for air gap installation
+def AIR_GAP_INSTALLATION = params.AIR_GAP_INSTALLATION ? params.AIR_GAP_INSTALLATION : false
+def LONGHORN_INSTALL_VERSION = params.LONGHORN_INSTALL_VERSION ? params.LONGHORN_INSTALL_VERSION : "master"
+def LONGHORN_TRANSIENT_VERSION = params.LONGHORN_TRANSIENT_VERSION ? params.LONGHORN_TRANSIENT_VERSION : ""
+def CIS_HARDENING = params.CIS_HARDENING ? params.CIS_HARDENING : false
+def REGISTRY_URL
+def REGISTRY_USERNAME
+def REGISTRY_PASSWORD
+
+// parameter for hdd test
+def USE_HDD = params.USE_HDD ? params.USE_HDD : false
+
+node {
+
+ withCredentials([
+ usernamePassword(credentialsId: CREDS_ID, passwordVariable: 'AWS_SECRET_KEY', usernameVariable: 'AWS_ACCESS_KEY'),
+ string(credentialsId: 'DO_CREDS', variable: 'DO_TOKEN'),
+ string(credentialsId: REGISTRATION_CODE_ID, variable: 'REGISTRATION_CODE'),
+ ]) {
+
+ if (params.SEND_SLACK_NOTIFICATION) {
+ notifyBuild('STARTED', BUILD_TRIGGER_BY, params.NOTIFY_SLACK_CHANNEL)
+ }
+
+ checkout scm
+
+ try {
+
+ if (params.AIR_GAP_INSTALLATION) {
+
+ stage('airgap build') {
+ sh "airgap/scripts/build.sh"
+ sh """ docker run -itd --name airgap-${JOB_BASE_NAME}-${BUILD_NUMBER} \
+ --env TF_VAR_longhorn_version=${LONGHORN_INSTALL_VERSION} \
+ --env TF_VAR_do_token=${DO_TOKEN} \
+ --env TF_VAR_aws_access_key=${AWS_ACCESS_KEY} \
+ --env TF_VAR_aws_secret_key=${AWS_SECRET_KEY} \
+ airgap-${JOB_BASE_NAME}-${BUILD_NUMBER}
+ """
+ }
+
+ stage ('airgap setup') {
+ sh "docker exec airgap-${JOB_BASE_NAME}-${BUILD_NUMBER} ./airgap/scripts/terraform-setup.sh"
+ REGISTRY_URL = sh (
+ script: "docker exec airgap-${JOB_BASE_NAME}-${BUILD_NUMBER} terraform -chdir=./airgap/terraform output -raw registry_url",
+ returnStdout: true
+ )
+ println REGISTRY_URL
+ REGISTRY_USERNAME = sh (
+ script: "docker exec airgap-${JOB_BASE_NAME}-${BUILD_NUMBER} terraform -chdir=./airgap/terraform output -raw registry_username",
+ returnStdout: true
+ )
+ REGISTRY_PASSWORD = sh (
+ script: "docker exec airgap-${JOB_BASE_NAME}-${BUILD_NUMBER} terraform -chdir=./airgap/terraform output -raw registry_password",
+ returnStdout: true
+ )
+ }
+
+ }
+
+ stage('build') {
+
+ echo "Using credentials: $CREDS_ID"
+ echo "Using registration code: $REGISTRATION_CODE_ID"
+
+ sh "pipelines/storage_network/scripts/build.sh"
+ sh """ docker run -itd --name ${JOB_BASE_NAME}-${BUILD_NUMBER} \
+ --env AIR_GAP_INSTALLATION=${AIR_GAP_INSTALLATION} \
+ --env REGISTRY_URL=${REGISTRY_URL} \
+ --env REGISTRY_USERNAME=${REGISTRY_USERNAME} \
+ --env REGISTRY_PASSWORD=${REGISTRY_PASSWORD} \
+ --env LONGHORN_INSTALL_VERSION=${LONGHORN_INSTALL_VERSION} \
+ --env CUSTOM_LONGHORN_ENGINE_IMAGE=${CUSTOM_LONGHORN_ENGINE_IMAGE} \
+ --env CUSTOM_LONGHORN_INSTANCE_MANAGER_IMAGE=${CUSTOM_LONGHORN_INSTANCE_MANAGER_IMAGE} \
+ --env CUSTOM_LONGHORN_MANAGER_IMAGE=${CUSTOM_LONGHORN_MANAGER_IMAGE} \
+ --env CUSTOM_LONGHORN_SHARE_MANAGER_IMAGE=${CUSTOM_LONGHORN_SHARE_MANAGER_IMAGE} \
+ --env CUSTOM_LONGHORN_BACKING_IMAGE_MANAGER_IMAGE=${CUSTOM_LONGHORN_BACKING_IMAGE_MANAGER_IMAGE} \
+ --env LONGHORN_TESTS_CUSTOM_IMAGE=${LONGHORN_TESTS_CUSTOM_IMAGE} \
+ --env DISTRO=${DISTRO} \
+ --env LONGHORN_REPO_URI=${LONGHORN_REPO_URI} \
+ --env LONGHORN_REPO_BRANCH=${LONGHORN_REPO_BRANCH} \
+ --env LONGHORN_STABLE_VERSION=${LONGHORN_STABLE_VERSION} \
+ --env LONGHORN_TRANSIENT_VERSION=${LONGHORN_TRANSIENT_VERSION} \
+ --env LONGHORN_TEST_CLOUDPROVIDER=${LONGHORN_TEST_CLOUDPROVIDER} \
+ --env LONGHORN_UPGRADE_TEST=${LONGHORN_UPGRADE_TEST} \
+ --env PYTEST_CUSTOM_OPTIONS="${PYTEST_CUSTOM_OPTIONS}" \
+ --env BACKUP_STORE_TYPE="${BACKUP_STORE_TYPE}" \
+ --env TF_VAR_use_hdd=${USE_HDD} \
+ --env TF_VAR_arch=${ARCH} \
+ --env TF_VAR_k8s_distro_name=${K8S_DISTRO_NAME} \
+ --env TF_VAR_k8s_distro_version=${K8S_DISTRO_VERSION} \
+ --env TF_VAR_aws_availability_zone=${AWS_AVAILABILITY_ZONE} \
+ --env TF_VAR_aws_region=${AWS_REGION} \
+ --env TF_VAR_os_distro_version=${DISTRO_VERSION} \
+ --env TF_VAR_do_token=${env.TF_VAR_do_token} \
+ --env TF_VAR_aws_access_key=${AWS_ACCESS_KEY} \
+ --env TF_VAR_lh_aws_instance_name_controlplane="${JOB_BASE_NAME}-ctrl" \
+ --env TF_VAR_lh_aws_instance_name_worker="${JOB_BASE_NAME}-wrk" \
+ --env TF_VAR_lh_aws_instance_type_controlplane=${CONTROLPLANE_INSTANCE_TYPE} \
+ --env TF_VAR_lh_aws_instance_type_worker=${WORKER_INSTANCE_TYPE}\
+ --env TF_VAR_aws_secret_key=${AWS_SECRET_KEY} \
+ --env AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY} \
+ --env AWS_SECRET_ACCESS_KEY=${AWS_SECRET_KEY} \
+ --env AWS_DEFAULT_REGION=${AWS_REGION} \
+ --env TF_VAR_selinux_mode=${SELINUX_MODE} \
+ --env TF_VAR_registration_code=${REGISTRATION_CODE} \
+ --env TF_VAR_cis_hardening=${CIS_HARDENING} \
+ --env TF_VAR_mtu=${MTU_SIZE} \
+ --env TF_VAR_multus_version=${MULTUS_VERSION} \
+ --env TF_VAR_thick_plugin=${THICK_PLUGIN} \
+ ${imageName}
+ """
+ }
+
+ timeout(60) {
+ stage ('terraform') {
+ sh "docker exec ${JOB_BASE_NAME}-${BUILD_NUMBER} pipelines/storage_network/scripts/terraform-setup.sh"
+ }
+ }
+
+ stage ('longhorn setup & tests') {
+ sh "docker exec ${JOB_BASE_NAME}-${BUILD_NUMBER} pipelines/storage_network/scripts/longhorn-setup.sh"
+ }
+
+ stage ('download support bundle') {
+ sh "docker exec ${JOB_BASE_NAME}-${BUILD_NUMBER} pipelines/storage_network/scripts/download-support-bundle.sh ${JOB_BASE_NAME}-${BUILD_NUMBER}-bundle.zip"
+ sh "docker cp ${JOB_BASE_NAME}-${BUILD_NUMBER}:${WORKSPACE}/${JOB_BASE_NAME}-${BUILD_NUMBER}-bundle.zip ."
+ archiveArtifacts allowEmptyArchive: true, artifacts: '**/*.zip', followSymlinks: false
+ }
+
+ stage ('report generation') {
+ sh "docker cp ${JOB_BASE_NAME}-${BUILD_NUMBER}:${WORKSPACE}/longhorn-test-junit-report.xml ."
+
+ if(params.LONGHORN_UPGRADE_TEST && params.LONGHORN_TRANSIENT_VERSION) {
+ sh "docker cp ${JOB_BASE_NAME}-${BUILD_NUMBER}:${WORKSPACE}/longhorn-test-upgrade-from-stable-junit-report.xml ."
+ sh "docker cp ${JOB_BASE_NAME}-${BUILD_NUMBER}:${WORKSPACE}/longhorn-test-upgrade-from-transient-junit-report.xml ."
+ summary = junit 'longhorn-test-upgrade-from-stable-junit-report.xml, longhorn-test-upgrade-from-transient-junit-report.xml, longhorn-test-junit-report.xml'
+ }
+ else if(params.LONGHORN_UPGRADE_TEST) {
+ sh "docker cp ${JOB_BASE_NAME}-${BUILD_NUMBER}:${WORKSPACE}/longhorn-test-upgrade-from-stable-junit-report.xml ."
+ summary = junit 'longhorn-test-upgrade-from-stable-junit-report.xml, longhorn-test-junit-report.xml'
+ }
+ else {
+ summary = junit 'longhorn-test-junit-report.xml'
+ }
+ }
+
+ } catch (e) {
+ currentBuild.result = "FAILED"
+ throw e
+ } finally {
+ stage ('releasing resources') {
+ if (sh (script: "docker container inspect airgap-${JOB_BASE_NAME}-${BUILD_NUMBER} > /dev/null 2>&1", returnStatus: true) == 0) {
+ sh "docker exec airgap-${JOB_BASE_NAME}-${BUILD_NUMBER} ./airgap/scripts/cleanup.sh"
+ sh "docker stop airgap-${JOB_BASE_NAME}-${BUILD_NUMBER}"
+ sh "docker rm -v airgap-${JOB_BASE_NAME}-${BUILD_NUMBER}"
+ sh "docker rmi airgap-${JOB_BASE_NAME}-${BUILD_NUMBER}"
+ }
+
+ if (sh (script: "docker container inspect ${JOB_BASE_NAME}-${BUILD_NUMBER} > /dev/null 2>&1", returnStatus: true) == 0) {
+ sh "docker exec ${JOB_BASE_NAME}-${BUILD_NUMBER} pipelines/storage_network/scripts/cleanup.sh"
+ sh "docker stop ${JOB_BASE_NAME}-${BUILD_NUMBER}"
+ sh "docker rm -v ${JOB_BASE_NAME}-${BUILD_NUMBER}"
+ sh "docker rmi ${imageName}"
+ }
+
+ if (summary) {
+ summary_msg = "\nTest Summary - Failures: ${summary.failCount}, Skipped: ${summary.skipCount}, Passed: ${summary.passCount} -- Job completed in ${currentBuild.durationString.replace(' and counting', '')}"
+ } else {
+ summary_msg = "\n Test Failed: No Junit report"
+ }
+
+ if(params.SEND_SLACK_NOTIFICATION){
+ notifyBuild(currentBuild.result, summary_msg, params.NOTIFY_SLACK_CHANNEL)
+ }
+ }
+ }
+ }
+
+}
+
+
+def notifyBuild(String buildStatus = 'STARTED', String summary_msg, String slack_channel) {
+ // build status of null means successful
+ buildStatus = buildStatus ?: 'SUCCESSFUL'
+
+ // Default values
+ def colorName = 'RED'
+ def colorCode = '#FF0000'
+ def subject = "${buildStatus}: Job '${env.JOB_BASE_NAME} [${env.BUILD_NUMBER}]'"
+ def summary = "${subject} (${env.BUILD_URL})" + summary_msg
+
+ // Override default values based on build status
+ if (buildStatus == 'STARTED') {
+ color = 'YELLOW'
+ colorCode = '#FFFF00'
+ } else if (buildStatus == 'SUCCESSFUL') {
+ color = 'GREEN'
+ colorCode = '#00FF00'
+ } else {
+ color = 'RED'
+ colorCode = '#FF0000'
+ }
+
+ // Send notifications
+ slackSend (color: colorCode, message: summary, channel: slack_channel, tokenCredentialId: 'longhorn-tests-slack-token')
+}
diff --git a/pipelines/storage_network/scripts/build.sh b/pipelines/storage_network/scripts/build.sh
new file mode 100755
index 0000000000..74f825bdef
--- /dev/null
+++ b/pipelines/storage_network/scripts/build.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+docker build --no-cache -f ./pipelines/storage_network/Dockerfile.setup -t "${JOB_BASE_NAME}-${BUILD_NUMBER}" .
diff --git a/pipelines/storage_network/scripts/cleanup.sh b/pipelines/storage_network/scripts/cleanup.sh
new file mode 100755
index 0000000000..6d41aeeabd
--- /dev/null
+++ b/pipelines/storage_network/scripts/cleanup.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+# terminate any terraform processes
+TERRAFORM_PIDS=( `ps aux | grep -i terraform | grep -v grep | awk '{printf("%s ",$1)}'` )
+if [[ -n ${TERRAFORM_PIDS[@]} ]] ; then
+ for PID in "${TERRAFORM_PIDS[@]}"; do
+ kill "${TERRAFORM_PIDS}"
+ done
+fi
+
+# wait 30 seconds for graceful terraform termination
+sleep 30
+
+terraform -chdir=pipelines/storage_network/terraform destroy -auto-approve -no-color
diff --git a/pipelines/storage_network/scripts/download-support-bundle.sh b/pipelines/storage_network/scripts/download-support-bundle.sh
new file mode 100755
index 0000000000..1bac81d5e0
--- /dev/null
+++ b/pipelines/storage_network/scripts/download-support-bundle.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+set -e
+
+SUPPORT_BUNDLE_FILE_NAME=${1:-"lh-support-bundle.zip"}
+SUPPORT_BUNDLE_ISSUE_URL=${2:-""}
+SUPPORT_BUNDLE_ISSUE_DESC=${3:-"Auto-generated support bundle"}
+
+set_kubeconfig_envvar(){
+ export KUBECONFIG="${PWD}/pipelines/storage_network/terraform/k3s.yaml"
+}
+
+set_kubeconfig_envvar
+
+LH_FRONTEND_ADDR=`kubectl get svc -n longhorn-system longhorn-frontend -o json | jq -r '.spec.clusterIP + ":" + (.spec.ports[0].port|tostring)'`
+
+JSON_PAYLOAD="{\"issueURL\": \"${SUPPORT_BUNDLE_ISSUE_DESC}\", \"description\": \"${SUPPORT_BUNDLE_ISSUE_DESC}\"}"
+
+CURL_CMD="curl -XPOST http://${LH_FRONTEND_ADDR}/v1/supportbundles -H 'Accept: application/json' -H 'Accept-Encoding: gzip, deflate' -d '"${JSON_PAYLOAD}"'"
+
+SUPPORT_BUNDLE_URL=`kubectl exec -n longhorn-system svc/longhorn-frontend -- bash -c "${CURL_CMD}" | jq -r '.links.self + "/" + .name'`
+
+SUPPORT_BUNDLE_READY=false
+while [[ ${SUPPORT_BUNDLE_READY} == false ]]; do
+ PERCENT=`kubectl exec -n longhorn-system svc/longhorn-frontend -- curl -H 'Accept: application/json' ${SUPPORT_BUNDLE_URL} | jq -r '.progressPercentage' || true`
+ echo ${PERCENT}
+
+ if [[ ${PERCENT} == 100 ]]; then SUPPORT_BUNDLE_READY=true; fi
+done
+
+kubectl exec -n longhorn-system svc/longhorn-frontend -- curl -H 'Accept-Encoding: gzip, deflate' ${SUPPORT_BUNDLE_URL}/download > ${SUPPORT_BUNDLE_FILE_NAME}
diff --git a/pipelines/storage_network/scripts/longhorn-setup.sh b/pipelines/storage_network/scripts/longhorn-setup.sh
new file mode 100755
index 0000000000..115d6ffbd1
--- /dev/null
+++ b/pipelines/storage_network/scripts/longhorn-setup.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+
+set -x
+
+source pipelines/utilities/selinux_workaround.sh
+source pipelines/utilities/install_csi_snapshotter.sh
+source pipelines/utilities/create_aws_secret.sh
+source pipelines/utilities/install_backupstores.sh
+source pipelines/utilities/storage_network.sh
+source pipelines/utilities/create_longhorn_namespace.sh
+source pipelines/utilities/longhorn_manifest.sh
+source pipelines/utilities/run_longhorn_test.sh
+
+# create and clean tmpdir
+TMPDIR="/tmp/longhorn"
+mkdir -p ${TMPDIR}
+rm -rf "${TMPDIR}/"
+
+export LONGHORN_NAMESPACE="longhorn-system"
+export LONGHORN_INSTALL_METHOD="manifest"
+
+set_kubeconfig_envvar(){
+ export KUBECONFIG="${PWD}/pipelines/storage_network/terraform/k3s.yaml"
+}
+
+main(){
+ set_kubeconfig_envvar
+
+ if [[ ${DISTRO} == "rhel" ]] || [[ ${DISTRO} == "rockylinux" ]] || [[ ${DISTRO} == "oracle" ]]; then
+ apply_selinux_workaround
+ fi
+
+ # set debugging mode off to avoid leaking aws secrets to the logs.
+ # DON'T REMOVE!
+ set +x
+ create_aws_secret
+ set -x
+
+ if [[ "${TF_VAR_thick_plugin}" == true ]]; then
+ deploy_multus_thick_plugin_daemonset
+ else
+ deploy_multus_thin_plugin_daemonset
+ fi
+ deploy_network_attachment_definition
+
+ create_longhorn_namespace
+ install_backupstores
+ install_csi_snapshotter
+
+ generate_longhorn_yaml_manifest
+ install_longhorn_by_manifest
+
+ update_storage_network_setting
+
+ run_longhorn_test
+}
+
+main
diff --git a/pipelines/storage_network/scripts/terraform-setup.sh b/pipelines/storage_network/scripts/terraform-setup.sh
new file mode 100755
index 0000000000..0b7f9b3a8f
--- /dev/null
+++ b/pipelines/storage_network/scripts/terraform-setup.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+set -x
+
+terraform -chdir=pipelines/storage_network/terraform init
+terraform -chdir=pipelines/storage_network/terraform apply -auto-approve -no-color
+
+NETWORK_INTERFACE_IDS=$(terraform -chdir=pipelines/storage_network/terraform output -json network_interface_ids | tr -d '"')
+for id in ${NETWORK_INTERFACE_IDS}; do
+ aws ec2 modify-network-interface-attribute --network-interface-id "${id}" --no-source-dest-check
+done
+
+exit $?
diff --git a/pipelines/storage_network/terraform/data.tf b/pipelines/storage_network/terraform/data.tf
new file mode 100644
index 0000000000..aeee2eae84
--- /dev/null
+++ b/pipelines/storage_network/terraform/data.tf
@@ -0,0 +1,52 @@
+locals {
+ aws_ami_sles_arch = var.arch == "amd64" ? "x86_64" : var.arch
+}
+
+data "aws_ami" "aws_ami_sles" {
+ most_recent = true
+ owners = [var.aws_ami_sles_account_number]
+ name_regex = "^suse-sles-${var.os_distro_version}-v\\d+-hvm-ssd-${local.aws_ami_sles_arch}"
+}
+
+# Generate template file for k3s server
+data "template_file" "provision_k3s_server" {
+ template = var.k8s_distro_name == "k3s" ? file("${path.module}/user-data-scripts/provision_k3s_server.sh.tpl") : null
+ vars = {
+ k3s_cluster_secret = random_password.cluster_secret.result
+ k3s_server_public_ip = aws_eip.aws_eip[0].public_ip
+ k3s_version = var.k8s_distro_version
+ thick_plugin = var.thick_plugin
+ }
+}
+
+# Generate template file for k3s agent
+data "template_file" "provision_k3s_agent" {
+ template = var.k8s_distro_name == "k3s" ? file("${path.module}/user-data-scripts/provision_k3s_agent.sh.tpl") : null
+ vars = {
+ k3s_server_url = "https://${aws_eip.aws_eip[0].public_ip}:6443"
+ k3s_cluster_secret = random_password.cluster_secret.result
+ k3s_version = var.k8s_distro_version
+ thick_plugin = var.thick_plugin
+ }
+}
+
+# Generate template file for flannel
+data "template_file" "flannel" {
+ template = var.k8s_distro_name == "k3s" ? file("${path.module}/user-data-scripts/flannel.sh.tpl") : null
+ vars = {
+ N1 = aws_network_interface.instance_eth1[0].private_ip
+ N2 = aws_network_interface.instance_eth1[1].private_ip
+ N3 = aws_network_interface.instance_eth1[2].private_ip
+ mtu = var.mtu
+ }
+}
+
+# Generate template file for routes
+data "template_file" "routes" {
+ template = var.k8s_distro_name == "k3s" ? file("${path.module}/user-data-scripts/routes.sh.tpl") : null
+ vars = {
+ N1 = aws_network_interface.instance_eth1[0].private_ip
+ N2 = aws_network_interface.instance_eth1[1].private_ip
+ N3 = aws_network_interface.instance_eth1[2].private_ip
+ }
+}
diff --git a/pipelines/storage_network/terraform/main.tf b/pipelines/storage_network/terraform/main.tf
new file mode 100644
index 0000000000..ce8a22ebeb
--- /dev/null
+++ b/pipelines/storage_network/terraform/main.tf
@@ -0,0 +1,308 @@
+terraform {
+ required_providers {
+ aws = {
+ source = "hashicorp/aws"
+ version = "~> 3.0"
+ }
+ }
+}
+
+provider "aws" {
+ region = var.aws_region
+ access_key = var.aws_access_key
+ secret_key = var.aws_secret_key
+}
+
+resource "random_string" "random_suffix" {
+ length = 8
+ special = false
+ lower = true
+ upper = false
+}
+
+resource "random_password" "cluster_secret" {
+ length = 64
+ special = false
+}
+
+resource "aws_vpc" "aws_vpc" {
+ cidr_block = "10.0.0.0/16"
+
+ tags = {
+ Name = "${var.aws_vpc_name}-${random_string.random_suffix.id}"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_internet_gateway" "aws_igw" {
+ vpc_id = aws_vpc.aws_vpc.id
+
+ tags = {
+ Name = "lh_igw-${random_string.random_suffix.id}"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_route_table" "aws_public_rt" {
+ depends_on = [
+ aws_internet_gateway.aws_igw,
+ ]
+
+ vpc_id = aws_vpc.aws_vpc.id
+
+ route {
+ cidr_block = "0.0.0.0/0"
+ gateway_id = aws_internet_gateway.aws_igw.id
+ }
+
+ tags = {
+ Name = "lh_aws_public_rt-${random_string.random_suffix.id}"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_subnet" "aws_subnet_1" {
+ vpc_id = aws_vpc.aws_vpc.id
+ availability_zone = "us-east-1c"
+ cidr_block = "10.0.1.0/24"
+
+ tags = {
+ Name = "lh_subnet_1-${random_string.random_suffix.id}"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_subnet" "aws_subnet_2" {
+ vpc_id = aws_vpc.aws_vpc.id
+ availability_zone = "us-east-1c"
+ cidr_block = "10.0.2.0/24"
+
+ tags = {
+ Name = "lh_subnet_2-${random_string.random_suffix.id}"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_route_table_association" "aws_subnet_1_rt_association" {
+ depends_on = [
+ aws_subnet.aws_subnet_1,
+ aws_route_table.aws_public_rt
+ ]
+
+ subnet_id = aws_subnet.aws_subnet_1.id
+ route_table_id = aws_route_table.aws_public_rt.id
+}
+
+resource "aws_route_table_association" "aws_subnet_2_rt_association" {
+ depends_on = [
+ aws_subnet.aws_subnet_2,
+ aws_route_table.aws_public_rt
+ ]
+
+ subnet_id = aws_subnet.aws_subnet_2.id
+ route_table_id = aws_route_table.aws_public_rt.id
+}
+
+resource "aws_security_group" "aws_secgrp" {
+ name = "lh_aws_secgrp"
+ description = "Allow all inbound traffic"
+ vpc_id = aws_vpc.aws_vpc.id
+
+ ingress {
+ description = "Allow SSH"
+ from_port = 22
+ to_port = 22
+ protocol = "tcp"
+ cidr_blocks = ["0.0.0.0/0"]
+ }
+
+ ingress {
+ description = "Allow all ports"
+ from_port = 0
+ to_port = 0
+ protocol = "-1"
+ cidr_blocks = ["0.0.0.0/0"]
+ }
+
+ egress {
+ from_port = 0
+ to_port = 0
+ protocol = "-1"
+ cidr_blocks = ["0.0.0.0/0"]
+ }
+
+ tags = {
+ Name = "lh_aws_secgrp-${random_string.random_suffix.id}"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_key_pair" "aws_pair_key" {
+ key_name = format("%s_%s", "aws_key_pair", random_string.random_suffix.id)
+ public_key = file(var.aws_ssh_public_key_file_path)
+}
+
+resource "aws_network_interface" "instance_eth0" {
+ subnet_id = aws_subnet.aws_subnet_1.id
+ security_groups = [aws_security_group.aws_secgrp.id]
+
+ count = var.aws_instance_count
+
+ tags = {
+ Name = "instance_eth0-${count.index}-${random_string.random_suffix.id}"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_instance" "aws_instance" {
+ depends_on = [
+ aws_subnet.aws_subnet_1,
+ aws_subnet.aws_subnet_2,
+ aws_network_interface.instance_eth0
+ ]
+
+ ami = data.aws_ami.aws_ami_sles.id
+ instance_type = var.aws_instance_type
+
+ count = var.aws_instance_count
+
+ network_interface {
+ network_interface_id = aws_network_interface.instance_eth0[count.index].id
+ device_index = 0
+ }
+
+ root_block_device {
+ delete_on_termination = true
+ volume_size = var.aws_instance_root_block_device_size
+ }
+
+ key_name = aws_key_pair.aws_pair_key.key_name
+ user_data = count.index == 0 ? data.template_file.provision_k3s_server.rendered : data.template_file.provision_k3s_agent.rendered
+
+ tags = {
+ Name = "${var.aws_instance_name}-${count.index}-${random_string.random_suffix.id}"
+ DoNotDelete = "true"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_network_interface" "instance_eth1" {
+ depends_on = [
+ aws_subnet.aws_subnet_1,
+ aws_subnet.aws_subnet_2,
+ aws_instance.aws_instance
+ ]
+
+ subnet_id = aws_subnet.aws_subnet_2.id
+ security_groups = [aws_security_group.aws_secgrp.id]
+
+ count = var.aws_instance_count
+
+ attachment {
+ instance = aws_instance.aws_instance[count.index].id
+ device_index = 1
+ }
+
+ tags = {
+ Name = "instance_eth1-${count.index}-${random_string.random_suffix.id}"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_eip" "aws_eip" {
+ vpc = true
+
+ count = var.aws_instance_count
+
+ tags = {
+ Name = "aws_eip-${count.index}-${random_string.random_suffix.id}"
+ Owner = var.resources_owner
+ }
+}
+
+resource "aws_eip_association" "aws_eip_assoc" {
+ depends_on = [
+ aws_instance.aws_instance,
+ aws_eip.aws_eip
+ ]
+
+ count = var.aws_instance_count
+
+ network_interface_id = aws_network_interface.instance_eth0[count.index].id
+ allocation_id = aws_eip.aws_eip[count.index].id
+}
+
+resource "null_resource" "rsync_kubeconfig_file" {
+
+ depends_on = [
+ aws_instance.aws_instance,
+ aws_eip.aws_eip,
+ aws_eip_association.aws_eip_assoc
+ ]
+
+ provisioner "remote-exec" {
+
+ inline = [
+ "cloud-init status --wait",
+ "if [ \"`cloud-init status | grep error`\" ]; then sudo cat /var/log/cloud-init-output.log; fi",
+ "RETRY=0; MAX_RETRY=450; until([ -f /etc/rancher/k3s/k3s.yaml ] && [ `sudo /usr/local/bin/kubectl get node -o jsonpath='{.items[*].status.conditions}' | jq '.[] | select(.type == \"Ready\").status' | grep -ci true` -eq ${var.aws_instance_count} ]); do echo \"waiting for k3s cluster nodes to be running\"; sleep 2; if [ $RETRY -eq $MAX_RETRY ]; then break; fi; RETRY=$((RETRY+1)); done"
+ ]
+
+ connection {
+ type = "ssh"
+ user = "ec2-user"
+ host = aws_eip.aws_eip[0].public_ip
+ private_key = file(var.aws_ssh_private_key_file_path)
+ }
+ }
+
+ provisioner "local-exec" {
+ command = "rsync -aPvz --rsync-path=\"sudo rsync\" -e \"ssh -o StrictHostKeyChecking=no -l ec2-user -i ${var.aws_ssh_private_key_file_path}\" ${aws_eip.aws_eip[0].public_ip}:/etc/rancher/k3s/k3s.yaml . && sed -i 's#https://127.0.0.1:6443#https://${aws_eip.aws_eip[0].public_ip}:6443#' k3s.yaml"
+ }
+}
+
+# setup flannel
+resource "null_resource" "cluster_setup_flannel" {
+ count = var.aws_instance_count
+
+ depends_on = [
+ aws_instance.aws_instance,
+ null_resource.rsync_kubeconfig_file
+ ]
+
+ provisioner "remote-exec" {
+
+ inline = [data.template_file.flannel.rendered]
+
+ connection {
+ type = "ssh"
+ user = "ec2-user"
+ host = aws_eip.aws_eip[count.index].public_ip
+ private_key = file(var.aws_ssh_private_key_file_path)
+ }
+ }
+
+}
+
+# setup routes
+resource "null_resource" "cluster_setup_routes" {
+ count = var.aws_instance_count
+
+ depends_on = [
+ aws_instance.aws_instance,
+ null_resource.cluster_setup_flannel
+ ]
+
+ provisioner "remote-exec" {
+
+ inline = [data.template_file.routes.rendered]
+
+ connection {
+ type = "ssh"
+ user = "ec2-user"
+ host = aws_eip.aws_eip[count.index].public_ip
+ private_key = file(var.aws_ssh_private_key_file_path)
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/pipelines/storage_network/terraform/output.tf b/pipelines/storage_network/terraform/output.tf
new file mode 100644
index 0000000000..e5b811e065
--- /dev/null
+++ b/pipelines/storage_network/terraform/output.tf
@@ -0,0 +1,7 @@
+output "network_interface_ids" {
+ depends_on = [
+ aws_network_interface.instance_eth0,
+ aws_network_interface.instance_eth1
+ ]
+ value = join(" ", concat(aws_network_interface.instance_eth0[*].id, aws_network_interface.instance_eth1[*].id))
+}
\ No newline at end of file
diff --git a/pipelines/storage_network/terraform/user-data-scripts/flannel.sh.tpl b/pipelines/storage_network/terraform/user-data-scripts/flannel.sh.tpl
new file mode 100644
index 0000000000..0e28086680
--- /dev/null
+++ b/pipelines/storage_network/terraform/user-data-scripts/flannel.sh.tpl
@@ -0,0 +1,19 @@
+#!/bin/bash
+STORAGE_NETWORK_PREFIX="192.168"
+
+ETH1_IP=`ip a | grep eth1 | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | awk '{print $2}'`
+
+count=1
+for n in ${N1} ${N2} ${N3}; do
+ [[ $ETH1_IP != $n ]] && ((count=count+1)) && continue
+
+ NET=$count
+ break
+done
+
+cat << EOF | sudo tee -a /run/flannel/multus-subnet-$STORAGE_NETWORK_PREFIX.0.0.env
+FLANNEL_NETWORK=$STORAGE_NETWORK_PREFIX.0.0/16
+FLANNEL_SUBNET=$STORAGE_NETWORK_PREFIX.$NET.0/24
+FLANNEL_MTU=${mtu}
+FLANNEL_IPMASQ=true
+EOF
\ No newline at end of file
diff --git a/pipelines/storage_network/terraform/user-data-scripts/provision_k3s_agent.sh.tpl b/pipelines/storage_network/terraform/user-data-scripts/provision_k3s_agent.sh.tpl
new file mode 100755
index 0000000000..35b22c900b
--- /dev/null
+++ b/pipelines/storage_network/terraform/user-data-scripts/provision_k3s_agent.sh.tpl
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+set -e
+
+sudo systemctl restart guestregister # Sometimes registration fails on first boot.
+sudo zypper ref
+sudo zypper install -y -t pattern devel_basis
+sudo zypper install -y open-iscsi nfs-client
+sudo systemctl -q enable iscsid
+sudo systemctl start iscsid
+
+if [ -b "/dev/nvme1n1" ]; then
+ mkfs.ext4 -E nodiscard /dev/nvme1n1
+ mkdir /mnt/sda1
+ mount /dev/nvme1n1 /mnt/sda1
+
+ mkdir /mnt/sda1/local
+ mkdir /opt/local-path-provisioner
+ mount --bind /mnt/sda1/local /opt/local-path-provisioner
+
+ mkdir /mnt/sda1/longhorn
+ mkdir /var/lib/longhorn
+ mount --bind /mnt/sda1/longhorn /var/lib/longhorn
+elif [ -b "/dev/xvdh" ]; then
+ mkfs.ext4 -E nodiscard /dev/xvdh
+ mkdir /var/lib/longhorn
+ mount /dev/xvdh /var/lib/longhorn
+fi
+
+# TODO: It looks like "set -e" will break the intended functionality of the remaining code. Consider a refactor.
+set +e
+
+until (curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="agent --token ${k3s_cluster_secret}" K3S_URL="${k3s_server_url}" INSTALL_K3S_VERSION="${k3s_version}" sh -); do
+ echo 'k3s agent did not install correctly'
+ sleep 2
+done
+
+if [[ "${thick_plugin}" == true ]]; then
+ ln -s /var/lib/rancher/k3s/agent/etc/cni/net.d /etc/cni
+ ln -s /var/lib/rancher/k3s/data/current/bin /opt/cni
+fi
+
+curl -OL https://github.com/containernetworking/plugins/releases/download/v1.3.0/cni-plugins-linux-amd64-v1.3.0.tgz
+tar -zxvf cni-plugins-linux-amd64-v1.3.0.tgz
+cp ipvlan /var/lib/rancher/k3s/data/current/bin/
\ No newline at end of file
diff --git a/pipelines/storage_network/terraform/user-data-scripts/provision_k3s_server.sh.tpl b/pipelines/storage_network/terraform/user-data-scripts/provision_k3s_server.sh.tpl
new file mode 100755
index 0000000000..f3533c959d
--- /dev/null
+++ b/pipelines/storage_network/terraform/user-data-scripts/provision_k3s_server.sh.tpl
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+set -e
+
+sudo systemctl restart guestregister # Sometimes registration fails on first boot.
+sudo zypper ref
+sudo zypper install -y -t pattern devel_basis
+sudo zypper install -y open-iscsi nfs-client jq
+sudo systemctl -q enable iscsid
+sudo systemctl start iscsid
+
+if [ -b "/dev/nvme1n1" ]; then
+ mkfs.ext4 -E nodiscard /dev/nvme1n1
+ mkdir /mnt/sda1
+ mount /dev/nvme1n1 /mnt/sda1
+
+ mkdir /mnt/sda1/local
+ mkdir /opt/local-path-provisioner
+ mount --bind /mnt/sda1/local /opt/local-path-provisioner
+
+ mkdir /mnt/sda1/longhorn
+ mkdir /var/lib/longhorn
+ mount --bind /mnt/sda1/longhorn /var/lib/longhorn
+elif [ -b "/dev/xvdh" ]; then
+ mkfs.ext4 -E nodiscard /dev/xvdh
+ mkdir /var/lib/longhorn
+ mount /dev/xvdh /var/lib/longhorn
+fi
+
+until (curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server --tls-san ${k3s_server_public_ip} --write-kubeconfig-mode 644 --token ${k3s_cluster_secret}" INSTALL_K3S_VERSION="${k3s_version}" sh -); do
+ echo 'k3s server did not install correctly'
+ sleep 2
+done
+
+RETRY=0
+MAX_RETRY=180
+until (kubectl get pods -A | grep 'Running'); do
+ echo 'Waiting for k3s startup'
+ sleep 5
+ if [ $RETRY -eq $MAX_RETRY ]; then
+ break
+ fi
+ RETRY=$((RETRY+1))
+done
+
+if [[ "${thick_plugin}" == true ]]; then
+ ln -s /var/lib/rancher/k3s/agent/etc/cni/net.d /etc/cni
+ ln -s /var/lib/rancher/k3s/data/current/bin /opt/cni
+fi
+
+curl -OL https://github.com/containernetworking/plugins/releases/download/v1.3.0/cni-plugins-linux-amd64-v1.3.0.tgz
+tar -zxvf cni-plugins-linux-amd64-v1.3.0.tgz
+cp ipvlan /var/lib/rancher/k3s/data/current/bin/
\ No newline at end of file
diff --git a/pipelines/storage_network/terraform/user-data-scripts/routes.sh.tpl b/pipelines/storage_network/terraform/user-data-scripts/routes.sh.tpl
new file mode 100644
index 0000000000..f6585ede8e
--- /dev/null
+++ b/pipelines/storage_network/terraform/user-data-scripts/routes.sh.tpl
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+STORAGE_NETWORK_PREFIX="192.168"
+ACTION="add"
+
+ETH1_IP=`ip a | grep eth1 | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | awk '{print $2}'`
+
+if [[ $ETH1_IP != ${N1} ]]; then
+ sudo ip r $ACTION $STORAGE_NETWORK_PREFIX.1.0/24 via ${N1} dev eth1
+fi
+
+if [[ $ETH1_IP != ${N2} ]]; then
+ sudo ip r $ACTION $STORAGE_NETWORK_PREFIX.2.0/24 via ${N2} dev eth1
+fi
+
+if [[ $ETH1_IP != ${N3} ]]; then
+ sudo ip r $ACTION $STORAGE_NETWORK_PREFIX.3.0/24 via ${N3} dev eth1
+fi
\ No newline at end of file
diff --git a/pipelines/storage_network/terraform/variables.tf b/pipelines/storage_network/terraform/variables.tf
new file mode 100644
index 0000000000..3d807d04dc
--- /dev/null
+++ b/pipelines/storage_network/terraform/variables.tf
@@ -0,0 +1,113 @@
+variable "aws_access_key" {
+ type = string
+ description = "AWS ACCESS_KEY"
+}
+
+variable "aws_secret_key" {
+ type = string
+ description = "AWS SECRET_KEY"
+}
+
+variable "aws_region" {
+ type = string
+ default = "us-east-1"
+}
+
+variable "aws_availability_zone" {
+ type = string
+ default = "us-east-1a"
+}
+
+variable "aws_vpc_name" {
+ type = string
+ default = "vpc-lh-storage-network-tests"
+}
+
+variable "arch" {
+ type = string
+ description = "available values (amd64, arm64)"
+ default = "amd64"
+}
+
+variable "os_distro_version" {
+ type = string
+ default = "15-sp5"
+}
+
+variable "aws_ami_sles_account_number" {
+ type = string
+ default = "amazon"
+}
+
+variable "aws_instance_count" {
+ type = number
+ default = 3
+}
+
+variable "aws_instance_type" {
+ type = string
+ description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ default = "t2.xlarge"
+}
+
+variable "aws_ssh_public_key_file_path" {
+ type = string
+ default = "~/.ssh/id_rsa.pub"
+}
+
+variable "aws_ssh_private_key_file_path" {
+ type = string
+ default = "~/.ssh/id_rsa"
+}
+
+variable "aws_instance_name" {
+ type = string
+ default = "lh-storage-network-tests"
+}
+
+variable "aws_instance_root_block_device_size" {
+ type = number
+ default = 40
+}
+
+variable "k8s_distro_name" {
+ type = string
+ default = "k3s"
+ description = "kubernetes distro version to install [rke, k3s, rke2] (default: k3s)"
+}
+
+variable "k8s_distro_version" {
+ type = string
+ default = "v1.27.1+k3s1"
+ description = <<-EOT
+ kubernetes version that will be deployed
+ rke: (default: v1.22.5-rancher1-1)
+ k3s: (default: v1.27.1+k3s1)
+ rke2: (default: v1.27.2+rke2r1)
+ EOT
+}
+
+variable "resources_owner" {
+ type = string
+ default = "longhorn-infra"
+}
+
+variable "cis_hardening" {
+ type = bool
+ default = false
+}
+
+variable "mtu" {
+ type = string
+ default = "8951"
+}
+
+variable "multus_version" {
+ type = string
+ default = "v4.0.2"
+}
+
+variable "thick_plugin" {
+ type = bool
+ default = true
+}
\ No newline at end of file
diff --git a/pipelines/utilities/argocd.sh b/pipelines/utilities/argocd.sh
index cb6d6c0e8d..fa56809e31 100755
--- a/pipelines/utilities/argocd.sh
+++ b/pipelines/utilities/argocd.sh
@@ -48,8 +48,8 @@ spec:
targetRevision: ${REVISION}
helm:
values: |
- helmPreUpgradeCheckerJob:
- enabled: false
+ preUpgradeChecker:
+ jobEnabled: false
destination:
server: https://kubernetes.default.svc
namespace: ${LONGHORN_NAMESPACE}
diff --git a/pipelines/utilities/install_backupstores.sh b/pipelines/utilities/install_backupstores.sh
index 9ad06e8938..7f043e8a8d 100755
--- a/pipelines/utilities/install_backupstores.sh
+++ b/pipelines/utilities/install_backupstores.sh
@@ -1,6 +1,6 @@
install_backupstores(){
- MINIO_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn-tests/master/manager/integration/deploy/backupstores/minio-backupstore.yaml"
- NFS_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn-tests/master/manager/integration/deploy/backupstores/nfs-backupstore.yaml"
+ MINIO_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/minio-backupstore.yaml"
+ NFS_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/nfs-backupstore.yaml"
kubectl create -f ${MINIO_BACKUPSTORE_URL} \
-f ${NFS_BACKUPSTORE_URL}
}
\ No newline at end of file
diff --git a/pipelines/utilities/storage_network.sh b/pipelines/utilities/storage_network.sh
new file mode 100755
index 0000000000..c9e36b481d
--- /dev/null
+++ b/pipelines/utilities/storage_network.sh
@@ -0,0 +1,52 @@
+deploy_multus_thin_plugin_daemonset(){
+ curl -O "https://raw.githubusercontent.com/k8snetworkplumbingwg/multus-cni/${TF_VAR_multus_version}/deployments/multus-daemonset.yml"
+ sed -Ei 's@"kubeconfig":.+@"kubeconfig": "/var/lib/rancher/k3s/agent/etc/cni/net.d/multus.d/multus.kubeconfig"@g' multus-daemonset.yml
+ yq e -i 'select(.kind == "DaemonSet" and .metadata.name == "kube-multus-ds").spec.template.spec.containers[0].args += "--multus-kubeconfig-file-host=/var/lib/rancher/k3s/agent/etc/cni/net.d/multus.d/multus.kubeconfig"' multus-daemonset.yml
+ yq e -i "select(.kind == \"DaemonSet\" and .metadata.name == \"kube-multus-ds\").spec.template.spec.containers[0].image=\"ghcr.io/k8snetworkplumbingwg/multus-cni:${TF_VAR_multus_version}\"" multus-daemonset.yml
+ yq e -i "select(.kind == \"DaemonSet\" and .metadata.name == \"kube-multus-ds\").spec.template.spec.initContainers[0].image=\"ghcr.io/k8snetworkplumbingwg/multus-cni:${TF_VAR_multus_version}\"" multus-daemonset.yml
+ sed -Ei 's@path: /etc/cni/net.d@path: /var/lib/rancher/k3s/agent/etc/cni/net.d@g' multus-daemonset.yml
+ sed -Ei 's@path: /opt/cni/bin@path: /var/lib/rancher/k3s/data/current/bin@g' multus-daemonset.yml
+ kubectl apply -f multus-daemonset.yml
+}
+
+deploy_multus_thick_plugin_daemonset(){
+ curl -O https://raw.githubusercontent.com/k8snetworkplumbingwg/multus-cni/v4.0.2/deployments/multus-daemonset-thick.yml
+ yq e -i 'select(.kind == "DaemonSet" and .metadata.name == "kube-multus-ds").spec.template.spec.containers[0].volumeMounts += {"name": "cnibin", "mountPath": "/opt/cni/bin"}' multus-daemonset-thick.yml
+ yq e -i "select(.kind == \"DaemonSet\" and .metadata.name == \"kube-multus-ds\").spec.template.spec.containers[0].image=\"ghcr.io/k8snetworkplumbingwg/multus-cni:${TF_VAR_multus_version}-thick\"" multus-daemonset-thick.yml
+ yq e -i "select(.kind == \"DaemonSet\" and .metadata.name == \"kube-multus-ds\").spec.template.spec.initContainers[0].image=\"ghcr.io/k8snetworkplumbingwg/multus-cni:${TF_VAR_multus_version}-thick\"" multus-daemonset-thick.yml
+ kubectl apply -f multus-daemonset-thick.yml
+}
+
+deploy_network_attachment_definition(){
+cat << EOF > nad-192-168-0-0.yaml
+apiVersion: "k8s.cni.cncf.io/v1"
+kind: NetworkAttachmentDefinition
+metadata:
+ name: demo-192-168-0-0
+ namespace: kube-system
+spec:
+ config: '{
+ "cniVersion": "0.3.1",
+ "type": "flannel",
+ "subnetFile": "/run/flannel/multus-subnet-192.168.0.0.env",
+ "dataDir": "/var/lib/cni/multus-subnet-192.168.0.0",
+ "delegate": {
+ "type": "ipvlan",
+ "master": "eth1",
+ "mode": "l3",
+ "capabilities": {
+ "ips": true
+ }
+ },
+ "kubernetes": {
+ "kubeconfig": "/etc/cni/net.d/multus.d/multus.kubeconfig"
+ }
+ }'
+EOF
+kubectl apply -f nad-192-168-0-0.yaml
+}
+
+
+update_storage_network_setting(){
+ kubectl -n longhorn-system patch -p '{"value": "kube-system/demo-192-168-0-0"}' --type=merge setting.longhorn.io/storage-network
+}
\ No newline at end of file
diff --git a/renovate.json b/renovate.json
new file mode 100644
index 0000000000..c297cdcff1
--- /dev/null
+++ b/renovate.json
@@ -0,0 +1,3 @@
+{
+ "extends": ["github>longhorn/release:renovate-default"]
+}
diff --git a/scalability_test/script/monitor.py b/scalability_test/script/monitor.py
index de03533460..cc195d7b2a 100644
--- a/scalability_test/script/monitor.py
+++ b/scalability_test/script/monitor.py
@@ -50,7 +50,7 @@ def update_data(self):
node_list = []
try:
pod_list = self.core_api_v1.list_namespaced_pod("default")
- # TODO: change to catch any exeption and count the number of api exceptions
+ # TODO: change to catch any exception and count the number of api exceptions
except client.ApiException as e:
print("Exception when calling CoreV1Api->list_namespaced_pod: %s\n" % e)
print("Skipping this update")
@@ -58,7 +58,7 @@ def update_data(self):
try:
node_list = self.custom_objects_api.list_cluster_custom_object("metrics.k8s.io", "v1beta1", "nodes")
- # TODO: change to catch any exeption and count the number of api exceptions
+ # TODO: change to catch any exception and count the number of api exceptions
except client.ApiException as e:
print("Exception when calling custom_objects_api->list_cluster_custom_object: %s\n" % e)
print("Will set node metrics to 0")
@@ -76,12 +76,12 @@ def update_data(self):
if pod_with_valid_starting_time_count < running_pod_count and MAX_POD_STARTING_TIME_POINT not in self.annotating_points:
self.annotating_points[MAX_POD_STARTING_TIME_POINT] = {
"xy": (diff.total_seconds(),
- pod_with_valid_starting_time_count), "desciption": "(1) "+str(pod_with_valid_starting_time_count)+" pods",
+ pod_with_valid_starting_time_count), "description": "(1) "+str(pod_with_valid_starting_time_count)+" pods",
"color": "tab:orange"}
if crashing_pod_count > self.max_pod_crashing_count and MAX_POD_CRASHING_POINT not in self.annotating_points:
self.annotating_points[MAX_POD_CRASHING_POINT] = {
"xy": (diff.total_seconds(),
- pod_with_valid_starting_time_count), "desciption": "(2) "+str(pod_with_valid_starting_time_count)+" pods",
+ pod_with_valid_starting_time_count), "description": "(2) "+str(pod_with_valid_starting_time_count)+" pods",
"color": "tab:red"}
for node in node_list['items']:
@@ -101,7 +101,7 @@ def update_data(self):
self.cpu_metrics[node_name] = cpu_metric
self.ram_metrics[node_name] = ram_metric
- # update node metrics with value 0 if the infomation is missing in the above update
+ # update node metrics with value 0 if the information is missing in the above update
for metric in self.cpu_metrics.values():
if len(metric) < len(self.time_diffs):
cpu_metric.extend([0]*(len(self.time_diffs)-len(metric)))
@@ -192,10 +192,10 @@ def draw(self):
ax1, ax2, ax3 = self.axes
ax1.plot(self.time_diffs, self.running_pod_metric)
- ax1.set_ylabel('Number of running pods')
+ informationsinformationsax1.set_ylabel('Number of running pods')
for point in self.annotating_points.values():
- ax1.annotate(point["desciption"],
+ ax1.annotate(point["description"],
xy= point["xy"], xycoords='data',
xytext=(0, 20), textcoords='offset points',
arrowprops=dict(facecolor=point["color"], shrink=0.05),
diff --git a/scalability_test/script/scale-test.py b/scalability_test/script/scale-test.py
index d3164f7442..e343617ffd 100644
--- a/scalability_test/script/scale-test.py
+++ b/scalability_test/script/scale-test.py
@@ -33,7 +33,7 @@ def get_node_capacities():
# hugepages-2Mi: '0'
# memory: 32412804Ki
# pods: '110'
- cpu = int(i.status.capacity["cpu"])*1000**3 # conver to nano cpu
+ cpu = int(i.status.capacity["cpu"])*1000**3 # convert to nano cpu
ram = int(i.status.capacity["memory"][:-2])
node_capacities[i.metadata.name] = {"cpu": cpu, "ram": ram}
diff --git a/scalability_test/terraform/variables.tf b/scalability_test/terraform/variables.tf
index 388235c0ac..9defd21ea1 100644
--- a/scalability_test/terraform/variables.tf
+++ b/scalability_test/terraform/variables.tf
@@ -45,12 +45,12 @@ variable "lh_aws_instance_name_controlplane" {
variable "lh_aws_instance_type_controlplane" {
type = string
- default = "t2.xlarge"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_type_worker" {
type = string
- default = "t2.xlarge"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_root_block_device_size_controlplane" {
diff --git a/secscan/terraform/aws/main.tf b/secscan/terraform/aws/main.tf
index 81fed9e5a1..52bd9be406 100644
--- a/secscan/terraform/aws/main.tf
+++ b/secscan/terraform/aws/main.tf
@@ -90,7 +90,7 @@ resource "aws_route_table" "lh-secscan_aws_public_rt" {
}
}
-# Assciate public subnet to public route table
+# Associate public subnet to public route table
resource "aws_route_table_association" "lh-secscan_aws_public_subnet_rt_association" {
depends_on = [
aws_subnet.lh-secscan_aws_public_subnet,
diff --git a/test_framework/Jenkinsfile b/test_framework/Jenkinsfile
index 3f2c7bd508..3b1de30efa 100644
--- a/test_framework/Jenkinsfile
+++ b/test_framework/Jenkinsfile
@@ -23,6 +23,9 @@ def REGISTRY_URL
def REGISTRY_USERNAME
def REGISTRY_PASSWORD
+// parameter for mTls
+def ENABLE_MTLS = params.ENABLE_MTLS ? params.ENABLE_MTLS : false
+
// parameter for hdd test
def USE_HDD = params.USE_HDD ? params.USE_HDD : false
@@ -115,6 +118,7 @@ node {
--env PYTEST_CUSTOM_OPTIONS="${PYTEST_CUSTOM_OPTIONS}" \
--env BACKUP_STORE_TYPE="${BACKUP_STORE_TYPE}" \
--env TF_VAR_use_hdd=${USE_HDD} \
+ --env TF_VAR_enable_mtls=${ENABLE_MTLS} \
--env TF_VAR_arch=${ARCH} \
--env TF_VAR_k8s_distro_name=${K8S_DISTRO_NAME} \
--env TF_VAR_k8s_distro_version=${K8S_DISTRO_VERSION} \
diff --git a/test_framework/scripts/longhorn-setup.sh b/test_framework/scripts/longhorn-setup.sh
index 691dedabf3..99f2dcd29c 100755
--- a/test_framework/scripts/longhorn-setup.sh
+++ b/test_framework/scripts/longhorn-setup.sh
@@ -54,6 +54,11 @@ install_cluster_autoscaler(){
}
+enable_mtls(){
+ kubectl apply -f "${TF_VAR_tf_workspace}/templates/longhorn-grpc-tls.yml" -n ${LONGHORN_NAMESPACE}
+}
+
+
install_csi_snapshotter_crds(){
CSI_SNAPSHOTTER_REPO_URL="https://github.com/kubernetes-csi/external-snapshotter.git"
CSI_SNAPSHOTTER_REPO_DIR="${TMPDIR}/k8s-csi-external-snapshotter"
@@ -266,8 +271,8 @@ create_longhorn_namespace(){
install_backupstores(){
- MINIO_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn-tests/master/manager/integration/deploy/backupstores/minio-backupstore.yaml"
- NFS_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn-tests/master/manager/integration/deploy/backupstores/nfs-backupstore.yaml"
+ MINIO_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/minio-backupstore.yaml"
+ NFS_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/nfs-backupstore.yaml"
kubectl create -f ${MINIO_BACKUPSTORE_URL} \
-f ${NFS_BACKUPSTORE_URL}
}
@@ -323,6 +328,7 @@ run_longhorn_upgrade_test(){
yq e -i 'select(.spec.containers[0] != null).spec.containers[0].env[4].value="'${LONGHORN_UPGRADE_TYPE}'"' ${LONGHORN_UPGRADE_TESTS_MANIFEST_FILE_PATH}
+ RESOURCE_SUFFIX=$(terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw resource_suffix)
yq e -i 'select(.spec.containers[0] != null).spec.containers[0].env[7].value="'${RESOURCE_SUFFIX}'"' ${LONGHORN_UPGRADE_TESTS_MANIFEST_FILE_PATH}
kubectl apply -f ${LONGHORN_UPGRADE_TESTS_MANIFEST_FILE_PATH}
@@ -340,6 +346,9 @@ run_longhorn_upgrade_test(){
# get upgrade test junit xml report
kubectl cp ${LONGHORN_UPGRADE_TEST_POD_NAME}:${LONGHORN_JUNIT_REPORT_PATH} "${TF_VAR_tf_workspace}/${LONGHORN_UPGRADE_TEST_POD_NAME}-junit-report.xml" -c longhorn-test-report
+
+ # delete upgrade test pod
+ kubectl delete -f ${LONGHORN_UPGRADE_TESTS_MANIFEST_FILE_PATH}
}
@@ -389,6 +398,9 @@ run_longhorn_tests(){
LONGHORN_TEST_POD_NAME=`yq e 'select(.spec.containers[0] != null).metadata.name' ${LONGHORN_TESTS_MANIFEST_FILE_PATH}`
+ RESOURCE_SUFFIX=$(terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw resource_suffix)
+ yq e -i 'select(.spec.containers[0] != null).spec.containers[0].env[7].value="'${RESOURCE_SUFFIX}'"' ${LONGHORN_TESTS_MANIFEST_FILE_PATH}
+
kubectl apply -f ${LONGHORN_TESTS_MANIFEST_FILE_PATH}
local RETRY_COUNTS=60
@@ -434,7 +446,9 @@ main(){
install_backupstores
fi
install_csi_snapshotter_crds
-
+ if [[ "${TF_VAR_enable_mtls}" == true ]]; then
+ enable_mtls
+ fi
if [[ "${AIR_GAP_INSTALLATION}" == true ]]; then
if [[ "${LONGHORN_INSTALL_METHOD}" == "manifest-file" ]]; then
create_registry_secret
diff --git a/test_framework/scripts/terraform-setup.sh b/test_framework/scripts/terraform-setup.sh
index 3d28f7c2c5..6c7beee307 100755
--- a/test_framework/scripts/terraform-setup.sh
+++ b/test_framework/scripts/terraform-setup.sh
@@ -32,8 +32,6 @@ terraform_setup(){
if [[ "${TF_VAR_create_load_balancer}" == true ]]; then
terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw load_balancer_url > ${TF_VAR_tf_workspace}/load_balancer_url
fi
-
- export RESOURCE_SUFFIX=$(terraform -chdir=${TF_VAR_tf_workspace}/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw resource_suffix)
}
diff --git a/test_framework/templates/longhorn-grpc-tls.yml b/test_framework/templates/longhorn-grpc-tls.yml
new file mode 100644
index 0000000000..cf612e29f9
--- /dev/null
+++ b/test_framework/templates/longhorn-grpc-tls.yml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Secret
+metadata:
+ name: longhorn-grpc-tls
+ namespace: longhorn-system
+type: kubernetes.io/tls
+data:
+ ca.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUREekNDQWZlZ0F3SUJBZ0lVTHN6YnlpMVN6dSs5UlJmZkl3TDJlNFkzSytzd0RRWUpLb1pJaHZjTkFRRUwKQlFBd0ZqRVVNQklHQTFVRUF3d0xiRzl1WjJodmNtNHRZMkV3SUJjTk1qUXdNVEkyTVRjMU5EUTVXaGdQTXpBeQpNekExTWpreE56VTBORGxhTUJZeEZEQVNCZ05WQkFNTUMyeHZibWRvYjNKdUxXTmhNSUlCSWpBTkJna3Foa2lHCjl3MEJBUUVGQUFPQ0FROEFNSUlCQ2dLQ0FRRUFucGFFbG4xRjFhT2hzekZTcVJ0TUliNWdLTkNZSUhzRml4WGEKZTJiZ2hKRThQdUZ0bUdzekhlQVpBeWNueHM1L1J3cU1ieVBPbDFuL3FlU2RJMUg5QnMvNUQwZk1tUEFQMm94aQozZ3B2cXhRbzZwdE5PMGwxUnVBcmZmKytQKzNqd2RNdWpDMDdWVW9HZUpsbWoxNUpLbTZRQWJ1cURnejEyaDNjCmYvUzg5bWJWeXowZXMwMktTQnRqVm5RRTBlSVdGakg1SnVyVEU0bEJpT1hWbktHSUZnQXYzZ3pxeXZsdUo3VVgKUml5TC9UaVp1VS9aSnFtQlJpQyttWGpiUndlVTRvMW1mNGlrN1dPQXIzY2FNOUUzQVgvaDlMbzhYTXhDM1hqVAphdkZta2NnWXZhSlhicWhqak9VWVhlNmo1UmN6dnNUVk8wOXBsL3RlTld3Mkx4ZmFsUUlEQVFBQm8xTXdVVEFkCkJnTlZIUTRFRmdRVTNybmVhNVBoVFVzMVJCSG1ZT0lSdmdpR0p1Z3dId1lEVlIwakJCZ3dGb0FVM3JuZWE1UGgKVFVzMVJCSG1ZT0lSdmdpR0p1Z3dEd1lEVlIwVEFRSC9CQVV3QXdFQi96QU5CZ2txaGtpRzl3MEJBUXNGQUFPQwpBUUVBR3F4emxpdHVrVlFhUFZENmFZSC9xQ0IyRnh5WW1WU3pBVGJncGlwU3JrZHFBWmJRVUNQdXUvSnVMdGlSCjBuYXFicHkzZ2J6K0IzY1VPZlJjQWpxd2VQclpRMUVOTVF4TUZGZEJ2MG51Tko2TllFWWlKUEVhSFlhdE1IZlgKaXVndTZwcXNmZW56dlROMG1MeGx0eDBycVdXNnFiT1k4OGdVKzA1bXl2c0dTUjdWUldsQ2Yyc1FnQmtteWJHbgozSTBuaFFMVHd1N2Y2VkUrd21GeEhlUDl3OWN1Mk8wbFdMV1ZHTno1ZExybGdDcCsrdWttZDlMOFlPbW1tT3lVCkhVVm5rOGY5Ykk2NG9ENjNNS0M2UU83Kzk0ZnRETFBSRFZxVHBReE5pV25QOWl2M0lIVlQvUS93TkN5OVNYQUIKRzJ3Qm1nLzJ0eFY0S09HSHRCamxlb1BxcUE9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==
+ tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUZBekNDQSt1Z0F3SUJBZ0lCQVRBTkJna3Foa2lHOXcwQkFRc0ZBREFXTVJRd0VnWURWUVFEREF0c2IyNW4KYUc5eWJpMWpZVEFnRncweU5EQXhNall4TnpVNU16aGFHQTh6TURJek1EVXlPVEUzTlRrek9Gb3dHekVaTUJjRwpBMVVFQXd3UWJHOXVaMmh2Y200dFltRmphMlZ1WkRDQ0FTSXdEUVlKS29aSWh2Y05BUUVCQlFBRGdnRVBBRENDCkFRb0NnZ0VCQUpOcXpVVnlHUG8wK3ZoNHl1d0lHYkkydXZNRm4wc3ZaMmlBZjlGRmdHRmFTU25PbnAycElWajkKcVl0UjluM1JwT0lDdUhvS1hucmQ5OWJxZlpTRXBwam1tTTIvYXFMcWZPVkRLWkQ2eURkZ2FhQ3U3NWo4UHBoQgpKOGFHKzlUMGJzaEhHbDRRamZHb0wrK3ZtemxQdk9vWGcwMW5uN29IRjVWcmtjNmNMRm1qazRGM0J4Z0lGL25aCmNiVVRlMFV0anBMazRvV1RHNll6aUVzbTY0cEJHWkc0TzZidkZpWnZzeFlqSy83RFVHUEdHOS9GTUw2SC9RNXEKQ3NKMjNsbU5MdnJtOUNCb3pUTWcvbUpPcDVyOVVkdDdHbGExM1BEcG0rUEpwMVpreENmdlZOUzd2ZWtmUXM4ZAovbXlkQ2xRLzQ4RUZHTm0vVkluM1NXeUhZOUhXTEowQ0F3RUFBYU9DQWxNd2dnSlBNSUlDQ3dZRFZSMFJCSUlDCkFqQ0NBZjZDRUd4dmJtZG9iM0p1TFdKaFkydGxibVNDSUd4dmJtZG9iM0p1TFdKaFkydGxibVF1Ykc5dVoyaHYKY200dGMzbHpkR1Z0Z2lSc2IyNW5hRzl5YmkxaVlXTnJaVzVrTG14dmJtZG9iM0p1TFhONWMzUmxiUzV6ZG1PQwpFV3h2Ym1kb2IzSnVMV1p5YjI1MFpXNWtnaUZzYjI1bmFHOXliaTFtY205dWRHVnVaQzVzYjI1bmFHOXliaTF6CmVYTjBaVzJDSld4dmJtZG9iM0p1TFdaeWIyNTBaVzVrTG14dmJtZG9iM0p1TFhONWMzUmxiUzV6ZG1PQ0YyeHYKYm1kb2IzSnVMV1Z1WjJsdVpTMXRZVzVoWjJWeWdpZHNiMjVuYUc5eWJpMWxibWRwYm1VdGJXRnVZV2RsY2k1cwpiMjVuYUc5eWJpMXplWE4wWlcyQ0syeHZibWRvYjNKdUxXVnVaMmx1WlMxdFlXNWhaMlZ5TG14dmJtZG9iM0p1CkxYTjVjM1JsYlM1emRtT0NHR3h2Ym1kb2IzSnVMWEpsY0d4cFkyRXRiV0Z1WVdkbGNvSW9iRzl1WjJodmNtNHQKY21Wd2JHbGpZUzF0WVc1aFoyVnlMbXh2Ym1kb2IzSnVMWE41YzNSbGJZSXNiRzl1WjJodmNtNHRjbVZ3YkdsagpZUzF0WVc1aFoyVnlMbXh2Ym1kb2IzSnVMWE41YzNSbGJTNXpkbU9DREd4dmJtZG9iM0p1TFdOemFZSWNiRzl1CloyaHZjbTR0WTNOcExteHZibWRvYjNKdUxYTjVjM1JsYllJZ2JHOXVaMmh2Y200dFkzTnBMbXh2Ym1kb2IzSnUKTFhONWMzUmxiUzV6ZG1PQ0VHeHZibWRvYjNKdUxXSmhZMnRsYm1TSEJIOEFBQUV3SFFZRFZSME9CQllFRklkdwpxZlQ5WmxUcVQrYkk5QnhuYnJtS3V1R1BNQjhHQTFVZEl3UVlNQmFBRk42NTNtdVQ0VTFMTlVRUjVtRGlFYjRJCmhpYm9NQTBHQ1NxR1NJYjNEUUVCQ3dVQUE0SUJBUUJ5UFdBTzlZbjI3Ym84QmgwbGVKWGNieUpDWjV1aGczREUKdzZuRU9rT3ZndCtkUXNYSDdqL0F1K3F0V0I1b0owY01aOVJjUkhEczZ4ZVp3S2Q3c1FxZE92dVJGUUZ3SW9tdgpDTGd4L1F6TzJucDlQZnNGV253ODNILzM5N3pyNnpSd2thWXRSYlZISGNSbGd4c1orLzhjc2FVZVhXdEZvQWdkCnNMckpWR2IwTWdkc0s4RlJFa2JpUWJLZDd6YXg0RDdzQVFWaUVYMmw2NUpBOG5WcUx1U2ZsWENZNDZGUWs4RXEKT3hWdGFWeE00bS9hWW1tQkxOVklrakMvVVZzL1NadGxrRFNOQjFqaFlkVWkralZvYlZFZURNS0Jhakl1bzAxUwpVWDZXUCt2dEFWZEVVb1Nqc0dqZzRMTVlNWGhpUDlRMnZlK1dDOExCeGZBaHZIRUUzaGo3Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
+ tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2Z0lCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktnd2dnU2tBZ0VBQW9JQkFRQ1RhczFGY2hqNk5QcjQKZU1yc0NCbXlOcnJ6Qlo5TEwyZG9nSC9SUllCaFdra3B6cDZkcVNGWS9hbUxVZlo5MGFUaUFyaDZDbDU2M2ZmVwo2bjJVaEthWTVwak52MnFpNm56bFF5bVErc2czWUdtZ3J1K1kvRDZZUVNmR2h2dlU5RzdJUnhwZUVJM3hxQy92CnI1czVUN3pxRjROTlo1KzZCeGVWYTVIT25DeFpvNU9CZHdjWUNCZjUyWEcxRTN0RkxZNlM1T0tGa3h1bU00aEwKSnV1S1FSbVJ1RHVtN3hZbWI3TVdJeXYrdzFCanhodmZ4VEMraC8wT2FnckNkdDVaalM3NjV2UWdhTTB6SVA1aQpUcWVhL1ZIYmV4cFd0ZHp3Nlp2anlhZFdaTVFuNzFUVXU3M3BIMExQSGY1c25RcFVQK1BCQlJqWnYxU0o5MGxzCmgyUFIxaXlkQWdNQkFBRUNnZ0VBUjVYTzdXQ3RHVjg5MmdmS1Bsam1wWUJuUXhqaFZDREdYZHc4ZFZLRE40TWcKMFNEcExGVlRnTVBaSDNMak9CRGtPTzRxUi9VZUVSc1Z2WExzSFlGVzV4dmZhdFgvZ2ZKTlNRVld1M1RVWWZPNwpCMUM3djdZSjdXU0NYS2p5eEdRWUljQkpZUkUzNUhnUUl4dkt6RWRZelBJekRCVDhYdGtQempySXVLUms4dmU3CnVNNkY0TE9tNEhtL0xIWlZteVNpNGhxQkhtSWEzS1diVEhGRGk5ODBqZm0vQjVORWNzV0sxSk96TW1DeS9lV0gKSU9jK0p4Nmk5dFk3YTliQ3ladlBzVFFOazV3dXlTaUQvMFloTVhBalBUVGNnRDlYL2xSRGtKRjVzejd5UXk4Ngpyemw0UU9QMXpSWG04Ykt6WUxCcFpxc2M1em4wcEdrTXJzd2ZXYmxXbndLQmdRREZhQWZQWXExRVdpMmd4WFE2ClFHZkRWQk1UK0pNSGIyZE4wL3k2NkVzS2huc2dEN2tFOVFqdm4vSnVCd3haRXZveDhPcHhzdFU0UjM2YmNHYnQKYUUyOTFyU3BDckpwK1R3OVVmamg0SlB3c1R0bjZvZXNjaVZHcDZGMzVlaFZTQlNnaVJ2L2hEdXhQaThwVWFRagpGS1FDbFhFTkliU3MwNy9oNFdYdzFjVmNYd0tCZ1FDL0xGVUdSSGl4bnNYelJsRkhITHZ1Znk1eSswSmNTcFVnCmFncFN3MFFNRE04VWpybnVIc3lxWXlBbkk4c1UyVXdJUVlFNkU5cEwrWWNVRVVrYUJsU2wyYkNibWVFVkpLZVkKOWlpUmwvejZ5T2Y1WlJ0Y09MdVhBRUtabTU4WVd0bFRaWGJvYnd3RVZNa1N0KzJNWml1SjZrQjlyMnRoOWxySwpMNG16SVRFWWd3S0JnUUNmTVA5clhGWUIwdjhNc1c3RE13RDZZYWhvNklJWTh0dkp4WFAvZmloVnVwRThEN0hTCnI0K2ZQY3NRczVwZmtwQTFDZVRsLzZNMm1XRWVGSXpNVXRxdWhxQjEyV3g3VFVRbzV4dmZlMjJTSWpxWDJHZkUKeHVBTWxFNEFGR1ZCc0xrQnBNL3hSRCttOVZDdTcybC82THRDWWlVaXc5V2hzYmtCZlBUcVBGbkYzUUtCZ0RidwpkSmJTZ3FUNDdnWlZ4UEhjemgxaUsyVWIxQnhWeXJsLy8rdDg5a2RJUHhLM1diT1c0bFp0R2tabFFPMkM3UmpLClNtcjRYWm5MNGdmZ1Y5UEUwZnEvcnNObzI0aUoraWc1UmJ0aHBIQWw0SlNKZSsxcTJHNHl3dkVHQ2hpanN5VUcKV2IrK2VnT2NvaFJoQzBGMzh6YzFQTWRoN0VoQTFpS1l1c2ZoMkF3bEFvR0JBTDBtOW9od1lhK3N0aytQTUI0SgpSaE1WeHNGUzlBRENXQ01jVHFrVktHQnRseVc3S1Q0ZVh4NGRFVUpYQktnaVJURVI1VCtyMzI4OVdEd05HWTIzCmFuN0dHTThCSHJ4WVdKdGtpOEFnNE1scHkvbS9YN1c4bkFjUjZpSGVVWEpPL21pa21ydjR4M0ZKODNJK2RUZlAKLy9QaU4rOFkyR1VHMGNYSzlsbFFaT0dKCi0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K
\ No newline at end of file
diff --git a/test_framework/terraform/aws/centos/main.tf b/test_framework/terraform/aws/centos/main.tf
index 758a625be2..7097a5a1fd 100644
--- a/test_framework/terraform/aws/centos/main.tf
+++ b/test_framework/terraform/aws/centos/main.tf
@@ -250,7 +250,7 @@ resource "aws_route_table" "lh_aws_private_rt" {
}
}
-# Assciate public subnet to public route table
+# Associate public subnet to public route table
resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_public_subnet,
@@ -261,7 +261,7 @@ resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
route_table_id = aws_route_table.lh_aws_public_rt.id
}
-# Assciate private subnet to private route table
+# Associate private subnet to private route table
resource "aws_route_table_association" "lh_aws_private_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_private_subnet,
diff --git a/test_framework/terraform/aws/centos/variables.tf b/test_framework/terraform/aws/centos/variables.tf
index 07d56257be..238748d20c 100644
--- a/test_framework/terraform/aws/centos/variables.tf
+++ b/test_framework/terraform/aws/centos/variables.tf
@@ -10,12 +10,12 @@ variable "lh_aws_secret_key" {
variable "aws_region" {
type = string
- default = "us-east-2"
+ default = "us-east-1"
}
variable "aws_availability_zone" {
type = string
- default = "us-east-2c"
+ default = "us-east-1c"
}
variable "lh_aws_vpc_name" {
@@ -55,12 +55,12 @@ variable "lh_aws_instance_name_controlplane" {
variable "lh_aws_instance_type_controlplane" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
}
variable "lh_aws_instance_type_worker" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
}
variable "lh_aws_instance_root_block_device_size_controlplane" {
@@ -96,12 +96,12 @@ variable "k8s_distro_name" {
variable "k8s_distro_version" {
type = string
- default = "v1.25.3+k3s1"
+ default = "v1.28.4+k3s1"
description = <<-EOT
kubernetes version that will be deployed
rke: (default: v1.22.5-rancher1-1)
- k3s: (default: v1.25.3+k3s1)
- rke2: (default: v1.25.3+rke2r1)
+ k3s: (default: v1.28.4+k3s1)
+ rke2: (default: v1.28.4+rke2r1)
EOT
}
diff --git a/test_framework/terraform/aws/eks/main.tf b/test_framework/terraform/aws/eks/main.tf
index 95eb33647d..403abe5212 100644
--- a/test_framework/terraform/aws/eks/main.tf
+++ b/test_framework/terraform/aws/eks/main.tf
@@ -120,7 +120,7 @@ resource "aws_eks_node_group" "node_group" {
subnet_ids = module.vpc.public_subnets
ami_type = var.arch == "amd64" ? "AL2_x86_64" : "AL2_ARM_64"
capacity_type = "ON_DEMAND"
- instance_types = [var.arch == "amd64" ? "t2.xlarge" : "a1.xlarge"]
+ instance_types = [var.arch == "amd64" ? "t3.xlarge" : "t4g.xlarge"]
disk_size = 40
scaling_config {
desired_size = 3
diff --git a/test_framework/terraform/aws/oracle/main.tf b/test_framework/terraform/aws/oracle/main.tf
index 4b22f7a21f..3fddf19914 100644
--- a/test_framework/terraform/aws/oracle/main.tf
+++ b/test_framework/terraform/aws/oracle/main.tf
@@ -250,7 +250,7 @@ resource "aws_route_table" "lh_aws_private_rt" {
}
}
-# Assciate public subnet to public route table
+# Associate public subnet to public route table
resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_public_subnet,
@@ -261,7 +261,7 @@ resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
route_table_id = aws_route_table.lh_aws_public_rt.id
}
-# Assciate private subnet to private route table
+# Associate private subnet to private route table
resource "aws_route_table_association" "lh_aws_private_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_private_subnet,
diff --git a/test_framework/terraform/aws/oracle/variables.tf b/test_framework/terraform/aws/oracle/variables.tf
index aa588e6f1d..48664199cf 100644
--- a/test_framework/terraform/aws/oracle/variables.tf
+++ b/test_framework/terraform/aws/oracle/variables.tf
@@ -10,12 +10,12 @@ variable "lh_aws_secret_key" {
variable "aws_region" {
type = string
- default = "us-east-2"
+ default = "us-east-1"
}
variable "aws_availability_zone" {
type = string
- default = "us-east-2a"
+ default = "us-east-1a"
}
variable "lh_aws_vpc_name" {
@@ -26,11 +26,12 @@ variable "lh_aws_vpc_name" {
variable "arch" {
type = string
description = "available values (amd64, arm64)"
+ default = "amd64"
}
variable "distro_version" {
type = string
- default = "8.6"
+ default = "9.3"
}
variable "aws_ami_oraclelinux_account_number" {
@@ -55,12 +56,14 @@ variable "lh_aws_instance_name_controlplane" {
variable "lh_aws_instance_type_controlplane" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_type_worker" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_root_block_device_size_controlplane" {
@@ -96,12 +99,12 @@ variable "k8s_distro_name" {
variable "k8s_distro_version" {
type = string
- default = "v1.25.3+k3s1"
+ default = "v1.28.4+k3s1"
description = <<-EOT
kubernetes version that will be deployed
rke: (default: v1.22.5-rancher1-1)
- k3s: (default: v1.25.3+k3s1)
- rke2: (default: v1.25.3+rke2r1)
+ k3s: (default: v1.28.4+k3s1)
+ rke2: (default: v1.28.4+rke2r1)
EOT
}
diff --git a/test_framework/terraform/aws/rhel/main.tf b/test_framework/terraform/aws/rhel/main.tf
index 4b22f7a21f..3fddf19914 100644
--- a/test_framework/terraform/aws/rhel/main.tf
+++ b/test_framework/terraform/aws/rhel/main.tf
@@ -250,7 +250,7 @@ resource "aws_route_table" "lh_aws_private_rt" {
}
}
-# Assciate public subnet to public route table
+# Associate public subnet to public route table
resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_public_subnet,
@@ -261,7 +261,7 @@ resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
route_table_id = aws_route_table.lh_aws_public_rt.id
}
-# Assciate private subnet to private route table
+# Associate private subnet to private route table
resource "aws_route_table_association" "lh_aws_private_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_private_subnet,
diff --git a/test_framework/terraform/aws/rhel/variables.tf b/test_framework/terraform/aws/rhel/variables.tf
index 98f211e362..72ff0a3734 100644
--- a/test_framework/terraform/aws/rhel/variables.tf
+++ b/test_framework/terraform/aws/rhel/variables.tf
@@ -10,12 +10,12 @@ variable "lh_aws_secret_key" {
variable "aws_region" {
type = string
- default = "us-east-2"
+ default = "us-east-1"
}
variable "aws_availability_zone" {
type = string
- default = "us-east-2a"
+ default = "us-east-1a"
}
variable "lh_aws_vpc_name" {
@@ -26,11 +26,12 @@ variable "lh_aws_vpc_name" {
variable "arch" {
type = string
description = "available values (amd64, arm64)"
+ default = "amd64"
}
variable "os_distro_version" {
type = string
- default = "8.6.0"
+ default = "9.3.0"
}
variable "aws_ami_rhel_account_number" {
@@ -55,12 +56,14 @@ variable "lh_aws_instance_name_controlplane" {
variable "lh_aws_instance_type_controlplane" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_type_worker" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_root_block_device_size_controlplane" {
@@ -96,12 +99,12 @@ variable "k8s_distro_name" {
variable "k8s_distro_version" {
type = string
- default = "v1.25.3+k3s1"
+ default = "v1.28.4+k3s1"
description = <<-EOT
kubernetes version that will be deployed
rke: (default: v1.22.5-rancher1-1)
- k3s: (default: v1.25.3+k3s1)
- rke2: (default: v1.25.3+rke2r1)
+ k3s: (default: v1.28.4+k3s1)
+ rke2: (default: v1.28.4+rke2r1)
EOT
}
diff --git a/test_framework/terraform/aws/rockylinux/main.tf b/test_framework/terraform/aws/rockylinux/main.tf
index 02cf5120a5..e8e7be4756 100644
--- a/test_framework/terraform/aws/rockylinux/main.tf
+++ b/test_framework/terraform/aws/rockylinux/main.tf
@@ -251,7 +251,7 @@ resource "aws_route_table" "lh_aws_private_rt" {
}
}
-# Assciate public subnet to public route table
+# Associate public subnet to public route table
resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_public_subnet,
@@ -262,7 +262,7 @@ resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
route_table_id = aws_route_table.lh_aws_public_rt.id
}
-# Assciate private subnet to private route table
+# Associate private subnet to private route table
resource "aws_route_table_association" "lh_aws_private_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_private_subnet,
diff --git a/test_framework/terraform/aws/rockylinux/variables.tf b/test_framework/terraform/aws/rockylinux/variables.tf
index dfce28e0ae..eb81c3b6f5 100644
--- a/test_framework/terraform/aws/rockylinux/variables.tf
+++ b/test_framework/terraform/aws/rockylinux/variables.tf
@@ -10,12 +10,12 @@ variable "lh_aws_secret_key" {
variable "aws_region" {
type = string
- default = "us-east-2"
+ default = "us-east-1"
}
variable "aws_availability_zone" {
type = string
- default = "us-east-2a"
+ default = "us-east-1a"
}
variable "lh_aws_vpc_name" {
@@ -26,11 +26,12 @@ variable "lh_aws_vpc_name" {
variable "arch" {
type = string
description = "available values (amd64, arm64)"
+ default = "amd64"
}
variable "os_distro_version" {
type = string
- default = "9.2"
+ default = "9.3"
}
variable "aws_ami_rockylinux_account_number" {
@@ -55,12 +56,14 @@ variable "lh_aws_instance_name_controlplane" {
variable "lh_aws_instance_type_controlplane" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_type_worker" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_root_block_device_size_controlplane" {
@@ -96,12 +99,12 @@ variable "k8s_distro_name" {
variable "k8s_distro_version" {
type = string
- default = "v1.25.3+k3s1"
+ default = "v1.28.4+k3s1"
description = <<-EOT
kubernetes version that will be deployed
rke: (default: v1.22.5-rancher1-1)
- k3s: (default: v1.25.3+k3s1)
- rke2: (default: v1.25.3+rke2r1)
+ k3s: (default: v1.28.4+k3s1)
+ rke2: (default: v1.28.4+rke2r1)
EOT
}
diff --git a/test_framework/terraform/aws/sle-micro/variables.tf b/test_framework/terraform/aws/sle-micro/variables.tf
index b00745d94b..503bbd0cef 100644
--- a/test_framework/terraform/aws/sle-micro/variables.tf
+++ b/test_framework/terraform/aws/sle-micro/variables.tf
@@ -31,7 +31,7 @@ variable "arch" {
variable "os_distro_version" {
type = string
- default = "5.3"
+ default = "5.5"
}
variable "aws_ami_sles_account_number" {
@@ -56,14 +56,14 @@ variable "lh_aws_instance_name_controlplane" {
variable "lh_aws_instance_type_controlplane" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
- default = "t2.xlarge"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_type_worker" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
- default = "t2.xlarge"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_root_block_device_size_controlplane" {
@@ -99,12 +99,12 @@ variable "k8s_distro_name" {
variable "k8s_distro_version" {
type = string
- default = "v1.25.3+k3s1"
+ default = "v1.28.4+k3s1"
description = <<-EOT
kubernetes version that will be deployed
rke: (default: v1.22.5-rancher1-1)
- k3s: (default: v1.25.3+k3s1)
- rke2: (default: v1.25.3+rke2r1)
+ k3s: (default: v1.28.4+k3s1)
+ rke2: (default: v1.28.4+rke2r1)
EOT
}
diff --git a/test_framework/terraform/aws/sles/main.tf b/test_framework/terraform/aws/sles/main.tf
index 665dd5b946..0e78f0a6c6 100644
--- a/test_framework/terraform/aws/sles/main.tf
+++ b/test_framework/terraform/aws/sles/main.tf
@@ -258,7 +258,7 @@ resource "aws_route_table" "lh_aws_private_rt" {
}
}
-# Assciate public subnet to public route table
+# Associate public subnet to public route table
resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_public_subnet,
@@ -269,7 +269,7 @@ resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
route_table_id = aws_route_table.lh_aws_public_rt.id
}
-# Assciate private subnet to private route table
+# Associate private subnet to private route table
resource "aws_route_table_association" "lh_aws_private_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_private_subnet,
diff --git a/test_framework/terraform/aws/sles/variables.tf b/test_framework/terraform/aws/sles/variables.tf
index 1a435ac39b..640304259a 100644
--- a/test_framework/terraform/aws/sles/variables.tf
+++ b/test_framework/terraform/aws/sles/variables.tf
@@ -56,14 +56,14 @@ variable "lh_aws_instance_name_controlplane" {
variable "lh_aws_instance_type_controlplane" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
- default = "t2.xlarge"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_type_worker" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
- default = "t2.xlarge"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_root_block_device_size_controlplane" {
diff --git a/test_framework/terraform/aws/ubuntu/main.tf b/test_framework/terraform/aws/ubuntu/main.tf
index 5ce977a111..956411de0f 100644
--- a/test_framework/terraform/aws/ubuntu/main.tf
+++ b/test_framework/terraform/aws/ubuntu/main.tf
@@ -252,7 +252,7 @@ resource "aws_route_table" "lh_aws_private_rt" {
}
}
-# Assciate public subnet to public route table
+# Associate public subnet to public route table
resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_public_subnet,
@@ -263,7 +263,7 @@ resource "aws_route_table_association" "lh_aws_public_subnet_rt_association" {
route_table_id = aws_route_table.lh_aws_public_rt.id
}
-# Assciate private subnet to private route table
+# Associate private subnet to private route table
resource "aws_route_table_association" "lh_aws_private_subnet_rt_association" {
depends_on = [
aws_subnet.lh_aws_private_subnet,
diff --git a/test_framework/terraform/aws/ubuntu/variables.tf b/test_framework/terraform/aws/ubuntu/variables.tf
index 468a06b00e..6ee716104f 100644
--- a/test_framework/terraform/aws/ubuntu/variables.tf
+++ b/test_framework/terraform/aws/ubuntu/variables.tf
@@ -10,12 +10,12 @@ variable "lh_aws_secret_key" {
variable "aws_region" {
type = string
- default = "us-east-2"
+ default = "us-east-1"
}
variable "aws_availability_zone" {
type = string
- default = "us-east-2a"
+ default = "us-east-1a"
}
variable "lh_aws_vpc_name" {
@@ -26,11 +26,12 @@ variable "lh_aws_vpc_name" {
variable "arch" {
type = string
description = "available values (amd64, arm64)"
+ default = "amd64"
}
variable "os_distro_version" {
type = string
- default = "20.04"
+ default = "22.04"
}
variable "aws_ami_ubuntu_account_number" {
@@ -55,13 +56,14 @@ variable "lh_aws_instance_name_controlplane" {
variable "lh_aws_instance_type_controlplane" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_type_worker" {
type = string
- description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64"
-
+ description = "Recommended instance types t3.xlarge for amd64 & t4g.xlarge for arm64"
+ default = "t3.xlarge"
}
variable "lh_aws_instance_root_block_device_size_controlplane" {
@@ -97,12 +99,12 @@ variable "k8s_distro_name" {
variable "k8s_distro_version" {
type = string
- default = "v1.25.3+k3s1"
+ default = "v1.28.4+k3s1"
description = <<-EOT
kubernetes version that will be deployed
rke: (default: v1.22.5-rancher1-1)
- k3s: (default: v1.25.3+k3s1)
- rke2: (default: v1.25.3+rke2r1)
+ k3s: (default: v1.28.4+k3s1)
+ rke2: (default: v1.28.4+rke2r1)
EOT
}
diff --git a/test_framework/terraform/azure/aks/main.tf b/test_framework/terraform/azure/aks/main.tf
index 35376b8faa..748f0f6b06 100644
--- a/test_framework/terraform/azure/aks/main.tf
+++ b/test_framework/terraform/azure/aks/main.tf
@@ -2,7 +2,7 @@ terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
- version = "=3.0.0"
+ version = "3.94.0"
}
}
}
diff --git a/test_tools/gen_data/README.md b/test_tools/gen_data/README.md
index 3e73e6720a..f46e6661c3 100644
--- a/test_tools/gen_data/README.md
+++ b/test_tools/gen_data/README.md
@@ -7,7 +7,7 @@ Modify config.yaml
storage: 1Gi # Each volume size
storageClass: longhorn-test # Need to prepare your own storage class first
dataSizeInMb: 500
-namespace: default # Nees to prepare first before run script
+namespace: default # Needs to prepare first before run script
statefulSet: # Single RWO/RWX statefulset and its replica counts
rwo:
replicas: 1
diff --git a/test_tools/gen_data/run.sh b/test_tools/gen_data/run.sh
index d9b786786a..aa5da23629 100755
--- a/test_tools/gen_data/run.sh
+++ b/test_tools/gen_data/run.sh
@@ -120,7 +120,7 @@ check_config_input() {
DEPLOYMENT_RWX_REPLICAS=$(yq eval '.deployment.rwx.deploymentReplicas' config.yaml)
msg="$CONFIG_FILE is not correct, please check"
- # varialbe = "null" when yq not find yaml field
+ # variable = "null" when yq not find yaml field
[ "$STORAGE_SIZE" = "null" -o ${#STORAGE_SIZE} -eq 0 ] && error "$msg" && exit 2
[ "$NAMESPACE" = "null" -o ${#NAMESPACE} -eq 0 ] && error "$msg" && exit 2
[ "$STORAGE_CLASS_NAME" = "null" -o ${#STORAGE_CLASS_NAME} -eq 0 ] && error "$msg" && exit 2