From cc44f8843acba89c408543143b15b117736a1899 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Tue, 9 Jul 2024 08:51:36 +0100 Subject: [PATCH 001/106] lxd/auth/generate: Add dry run functionality. Signed-off-by: Mark Laing --- lxd/auth/generate/main.go | 63 ++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/lxd/auth/generate/main.go b/lxd/auth/generate/main.go index 11a26b46e741..370cf87e37dd 100644 --- a/lxd/auth/generate/main.go +++ b/lxd/auth/generate/main.go @@ -7,6 +7,8 @@ package main import ( "bufio" + "encoding/json" + "flag" "fmt" "io" "os" @@ -24,8 +26,8 @@ var relationRegexp = regexp.MustCompile(`^\s+define\s+(\w+):\s+.+$`) var commentRegexp = regexp.MustCompile(`^\s*#\s*(.*)$`) type entitlement struct { - relation string - description string + Relation string `json:"name"` + Description string `json:"description"` } // snakeToPascal converts a snake case (hello_world) string to a Pascal case string (HelloWorld). @@ -63,7 +65,46 @@ func snakeToPascal(str string) string { return s } +var flagDryRun bool + +func init() { + flag.BoolVar(&flagDryRun, "dry-run", false, "Output json file to stdout") + flag.Parse() +} + func main() { + if flagDryRun { + err := func() error { + f, err := os.Open("drivers/openfga_model.openfga") + if err != nil { + return fmt.Errorf("Failed to open OpenFGA model file: %w", err) + } + + defer f.Close() + + entityToEntitlements, _, err := scanOpenFGAModel(f) + if err != nil { + return err + } + + err = f.Close() + if err != nil { + return fmt.Errorf("Failed to close OpenFGA model file: %w", err) + } + + err = json.NewEncoder(os.Stdout).Encode(entityToEntitlements) + if err != nil { + return fmt.Errorf("Failed to write entitlement json to stdout: %w", err) + } + + return nil + }() + if err != nil { + fmt.Printf("Failed to generate entitlements from OpenFGA model (dry run): %v\n", err) + os.Exit(1) + } + } + err := func() error { f, err := os.Open("drivers/openfga_model.openfga") if err != nil { @@ -131,7 +172,7 @@ func writeOutput(w io.Writer, entityToEntitlements map[entity.Type][]entitlement var entityTypes []string for entityType, entitlements := range entityToEntitlements { for _, e := range entitlements { - if entitlement.relation == e.relation { + if entitlement.Relation == e.Relation { entityTypes = append(entityTypes, string(entityType)) break } @@ -144,12 +185,12 @@ func writeOutput(w io.Writer, entityToEntitlements map[entity.Type][]entitlement sort.Strings(entityTypes) - builder.WriteString(fmt.Sprintf("\t// Entitlement%s is the \"%s\" entitlement. It applies to the following entities: %s.\n", snakeToPascal(entitlement.relation), entitlement.relation, strings.Join(entityTypes, ", "))) + builder.WriteString(fmt.Sprintf("\t// Entitlement%s is the \"%s\" entitlement. It applies to the following entities: %s.\n", snakeToPascal(entitlement.Relation), entitlement.Relation, strings.Join(entityTypes, ", "))) if i == len(allEntitlements)-1 { - builder.WriteString(fmt.Sprintf("\tEntitlement%s Entitlement = \"%s\"\n", snakeToPascal(entitlement.relation), entitlement.relation)) + builder.WriteString(fmt.Sprintf("\tEntitlement%s Entitlement = \"%s\"\n", snakeToPascal(entitlement.Relation), entitlement.Relation)) } else { - builder.WriteString(fmt.Sprintf("\tEntitlement%s Entitlement = \"%s\"\n\n", snakeToPascal(entitlement.relation), entitlement.relation)) + builder.WriteString(fmt.Sprintf("\tEntitlement%s Entitlement = \"%s\"\n\n", snakeToPascal(entitlement.Relation), entitlement.Relation)) } } @@ -170,8 +211,8 @@ func writeOutput(w io.Writer, entityToEntitlements map[entity.Type][]entitlement builder.WriteString(fmt.Sprintf("\tentity.Type%s: {\n", snakeToPascal(entityType))) for _, entitlement := range entitlements { // Here we can add the comment from the OpenFGA model. - builder.WriteString(fmt.Sprintf("\t\t// %s\n", entitlement.description)) - builder.WriteString(fmt.Sprintf("\t\tEntitlement%s,\n", snakeToPascal(entitlement.relation))) + builder.WriteString(fmt.Sprintf("\t\t// %s\n", entitlement.Description)) + builder.WriteString(fmt.Sprintf("\t\tEntitlement%s,\n", snakeToPascal(entitlement.Relation))) } builder.WriteString("\t},\n") @@ -238,15 +279,15 @@ scan: } entitlement := entitlement{ - relation: submatch[1], - description: strings.Join(curComment, " "), + Relation: submatch[1], + Description: strings.Join(curComment, " "), } entityToEntitlements[curType] = append(entityToEntitlements[curType], entitlement) var found bool for _, e := range allEntitlements { - if submatch[1] == e.relation { + if submatch[1] == e.Relation { found = true break } From 3076591cdc8c18930dd0b889311e0776365cd7ec Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Tue, 9 Jul 2024 08:52:04 +0100 Subject: [PATCH 002/106] lxd-metadata: Add entitlements to output json. Signed-off-by: Mark Laing --- lxd/lxd-metadata/lxd_metadata.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lxd/lxd-metadata/lxd_metadata.go b/lxd/lxd-metadata/lxd_metadata.go index 1ec827cd1c72..facb94421149 100644 --- a/lxd/lxd-metadata/lxd_metadata.go +++ b/lxd/lxd-metadata/lxd_metadata.go @@ -9,6 +9,7 @@ import ( "go/token" "log" "os" + "os/exec" "path/filepath" "regexp" "sort" @@ -38,7 +39,8 @@ type IterableAny interface { // doc is the structure of the JSON file that contains the generated configuration metadata. type doc struct { - Configs map[string]map[string]map[string][]any `json:"configs"` + Configs map[string]map[string]map[string][]any `json:"configs"` + Entities json.RawMessage `json:"entities"` } // detectType detects the type of a string and returns the corresponding value. @@ -324,6 +326,15 @@ func parse(path string, outputJSONPath string, excludedPaths []string, substitut // sort the config keys alphabetically sortConfigKeys(allEntries) jsonDoc.Configs = allEntries + + cmd := exec.Command("go", "run", "./generate/main.go", "--dry-run") + cmd.Dir = "./lxd/auth" + entities, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("Error while getting entitlement data: %w", err) + } + + jsonDoc.Entities = entities data, err := json.MarshalIndent(jsonDoc, "", "\t") if err != nil { return nil, fmt.Errorf("Error while marshaling project documentation: %v", err) From 115d2e603c5227b8401c16a489eee08dccd1f912 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 11 Jul 2024 13:08:22 +0200 Subject: [PATCH 003/106] lxd/apparmor/instance_qemu: support external qemu snap Signed-off-by: Alexander Mikhalitsyn --- lxd/apparmor/instance.go | 23 ++++++++++++----------- lxd/apparmor/instance_qemu.go | 7 +++++++ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/lxd/apparmor/instance.go b/lxd/apparmor/instance.go index 2595ecc428bc..ee056ce092e3 100644 --- a/lxd/apparmor/instance.go +++ b/lxd/apparmor/instance.go @@ -206,17 +206,18 @@ func instanceProfile(sysOS *sys.OS, inst instance) (string, error) { } err = qemuProfileTpl.Execute(sb, map[string]any{ - "devicesPath": inst.DevicesPath(), - "exePath": execPath, - "libraryPath": strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":"), - "logPath": inst.LogPath(), - "name": InstanceProfileName(inst), - "path": path, - "raw": rawContent, - "rootPath": rootPath, - "snap": shared.InSnap(), - "userns": sysOS.RunningInUserNS, - "qemuFwPaths": qemuFwPathsArr, + "devicesPath": inst.DevicesPath(), + "exePath": execPath, + "libraryPath": strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":"), + "logPath": inst.LogPath(), + "name": InstanceProfileName(inst), + "path": path, + "raw": rawContent, + "rootPath": rootPath, + "snap": shared.InSnap(), + "userns": sysOS.RunningInUserNS, + "qemuFwPaths": qemuFwPathsArr, + "snapExtQemuPrefix": os.Getenv("SNAP_QEMU_PREFIX"), }) if err != nil { return "", err diff --git a/lxd/apparmor/instance_qemu.go b/lxd/apparmor/instance_qemu.go index 5f00c1e2e190..ae7d2916c3db 100644 --- a/lxd/apparmor/instance_qemu.go +++ b/lxd/apparmor/instance_qemu.go @@ -83,6 +83,13 @@ profile "{{ .name }}" flags=(attach_disconnected,mediate_deleted) { # Snap-specific libraries /snap/lxd/*/lib/**.so* mr, + +{{- if .snapExtQemuPrefix }} + /snap/lxd/*/{{ .snapExtQemuPrefix }}/lib/**.so* mr, + /snap/lxd/*/{{ .snapExtQemuPrefix }}/bin/qemu-system-* mrix, + /snap/lxd/*/{{ .snapExtQemuPrefix }}/share/** r, +{{- end }} + {{- end }} {{if .libraryPath -}} From 4a7f1152748f76bd5d4d582a218bbe2a31d0f989 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 11 Jul 2024 13:09:06 +0200 Subject: [PATCH 004/106] lxd/instance/drivers/driver_qemu: support external qemu snap Signed-off-by: Alexander Mikhalitsyn --- lxd/instance/drivers/driver_qemu.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index ca56d180fc96..83b61ecf5b5b 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -2033,29 +2033,34 @@ func (d *qemu) setupNvram() error { } func (d *qemu) qemuArchConfig(arch int) (path string, bus string, err error) { + basePath := "" + if shared.InSnap() && os.Getenv("SNAP_QEMU_PREFIX") != "" { + basePath = filepath.Join(os.Getenv("SNAP"), os.Getenv("SNAP_QEMU_PREFIX")) + "/bin/" + } + if arch == osarch.ARCH_64BIT_INTEL_X86 { - path, err := exec.LookPath("qemu-system-x86_64") + path, err := exec.LookPath(basePath + "qemu-system-x86_64") if err != nil { return "", "", err } return path, "pcie", nil } else if arch == osarch.ARCH_64BIT_ARMV8_LITTLE_ENDIAN { - path, err := exec.LookPath("qemu-system-aarch64") + path, err := exec.LookPath(basePath + "qemu-system-aarch64") if err != nil { return "", "", err } return path, "pcie", nil } else if arch == osarch.ARCH_64BIT_POWERPC_LITTLE_ENDIAN { - path, err := exec.LookPath("qemu-system-ppc64") + path, err := exec.LookPath(basePath + "qemu-system-ppc64") if err != nil { return "", "", err } return path, "pci", nil } else if arch == osarch.ARCH_64BIT_S390_BIG_ENDIAN { - path, err := exec.LookPath("qemu-system-s390x") + path, err := exec.LookPath(basePath + "qemu-system-s390x") if err != nil { return "", "", err } @@ -8530,6 +8535,10 @@ func (d *qemu) Info() instance.Info { data.Version = "unknown" // Not necessarily an error that should prevent us using driver. } + if shared.InSnap() && os.Getenv("SNAP_QEMU_PREFIX") != "" { + data.Version = data.Version + " (external)" + } + data.Features, err = d.checkFeatures(hostArch, qemuPath) if err != nil { logger.Errorf("Unable to run feature checks during QEMU initialization: %v", err) From b53dcb05165eedd39fdc1e36946411ac0acef14d Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Wed, 10 Jul 2024 18:08:54 +0100 Subject: [PATCH 005/106] lxd-metadata: Add entities/entitlements to generated doc file. Signed-off-by: Mark Laing --- lxd/lxd-metadata/lxd_metadata.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lxd/lxd-metadata/lxd_metadata.go b/lxd/lxd-metadata/lxd_metadata.go index facb94421149..a8fcaf384d94 100644 --- a/lxd/lxd-metadata/lxd_metadata.go +++ b/lxd/lxd-metadata/lxd_metadata.go @@ -494,6 +494,31 @@ func writeDocFile(inputJSONPath, outputTxtPath string) error { } } + entities := make(map[string][]map[string]string) + err = json.Unmarshal(jsonDoc.Entities, &entities) + if err != nil { + return err + } + + sortedEntityNames := make([]string, 0, len(entities)) + for entityName := range entities { + sortedEntityNames = append(sortedEntityNames, entityName) + } + + sort.Strings(sortedEntityNames) + + for _, entityName := range sortedEntityNames { + entitlements := entities[entityName] + buffer.WriteString(fmt.Sprintf("\n", entityName)) + for _, entitlement := range entitlements { + buffer.WriteString(fmt.Sprintf("`%s`\n", entitlement["name"])) + buffer.WriteString(fmt.Sprintf(": %s\n\n", entitlement["description"])) + } + + buffer.WriteString("\n") + buffer.WriteString(fmt.Sprintf("\n", entityName)) + } + err = os.WriteFile(outputTxtPath, buffer.Bytes(), 0644) if err != nil { return fmt.Errorf("Error while writing the Markdown project documentation: %v", err) From dea337495e29588a66e317afd203f6a9ff99d8a5 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Wed, 10 Jul 2024 18:09:21 +0100 Subject: [PATCH 006/106] doc: Rename config_options.txt -> metadata.txt. Signed-off-by: Mark Laing --- doc/contributing.md | 8 ++--- doc/howto/network_acls.md | 8 ++--- doc/howto/network_forwards.md | 8 ++--- doc/howto/network_load_balancers.md | 12 +++---- doc/howto/network_ovn_peers.md | 4 +-- doc/howto/network_zones.md | 8 ++--- doc/{config_options.txt => metadata.txt} | 0 doc/reference/cluster_member_config.md | 4 +-- doc/reference/devices_disk.md | 4 +-- doc/reference/devices_gpu.md | 16 +++++----- doc/reference/devices_infiniband.md | 4 +-- doc/reference/devices_nic.md | 32 +++++++++---------- doc/reference/devices_pci.md | 4 +-- doc/reference/devices_proxy.md | 4 +-- doc/reference/devices_tpm.md | 4 +-- doc/reference/devices_unix_block.md | 4 +-- doc/reference/devices_unix_char.md | 4 +-- doc/reference/devices_unix_hotplug.md | 4 +-- doc/reference/devices_usb.md | 4 +-- doc/reference/instance_options.md | 40 ++++++++++++------------ doc/reference/instance_properties.md | 4 +-- doc/reference/network_bridge.md | 4 +-- doc/reference/network_macvlan.md | 4 +-- doc/reference/network_ovn.md | 4 +-- doc/reference/network_physical.md | 4 +-- doc/reference/network_sriov.md | 4 +-- doc/reference/projects.md | 16 +++++----- doc/reference/storage_btrfs.md | 12 +++---- doc/reference/storage_ceph.md | 8 ++--- doc/reference/storage_cephfs.md | 8 ++--- doc/reference/storage_cephobject.md | 8 ++--- doc/reference/storage_dir.md | 8 ++--- doc/reference/storage_lvm.md | 12 +++---- doc/reference/storage_powerflex.md | 8 ++--- doc/reference/storage_zfs.md | 12 +++---- doc/server.md | 28 ++++++++--------- 36 files changed, 160 insertions(+), 160 deletions(-) rename doc/{config_options.txt => metadata.txt} (100%) diff --git a/doc/contributing.md b/doc/contributing.md index b3d88b3525ef..b560c4f71aab 100644 --- a/doc/contributing.md +++ b/doc/contributing.md @@ -147,15 +147,15 @@ Look for comments that start with `lxdmeta:generate` in the code. When you add or change a configuration option, make sure to include the required documentation comment for it. See the [`lxd-metadata` README file](https://github.com/canonical/lxd/blob/main/lxd/lxd-metadata/README.md) for information about the format. -Then run `make generate-config` to re-generate the `doc/config_options.txt` file. +Then run `make generate-config` to re-generate the `doc/metadata.txt` file. The updated file should be checked in. -The documentation includes sections from the `doc/config_options.txt` to display a group of configuration options. +The documentation includes sections from the `doc/metadata.txt` to display a group of configuration options. For example, to include the core server options: ```` -% Include content from [config_options.txt](config_options.txt) -```{include} config_options.txt +% Include content from [metadata.txt](metadata.txt) +```{include} metadata.txt :start-after: :end-before: ``` diff --git a/doc/howto/network_acls.md b/doc/howto/network_acls.md index 4522f6696579..26153a6bab79 100644 --- a/doc/howto/network_acls.md +++ b/doc/howto/network_acls.md @@ -42,8 +42,8 @@ Valid network ACL names must adhere to the following rules: ACLs have the following properties: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -92,8 +92,8 @@ If one of the rules in the ACLs matches, the action for that rule is taken and n ACL rules have the following properties: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/howto/network_forwards.md b/doc/howto/network_forwards.md index 3c54347eee1c..ebd8b918c099 100644 --- a/doc/howto/network_forwards.md +++ b/doc/howto/network_forwards.md @@ -50,8 +50,8 @@ Note that this target address must be within the same subnet as the network that Network forwards have the following properties: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -94,8 +94,8 @@ If you want to forward the traffic to different ports, you have two options: Network forward ports have the following properties: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/howto/network_load_balancers.md b/doc/howto/network_load_balancers.md index 93ab5051afd3..f60f60defc90 100644 --- a/doc/howto/network_load_balancers.md +++ b/doc/howto/network_load_balancers.md @@ -35,8 +35,8 @@ If a listen address is not given, the `--allocate` flag must be provided. Network load balancers have the following properties: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -74,8 +74,8 @@ If you want to forward the traffic to different ports, you have two options: Network load balancer backends have the following properties: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -98,8 +98,8 @@ The backend(s) specified must have target port(s) settings compatible with the p Network load balancer ports have the following properties: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/howto/network_ovn_peers.md b/doc/howto/network_ovn_peers.md index 579e2cbe35d3..32d2948cd1af 100644 --- a/doc/howto/network_ovn_peers.md +++ b/doc/howto/network_ovn_peers.md @@ -46,8 +46,8 @@ This behavior prevents users in a different project from discovering whether a p Peer routing relationships have the following properties: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/howto/network_zones.md b/doc/howto/network_zones.md index b0449848d613..e3c81cf2745e 100644 --- a/doc/howto/network_zones.md +++ b/doc/howto/network_zones.md @@ -155,8 +155,8 @@ lxc network zone edit The following configuration options are available for network zones: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -205,8 +205,8 @@ This command creates an empty record without entries and adds it to a network zo Records have the following properties: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/config_options.txt b/doc/metadata.txt similarity index 100% rename from doc/config_options.txt rename to doc/metadata.txt diff --git a/doc/reference/cluster_member_config.md b/doc/reference/cluster_member_config.md index c5171c79ef9a..cb6bd65c3e54 100644 --- a/doc/reference/cluster_member_config.md +++ b/doc/reference/cluster_member_config.md @@ -8,8 +8,8 @@ Each cluster member has its own key/value configuration with the following suppo The following keys are currently supported: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_disk.md b/doc/reference/devices_disk.md index 732bb968556d..b26085db0140 100644 --- a/doc/reference/devices_disk.md +++ b/doc/reference/devices_disk.md @@ -91,8 +91,8 @@ Note that you cannot use initial volume configurations with custom volume option `disk` devices have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_gpu.md b/doc/reference/devices_gpu.md index 26058b60e547..c044dcc56236 100644 --- a/doc/reference/devices_gpu.md +++ b/doc/reference/devices_gpu.md @@ -35,8 +35,8 @@ A `physical` GPU device passes an entire GPU through into the instance. GPU devices of type `physical` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -68,8 +68,8 @@ You can check the list of available `mdev` profiles by running [`lxc info --reso GPU devices of type `mdev` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -97,8 +97,8 @@ Currently, this requires NVIDIA MIG instances to be pre-created. GPU devices of type `mig` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -127,8 +127,8 @@ An `sriov` GPU device passes a virtual function of an SR-IOV-enabled GPU into th GPU devices of type `sriov` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_infiniband.md b/doc/reference/devices_infiniband.md index e21490fcd26f..e647ca137c2e 100644 --- a/doc/reference/devices_infiniband.md +++ b/doc/reference/devices_infiniband.md @@ -25,8 +25,8 @@ LXD supports two different kinds of network types for InfiniBand devices: `infiniband` devices have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_nic.md b/doc/reference/devices_nic.md index 6321b8d5bf7a..b19148ef67a5 100644 --- a/doc/reference/devices_nic.md +++ b/doc/reference/devices_nic.md @@ -74,8 +74,8 @@ A `bridged` NIC uses an existing bridge on the host and creates a virtual device NIC devices of type `bridged` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -111,8 +111,8 @@ Both the host and the instances can talk to the gateway, but they cannot communi NIC devices of type `macvlan` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -162,8 +162,8 @@ VF allocation NIC devices of type `sriov` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -196,8 +196,8 @@ The targeted device will vanish from the host and appear in the instance (which NIC devices of type `physical` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -263,8 +263,8 @@ VDPA hardware acceleration NIC devices of type `ovn` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -317,8 +317,8 @@ DNS NIC devices of type `ipvlan` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -347,8 +347,8 @@ A `p2p` NIC creates a virtual device pair, putting one side in the instance and NIC devices of type `p2p` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -428,8 +428,8 @@ Parent interface NIC devices of type `routed` have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_pci.md b/doc/reference/devices_pci.md index 270b629e6dd2..6bef15e3e105 100644 --- a/doc/reference/devices_pci.md +++ b/doc/reference/devices_pci.md @@ -19,8 +19,8 @@ In theory, you can also use them for more advanced PCI devices like GPUs or netw `pci` devices have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_proxy.md b/doc/reference/devices_proxy.md index 3d4e9150ad24..1be14ec0bb69 100644 --- a/doc/reference/devices_proxy.md +++ b/doc/reference/devices_proxy.md @@ -80,8 +80,8 @@ However, when using NAT mode, you must specify an IP address on the LXD host. `proxy` devices have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_tpm.md b/doc/reference/devices_tpm.md index 5c7551177b39..32693cc949ae 100644 --- a/doc/reference/devices_tpm.md +++ b/doc/reference/devices_tpm.md @@ -22,8 +22,8 @@ For virtual machines, TPM can be used both for sealing certificates and for vali `tpm` devices have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_unix_block.md b/doc/reference/devices_unix_block.md index 5ae80aa36c58..d48401b7e1fc 100644 --- a/doc/reference/devices_unix_block.md +++ b/doc/reference/devices_unix_block.md @@ -17,8 +17,8 @@ You can read from the device and write to it. `unix-block` devices have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_unix_char.md b/doc/reference/devices_unix_char.md index 5a0c82e9b9b9..dfe51f7a9623 100644 --- a/doc/reference/devices_unix_char.md +++ b/doc/reference/devices_unix_char.md @@ -17,8 +17,8 @@ You can read from the device and write to it. `unix-char` devices have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_unix_hotplug.md b/doc/reference/devices_unix_hotplug.md index 77d4728bc648..0e56614be038 100644 --- a/doc/reference/devices_unix_hotplug.md +++ b/doc/reference/devices_unix_hotplug.md @@ -19,8 +19,8 @@ The implementation depends on `systemd-udev` to be run on the host. `unix-hotplug` devices have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/devices_usb.md b/doc/reference/devices_usb.md index 9b4da1fe08d6..6106fff2af73 100644 --- a/doc/reference/devices_usb.md +++ b/doc/reference/devices_usb.md @@ -24,8 +24,8 @@ When a device is passed to the instance, it vanishes from the host. `usb` devices have the following device options: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/instance_options.md b/doc/reference/instance_options.md index 5437867d4ef6..f9610d041ab5 100644 --- a/doc/reference/instance_options.md +++ b/doc/reference/instance_options.md @@ -26,8 +26,8 @@ Note that while a type is defined for each option, all values are stored as stri In addition to the configuration options listed in the following sections, these instance options are supported: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -46,8 +46,8 @@ These are then set for [`lxc exec`](lxc_exec.md). The following instance options control the boot-related behavior of the instance: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -57,8 +57,8 @@ The following instance options control the boot-related behavior of the instance The following instance options control the [`cloud-init`](cloud-init) configuration of the instance: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -73,8 +73,8 @@ Therefore, make sure that the `cloud-init` configuration you specify in those op The following instance options specify resource limits for the instance: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -217,8 +217,8 @@ Note that this inheritance is not enforced by LXD but by the kernel. The following instance options control the behavior if the instance is {ref}`moved from one LXD server to another `: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -228,8 +228,8 @@ The following instance options control the behavior if the instance is {ref}`mov The following instance options specify the NVIDIA and CUDA configuration of the instance: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -239,8 +239,8 @@ The following instance options specify the NVIDIA and CUDA configuration of the The following instance options allow direct interaction with the backend features that LXD itself uses: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -342,8 +342,8 @@ value = "0" The following instance options control the {ref}`security` policies of the instance: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -353,8 +353,8 @@ The following instance options control the {ref}`security` policies of the insta The following instance options control the creation and expiry of {ref}`instance snapshots `: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -369,8 +369,8 @@ The following instance options control the creation and expiry of {ref}`instance The following volatile keys are currently used internally by LXD to store internal data specific to an instance: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/instance_properties.md b/doc/reference/instance_properties.md index 2152fb1926ad..115c3d2033b6 100644 --- a/doc/reference/instance_properties.md +++ b/doc/reference/instance_properties.md @@ -6,8 +6,8 @@ They cannot be part of a {ref}`profile `. The following instance properties are available: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/network_bridge.md b/doc/reference/network_bridge.md index a3191ae106d0..7bffd854d2d4 100644 --- a/doc/reference/network_bridge.md +++ b/doc/reference/network_bridge.md @@ -58,8 +58,8 @@ The following configuration key namespaces are currently supported for the `brid The following configuration options are available for the `bridge` network type: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/network_macvlan.md b/doc/reference/network_macvlan.md index 98afb14be072..dedf51e2fbd2 100644 --- a/doc/reference/network_macvlan.md +++ b/doc/reference/network_macvlan.md @@ -28,8 +28,8 @@ The following configuration key namespaces are currently supported for the `macv The following configuration options are available for the `macvlan` network type: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/network_ovn.md b/doc/reference/network_ovn.md index 81746cd1c538..2884b7df1c1c 100644 --- a/doc/reference/network_ovn.md +++ b/doc/reference/network_ovn.md @@ -78,8 +78,8 @@ The following configuration key namespaces are currently supported for the `ovn` The following configuration options are available for the `ovn` network type: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/network_physical.md b/doc/reference/network_physical.md index 56e278c3a65f..588e17929531 100644 --- a/doc/reference/network_physical.md +++ b/doc/reference/network_physical.md @@ -27,8 +27,8 @@ The following configuration key namespaces are currently supported for the `phys The following configuration options are available for the `physical` network type: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/network_sriov.md b/doc/reference/network_sriov.md index 727953f41e97..f3eed5e1aaa2 100644 --- a/doc/reference/network_sriov.md +++ b/doc/reference/network_sriov.md @@ -22,8 +22,8 @@ The following configuration key namespaces are currently supported for the `srio The following configuration options are available for the `sriov` network type: -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/projects.md b/doc/reference/projects.md index 9f3f6832e841..2a7eab2464c7 100644 --- a/doc/reference/projects.md +++ b/doc/reference/projects.md @@ -26,8 +26,8 @@ However, if you unset one of the `feature.*` options, it does not go back to the The default value for all `feature.*` options is `false`. ``` -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -52,8 +52,8 @@ When using project limits, the following conditions must be fulfilled: This means that to use {config:option}`project-limits:limits.cpu` on a project, the {config:option}`instance-resource-limits:limits.cpu` configuration of each instance in the project must be set to a number of CPUs, not a set or a range of CPUs. - The {config:option}`project-limits:limits.memory` configuration must be set to an absolute value, not a percentage. -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -81,8 +81,8 @@ If `restricted` is set to `false`, changing a `restricted.*` option has no effec Setting all `restricted.*` keys to `allow` is equivalent to setting `restricted` itself to `false`. ``` -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -93,8 +93,8 @@ Setting all `restricted.*` keys to `allow` is equivalent to setting `restricted` There are some {ref}`server` options that you can override for a project. In addition, you can add user metadata for a project. -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/storage_btrfs.md b/doc/reference/storage_btrfs.md index 90a1b48e9d23..8c39a37a4ce8 100644 --- a/doc/reference/storage_btrfs.md +++ b/doc/reference/storage_btrfs.md @@ -62,8 +62,8 @@ The following configuration options are available for storage pools that use the (storage-btrfs-pool-config)= ### Storage pool configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -72,8 +72,8 @@ The following configuration options are available for storage pools that use the ### Storage volume configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -82,8 +82,8 @@ The following configuration options are available for storage pools that use the To enable storage buckets for local storage pool drivers and allow applications to access the buckets via the S3 protocol, you must configure the {config:option}`server-core:core.storage_buckets_address` server setting. -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/storage_ceph.md b/doc/reference/storage_ceph.md index 1d40930ec719..e35ba71683e0 100644 --- a/doc/reference/storage_ceph.md +++ b/doc/reference/storage_ceph.md @@ -89,8 +89,8 @@ The following configuration options are available for storage pools that use the (storage-ceph-pool-config)= ### Storage pool configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -100,8 +100,8 @@ The following configuration options are available for storage pools that use the (storage-ceph-vol-config)= ### Storage volume configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/storage_cephfs.md b/doc/reference/storage_cephfs.md index a3e912847ce4..f1b11c5ee423 100644 --- a/doc/reference/storage_cephfs.md +++ b/doc/reference/storage_cephfs.md @@ -65,8 +65,8 @@ The following configuration options are available for storage pools that use the (storage-cephfs-pool-config)= ### Storage pool configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -75,8 +75,8 @@ The following configuration options are available for storage pools that use the ### Storage volume configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/storage_cephobject.md b/doc/reference/storage_cephobject.md index 8ba936cd90fd..619827205514 100644 --- a/doc/reference/storage_cephobject.md +++ b/doc/reference/storage_cephobject.md @@ -65,16 +65,16 @@ The following configuration options are available for storage pools that use the (storage-cephobject-pool-config)= ### Storage pool configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` ### Storage bucket configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/storage_dir.md b/doc/reference/storage_dir.md index 414d34241a14..665a009640cc 100644 --- a/doc/reference/storage_dir.md +++ b/doc/reference/storage_dir.md @@ -28,8 +28,8 @@ The following configuration options are available for storage pools that use the ### Storage pool configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -38,8 +38,8 @@ The following configuration options are available for storage pools that use the ### Storage volume configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/storage_lvm.md b/doc/reference/storage_lvm.md index f2b570728e70..27c5a899102c 100644 --- a/doc/reference/storage_lvm.md +++ b/doc/reference/storage_lvm.md @@ -44,8 +44,8 @@ The following configuration options are available for storage pools that use the (storage-lvm-pool-config)= ### Storage pool configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -55,8 +55,8 @@ The following configuration options are available for storage pools that use the (storage-lvm-vol-config)= ### Storage volume configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -65,8 +65,8 @@ The following configuration options are available for storage pools that use the To enable storage buckets for local storage pool drivers and allow applications to access the buckets via the S3 protocol, you must configure the {config:option}`server-core:core.storage_buckets_address` server setting. -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/storage_powerflex.md b/doc/reference/storage_powerflex.md index ed63d1c1dd27..cde181cff064 100644 --- a/doc/reference/storage_powerflex.md +++ b/doc/reference/storage_powerflex.md @@ -109,8 +109,8 @@ The following configuration options are available for storage pools that use the (storage-powerflex-pool-config)= ### Storage pool configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -120,8 +120,8 @@ The following configuration options are available for storage pools that use the (storage-powerflex-vol-config)= ### Storage volume configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/reference/storage_zfs.md b/doc/reference/storage_zfs.md index d8cf487c8805..09c1274fe84d 100644 --- a/doc/reference/storage_zfs.md +++ b/doc/reference/storage_zfs.md @@ -98,8 +98,8 @@ The following configuration options are available for storage pools that use the (storage-zfs-pool-config)= ### Storage pool configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -109,8 +109,8 @@ The following configuration options are available for storage pools that use the (storage-zfs-vol-config)= ### Storage volume configuration -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` @@ -119,8 +119,8 @@ The following configuration options are available for storage pools that use the To enable storage buckets for local storage pool drivers and allow applications to access the buckets via the S3 protocol, you must configure the {config:option}`server-core:core.storage_buckets_address` server setting. -% Include content from [../config_options.txt](../config_options.txt) -```{include} ../config_options.txt +% Include content from [../metadata.txt](../metadata.txt) +```{include} ../metadata.txt :start-after: :end-before: ``` diff --git a/doc/server.md b/doc/server.md index 74620c82bcf5..99a05f03053d 100644 --- a/doc/server.md +++ b/doc/server.md @@ -26,8 +26,8 @@ Options with a `local` scope must be set on a per-member basis. The following server options control the core daemon configuration: -% Include content from [config_options.txt](config_options.txt) -```{include} config_options.txt +% Include content from [metadata.txt](metadata.txt) +```{include} metadata.txt :start-after: :end-before: ``` @@ -37,8 +37,8 @@ The following server options control the core daemon configuration: The following server options control the {ref}`ACME ` configuration: -% Include content from [config_options.txt](config_options.txt) -```{include} config_options.txt +% Include content from [metadata.txt](metadata.txt) +```{include} metadata.txt :start-after: :end-before: ``` @@ -48,8 +48,8 @@ The following server options control the {ref}`ACME :end-before: ``` @@ -59,8 +59,8 @@ The following server options configure external user authentication through {ref The following server options control {ref}`clustering`: -% Include content from [config_options.txt](config_options.txt) -```{include} config_options.txt +% Include content from [metadata.txt](metadata.txt) +```{include} metadata.txt :start-after: :end-before: ``` @@ -70,8 +70,8 @@ The following server options control {ref}`clustering`: The following server options configure how to handle {ref}`images`: -% Include content from [config_options.txt](config_options.txt) -```{include} config_options.txt +% Include content from [metadata.txt](metadata.txt) +```{include} metadata.txt :start-after: :end-before: ``` @@ -81,8 +81,8 @@ The following server options configure how to handle {ref}`images`: The following server options configure the external log aggregation system: -% Include content from [config_options.txt](config_options.txt) -```{include} config_options.txt +% Include content from [metadata.txt](metadata.txt) +```{include} metadata.txt :start-after: :end-before: ``` @@ -92,8 +92,8 @@ The following server options configure the external log aggregation system: The following server options configure server-specific settings for {ref}`instances`, MAAS integration, {ref}`OVN ` integration, {ref}`Backups ` and {ref}`storage`: -% Include content from [config_options.txt](config_options.txt) -```{include} config_options.txt +% Include content from [metadata.txt](metadata.txt) +```{include} metadata.txt :start-after: :end-before: ``` From 0b4af811e4f28e3ff5dd34fdaf3e64053a08b31f Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Wed, 10 Jul 2024 18:09:42 +0100 Subject: [PATCH 007/106] workflows: Rename config_options.txt -> metadata.txt. Signed-off-by: Mark Laing --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dc812c6f7d1d..709a151d8626 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -160,7 +160,7 @@ jobs: run: | set -eux sudo chmod o+w ./lxd/metadata/configuration.json - sudo chmod o+w ./doc/config_options.txt + sudo chmod o+w ./doc/metadata.txt sudo chmod o+w ./po/* make static-analysis From 55125556bd5db1e3596a4ee36f246e954670fc91 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Wed, 10 Jul 2024 18:09:56 +0100 Subject: [PATCH 008/106] Makefile: Rename config_options.txt -> metadata.txt. Signed-off-by: Mark Laing --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 957995e7c994..e9b8a5d669bd 100644 --- a/Makefile +++ b/Makefile @@ -118,7 +118,7 @@ endif .PHONY: update-metadata update-metadata: build @echo "Generating golang documentation metadata" - $(GOPATH)/bin/lxd-metadata . --json ./lxd/metadata/configuration.json --txt ./doc/config_options.txt --substitution-db ./doc/substitutions.yaml + $(GOPATH)/bin/lxd-metadata . --json ./lxd/metadata/configuration.json --txt ./doc/metadata.txt --substitution-db ./doc/substitutions.yaml .PHONY: doc doc: doc-clean doc-install doc-html doc-objects From 4d233f7a4cb03d909fa5c18fa42795b0e904069d Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Wed, 10 Jul 2024 18:15:21 +0100 Subject: [PATCH 009/106] test/lint: Rename config_options.txt -> metadata.txt. Signed-off-by: Mark Laing --- test/lint/metadata-up-to-date.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/lint/metadata-up-to-date.sh b/test/lint/metadata-up-to-date.sh index 6744d0d571d9..b1fcf03c9667 100755 --- a/test/lint/metadata-up-to-date.sh +++ b/test/lint/metadata-up-to-date.sh @@ -4,7 +4,7 @@ set -eu hash_before="lxd/metadata-before.txt" hash_after="lxd/metadata-after.txt" json_metadata="lxd/metadata/configuration.json" -doc_config_options="doc/config_options.txt" +doc_config_options="doc/metadata.txt" metadata_hash() { files_to_check="${json_metadata} ${doc_config_options}" From bcb1f9fd09ce5b2669415cbc3cc7f93fa3b235a0 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 11 Jul 2024 11:21:44 +0100 Subject: [PATCH 010/106] {doc,lxd/metadata}: Run make update-metadata. Signed-off-by: Mark Laing --- doc/metadata.txt | 459 +++++++++++++++++++++++++ lxd/metadata/configuration.json | 582 ++++++++++++++++++++++++++++++++ 2 files changed, 1041 insertions(+) diff --git a/doc/metadata.txt b/doc/metadata.txt index 6beb74d03048..340a678415f4 100644 --- a/doc/metadata.txt +++ b/doc/metadata.txt @@ -5814,3 +5814,462 @@ container or containers that use it. This allows using the `zfs` command in the ``` + +`can_view` +: Grants permission to view the certificate. + +`can_edit` +: Grants permission to edit the certificate. + +`can_delete` +: Grants permission to delete the certificate. + + + + +`can_view` +: Grants permission to view the group. Identities can always view groups that they are a member of. + +`can_edit` +: Grants permission to edit the group. + +`can_delete` +: Grants permission to delete the group. + + + + +`can_view` +: Grants permission to view the identity. + +`can_edit` +: Grants permission to edit the identity. + +`can_delete` +: Grants permission to delete the identity. + + + + +`can_view` +: Grants permission to view the identity provider group. + +`can_edit` +: Grants permission to edit the identity provider group. + +`can_delete` +: Grants permission to delete the identity provider group. + + + + +`can_edit` +: Grants permission to edit the image. + +`can_delete` +: Grants permission to delete the image. + +`can_view` +: Grants permission to view the image. + + + + +`can_edit` +: Grants permission to edit the image alias. + +`can_delete` +: Grants permission to delete the image alias. + +`can_view` +: Grants permission to view the image alias. + + + + +`user` +: Grants permission to view the instance, to access files, and to start a terminal or console session. + +`operator` +: Grants permission to view the instance, to access files, start a terminal or console session, and to manage snapshots and backups. + +`can_edit` +: Grants permission to edit the instance. + +`can_delete` +: Grants permission to delete the instance. + +`can_view` +: Grants permission to view the instance. + +`can_update_state` +: Grants permission to change the instance state. + +`can_manage_snapshots` +: Grants permission to create and delete snapshots of the instance. + +`can_manage_backups` +: Grants permission to create and delete backups of the instance. + +`can_connect_sftp` +: Grants permission to get an SFTP client for the instance. + +`can_access_files` +: Grants permission to push or pull files into or out of the instance. + +`can_access_console` +: Grants permission to start a console session. + +`can_exec` +: Grants permission to start a terminal session. + + + + +`can_edit` +: Grants permission to edit the network. + +`can_delete` +: Grants permission to delete the network. + +`can_view` +: Grants permission to view the network. + + + + +`can_edit` +: Grants permission to edit the network ACL. + +`can_delete` +: Grants permission to delete the network ACL. + +`can_view` +: Grants permission to view the network ACL. + + + + +`can_edit` +: Grants permission to edit the network zone. + +`can_delete` +: Grants permission to delete the network zone. + +`can_view` +: Grants permission to view the network zone. + + + + +`can_edit` +: Grants permission to edit the profile. + +`can_delete` +: Grants permission to delete the profile. + +`can_view` +: Grants permission to view the profile. + + + + +`operator` +: Grants permission to create, view, edit, and delete all resources belonging to the project, but does not grant permission to edit the project configuration itself. + +`viewer` +: Grants permission to view all resources belonging to the project. + +`can_view` +: Grants permission to view the project. + +`can_edit` +: Grants permission to edit the project. + +`can_delete` +: Grants permission to delete the project. + +`image_manager` +: Grants permission to create, view, edit, and delete all images belonging to the project. + +`can_create_images` +: Grants permission to create images. + +`can_view_images` +: Grants permission to view images. + +`can_edit_images` +: Grants permission to edit images. + +`can_delete_images` +: Grants permission to delete images. + +`image_alias_manager` +: Grants permission to create, view, edit, and delete all image aliases belonging to the project. + +`can_create_image_aliases` +: Grants permission to create image aliases. + +`can_view_image_aliases` +: Grants permission to view image aliases. + +`can_edit_image_aliases` +: Grants permission to edit image aliases. + +`can_delete_image_aliases` +: Grants permission to delete image aliases. + +`instance_manager` +: Grants permission to create, view, edit, and delete all instances belonging to the project. + +`can_create_instances` +: Grants permission to create instances. + +`can_view_instances` +: Grants permission to view instances. + +`can_edit_instances` +: Grants permission to edit instances. + +`can_delete_instances` +: Grants permission to delete instances. + +`can_operate_instances` +: Grants permission to view instances, manage their state, manage their snapshots and backups, start terminal or console sessions, and access their files. + +`network_manager` +: Grants permission to create, view, edit, and delete all networks belonging to the project. + +`can_create_networks` +: Grants permission to create networks. + +`can_view_networks` +: Grants permission to view networks. + +`can_edit_networks` +: Grants permission to edit networks. + +`can_delete_networks` +: Grants permission to delete networks. + +`network_acl_manager` +: Grants permission to create, view, edit, and delete all network ACLs belonging to the project. + +`can_create_network_acls` +: Grants permission to create network ACLs. + +`can_view_network_acls` +: Grants permission to view network ACLs. + +`can_edit_network_acls` +: Grants permission to edit network ACLs. + +`can_delete_network_acls` +: Grants permission to delete network ACLs. + +`network_zone_manager` +: Grants permission to create, view, edit, and delete all network zones belonging to the project. + +`can_create_network_zones` +: Grants permission to create network zones. + +`can_view_network_zones` +: Grants permission to view network zones. + +`can_edit_network_zones` +: Grants permission to edit network zones. + +`can_delete_network_zones` +: Grants permission to delete network zones. + +`profile_manager` +: Grants permission to create, view, edit, and delete all profiles belonging to the project. + +`can_create_profiles` +: Grants permission to create profiles. + +`can_view_profiles` +: Grants permission to view profiles. + +`can_edit_profiles` +: Grants permission to edit profiles. + +`can_delete_profiles` +: Grants permission to delete profiles. + +`storage_volume_manager` +: Grants permission to create, view, edit, and delete all storage volumes belonging to the project. + +`can_create_storage_volumes` +: Grants permission to create storage volumes. + +`can_view_storage_volumes` +: Grants permission to view storage volumes. + +`can_edit_storage_volumes` +: Grants permission to edit storage volumes. + +`can_delete_storage_volumes` +: Grants permission to delete storage volumes. + +`storage_bucket_manager` +: Grants permission to create, view, edit, and delete all storage buckets belonging to the project. + +`can_create_storage_buckets` +: Grants permission to create storage buckets. + +`can_view_storage_buckets` +: Grants permission to view storage buckets. + +`can_edit_storage_buckets` +: Grants permission to edit storage buckets. + +`can_delete_storage_buckets` +: Grants permission to delete storage buckets. + +`can_view_operations` +: Grants permission to view operations relating to the project. + +`can_view_events` +: Grants permission to view events relating to the project. + +`can_view_metrics` +: Grants permission to view project level metrics. + + + + +`admin` +: Grants full access to LXD as if via Unix socket. + +`viewer` +: Grants access to view all resources in the LXD server. + +`can_edit` +: Grants permission to edit server configuration, to edit cluster member configuration, to update the state of a cluster member, to create, edit, and delete cluster groups, to update cluster member certificates, and to edit or delete warnings. + +`permission_manager` +: Grants permission to view permissions, to create, edit, and delete identities, to view, create, edit, and delete authorization groups, and to view, create, edit, and delete identity provider groups. Note that clients with this permission are able to elevate their own privileges. + +`can_view_permissions` +: Grants permission to view permissions. + +`can_create_identities` +: Grants permission to create identities. + +`can_view_identities` +: Grants permission to view identities. + +`can_edit_identities` +: Grants permission to edit identities. + +`can_delete_identities` +: Grants permission to delete identities. + +`can_create_groups` +: Grants permission to create authorization groups. + +`can_view_groups` +: Grants permission to view authorization groups. + +`can_edit_groups` +: Grants permission to edit authorization groups. + +`can_delete_groups` +: Grants permission to delete authorization groups. + +`can_create_identity_provider_groups` +: Grants permission to create identity provider groups. + +`can_view_identity_provider_groups` +: Grants permission to view identity provider groups. + +`can_edit_identity_provider_groups` +: Grants permission to edit identity provider groups. + +`can_delete_identity_provider_groups` +: Grants permission to delete identity provider groups. + +`storage_pool_manager` +: Grants permission to create, edit, and delete storage pools. + +`can_create_storage_pools` +: Grants permission to create storage pools. + +`can_edit_storage_pools` +: Grants permission to edit storage pools. + +`can_delete_storage_pools` +: Grants permission to delete storage pools. + +`project_manager` +: Grants permission to view, create, edit, and delete projects, and to create, edit, and delete any resources that are owned by those projects. + +`can_create_projects` +: Grants permission to create projects. + +`can_view_projects` +: Grants permission to view projects, and all resources within those projects. + +`can_edit_projects` +: Grants permission to edit projects, and all resources within those projects. + +`can_delete_projects` +: Grants permission to delete projects. + +`can_override_cluster_target_restriction` +: If a project is configured with `restricted.cluster.target`, clients with this permission can override the restriction. + +`can_view_privileged_events` +: Grants permission to view privileged event types, such as logging events. + +`can_view_resources` +: Grants permission to view server and storage pool resource usage information. + +`can_view_metrics` +: Grants permission to view all server and project level metrics. + +`can_view_warnings` +: Grants permission to view warnings. + + + + +`can_edit` +: Grants permission to edit the storage bucket. + +`can_delete` +: Grants permission to delete the storage bucket. + +`can_view` +: Grants permission to view the storage bucket. + + + + +`can_edit` +: Grants permission to edit the storage pool. + +`can_delete` +: Grants permission to delete the storage pool. + + + + +`can_edit` +: Grants permission to edit the storage volume. + +`can_delete` +: Grants permission to delete the storage volume. + +`can_view` +: Grants permission to view the storage volume. + +`can_manage_snapshots` +: Grants permission to create and delete snapshots of the storage volume. + +`can_manage_backups` +: Grants permission to create and delete backups of the storage volume. + + + diff --git a/lxd/metadata/configuration.json b/lxd/metadata/configuration.json index ba442bc9155d..40d6e1bd6334 100644 --- a/lxd/metadata/configuration.json +++ b/lxd/metadata/configuration.json @@ -6514,5 +6514,587 @@ ] } } + }, + "entities": { + "certificate": [ + { + "name": "can_view", + "description": "Grants permission to view the certificate." + }, + { + "name": "can_edit", + "description": "Grants permission to edit the certificate." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the certificate." + } + ], + "group": [ + { + "name": "can_view", + "description": "Grants permission to view the group. Identities can always view groups that they are a member of." + }, + { + "name": "can_edit", + "description": "Grants permission to edit the group." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the group." + } + ], + "identity": [ + { + "name": "can_view", + "description": "Grants permission to view the identity." + }, + { + "name": "can_edit", + "description": "Grants permission to edit the identity." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the identity." + } + ], + "identity_provider_group": [ + { + "name": "can_view", + "description": "Grants permission to view the identity provider group." + }, + { + "name": "can_edit", + "description": "Grants permission to edit the identity provider group." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the identity provider group." + } + ], + "image": [ + { + "name": "can_edit", + "description": "Grants permission to edit the image." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the image." + }, + { + "name": "can_view", + "description": "Grants permission to view the image." + } + ], + "image_alias": [ + { + "name": "can_edit", + "description": "Grants permission to edit the image alias." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the image alias." + }, + { + "name": "can_view", + "description": "Grants permission to view the image alias." + } + ], + "instance": [ + { + "name": "user", + "description": "Grants permission to view the instance, to access files, and to start a terminal or console session." + }, + { + "name": "operator", + "description": "Grants permission to view the instance, to access files, start a terminal or console session, and to manage snapshots and backups." + }, + { + "name": "can_edit", + "description": "Grants permission to edit the instance." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the instance." + }, + { + "name": "can_view", + "description": "Grants permission to view the instance." + }, + { + "name": "can_update_state", + "description": "Grants permission to change the instance state." + }, + { + "name": "can_manage_snapshots", + "description": "Grants permission to create and delete snapshots of the instance." + }, + { + "name": "can_manage_backups", + "description": "Grants permission to create and delete backups of the instance." + }, + { + "name": "can_connect_sftp", + "description": "Grants permission to get an SFTP client for the instance." + }, + { + "name": "can_access_files", + "description": "Grants permission to push or pull files into or out of the instance." + }, + { + "name": "can_access_console", + "description": "Grants permission to start a console session." + }, + { + "name": "can_exec", + "description": "Grants permission to start a terminal session." + } + ], + "network": [ + { + "name": "can_edit", + "description": "Grants permission to edit the network." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the network." + }, + { + "name": "can_view", + "description": "Grants permission to view the network." + } + ], + "network_acl": [ + { + "name": "can_edit", + "description": "Grants permission to edit the network ACL." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the network ACL." + }, + { + "name": "can_view", + "description": "Grants permission to view the network ACL." + } + ], + "network_zone": [ + { + "name": "can_edit", + "description": "Grants permission to edit the network zone." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the network zone." + }, + { + "name": "can_view", + "description": "Grants permission to view the network zone." + } + ], + "profile": [ + { + "name": "can_edit", + "description": "Grants permission to edit the profile." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the profile." + }, + { + "name": "can_view", + "description": "Grants permission to view the profile." + } + ], + "project": [ + { + "name": "operator", + "description": "Grants permission to create, view, edit, and delete all resources belonging to the project, but does not grant permission to edit the project configuration itself." + }, + { + "name": "viewer", + "description": "Grants permission to view all resources belonging to the project." + }, + { + "name": "can_view", + "description": "Grants permission to view the project." + }, + { + "name": "can_edit", + "description": "Grants permission to edit the project." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the project." + }, + { + "name": "image_manager", + "description": "Grants permission to create, view, edit, and delete all images belonging to the project." + }, + { + "name": "can_create_images", + "description": "Grants permission to create images." + }, + { + "name": "can_view_images", + "description": "Grants permission to view images." + }, + { + "name": "can_edit_images", + "description": "Grants permission to edit images." + }, + { + "name": "can_delete_images", + "description": "Grants permission to delete images." + }, + { + "name": "image_alias_manager", + "description": "Grants permission to create, view, edit, and delete all image aliases belonging to the project." + }, + { + "name": "can_create_image_aliases", + "description": "Grants permission to create image aliases." + }, + { + "name": "can_view_image_aliases", + "description": "Grants permission to view image aliases." + }, + { + "name": "can_edit_image_aliases", + "description": "Grants permission to edit image aliases." + }, + { + "name": "can_delete_image_aliases", + "description": "Grants permission to delete image aliases." + }, + { + "name": "instance_manager", + "description": "Grants permission to create, view, edit, and delete all instances belonging to the project." + }, + { + "name": "can_create_instances", + "description": "Grants permission to create instances." + }, + { + "name": "can_view_instances", + "description": "Grants permission to view instances." + }, + { + "name": "can_edit_instances", + "description": "Grants permission to edit instances." + }, + { + "name": "can_delete_instances", + "description": "Grants permission to delete instances." + }, + { + "name": "can_operate_instances", + "description": "Grants permission to view instances, manage their state, manage their snapshots and backups, start terminal or console sessions, and access their files." + }, + { + "name": "network_manager", + "description": "Grants permission to create, view, edit, and delete all networks belonging to the project." + }, + { + "name": "can_create_networks", + "description": "Grants permission to create networks." + }, + { + "name": "can_view_networks", + "description": "Grants permission to view networks." + }, + { + "name": "can_edit_networks", + "description": "Grants permission to edit networks." + }, + { + "name": "can_delete_networks", + "description": "Grants permission to delete networks." + }, + { + "name": "network_acl_manager", + "description": "Grants permission to create, view, edit, and delete all network ACLs belonging to the project." + }, + { + "name": "can_create_network_acls", + "description": "Grants permission to create network ACLs." + }, + { + "name": "can_view_network_acls", + "description": "Grants permission to view network ACLs." + }, + { + "name": "can_edit_network_acls", + "description": "Grants permission to edit network ACLs." + }, + { + "name": "can_delete_network_acls", + "description": "Grants permission to delete network ACLs." + }, + { + "name": "network_zone_manager", + "description": "Grants permission to create, view, edit, and delete all network zones belonging to the project." + }, + { + "name": "can_create_network_zones", + "description": "Grants permission to create network zones." + }, + { + "name": "can_view_network_zones", + "description": "Grants permission to view network zones." + }, + { + "name": "can_edit_network_zones", + "description": "Grants permission to edit network zones." + }, + { + "name": "can_delete_network_zones", + "description": "Grants permission to delete network zones." + }, + { + "name": "profile_manager", + "description": "Grants permission to create, view, edit, and delete all profiles belonging to the project." + }, + { + "name": "can_create_profiles", + "description": "Grants permission to create profiles." + }, + { + "name": "can_view_profiles", + "description": "Grants permission to view profiles." + }, + { + "name": "can_edit_profiles", + "description": "Grants permission to edit profiles." + }, + { + "name": "can_delete_profiles", + "description": "Grants permission to delete profiles." + }, + { + "name": "storage_volume_manager", + "description": "Grants permission to create, view, edit, and delete all storage volumes belonging to the project." + }, + { + "name": "can_create_storage_volumes", + "description": "Grants permission to create storage volumes." + }, + { + "name": "can_view_storage_volumes", + "description": "Grants permission to view storage volumes." + }, + { + "name": "can_edit_storage_volumes", + "description": "Grants permission to edit storage volumes." + }, + { + "name": "can_delete_storage_volumes", + "description": "Grants permission to delete storage volumes." + }, + { + "name": "storage_bucket_manager", + "description": "Grants permission to create, view, edit, and delete all storage buckets belonging to the project." + }, + { + "name": "can_create_storage_buckets", + "description": "Grants permission to create storage buckets." + }, + { + "name": "can_view_storage_buckets", + "description": "Grants permission to view storage buckets." + }, + { + "name": "can_edit_storage_buckets", + "description": "Grants permission to edit storage buckets." + }, + { + "name": "can_delete_storage_buckets", + "description": "Grants permission to delete storage buckets." + }, + { + "name": "can_view_operations", + "description": "Grants permission to view operations relating to the project." + }, + { + "name": "can_view_events", + "description": "Grants permission to view events relating to the project." + }, + { + "name": "can_view_metrics", + "description": "Grants permission to view project level metrics." + } + ], + "server": [ + { + "name": "admin", + "description": "Grants full access to LXD as if via Unix socket." + }, + { + "name": "viewer", + "description": "Grants access to view all resources in the LXD server." + }, + { + "name": "can_edit", + "description": "Grants permission to edit server configuration, to edit cluster member configuration, to update the state of a cluster member, to create, edit, and delete cluster groups, to update cluster member certificates, and to edit or delete warnings." + }, + { + "name": "permission_manager", + "description": "Grants permission to view permissions, to create, edit, and delete identities, to view, create, edit, and delete authorization groups, and to view, create, edit, and delete identity provider groups. Note that clients with this permission are able to elevate their own privileges." + }, + { + "name": "can_view_permissions", + "description": "Grants permission to view permissions." + }, + { + "name": "can_create_identities", + "description": "Grants permission to create identities." + }, + { + "name": "can_view_identities", + "description": "Grants permission to view identities." + }, + { + "name": "can_edit_identities", + "description": "Grants permission to edit identities." + }, + { + "name": "can_delete_identities", + "description": "Grants permission to delete identities." + }, + { + "name": "can_create_groups", + "description": "Grants permission to create authorization groups." + }, + { + "name": "can_view_groups", + "description": "Grants permission to view authorization groups." + }, + { + "name": "can_edit_groups", + "description": "Grants permission to edit authorization groups." + }, + { + "name": "can_delete_groups", + "description": "Grants permission to delete authorization groups." + }, + { + "name": "can_create_identity_provider_groups", + "description": "Grants permission to create identity provider groups." + }, + { + "name": "can_view_identity_provider_groups", + "description": "Grants permission to view identity provider groups." + }, + { + "name": "can_edit_identity_provider_groups", + "description": "Grants permission to edit identity provider groups." + }, + { + "name": "can_delete_identity_provider_groups", + "description": "Grants permission to delete identity provider groups." + }, + { + "name": "storage_pool_manager", + "description": "Grants permission to create, edit, and delete storage pools." + }, + { + "name": "can_create_storage_pools", + "description": "Grants permission to create storage pools." + }, + { + "name": "can_edit_storage_pools", + "description": "Grants permission to edit storage pools." + }, + { + "name": "can_delete_storage_pools", + "description": "Grants permission to delete storage pools." + }, + { + "name": "project_manager", + "description": "Grants permission to view, create, edit, and delete projects, and to create, edit, and delete any resources that are owned by those projects." + }, + { + "name": "can_create_projects", + "description": "Grants permission to create projects." + }, + { + "name": "can_view_projects", + "description": "Grants permission to view projects, and all resources within those projects." + }, + { + "name": "can_edit_projects", + "description": "Grants permission to edit projects, and all resources within those projects." + }, + { + "name": "can_delete_projects", + "description": "Grants permission to delete projects." + }, + { + "name": "can_override_cluster_target_restriction", + "description": "If a project is configured with `restricted.cluster.target`, clients with this permission can override the restriction." + }, + { + "name": "can_view_privileged_events", + "description": "Grants permission to view privileged event types, such as logging events." + }, + { + "name": "can_view_resources", + "description": "Grants permission to view server and storage pool resource usage information." + }, + { + "name": "can_view_metrics", + "description": "Grants permission to view all server and project level metrics." + }, + { + "name": "can_view_warnings", + "description": "Grants permission to view warnings." + } + ], + "storage_bucket": [ + { + "name": "can_edit", + "description": "Grants permission to edit the storage bucket." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the storage bucket." + }, + { + "name": "can_view", + "description": "Grants permission to view the storage bucket." + } + ], + "storage_pool": [ + { + "name": "can_edit", + "description": "Grants permission to edit the storage pool." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the storage pool." + } + ], + "storage_volume": [ + { + "name": "can_edit", + "description": "Grants permission to edit the storage volume." + }, + { + "name": "can_delete", + "description": "Grants permission to delete the storage volume." + }, + { + "name": "can_view", + "description": "Grants permission to view the storage volume." + }, + { + "name": "can_manage_snapshots", + "description": "Grants permission to create and delete snapshots of the storage volume." + }, + { + "name": "can_manage_backups", + "description": "Grants permission to create and delete backups of the storage volume." + } + ] } } \ No newline at end of file From 3d75871d2fed798991d7073f65ccb1c6235d05a1 Mon Sep 17 00:00:00 2001 From: Din Music Date: Mon, 15 Apr 2024 10:46:05 +0200 Subject: [PATCH 011/106] api: Add api extension instance_import_conversion Signed-off-by: Din Music --- doc/api-extensions.md | 4 ++++ shared/version/api.go | 1 + 2 files changed, 5 insertions(+) diff --git a/doc/api-extensions.md b/doc/api-extensions.md index 41c1a3d86bea..bfe703418e79 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -2424,3 +2424,7 @@ The OVN driver will allocate IP addresses from the subnets specified in the upli Adds the ability to explicitly specify a trust token when creating a certificate and joining an existing cluster. + +## `instance_import_conversion` + +Adds the ability to convert images from different formats (e.g. VMDK or QCow2) into RAW image format and import them as LXD instances. diff --git a/shared/version/api.go b/shared/version/api.go index 41c97037c691..9f20873381bc 100644 --- a/shared/version/api.go +++ b/shared/version/api.go @@ -408,6 +408,7 @@ var APIExtensions = []string{ "device_usb_serial", "network_allocate_external_ips", "explicit_trust_token", + "instance_import_conversion", } // APIExtensionsCount returns the number of available API extensions. From 8b3dc26feece41e7bfadd80b6533b3f1abd1188c Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 25 Apr 2024 13:42:17 +0000 Subject: [PATCH 012/106] shared/api/instance: Add ConversionOptions and SourceDiskSize fields to the InstanceSource struct Signed-off-by: Din Music --- shared/api/instance.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/shared/api/instance.go b/shared/api/instance.go index efb92359d942..839f0c69724e 100644 --- a/shared/api/instance.go +++ b/shared/api/instance.go @@ -418,6 +418,19 @@ type InstanceSource struct { // // API extension: instance_allow_inconsistent_copy AllowInconsistent bool `json:"allow_inconsistent" yaml:"allow_inconsistent"` + + // Source disk size in bytes used to set the instance's volume size to accommodate the transferred root + // disk. This value is ignored if the root disk device has a size explicitly configured (for conversion). + // Example: 12345 + // + // API extension: instance_import_conversion + SourceDiskSize int64 `json:"sourceDiskSize" yaml:"sourceDiskSize"` + + // Optional list of options that are used during image conversion (for conversion). + // Example: ["format"] + // + // API extension: instance_import_conversion + ConversionOptions []string `json:"conversion_options" yaml:"conversion_options"` } // InstanceUEFIVars represents the UEFI variables of a LXD virtual machine. From a0c713672d011ee82069faadf47f20bd751097b9 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 25 Apr 2024 09:59:34 +0000 Subject: [PATCH 013/106] lxd/migration: Add field conversion options to volume target arguments Signed-off-by: Din Music --- lxd/migration/migration_volumes.go | 1 + 1 file changed, 1 insertion(+) diff --git a/lxd/migration/migration_volumes.go b/lxd/migration/migration_volumes.go index e13bab1b2443..b2b71eed37de 100644 --- a/lxd/migration/migration_volumes.go +++ b/lxd/migration/migration_volumes.go @@ -72,6 +72,7 @@ type VolumeTargetArgs struct { MigrationType Type TrackProgress bool Refresh bool + ConversionOptions []string Live bool VolumeSize int64 ContentType string From ac4c550870c7d9e0230acc7c906a51110feff261 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 25 Apr 2024 10:03:40 +0000 Subject: [PATCH 014/106] lxd/storage: Create instance from conversion The volume size of the imported VM disk is: 1. User specified storage pool and volume size - respect even if image won't fit in the pool (error out in such case). 2. User did not specify volume size - depends on conversion option format: a) format is enabled: Use the size of the uncompressed image (we get that info from qemu-img info) b) format is disabled: Use the value from SourceDiskSize field - We need to know the volume size before copying the image / disk as it will go directly into the volume. Signed-off-by: Din Music --- lxd/storage/backend_lxd.go | 334 ++++++++++++++++++++++++++++++++++ lxd/storage/backend_mock.go | 5 + lxd/storage/pool_interface.go | 1 + 3 files changed, 340 insertions(+) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index ebafe4447918..36e2e8dfa99e 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -12,6 +12,7 @@ import ( "net/url" "os" "path/filepath" + "slices" "strings" "sync" "time" @@ -22,6 +23,7 @@ import ( "golang.org/x/sync/errgroup" "gopkg.in/yaml.v2" + "github.com/canonical/lxd/lxd/apparmor" "github.com/canonical/lxd/lxd/backup" backupConfig "github.com/canonical/lxd/lxd/backup/config" "github.com/canonical/lxd/lxd/cluster/request" @@ -36,6 +38,7 @@ import ( "github.com/canonical/lxd/lxd/operations" "github.com/canonical/lxd/lxd/project" "github.com/canonical/lxd/lxd/response" + "github.com/canonical/lxd/lxd/rsync" "github.com/canonical/lxd/lxd/state" "github.com/canonical/lxd/lxd/storage/block" "github.com/canonical/lxd/lxd/storage/drivers" @@ -1806,6 +1809,120 @@ func (b *lxdBackend) isoFiller(data io.Reader) func(vol drivers.Volume, rootBloc } } +// imageConversionFiller returns a function that converts an image from the given path to the instance's volume. +// Function returns the unpacked image size on success. Otherwise, it returns -1 for size and an error. +func (b *lxdBackend) imageConversionFiller(imgPath string, imgFormat string) func(vol drivers.Volume, rootBlockPath string, allowUnsafeResize bool) (sizeInBytes int64, err error) { + return func(vol drivers.Volume, rootBlockPath string, allowUnsafeResize bool) (int64, error) { + diskPath, err := b.driver.GetVolumeDiskPath(vol) + if err != nil { + return -1, fmt.Errorf("Failed getting instance volume disk path: %v", err) + } + + // Ensure conversion supports the uploaded image format. + supportedImageFormats := []string{"qcow", "qcow2", "raw", "vdi", "vhdx", "vmdk"} + if !shared.ValueInSlice(imgFormat, supportedImageFormats) { + return -1, fmt.Errorf("Unsupported image format %q, allowed formats are [%s]", imgFormat, strings.Join(supportedImageFormats, ", ")) + } + + // Convert uploaded image from backups directory into RAW format on the instance volume. + cmd := []string{ + // Use prlimit to limit QEMU to 1 GiB address space and 120 seconds of CPU time. + "prlimit", "--cpu=120", "--as=1073741824", + "qemu-img", "convert", "-f", imgFormat, "-O", "raw", imgPath, diskPath, + } + + b.logger.Debug("Image conversion started") + defer b.logger.Debug("Image conversion finished") + + out, err := apparmor.QemuImg(b.state.OS, cmd, imgPath, diskPath) + if err != nil { + b.logger.Debug("Image conversion failed", logger.Ctx{"error": out}) + return -1, fmt.Errorf("qemu-img convert: failed to convert image from %q to %q format: %v", imgFormat, "raw", err) + } + + // Convert volume size to bytes. + volSizeBytes, err := units.ParseByteSizeString(vol.ConfigSize()) + if err != nil { + return -1, err + } + + return volSizeBytes, nil + } +} + +// recvVolumeFiller returns a function that receives the instance's volume. +// Function returns the volume size on success. Otherwise, it returns -1 for size and an error. +func (b *lxdBackend) recvVolumeFiller(conn io.ReadWriteCloser, contentType drivers.ContentType, args migration.VolumeTargetArgs, op *operations.Operation) func(vol drivers.Volume, rootBlockPath string, allowUnsafeResize bool) (sizeInBytes int64, err error) { + return func(vol drivers.Volume, rootBlockPath string, allowUnsafeResize bool) (int64, error) { + if contentType == drivers.ContentTypeFS { + // Receive filesystem. + err := b.recvFS(vol.MountPath(), vol.Name(), conn, args, op) + if err != nil { + return -1, err + } + } else { + // Receive block volume. + to, err := os.OpenFile(rootBlockPath, os.O_WRONLY|os.O_TRUNC, 0) + if err != nil { + return -1, fmt.Errorf("Error opening file for writing %q: %w", rootBlockPath, err) + } + + defer func() { _ = to.Close() }() + + err = b.recvBlockVol(to, vol.Name(), conn, args, op) + if err != nil { + return -1, err + } + } + + // Convert volume size to bytes. + volSizeBytes, err := units.ParseByteSizeString(vol.ConfigSize()) + if err != nil { + return -1, err + } + + return volSizeBytes, nil + } +} + +func (b *lxdBackend) recvBlockVol(toFile *os.File, volName string, conn io.ReadWriteCloser, args migration.VolumeTargetArgs, op *operations.Operation) error { + b.logger.Debug("Receive block volume started", logger.Ctx{"volName": volName}) + defer b.logger.Debug("Receive block volume finished", logger.Ctx{"volName": volName}) + + var wrapper *ioprogress.ProgressTracker + if args.TrackProgress { + wrapper = migration.ProgressTracker(op, "block_progress", volName) + } + + // Setup progress tracker. + fromPipe := io.ReadCloser(conn) + if wrapper != nil { + fromPipe = &ioprogress.ProgressReader{ + ReadCloser: fromPipe, + Tracker: wrapper, + } + } + + _, err := io.Copy(toFile, fromPipe) + if err != nil { + return fmt.Errorf("Error copying from migration connection to %q: %w", toFile.Name(), err) + } + + return toFile.Close() +} + +func (b *lxdBackend) recvFS(path string, volName string, conn io.ReadWriteCloser, args migration.VolumeTargetArgs, op *operations.Operation) error { + b.logger.Debug("Receiving filesystem volume started", logger.Ctx{"volName": volName, "path": path, "features": args.MigrationType.Features}) + defer b.logger.Debug("Receiving filesystem volume stopped", logger.Ctx{"volName": volName, "path": path}) + + var wrapper *ioprogress.ProgressTracker + if args.TrackProgress { + wrapper = migration.ProgressTracker(op, "fs_progress", volName) + } + + return rsync.Recv(shared.AddSlash(path), conn, wrapper, args.MigrationType.Features) +} + // CreateInstanceFromImage creates a new volume for an instance populated with the image requested. // On failure caller is expected to call DeleteInstance() to clean up. func (b *lxdBackend) CreateInstanceFromImage(inst instance.Instance, fingerprint string, op *operations.Operation) error { @@ -2224,6 +2341,223 @@ func (b *lxdBackend) CreateInstanceFromMigration(inst instance.Instance, conn io return nil } +// CreateInstanceFromConversion receives an image and creates and instance from it. +// Depending on provided conversionOptions, the image is also converted into the +// raw format. +func (b *lxdBackend) CreateInstanceFromConversion(inst instance.Instance, conn io.ReadWriteCloser, args migration.VolumeTargetArgs, op *operations.Operation) error { + l := b.logger.AddContext(logger.Ctx{"project": inst.Project().Name, "instance": inst.Name(), "args": fmt.Sprintf("%+v", args)}) + l.Debug("CreateInstanceFromConversion started") + defer l.Debug("CreateInstanceFromConversion finished") + + err := b.isStatusReady() + if err != nil { + return err + } + + if args.Config != nil { + return fmt.Errorf("VolumeTargetArgs.Config cannot be set for conversion") + } + + if args.Refresh { + return fmt.Errorf("Volume cannot be refreshed during conversion") + } + + if len(args.Snapshots) > 0 { + return fmt.Errorf("Snapshots cannot be received during conversion") + } + + isRemoteClusterMove := args.ClusterMoveSourceName != "" && b.driver.Info().Remote + if isRemoteClusterMove { + return fmt.Errorf("Conversion cannot be used for moving instances between members") + } + + contentType := InstanceContentType(inst) + volType, err := InstanceTypeToVolumeType(inst.Type()) + if err != nil { + return err + } + + volStorageName := project.Instance(inst.Project().Name, inst.Name()) + volConfig := make(map[string]string) + vol := b.GetNewVolume(volType, contentType, volStorageName, volConfig) + + // Ensure storage volume settings are honored when doing conversion. + vol.SetHasSource(false) + err = b.driver.FillVolumeConfig(vol) + if err != nil { + return fmt.Errorf("Failed filling volume config: %w", err) + } + + // Check if the volume exists in database + dbVol, err := VolumeDBGet(b, inst.Project().Name, inst.Name(), volType) + if err != nil && !response.IsNotFoundError(err) { + return err + } + + if dbVol != nil { + return fmt.Errorf("Volume for instance %q already exists in database", inst.Name()) + } + + // Check if the volume exists on storage. + volExists, err := b.driver.HasVolume(vol) + if err != nil { + return err + } + + if volExists { + return fmt.Errorf("Volume already exists on storage but not in database") + } + + revert := revert.New() + defer revert.Fail() + + // Validate config and create database entry for new storage volume if not refreshing. + // Strip unsupported config keys (in case the export was made from a different type of storage pool). + err = VolumeDBCreate(b, inst.Project().Name, inst.Name(), args.Description, volType, false, vol.Config(), inst.CreationDate(), time.Time{}, contentType, false, true) + if err != nil { + return err + } + + revert.Add(func() { _ = VolumeDBDelete(b, inst.Project().Name, inst.Name(), volType) }) + + // Generate the effective root device volume for instance. + err = b.applyInstanceRootDiskOverrides(inst, &vol) + if err != nil { + return err + } + + // Override args.Name and args.Config to ensure volume is created based on instance. + args.Config = vol.Config() + args.Name = inst.Name() + + // Get instance's root disk device from local devices. Do not use expanded devices, as we want + // to determine whether the root disk volume size was explicitly set by the client. + canResizeRootDiskSize := true + _, rootDiskConf, err := instancetype.GetRootDiskDevice(inst.LocalDevices().CloneNative()) + if err == nil && rootDiskConf != nil && rootDiskConf["size"] != "" { + // User has explicitly configured the root disk device. Therefore, we should not mess + // with the root disk configuration. + canResizeRootDiskSize = false + } + + var srcDiskSize int64 + var volFiller drivers.VolumeFiller + + if slices.Contains(args.ConversionOptions, "format") { + // When conversion option "format" is enabled, we need to upload the image + // to a temporary location before converting it into the desired format. + // The conversion cannot be done in-place, therefore the image has to be + // saved in an intermediate location. + conversionID := fmt.Sprintf("conversion_%s_%s", inst.Project().Name, inst.Name()) + imgPath := filepath.Join(shared.VarPath("backups"), conversionID) + + // Create new file in backups directory. + to, err := os.OpenFile(imgPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) + if err != nil { + return fmt.Errorf("Error opening file for writing %q: %w", imgPath, err) + } + + // Ensure temporary image in backups directory is removed regardless of the conversion success. + defer func() { + _ = to.Close() + _ = os.Remove(imgPath) + }() + + // Receive the image for conversion. + err = b.recvBlockVol(to, vol.Name(), conn, args, op) + if err != nil { + return err + } + + // Extract image format and size. + cmd := []string{ + // Use prlimit because qemu-img can consume considerable RAM & CPU time if fed + // a maliciously crafted disk image. Since cloud tenants are not to be trusted, + // ensure QEMU is limited to 1 GiB address space and 2 seconds of CPU time. + // This should be more than enough for real world images. + "prlimit", "--cpu=2", "--as=1073741824", + "qemu-img", "info", imgPath, "--output", "json", + } + + out, err := apparmor.QemuImg(b.state.OS, cmd, imgPath, "") + if err != nil { + return fmt.Errorf("qemu-img info: %v", err) + } + + imgInfo := struct { + Format string `json:"format"` + Bytes int64 `json:"virtual-size"` + }{} + + err = json.Unmarshal([]byte(out), &imgInfo) + if err != nil { + return fmt.Errorf("Failed to parse image information: %v", err) + } + + srcDiskSize = imgInfo.Bytes + + if canResizeRootDiskSize { + // Set size of the volume to the uncompressed image size. + l.Debug("Setting volume size to uncompressed image size", logger.Ctx{"size": fmt.Sprintf("%d", imgInfo.Bytes)}) + args.Config["size"] = fmt.Sprintf("%d", imgInfo.Bytes) + } + + // Convert received image into intance volume. + volFiller.Fill = b.imageConversionFiller(imgPath, imgInfo.Format) + } else { + // If volume size is provided, then use that as block volume size instead of pool default. + // This way if the volume being received is larger than the pool default size, the created + // block volume will still be able to accommodate it. + if canResizeRootDiskSize && contentType == drivers.ContentTypeBlock && args.VolumeSize > 0 { + l.Debug("Setting volume size to source disk size", logger.Ctx{"size": args.VolumeSize}) + args.Config["size"] = fmt.Sprintf("%d", args.VolumeSize) + } + + srcDiskSize = args.VolumeSize + + // If formatting is not required, receive the volume (block / FS) directly + // into the instance volume. + volFiller.Fill = b.recvVolumeFiller(conn, contentType, args, op) + } + + // Parse volume size into bytes. + volBytes, err := units.ParseByteSizeString(vol.ConfigSize()) + if err != nil { + return fmt.Errorf("Failed parsing instance volume size") + } + + // Parse source disk size into bytes. + srcSize, err := units.ParseByteSizeString(fmt.Sprintf("%d", srcDiskSize)) + if err != nil { + return fmt.Errorf("Failed parsing source disk size") + } + + // Ensure source disk will fit into the instance volume. + if volBytes < srcSize { + // Convert to IEC format for nicer error. + imgSize := units.GetByteSizeStringIEC(srcSize, 2) + volSize := units.GetByteSizeStringIEC(volBytes, 2) + return fmt.Errorf("Volume size (%s) is lower then source disk size (%s)", volSize, imgSize) + } + + volCopy := drivers.NewVolumeCopy(vol) + + err = b.driver.CreateVolume(volCopy.Volume, &volFiller, op) + if err != nil { + return err + } + + revert.Add(func() { _ = b.driver.DeleteVolume(volCopy.Volume, op) }) + + err = b.ensureInstanceSymlink(inst.Type(), inst.Project().Name, inst.Name(), vol.MountPath()) + if err != nil { + return err + } + + revert.Success() + return nil +} + // RenameInstance renames the instance's root volume and any snapshot volumes. func (b *lxdBackend) RenameInstance(inst instance.Instance, newName string, op *operations.Operation) error { l := b.logger.AddContext(logger.Ctx{"project": inst.Project().Name, "instance": inst.Name(), "newName": newName}) diff --git a/lxd/storage/backend_mock.go b/lxd/storage/backend_mock.go index 860556bca112..19129702dc03 100644 --- a/lxd/storage/backend_mock.go +++ b/lxd/storage/backend_mock.go @@ -152,6 +152,11 @@ func (b *mockBackend) CreateInstanceFromMigration(inst instance.Instance, conn i return nil } +// CreateInstanceFromConversion ... +func (b *mockBackend) CreateInstanceFromConversion(inst instance.Instance, conn io.ReadWriteCloser, args migration.VolumeTargetArgs, op *operations.Operation) error { + return nil +} + // RenameInstance ... func (b *mockBackend) RenameInstance(inst instance.Instance, newName string, op *operations.Operation) error { return nil diff --git a/lxd/storage/pool_interface.go b/lxd/storage/pool_interface.go index 9393e9e39d44..cfd0913d1aca 100644 --- a/lxd/storage/pool_interface.go +++ b/lxd/storage/pool_interface.go @@ -68,6 +68,7 @@ type Pool interface { CreateInstanceFromCopy(inst instance.Instance, src instance.Instance, snapshots bool, allowInconsistent bool, op *operations.Operation) error CreateInstanceFromImage(inst instance.Instance, fingerprint string, op *operations.Operation) error CreateInstanceFromMigration(inst instance.Instance, conn io.ReadWriteCloser, args migration.VolumeTargetArgs, op *operations.Operation) error + CreateInstanceFromConversion(inst instance.Instance, conn io.ReadWriteCloser, args migration.VolumeTargetArgs, op *operations.Operation) error RenameInstance(inst instance.Instance, newName string, op *operations.Operation) error DeleteInstance(inst instance.Instance, op *operations.Operation) error UpdateInstance(inst instance.Instance, newDesc string, newConfig map[string]string, op *operations.Operation) error From 00b519925cfb6300a9e3ce20e76e88044d67f1ee Mon Sep 17 00:00:00 2001 From: Din Music Date: Mon, 15 Jul 2024 14:59:13 +0000 Subject: [PATCH 015/106] lxd/instance/drivers: Helper function to retrieve instance storage pool from devices Signed-off-by: Din Music --- lxd/instance/drivers/driver_common.go | 15 +++++++++++++++ lxd/instance/drivers/driver_lxc.go | 12 +++--------- lxd/instance/drivers/driver_qemu.go | 12 +++--------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/lxd/instance/drivers/driver_common.go b/lxd/instance/drivers/driver_common.go index 013ba308b6d4..9d547032a899 100644 --- a/lxd/instance/drivers/driver_common.go +++ b/lxd/instance/drivers/driver_common.go @@ -1297,6 +1297,21 @@ func (d *common) getStoragePool() (storagePools.Pool, error) { return d.storagePool, nil } +// getParentStoragePool retrieves the root disk device from the expanded devices. +func (d *common) getParentStoragePool() (string, error) { + parentStoragePool := "" + parentLocalRootDiskDeviceKey, parentLocalRootDiskDevice, _ := instancetype.GetRootDiskDevice(d.ExpandedDevices().CloneNative()) + if parentLocalRootDiskDeviceKey != "" { + parentStoragePool = parentLocalRootDiskDevice["pool"] + } + + if parentStoragePool == "" { + return "", fmt.Errorf("Instance's root device is missing the pool property") + } + + return parentStoragePool, nil +} + // deviceLoad instantiates and validates a new device and returns it along with enriched config. func (d *common) deviceLoad(inst instance.Instance, deviceName string, rawConfig deviceConfig.Device) (device.Device, error) { var configCopy deviceConfig.Device diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index 0f09e81ad47b..ee1b1770c1e5 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -6078,15 +6078,9 @@ func (d *lxc) MigrateReceive(args instance.MigrateReceiveArgs) error { // At this point we have already figured out the parent container's root // disk device so we can simply retrieve it from the expanded devices. - parentStoragePool := "" - parentExpandedDevices := d.ExpandedDevices() - parentLocalRootDiskDeviceKey, parentLocalRootDiskDevice, _ := instancetype.GetRootDiskDevice(parentExpandedDevices.CloneNative()) - if parentLocalRootDiskDeviceKey != "" { - parentStoragePool = parentLocalRootDiskDevice["pool"] - } - - if parentStoragePool == "" { - return fmt.Errorf("Instance's root device is missing the pool property") + parentStoragePool, err := d.getParentStoragePool() + if err != nil { + return err } // A zero length Snapshots slice indicates volume only migration in diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index 83b61ecf5b5b..7da9d8ce5ca9 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -7285,15 +7285,9 @@ func (d *qemu) MigrateReceive(args instance.MigrateReceiveArgs) error { // At this point we have already figured out the parent instances's root // disk device so we can simply retrieve it from the expanded devices. - parentStoragePool := "" - parentExpandedDevices := d.ExpandedDevices() - parentLocalRootDiskDeviceKey, parentLocalRootDiskDevice, _ := instancetype.GetRootDiskDevice(parentExpandedDevices.CloneNative()) - if parentLocalRootDiskDeviceKey != "" { - parentStoragePool = parentLocalRootDiskDevice["pool"] - } - - if parentStoragePool == "" { - return fmt.Errorf("Instance's root device is missing the pool property") + parentStoragePool, err := d.getParentStoragePool() + if err != nil { + return err } // A zero length Snapshots slice indicates volume only migration in From 30720d929f5b909386171e661e313eb43ddf0fe6 Mon Sep 17 00:00:00 2001 From: Din Music Date: Wed, 10 Jul 2024 11:43:44 +0000 Subject: [PATCH 016/106] lxd/instance/drivers: Add ConversionReceive function Signed-off-by: Din Music --- lxd/instance/drivers/driver_lxc.go | 46 +++++++++++++++++++++++++++++ lxd/instance/drivers/driver_qemu.go | 41 +++++++++++++++++++++++++ lxd/instance/instance_interface.go | 16 ++++++++++ 3 files changed, 103 insertions(+) diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index ee1b1770c1e5..5a832bda3e0f 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -6315,6 +6315,52 @@ func (d *lxc) MigrateReceive(args instance.MigrateReceiveArgs) error { } } +// ConversionReceive establishes the filesystem connection, transfers the filesystem / block volume, +// and creates an instance from it. +func (d *lxc) ConversionReceive(args instance.ConversionReceiveArgs) error { + d.logger.Info("Conversion receive starting") + defer d.logger.Info("Conversion receive stopped") + + // Wait for essential migration connections before negotiation. + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() + + filesystemConn, err := args.FilesystemConn(ctx) + if err != nil { + return err + } + + pool, err := storagePools.LoadByInstance(d.state, d) + if err != nil { + return err + } + + // Ensure that configured root disk device is valid. + _, err = d.getParentStoragePool() + if err != nil { + return err + } + + volTargetArgs := migration.VolumeTargetArgs{ + IndexHeaderVersion: 0, + Name: d.Name(), + MigrationType: migration.Type{ + FSType: migration.MigrationFSType_RSYNC, + Features: []string{"xattrs", "delete", "compress"}, + }, + TrackProgress: true, // Use a progress tracker on receiver to get progress information. + VolumeSize: args.SourceDiskSize, // Block volume size override. + ConversionOptions: nil, // Containers do not support conversion options. + } + + err = pool.CreateInstanceFromConversion(d, filesystemConn, volTargetArgs, d.op) + if err != nil { + return fmt.Errorf("Failed creating instance on target: %w", err) + } + + return nil +} + // Migrate migrates the instance to another node. func (d *lxc) migrate(args *instance.CriuMigrationArgs) error { ctxMap := logger.Ctx{ diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index 7da9d8ce5ca9..8214422e9efa 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -7441,6 +7441,47 @@ func (d *qemu) MigrateReceive(args instance.MigrateReceiveArgs) error { } } +// ConversionReceive establishes the filesystem connection, transfers the filesystem / block volume, +// and creates an instance from it. +func (d *qemu) ConversionReceive(args instance.ConversionReceiveArgs) error { + d.logger.Info("Conversion receive starting") + defer d.logger.Info("Conversion receive stopped") + + // Wait for filesystem connection. + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() + + filesystemConn, err := args.FilesystemConn(ctx) + if err != nil { + return err + } + + pool, err := storagePools.LoadByInstance(d.state, d) + if err != nil { + return err + } + + // Ensure that configured root disk device is valid. + _, err = d.getParentStoragePool() + if err != nil { + return err + } + + volTargetArgs := migration.VolumeTargetArgs{ + Name: d.Name(), + TrackProgress: true, // Use a progress tracker on receiver to get progress information. + VolumeSize: args.SourceDiskSize, // Block volume size override. + ConversionOptions: args.ConversionOptions, // Non-nil options indicate image conversion. + } + + err = pool.CreateInstanceFromConversion(d, filesystemConn, volTargetArgs, d.op) + if err != nil { + return fmt.Errorf("Failed creating instance on target: %w", err) + } + + return nil +} + // CGroup is not implemented for VMs. func (d *qemu) CGroup() (*cgroup.CGroup, error) { return nil, instance.ErrNotImplemented diff --git a/lxd/instance/instance_interface.go b/lxd/instance/instance_interface.go index c6636fff190a..cc26005da6df 100644 --- a/lxd/instance/instance_interface.go +++ b/lxd/instance/instance_interface.go @@ -161,6 +161,9 @@ type Instance interface { MigrateSend(args MigrateSendArgs) error MigrateReceive(args MigrateReceiveArgs) error + // Conversion. + ConversionReceive(args ConversionReceiveArgs) error + // Progress reporting. SetOperation(op *operations.Operation) Operation() *operations.Operation @@ -242,3 +245,16 @@ type MigrateReceiveArgs struct { InstanceOperation *operationlock.InstanceOperation Refresh bool } + +// ConversionArgs represent arguments for instance conversion send and receive. +type ConversionArgs struct { + FilesystemConn func(ctx context.Context) (io.ReadWriteCloser, error) + Disconnect func() +} + +// ConversionReceiveArgs represent arguments for instance conversion receive. +type ConversionReceiveArgs struct { + ConversionArgs + SourceDiskSize int64 // Size of the disk in bytes. + ConversionOptions []string +} From d86e53d3a6bb35078d9ce896c53348f7b96fa334 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 25 Apr 2024 09:57:33 +0000 Subject: [PATCH 017/106] lxd/convert_instance: Add conversion sink for receiving root disk over conversion API Signed-off-by: Din Music --- lxd/convert_instance.go | 125 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 lxd/convert_instance.go diff --git a/lxd/convert_instance.go b/lxd/convert_instance.go new file mode 100644 index 000000000000..c23882db6707 --- /dev/null +++ b/lxd/convert_instance.go @@ -0,0 +1,125 @@ +package main + +import ( + "context" + "fmt" + "io" + "net/http" + + "github.com/canonical/lxd/lxd/instance" + "github.com/canonical/lxd/lxd/instance/operationlock" + "github.com/canonical/lxd/lxd/operations" + "github.com/canonical/lxd/lxd/state" + "github.com/canonical/lxd/shared" + "github.com/canonical/lxd/shared/api" + "github.com/canonical/lxd/shared/logger" +) + +type conversionSink struct { + fsConn *migrationConn + url string + instance instance.Instance + + sourceDiskSize int64 + conversionOptions []string +} + +// conversionSinkArgs arguments to configure conversion sink. +type conversionSinkArgs struct { + // General conversion fields. + secrets map[string]string + url string + instance instance.Instance + + // Conversion specific fields. + conversionOptions []string + + // Storage specific fields. + sourceDiskSize int64 +} + +func newConversionSink(args *conversionSinkArgs) (*conversionSink, error) { + sink := conversionSink{ + instance: args.instance, + url: args.url, + sourceDiskSize: args.sourceDiskSize, + conversionOptions: args.conversionOptions, + } + + secret, err := shared.RandomCryptoString() + if err != nil { + return nil, fmt.Errorf("Failed creating conversion sink secret for %q connection: %w", api.SecretNameFilesystem, err) + } + + sink.fsConn = newMigrationConn(secret, nil, nil) + + return &sink, nil +} + +// Metadata returns metadata for the conversion sink. +func (s *conversionSink) Metadata() any { + return shared.Jmap{ + api.SecretNameFilesystem: s.fsConn.Secret(), + } +} + +// Do performs the conversion operation on the target side (sink) for the given +// state and instance operation. It sets up the necessary websocket connection +// for filesystem, and then receives the conversion data. +func (s *conversionSink) Do(state *state.State, instOp *operationlock.InstanceOperation) error { + l := logger.AddContext(logger.Ctx{"project": s.instance.Project().Name, "instance": s.instance.Name()}) + + defer l.Info("Conversion channels disconnected on target") + defer s.fsConn.Close() + + filesystemConnFunc := func(ctx context.Context) (io.ReadWriteCloser, error) { + if s.fsConn == nil { + return nil, fmt.Errorf("Conversion target filesystem connection not initialized") + } + + wsConn, err := s.fsConn.WebsocketIO(ctx) + if err != nil { + return nil, fmt.Errorf("Failed getting conversion target filesystem connection: %w", err) + } + + return wsConn, nil + } + + args := instance.ConversionReceiveArgs{ + ConversionArgs: instance.ConversionArgs{ + FilesystemConn: filesystemConnFunc, + Disconnect: func() { s.fsConn.Close() }, + }, + SourceDiskSize: s.sourceDiskSize, + ConversionOptions: s.conversionOptions, + } + + err := s.instance.ConversionReceive(args) + if err != nil { + l.Error("Failed conversion on target", logger.Ctx{"err": err}) + return fmt.Errorf("Failed conversion on target: %w", err) + } + + return nil +} + +// Connect connects to the conversion source. +func (s *conversionSink) Connect(op *operations.Operation, r *http.Request, w http.ResponseWriter) error { + incomingSecret := r.FormValue("secret") + if incomingSecret == "" { + return api.StatusErrorf(http.StatusBadRequest, "Missing conversion sink secret") + } + + if incomingSecret == s.fsConn.Secret() { + err := s.fsConn.AcceptIncoming(r, w) + if err != nil { + return fmt.Errorf("Failed accepting incoming conversion sink %q connection: %w", api.SecretNameFilesystem, err) + } + + return nil + } + + // If we didn't find the right secret, the user provided a bad one, so return 403, not 404, since this + // operation actually exists. + return api.StatusErrorf(http.StatusForbidden, "Invalid conversion sink secret") +} From 43aa8dcef4995eacf6e50c54b17e14a84ba35405 Mon Sep 17 00:00:00 2001 From: Din Music Date: Mon, 8 Jul 2024 13:50:20 +0000 Subject: [PATCH 018/106] lxd/instances_post: Extract instance arguments setup from migration Signed-off-by: Din Music --- lxd/instances_post.go | 118 +++++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 54 deletions(-) diff --git a/lxd/instances_post.go b/lxd/instances_post.go index 67b0dad1b371..0e491f02e44f 100644 --- a/lxd/instances_post.go +++ b/lxd/instances_post.go @@ -214,12 +214,6 @@ func createFromMigration(s *state.State, r *http.Request, projectName string, pr return response.NotImplemented(fmt.Errorf("Mode %q not implemented", req.Source.Mode)) } - // Parse the architecture name - architecture, err := osarch.ArchitectureId(req.Architecture) - if err != nil { - return response.BadRequest(err) - } - dbType, err := instancetype.New(string(req.Type)) if err != nil { return response.BadRequest(err) @@ -229,57 +223,11 @@ func createFromMigration(s *state.State, r *http.Request, projectName string, pr return response.BadRequest(fmt.Errorf("Instance type not supported %q", req.Type)) } - // Prepare the instance creation request. - args := db.InstanceArgs{ - Project: projectName, - Architecture: architecture, - BaseImage: req.Source.BaseImage, - Config: req.Config, - Type: dbType, - Devices: deviceConfig.NewDevices(req.Devices), - Description: req.Description, - Ephemeral: req.Ephemeral, - Name: req.Name, - Profiles: profiles, - Stateful: req.Stateful, - } - - storagePool, storagePoolProfile, localRootDiskDeviceKey, localRootDiskDevice, resp := instanceFindStoragePool(s, projectName, req) + storagePool, args, resp := setupInstanceArgs(s, dbType, projectName, profiles, req) if resp != nil { return resp } - if storagePool == "" { - return response.BadRequest(fmt.Errorf("Can't find a storage pool for the instance to use")) - } - - if localRootDiskDeviceKey == "" && storagePoolProfile == "" { - // Give the container it's own local root disk device with a pool property. - rootDev := map[string]string{} - rootDev["type"] = "disk" - rootDev["path"] = "/" - rootDev["pool"] = storagePool - if args.Devices == nil { - args.Devices = deviceConfig.Devices{} - } - - // Make sure that we do not overwrite a device the user is currently using under the - // name "root". - rootDevName := "root" - for i := 0; i < 100; i++ { - if args.Devices[rootDevName] == nil { - break - } - - rootDevName = fmt.Sprintf("root%d", i) - continue - } - - args.Devices[rootDevName] = rootDev - } else if localRootDiskDeviceKey != "" && localRootDiskDevice["pool"] == "" { - args.Devices[localRootDiskDeviceKey]["pool"] = storagePool - } - var inst instance.Instance var instOp *operationlock.InstanceOperation var cleanup revert.Hook @@ -326,7 +274,7 @@ func createFromMigration(s *state.State, r *http.Request, projectName string, pr // Note: At this stage we do not yet know if snapshots are going to be received and so we cannot // create their DB records. This will be done if needed in the migrationSink.Do() function called // as part of the operation below. - inst, instOp, cleanup, err = instance.CreateInternal(s, args, true) + inst, instOp, cleanup, err = instance.CreateInternal(s, *args, true) if err != nil { return response.InternalError(fmt.Errorf("Failed creating instance record: %w", err)) } @@ -798,6 +746,68 @@ func createFromBackup(s *state.State, r *http.Request, projectName string, data return operations.OperationResponse(op) } +// setupInstanceArgs sets the database instance arguments and determines the storage pool to use. +func setupInstanceArgs(s *state.State, instType instancetype.Type, projectName string, profiles []api.Profile, req *api.InstancesPost) (storagePool string, instArgs *db.InstanceArgs, resp response.Response) { + // Parse the architecture name + architecture, err := osarch.ArchitectureId(req.Architecture) + if err != nil { + return "", nil, response.BadRequest(err) + } + + // Prepare the instance creation request. + args := db.InstanceArgs{ + Project: projectName, + Architecture: architecture, + BaseImage: req.Source.BaseImage, + Config: req.Config, + Type: instType, + Devices: deviceConfig.NewDevices(req.Devices), + Description: req.Description, + Ephemeral: req.Ephemeral, + Name: req.Name, + Profiles: profiles, + Stateful: req.Stateful, + } + + storagePool, storagePoolProfile, localRootDiskDeviceKey, localRootDiskDevice, resp := instanceFindStoragePool(s, projectName, req) + if resp != nil { + return "", nil, resp + } + + if storagePool == "" { + return "", nil, response.BadRequest(fmt.Errorf("Can't find a storage pool for the instance to use")) + } + + if localRootDiskDeviceKey == "" && storagePoolProfile == "" { + // Give the instance it's own local root disk device with a pool property. + rootDev := map[string]string{} + rootDev["type"] = "disk" + rootDev["path"] = "/" + rootDev["pool"] = storagePool + if args.Devices == nil { + args.Devices = deviceConfig.Devices{} + } + + // Make sure that we do not overwrite a device the user is currently using + // under the name "root". + rootDevName := "root" + for i := 0; i < 100; i++ { + if args.Devices[rootDevName] == nil { + break + } + + rootDevName = fmt.Sprintf("root%d", i) + continue + } + + args.Devices[rootDevName] = rootDev + } else if localRootDiskDeviceKey != "" && localRootDiskDevice["pool"] == "" { + args.Devices[localRootDiskDeviceKey]["pool"] = storagePool + } + + return storagePool, &args, nil +} + // swagger:operation POST /1.0/instances instances instances_post // // Create a new instance From 6c8c7a25f2096de34d3d3980b50d5d7c39466bad Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 25 Apr 2024 10:00:45 +0000 Subject: [PATCH 019/106] lxd/instances_post: Convert image if migration source type is conversion Signed-off-by: Din Music --- lxd/instances_post.go | 109 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/lxd/instances_post.go b/lxd/instances_post.go index 0e491f02e44f..e2509b4dfa2e 100644 --- a/lxd/instances_post.go +++ b/lxd/instances_post.go @@ -8,6 +8,7 @@ import ( "net/http" "net/url" "os" + "slices" "strings" petname "github.com/dustinkirkland/golang-petname" @@ -364,6 +365,112 @@ func createFromMigration(s *state.State, r *http.Request, projectName string, pr return operations.OperationResponse(op) } +// createFromConversion receives the root disk (container FS or VM block volume) from the client and creates an +// instance from it. Conversion options also allow the uploaded image to be converted into a raw format. +func createFromConversion(s *state.State, r *http.Request, projectName string, profiles []api.Profile, req *api.InstancesPost) response.Response { + if s.DB.Cluster.LocalNodeIsEvacuated() { + return response.Forbidden(fmt.Errorf("Cluster member is evacuated")) + } + + // Validate migration mode. + if req.Source.Mode != "push" { + return response.NotImplemented(fmt.Errorf("Mode %q not implemented", req.Source.Mode)) + } + + dbType, err := instancetype.New(string(req.Type)) + if err != nil { + return response.BadRequest(err) + } + + // Only virtual machines support additional conversion options. + if dbType != instancetype.VM && len(req.Source.ConversionOptions) > 0 { + return response.BadRequest(fmt.Errorf("Conversion options can only be used with virtual machines. Instance type %q does not support conversion options", req.Type)) + } + + // Validate conversion options. + for _, opt := range req.Source.ConversionOptions { + if !slices.Contains([]string{"format"}, opt) { + return response.BadRequest(fmt.Errorf("Invalid conversion option %q", opt)) + } + } + + storagePool, args, resp := setupInstanceArgs(s, dbType, projectName, profiles, req) + if resp != nil { + return resp + } + + revert := revert.New() + defer revert.Fail() + + _, err = storagePools.LoadByName(s, storagePool) + if err != nil { + return response.InternalError(err) + } + + // Create the instance DB record for main instance. + inst, instOp, cleanup, err := instance.CreateInternal(s, *args, true) + if err != nil { + return response.InternalError(fmt.Errorf("Failed creating instance record: %w", err)) + } + + revert.Add(cleanup) + if err != nil { + return response.SmartError(fmt.Errorf("Failed getting exclusive access to instance: %w", err)) + } + + revert.Add(func() { instOp.Done(err) }) + + conversionArgs := conversionSinkArgs{ + url: req.Source.Operation, + secrets: req.Source.Websockets, + sourceDiskSize: req.Source.SourceDiskSize, + conversionOptions: req.Source.ConversionOptions, + instance: inst, + } + + sink, err := newConversionSink(&conversionArgs) + if err != nil { + return response.InternalError(err) + } + + // Copy reverter so far so we can use it inside run after this function has finished. + runRevert := revert.Clone() + + run := func(op *operations.Operation) error { + defer runRevert.Fail() + + sink.instance.SetOperation(op) + + // And finally run the migration. + err = sink.Do(s, instOp) + if err != nil { + err = fmt.Errorf("Error transferring instance data: %w", err) + instOp.Done(err) // Complete operation that was created earlier, to release lock. + + return err + } + + instOp.Done(nil) // Complete operation that was created earlier, to release lock. + runRevert.Success() + return nil + } + + resources := map[string][]api.URL{} + resources["instances"] = []api.URL{*api.NewURL().Path(version.APIVersion, "instances", req.Name)} + + if dbType == instancetype.Container { + resources["containers"] = resources["instances"] + } + + op, err := operations.OperationCreate(s, projectName, operations.OperationClassWebsocket, operationtype.InstanceCreate, resources, sink.Metadata(), run, nil, sink.Connect, r) + if err != nil { + return response.InternalError(err) + } + + revert.Success() + return operations.OperationResponse(op) +} + func createFromCopy(s *state.State, r *http.Request, projectName string, profiles []api.Profile, req *api.InstancesPost) response.Response { if s.DB.Cluster.LocalNodeIsEvacuated() { return response.Forbidden(fmt.Errorf("Cluster member is evacuated")) @@ -1208,6 +1315,8 @@ func instancesPost(d *Daemon, r *http.Request) response.Response { return createFromNone(s, r, targetProjectName, profiles, &req) case "migration": return createFromMigration(s, r, targetProjectName, profiles, &req) + case "conversion": + return createFromConversion(s, r, targetProjectName, profiles, &req) case "copy": return createFromCopy(s, r, targetProjectName, profiles, &req) default: From 33c49585f9db4a7935fedae74c5c90625a4c0c67 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 25 Apr 2024 10:16:00 +0000 Subject: [PATCH 020/106] lxd-migrate: Add support for image conversion Signed-off-by: Din Music --- lxd-migrate/main_migrate.go | 67 ++++++++++++++++++++++++++++++++++--- lxd-migrate/utils.go | 26 ++++++++++++++ 2 files changed, 88 insertions(+), 5 deletions(-) diff --git a/lxd-migrate/main_migrate.go b/lxd-migrate/main_migrate.go index 9e0d56bbb36a..3e0cf34e5af4 100644 --- a/lxd-migrate/main_migrate.go +++ b/lxd-migrate/main_migrate.go @@ -10,6 +10,7 @@ import ( "os/signal" "path/filepath" "runtime" + "slices" "sort" "strings" @@ -18,6 +19,7 @@ import ( "gopkg.in/yaml.v2" "github.com/canonical/lxd/client" + "github.com/canonical/lxd/lxd/storage/block" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" cli "github.com/canonical/lxd/shared/cmd" @@ -30,7 +32,8 @@ import ( type cmdMigrate struct { global *cmdGlobal - flagRsyncArgs string + flagRsyncArgs string + flagConversionOpts []string } func (c *cmdMigrate) command() *cobra.Command { @@ -51,6 +54,7 @@ func (c *cmdMigrate) command() *cobra.Command { ` cmd.RunE = c.run cmd.Flags().StringVar(&c.flagRsyncArgs, "rsync-args", "", "Extra arguments to pass to rsync"+"``") + cmd.Flags().StringSliceVar(&c.flagConversionOpts, "conversion", []string{"format"}, "List of conversion opts. Allowed values are: [format]") return cmd } @@ -261,11 +265,20 @@ func (c *cmdMigrate) runInteractive(server lxd.InstanceServer) (cmdMigrateData, config.InstanceArgs = api.InstancesPost{ Source: api.InstanceSource{ - Type: "migration", - Mode: "push", + Type: "conversion", + Mode: "push", + ConversionOptions: c.flagConversionOpts, }, } + // If server does not support conversion, fallback to migration. + // Migration will move the image to the server and import it as + // LXD instance. This means that images of different formats, + // such as VMDK and QCow2, will not work. + if !server.HasExtension("instance_import_conversion") { + config.InstanceArgs.Source.Type = "migration" + } + config.InstanceArgs.Config = map[string]string{} config.InstanceArgs.Devices = map[string]map[string]string{} @@ -455,11 +468,24 @@ func (c *cmdMigrate) run(cmd *cobra.Command, args []string) error { return fmt.Errorf("This tool must be run as root") } + // Check conversion options. + supportedConversionOptions := []string{"format"} + for _, opt := range c.flagConversionOpts { + if !slices.Contains(supportedConversionOptions, opt) { + return fmt.Errorf("Unsupported conversion option %q, supported conversion options are %v", opt, supportedConversionOptions) + } + } + _, err := exec.LookPath("rsync") if err != nil { return err } + _, err = exec.LookPath("file") + if err != nil { + return err + } + // Server server, clientFingerprint, err := c.askServer() if err != nil { @@ -498,6 +524,11 @@ func (c *cmdMigrate) run(cmd *cobra.Command, args []string) error { server = server.UseProject(config.Project) } + if config.InstanceArgs.Type != api.InstanceTypeVM && len(config.InstanceArgs.Source.ConversionOptions) > 0 { + fmt.Printf("Instance type %q does not support conversion options. Ignored conversion options: %v\n", config.InstanceArgs.Type, config.InstanceArgs.Source.ConversionOptions) + config.InstanceArgs.Source.ConversionOptions = []string{} + } + config.Mounts = append(config.Mounts, config.SourcePath) // Get and sort the mounts @@ -548,10 +579,21 @@ func (c *cmdMigrate) run(cmd *cobra.Command, args []string) error { return fmt.Errorf("Failed to setup the source: %w", err) } } else { + isImageTypeRaw, err := isImageTypeRaw(config.SourcePath) + if err != nil { + return err + } + + // If image type is raw, formatting is not required. + if isImageTypeRaw && slices.Contains(config.InstanceArgs.Source.ConversionOptions, "format") { + fmt.Println(`Formatting is not required for images of type raw. Ignoring conversion option "format".`) + config.InstanceArgs.Source.ConversionOptions = shared.RemoveElementsFromSlice(config.InstanceArgs.Source.ConversionOptions, "format") + } + fullPath = path target := filepath.Join(path, "root.img") - err := os.WriteFile(target, nil, 0644) + err = os.WriteFile(target, nil, 0644) if err != nil { return fmt.Errorf("Failed to create %q: %w", target, err) } @@ -567,6 +609,16 @@ func (c *cmdMigrate) run(cmd *cobra.Command, args []string) error { if err != nil { return fmt.Errorf("Failed to make %s read-only: %w", config.SourcePath, err) } + + // In conversion mode, server expects the volume size hint in the request. + if config.InstanceArgs.Source.Type == "conversion" { + size, err := block.DiskSizeBytes(target) + if err != nil { + return err + } + + config.InstanceArgs.Source.SourceDiskSize = size + } } // System architecture @@ -597,7 +649,12 @@ func (c *cmdMigrate) run(cmd *cobra.Command, args []string) error { return err } - err = transferRootDiskForMigration(ctx, op, fullPath, c.flagRsyncArgs, config.InstanceArgs.Type) + if config.InstanceArgs.Source.Type == "conversion" { + err = transferRootDiskForConversion(ctx, op, fullPath, c.flagRsyncArgs, config.InstanceArgs.Type) + } else { + err = transferRootDiskForMigration(ctx, op, fullPath, c.flagRsyncArgs, config.InstanceArgs.Type) + } + if err != nil { return err } diff --git a/lxd-migrate/utils.go b/lxd-migrate/utils.go index 6c74ed4913cb..d1da2e20c836 100644 --- a/lxd-migrate/utils.go +++ b/lxd-migrate/utils.go @@ -122,6 +122,32 @@ func transferRootDiskForMigration(ctx context.Context, op lxd.Operation, rootfs return nil } +func transferRootDiskForConversion(ctx context.Context, op lxd.Operation, rootfs string, rsyncArgs string, instanceType api.InstanceType) error { + opAPI := op.Get() + + // Establish websocket connection. + wsFs, err := op.GetWebsocket(opAPI.Metadata[api.SecretNameFilesystem].(string)) + if err != nil { + return err + } + + if instanceType == api.InstanceTypeContainer { + // Send container filesystem. + err = rsyncSend(ctx, wsFs, rootfs, rsyncArgs, instanceType) + if err != nil { + return fmt.Errorf("Failed sending filesystem volume: %w", err) + } + } else { + // Send VM block volume (image / partition). + err := sendBlockVol(ctx, ws.NewWrapper(wsFs), filepath.Join(rootfs, "root.img")) + if err != nil { + return fmt.Errorf("Failed sending block volume: %w", err) + } + } + + return op.Wait() +} + func (c *cmdMigrate) connectLocal() (lxd.InstanceServer, error) { args := lxd.ConnectionArgs{} args.UserAgent = fmt.Sprintf("LXD-MIGRATE %s", version.Version) From e0638b16487e49c63792257461737f9dcdf87958 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 25 Apr 2024 14:22:43 +0000 Subject: [PATCH 021/106] doc: Add image formats to the wordlist Signed-off-by: Din Music --- doc/.wordlist.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/.wordlist.txt b/doc/.wordlist.txt index e205c6feb2e7..cd6ce56b3c18 100644 --- a/doc/.wordlist.txt +++ b/doc/.wordlist.txt @@ -39,6 +39,7 @@ OEM OLM Permalink pre +QCow Quickstart ReadMe reST @@ -50,5 +51,8 @@ subtree Ubuntu UI UUID +VDI +VHDX VM +VMDK YAML From 46f8298a4ffe4d4a6a07f3353268946a1ea05eec Mon Sep 17 00:00:00 2001 From: Din Music Date: Wed, 8 May 2024 14:53:54 +0000 Subject: [PATCH 022/106] doc: Update api definition Signed-off-by: Din Music --- doc/rest-api.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doc/rest-api.yaml b/doc/rest-api.yaml index e596449cd5a9..c4abca298976 100644 --- a/doc/rest-api.yaml +++ b/doc/rest-api.yaml @@ -2296,6 +2296,14 @@ definitions: example: false type: boolean x-go-name: ContainerOnly + conversion_options: + description: Optional list of options that are used during image conversion (for conversion). + example: + - format + items: + type: string + type: array + x-go-name: ConversionOptions fingerprint: description: Image fingerprint (for image source) example: ed56997f7c5b48e8d78986d2467a26109be6fb9f2d92e8c7b08eb8b6cec7629a @@ -2370,6 +2378,14 @@ definitions: example: foo/snap0 type: string x-go-name: Source + sourceDiskSize: + description: |- + Source disk size in bytes used to set the instance's volume size to accommodate the transferred root + disk. This value is ignored if the root disk device has a size explicitly configured (for conversion). + example: 12345 + format: int64 + type: integer + x-go-name: SourceDiskSize type: description: Source type example: image From c67ff809cc77b6e3f1e506dfa82f0ba8f79dc0b9 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 13 Jun 2024 15:43:40 +0000 Subject: [PATCH 023/106] doc: Add new supported image formats to the docs Signed-off-by: Din Music --- doc/howto/import_machines_to_instances.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/howto/import_machines_to_instances.md b/doc/howto/import_machines_to_instances.md index 39046e25bc95..41862b3827b3 100644 --- a/doc/howto/import_machines_to_instances.md +++ b/doc/howto/import_machines_to_instances.md @@ -27,7 +27,7 @@ The tool can create both containers and virtual machines: * When creating a container, you must provide a disk or partition that contains the root file system for the container. For example, this could be the `/` root disk of the machine or container where you are running the tool. -* When creating a virtual machine, you must provide a bootable disk, partition or image. +* When creating a virtual machine, you must provide a bootable disk, partition, or an image in raw, QCOW, QCOW2, VDI, VHDX, or VMDK format. This means that just providing a file system is not sufficient, and you cannot create a virtual machine from a container that you are running. It is also not possible to create a virtual machine from the physical machine that you are using to do the migration, because the migration tool would be using the disk that it is copying. Instead, you could provide a bootable image, or a bootable partition or disk that is currently not in use. From fb838216454232a5af1b9c572498351f3b2b5596 Mon Sep 17 00:00:00 2001 From: Din Music Date: Fri, 19 Jul 2024 13:27:30 +0000 Subject: [PATCH 024/106] lxd/storage/backend_lxd: Lower qemu-img convert priority instead of limiting CPU time Signed-off-by: Din Music --- lxd/storage/backend_lxd.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index 36e2e8dfa99e..9557d0b9e852 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -1826,8 +1826,8 @@ func (b *lxdBackend) imageConversionFiller(imgPath string, imgFormat string) fun // Convert uploaded image from backups directory into RAW format on the instance volume. cmd := []string{ - // Use prlimit to limit QEMU to 1 GiB address space and 120 seconds of CPU time. - "prlimit", "--cpu=120", "--as=1073741824", + // Run with low priority to reduce CPU impact on other processes. + "nice", "-n19", "qemu-img", "convert", "-f", imgFormat, "-O", "raw", imgPath, diskPath, } From 398d641a60c666486d6dd58c65d55a5f0f3e83e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20Peliz=C3=A4us?= Date: Mon, 22 Jul 2024 12:03:40 +0200 Subject: [PATCH 025/106] doc: Add PowerFlex storage pool creation how-to MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Julian Pelizäus --- doc/howto/storage_pools.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/doc/howto/storage_pools.md b/doc/howto/storage_pools.md index 5fada4ffa017..c78d7a3879cd 100644 --- a/doc/howto/storage_pools.md +++ b/doc/howto/storage_pools.md @@ -154,6 +154,29 @@ Use the existing Ceph Object Gateway `https://www.example.com/radosgw` to create lxc storage create pool1 cephobject cephobject.radosgw.endpoint=https://www.example.com/radosgw ```` +````{group-tab} Dell PowerFlex + +Create a storage pool named `pool1` using the PowerFlex pool `sp1` in the protection domain `pd1`: + + lxc storage create pool1 powerflex powerflex.pool=sp1 powerflex.domain=pd1 powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo + +Create a storage pool named `pool2` using the id of PowerFlex pool `sp1`: + + lxc storage create pool2 powerflex powerflex.pool= powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo + +Create a storage pool named `pool3` that uses PowerFlex volume snapshots (see {ref}`storage-powerflex-limitations`) when creating volume copies: + + lxc storage create pool3 powerflex powerflex.clone_copy=false powerflex.pool= powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo + +Create a storage pool named `pool4` that uses a PowerFlex gateway with a certificate that is not trusted: + + lxc storage create pool4 powerflex powerflex.gateway.verify=false powerflex.pool= powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo + +Create a storage pool named `pool5` that explicitly uses the PowerFlex SDC: + + lxc storage create pool5 powerflex powerflex.mode=sdc powerflex.pool= powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo + +```` ````` (storage-pools-cluster)= From 1bf03c9e504bbb883e81dccd250fff741ce48288 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20Peliz=C3=A4us?= Date: Mon, 22 Jul 2024 12:09:41 +0200 Subject: [PATCH 026/106] doc: Add missing PowerFlex details to explanation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Julian Pelizäus --- doc/explanation/storage.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/explanation/storage.md b/doc/explanation/storage.md index 51b6eca493a3..ec12863fee3c 100644 --- a/doc/explanation/storage.md +++ b/doc/explanation/storage.md @@ -23,6 +23,7 @@ The following storage drivers are supported: - [Ceph RBD - `ceph`](storage-ceph) - [CephFS - `cephfs`](storage-cephfs) - [Ceph Object - `cephobject`](storage-cephobject) +- [Dell PowerFlex - `powerflex`](storage-powerflex) See the following how-to guides for additional information: @@ -35,12 +36,12 @@ See the following how-to guides for additional information: Where the LXD data is stored depends on the configuration and the selected storage driver. Depending on the storage driver that is used, LXD can either share the file system with its host or keep its data separate. -Storage location | Directory | Btrfs | LVM | ZFS | Ceph (all) | -:--- | :-: | :-: | :-: | :-: | :-: | -Shared with the host | ✓ | ✓ | - | ✓ | - | -Dedicated disk/partition | - | ✓ | ✓ | ✓ | - | -Loop disk | - | ✓ | ✓ | ✓ | - | -Remote storage | - | - | - | - | ✓ | +Storage location | Directory | Btrfs | LVM | ZFS | Ceph (all) | Dell PowerFlex | +:--- | :-: | :-: | :-: | :-: | :-: | :-: | +Shared with the host | ✓ | ✓ | - | ✓ | - | - | +Dedicated disk/partition | - | ✓ | ✓ | ✓ | - | - | +Loop disk | - | ✓ | ✓ | ✓ | - | - | +Remote storage | - | - | - | - | ✓ | ✓ | #### Shared with the host @@ -70,6 +71,7 @@ You can increase their size though; see {ref}`storage-resize-pool`. #### Remote storage The `ceph`, `cephfs` and `cephobject` drivers store the data in a completely independent Ceph storage cluster that must be set up separately. +The same applies to the `powerflex` driver. (storage-default-pool)= ### Default storage pool From f74bdd2a71e2424979e8af412c84c07472bc6a59 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Tue, 16 Jul 2024 13:42:09 +0100 Subject: [PATCH 027/106] doc: Add permissions reference page. Signed-off-by: Mark Laing --- doc/reference/permissions.md | 141 +++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 doc/reference/permissions.md diff --git a/doc/reference/permissions.md b/doc/reference/permissions.md new file mode 100644 index 000000000000..46d9c78bfb3c --- /dev/null +++ b/doc/reference/permissions.md @@ -0,0 +1,141 @@ +(permissions-reference)= +# Permissions + +When managing user access via {ref}`fine-grained-authorization`, you add identities to groups and then grant entitlements against specific LXD API resources to these groups. + +Each LXD API resource has a particular entity type, and each entity type has a set of entitlements that can be granted against API resources of that type. + +Below is a description of each entity type, and a list of entitlements that can be granted against entities of that type. + +## Server +> Entity type name: `server` + +The `server` entity type is the top-level entity type for the LXD system. +Entitlements that are granted at this level might cascade to projects and other resources: + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Project +> Entity type name: `project` + +Entitlements that are granted at the `project` level might cascade to project specific resources (such as instances): + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Storage pool +> Entity type name: `storage_pool` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Identity +> Entity type name: `identity` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Group +> Entity type name: `group` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Identity provider group +> Entity type name: `identity_provider_group` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Certificate +> Entity type name: `certificate` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Instance +> Entity type name: `instance` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Image +> Entity type name: `image` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Image alias +> Entity type name: `image_alias` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Network +> Entity type name: `network` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Network ACL +> Entity type name: `network_acl` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Network zone +> Entity type name: `network_zone` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Profile +> Entity type name: `profile` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Storage volume +> Entity type name: `storage_volume` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` + +## Storage bucket +> Entity type name: `storage_bucket` + +```{include} ../metadata.txt + :start-after: + :end-before: +``` From ad228eb4b1e8e5a574ca06eedd41b8fcbeda6eed Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Tue, 16 Jul 2024 13:43:08 +0100 Subject: [PATCH 028/106] doc/explanation: Link to full permission list in fine-grained auth section. Signed-off-by: Mark Laing --- doc/explanation/authorization.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/explanation/authorization.md b/doc/explanation/authorization.md index 87b4a3d7a17f..d01dfeb1cf3c 100644 --- a/doc/explanation/authorization.md +++ b/doc/explanation/authorization.md @@ -67,6 +67,8 @@ Some useful permissions at a glance: - The `user` entitlement on entity type `instance` grants access to view an instance, pull/push files, get a console, and begin a terminal session. Members of a group with this entitlement cannot edit the instance configuration. +For a full list, see {ref}`permissions-reference`. + ```{note} Due to a limitation in the LXD client, if `can_exec` is granted to a group for a particular instance, members of the group will not be able to start a terminal session unless `can_view_events` is additionally granted for the parent project of the instance. We are working to resolve this. From 190a640f9083fae9c6f5a9abf422d2241595ae50 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Tue, 16 Jul 2024 13:43:31 +0100 Subject: [PATCH 029/106] doc/reference: Add permissions page to index. Signed-off-by: Mark Laing --- doc/reference/index.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/reference/index.md b/doc/reference/index.md index 915de193cfed..1a3fdb4c2746 100644 --- a/doc/reference/index.md +++ b/doc/reference/index.md @@ -55,6 +55,17 @@ Production server settings /reference/provided_metrics ``` +## Fine-grained permissions + +If you are managing user access via {ref}`fine-grained-authorization`, check which {ref}`permissions ` can be assigned to groups. + +```{toctree} +:titlesonly: +:maxdepth: 1 + +/reference/permissions +``` + (reference-api)= ## REST API From 5a4a158128064a352c61cde1638cfc493e3dadfc Mon Sep 17 00:00:00 2001 From: Ruth Fuchss Date: Tue, 23 Jul 2024 11:24:45 +0200 Subject: [PATCH 030/106] doc/storage: change examples to sections instead of tabs We have too many storage drivers now, so using tabs for them doesn't scale. ;-) Also, it will cause problems when adding API/UI instructions anyway, so converting it into sections. Signed-off-by: Ruth Fuchss --- doc/howto/storage_pools.md | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/doc/howto/storage_pools.md b/doc/howto/storage_pools.md index c78d7a3879cd..4c38520f850d 100644 --- a/doc/howto/storage_pools.md +++ b/doc/howto/storage_pools.md @@ -25,9 +25,7 @@ See the {ref}`storage-drivers` documentation for a list of available configurati See the following examples for how to create a storage pool using different storage drivers. -`````{tabs} - -````{group-tab} Directory +#### Create a directory pool Create a directory pool named `pool1`: @@ -36,8 +34,8 @@ Create a directory pool named `pool1`: Use the existing directory `/data/lxd` for `pool2`: lxc storage create pool2 dir source=/data/lxd -```` -````{group-tab} Btrfs + +#### Create a Btrfs pool Create a loop-backed pool named `pool1`: @@ -50,8 +48,8 @@ Use the existing Btrfs file system at `/some/path` for `pool2`: Create a pool named `pool3` on `/dev/sdX`: lxc storage create pool3 btrfs source=/dev/sdX -```` -````{group-tab} LVM + +#### Create an LVM pool Create a loop-backed pool named `pool1` (the LVM volume group will also be called `pool1`): @@ -72,8 +70,8 @@ Create a pool named `pool4` on `/dev/sdX` (the LVM volume group will also be cal Create a pool named `pool5` on `/dev/sdX` with the LVM volume group name `my-pool`: lxc storage create pool5 lvm source=/dev/sdX lvm.vg_name=my-pool -```` -````{group-tab} ZFS + +#### Create a ZFS pool Create a loop-backed pool named `pool1` (the ZFS zpool will also be called `pool1`): @@ -102,8 +100,8 @@ Create a pool named `pool6` on `/dev/sdX` (the ZFS zpool will also be called `po Create a pool named `pool7` on `/dev/sdX` with the ZFS zpool name `my-tank`: lxc storage create pool7 zfs source=/dev/sdX zfs.pool_name=my-tank -```` -````{group-tab} Ceph RBD + +#### Create a Ceph RBD pool Create an OSD storage pool named `pool1` in the default Ceph cluster (named `ceph`): @@ -124,8 +122,8 @@ Use the existing OSD storage pool `my-already-existing-osd` for `pool4`: Use the existing OSD erasure-coded pool `ecpool` and the OSD replicated pool `rpl-pool` for `pool5`: lxc storage create pool5 ceph source=rpl-pool ceph.osd.data_pool_name=ecpool -```` -````{group-tab} CephFS + +#### Create a CephFS pool ```{note} Each CephFS file system consists of two OSD storage pools, one for the actual data and one for the file metadata. @@ -143,8 +141,7 @@ Create a CephFS file system `my-filesystem` with a data pool called `my-data` an lxc storage create pool3 cephfs source=my-filesystem cephfs.create_missing=true cephfs.data_pool=my-data cephfs.meta_pool=my-metadata -```` -````{group-tab} Ceph Object +#### Create a Ceph Object pool ```{note} When using the Ceph Object driver, you must have a running Ceph Object Gateway [`radosgw`](https://docs.ceph.com/en/latest/radosgw/) URL available beforehand. @@ -153,16 +150,16 @@ When using the Ceph Object driver, you must have a running Ceph Object Gateway [ Use the existing Ceph Object Gateway `https://www.example.com/radosgw` to create `pool1`: lxc storage create pool1 cephobject cephobject.radosgw.endpoint=https://www.example.com/radosgw -```` -````{group-tab} Dell PowerFlex + +#### Create a Dell PowerFlex pool Create a storage pool named `pool1` using the PowerFlex pool `sp1` in the protection domain `pd1`: lxc storage create pool1 powerflex powerflex.pool=sp1 powerflex.domain=pd1 powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo -Create a storage pool named `pool2` using the id of PowerFlex pool `sp1`: +Create a storage pool named `pool2` using the ID of PowerFlex pool `sp1`: - lxc storage create pool2 powerflex powerflex.pool= powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo + lxc storage create pool2 powerflex powerflex.pool= powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo Create a storage pool named `pool3` that uses PowerFlex volume snapshots (see {ref}`storage-powerflex-limitations`) when creating volume copies: @@ -176,9 +173,6 @@ Create a storage pool named `pool5` that explicitly uses the PowerFlex SDC: lxc storage create pool5 powerflex powerflex.mode=sdc powerflex.pool= powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo -```` -````` - (storage-pools-cluster)= ### Create a storage pool in a cluster From 6fd36c3245edf2225534fea7e19d2f99fb82eed8 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Wed, 24 Jul 2024 13:47:39 +0100 Subject: [PATCH 031/106] lxd/storage/drivers/driver/dir/utils: Improve errors in setQuota Signed-off-by: Thomas Parrott --- lxd/storage/drivers/driver_dir_utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/storage/drivers/driver_dir_utils.go b/lxd/storage/drivers/driver_dir_utils.go index faf119a54aab..90242bd3a574 100644 --- a/lxd/storage/drivers/driver_dir_utils.go +++ b/lxd/storage/drivers/driver_dir_utils.go @@ -129,7 +129,7 @@ func (d *dir) setQuota(path string, volID int64, sizeBytes int64) error { // Initialise the project. err = quota.SetProject(path, projectID) if err != nil { - return err + return fmt.Errorf("Failed setting project: %w", err) } // Set the project quota size. From 05a83818c7782b7a1014d185408be878601af40f Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Wed, 24 Jul 2024 13:47:54 +0100 Subject: [PATCH 032/106] lxd/storage/quota/projectquota: Don't fail on missing file in SetProject Signed-off-by: Thomas Parrott --- lxd/storage/quota/projectquota.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lxd/storage/quota/projectquota.go b/lxd/storage/quota/projectquota.go index b0e4aac83703..5461cef2f40e 100644 --- a/lxd/storage/quota/projectquota.go +++ b/lxd/storage/quota/projectquota.go @@ -154,7 +154,9 @@ import "C" import ( "bufio" + "errors" "fmt" + "io/fs" "os" "path/filepath" "strings" @@ -238,6 +240,11 @@ func GetProject(path string) (uint32, error) { func SetProject(path string, id uint32) error { err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error { if err != nil { + // If file has disappeared during walk we cannot set project on it. + if errors.Is(err, fs.ErrNotExist) { + return nil + } + return err } From 29ebc776ba9b3b7205cceb8e0daf6ff3372da5f4 Mon Sep 17 00:00:00 2001 From: Din Music Date: Wed, 24 Jul 2024 13:56:58 +0000 Subject: [PATCH 033/106] lxd-migrate: Fix path provided to the raw disk check Signed-off-by: Din Music --- lxd-migrate/main_migrate.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd-migrate/main_migrate.go b/lxd-migrate/main_migrate.go index 3e0cf34e5af4..23d8ccf58a1f 100644 --- a/lxd-migrate/main_migrate.go +++ b/lxd-migrate/main_migrate.go @@ -347,7 +347,7 @@ func (c *cmdMigrate) runInteractive(server lxd.InstanceServer) (cmdMigrateData, } if config.InstanceArgs.Type == api.InstanceTypeVM && config.InstanceArgs.Source.Type == "migration" { - isImageTypeRaw, err := isImageTypeRaw(config.SourcePath) + isImageTypeRaw, err := isImageTypeRaw(s) if err != nil { return err } From 2c1a0f16b7e625eaefd697ba743b63e2c9f1a10c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Fri, 19 Jan 2024 18:12:01 -0500 Subject: [PATCH 034/106] api: instance_create_start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber Sponsored-by: Lanes & Planes GmbH (https://www.lanes-planes.com) (cherry picked from commit a00b1aa4a713c7eb1460701ac6127c151fe72548) Signed-off-by: Mark Bolton License: Apache-2.0 --- doc/api-extensions.md | 7 +++++++ shared/version/api.go | 1 + 2 files changed, 8 insertions(+) diff --git a/doc/api-extensions.md b/doc/api-extensions.md index bfe703418e79..d6655d4e27bb 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -2428,3 +2428,10 @@ and joining an existing cluster. ## `instance_import_conversion` Adds the ability to convert images from different formats (e.g. VMDK or QCow2) into RAW image format and import them as LXD instances. + +## `instance_create_start` + +Adds a `start` field to the `POST /1.0/instances` API which when set +to `true` will have the instance automatically start upon creation. + +In this scenario, the creation and startup is part of a single background operation. diff --git a/shared/version/api.go b/shared/version/api.go index 9f20873381bc..c4cac713c97e 100644 --- a/shared/version/api.go +++ b/shared/version/api.go @@ -409,6 +409,7 @@ var APIExtensions = []string{ "network_allocate_external_ips", "explicit_trust_token", "instance_import_conversion", + "instance_create_start", } // APIExtensionsCount returns the number of available API extensions. From 45f2a34752c414a6cb701902da1f937f4c4f321c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Fri, 19 Jan 2024 18:12:14 -0500 Subject: [PATCH 035/106] shared/api: Add Start to InstancesPost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber Sponsored-by: Lanes & Planes GmbH (https://www.lanes-planes.com) (cherry picked from commit d85d6843ed5631660ca2375e797597174715c83f) Signed-off-by: Mark Bolton License: Apache-2.0 --- shared/api/instance.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/shared/api/instance.go b/shared/api/instance.go index 839f0c69724e..2b3429373c63 100644 --- a/shared/api/instance.go +++ b/shared/api/instance.go @@ -49,6 +49,12 @@ type InstancesPost struct { // Type (container or virtual-machine) // Example: container Type InstanceType `json:"type" yaml:"type"` + + // Whether to start the instance after creation + // Example: true + // + // API extension: instance_create_start + Start bool `json:"start" yaml:"start"` } // InstancesPut represents the fields available for a mass update. From 7126f3c485242d360904785ff0578bedadab620b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Fri, 19 Jan 2024 18:13:29 -0500 Subject: [PATCH 036/106] doc/rest-api: Refresh swagger YAML MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber Sponsored-by: Lanes & Planes GmbH (https://www.lanes-planes.com) (cherry picked from commit b8c293da5a3399b8b16dcd2b5875dc5b4dd5323f) Signed-off-by: Mark Bolton License: Apache-2.0 --- doc/rest-api.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/rest-api.yaml b/doc/rest-api.yaml index c4abca298976..9858c0846645 100644 --- a/doc/rest-api.yaml +++ b/doc/rest-api.yaml @@ -2752,6 +2752,11 @@ definitions: x-go-name: Restore source: $ref: '#/definitions/InstanceSource' + start: + description: Whether to start the instance after creation + example: true + type: boolean + x-go-name: Start stateful: description: Whether the instance currently has saved state on disk example: false From 4e9969d48b5f4cac912740c1c656278789666173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Fri, 19 Jan 2024 18:39:40 -0500 Subject: [PATCH 037/106] lxd/instance: Add support for Start property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber Sponsored-by: Lanes & Planes GmbH (https://www.lanes-planes.com) (cherry picked from commit 2405063803f5cc79198910014c48f4497b2ac091) Signed-off-by: Mark Bolton License: Apache-2.0 --- lxd/instances_post.go | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/lxd/instances_post.go b/lxd/instances_post.go index e2509b4dfa2e..e4cce80a081f 100644 --- a/lxd/instances_post.go +++ b/lxd/instances_post.go @@ -124,7 +124,13 @@ func createFromImage(s *state.State, r *http.Request, p api.Project, profiles [] return err } - return instanceCreateFromImage(s, img, args, op) + // Actually create the instance. + err = instanceCreateFromImage(s, img, args, op) + if err != nil { + return err + } + + return instanceCreateFinish(s, req, args) } resources := map[string][]api.URL{} @@ -175,8 +181,13 @@ func createFromNone(s *state.State, r *http.Request, projectName string, profile } run := func(op *operations.Operation) error { + // Actually create the instance. _, err := instanceCreateAsEmpty(s, args) - return err + if err != nil { + return err + } + + return instanceCreateFinish(s, req, args) } resources := map[string][]api.URL{} @@ -602,6 +613,7 @@ func createFromCopy(s *state.State, r *http.Request, projectName string, profile } run := func(op *operations.Operation) error { + // Actually create the instance. _, err := instanceCreateAsCopy(s, instanceCreateAsCopyOpts{ sourceInstance: source, targetInstance: args, @@ -614,7 +626,7 @@ func createFromCopy(s *state.State, r *http.Request, projectName string, profile return err } - return nil + return instanceCreateFinish(s, req, args) } resources := map[string][]api.URL{} @@ -701,8 +713,9 @@ func createFromBackup(s *state.State, r *http.Request, projectName string, data } // Check project permissions. + var req api.InstancesPost err = s.DB.Cluster.Transaction(s.ShutdownCtx, func(ctx context.Context, tx *db.ClusterTx) error { - req := api.InstancesPost{ + req = api.InstancesPost{ InstancePut: bInfo.Config.Container.Writable(), Name: bInfo.Name, Source: api.InstanceSource{}, // Only relevant for "copy" or "migration", but may not be nil. @@ -838,7 +851,8 @@ func createFromBackup(s *state.State, r *http.Request, projectName string, data } runRevert.Success() - return nil + + return instanceCreateFinish(s, &req, db.InstanceArgs{Name: bInfo.Name, Project: bInfo.Project}) } resources := map[string][]api.URL{} @@ -1488,3 +1502,19 @@ func clusterCopyContainerInternal(s *state.State, r *http.Request, source instan // Run the migration return createFromMigration(s, nil, projectName, profiles, req) } + +// instanceCreateFinish finalizes the creation process of an instance by starting it based on +// the Start field of the request. +func instanceCreateFinish(s *state.State, req *api.InstancesPost, args db.InstanceArgs) error { + if req == nil || !req.Start { + return nil + } + + // Start the instance. + inst, err := instance.LoadByProjectAndName(s, args.Project, args.Name) + if err != nil { + return fmt.Errorf("Failed to load the instance: %w", err) + } + + return inst.Start(false) +} From 40b09fdc55fff71dec5afc45b26ea5ac768732ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Fri, 19 Jan 2024 18:49:25 -0500 Subject: [PATCH 038/106] lxc/launch: Use the Start property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber Sponsored-by: Lanes & Planes GmbH (https://www.lanes-planes.com) (cherry picked from commit aa15dd4d73dd3fee6d7abf19df307051d4ebfee7) Signed-off-by: Mark Bolton License: Apache-2.0 --- lxc/init.go | 19 ++++++++++++++----- lxc/launch.go | 7 ++++++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/lxc/init.go b/lxc/init.go index d9f416d8da09..18f34277603c 100644 --- a/lxc/init.go +++ b/lxc/init.go @@ -81,11 +81,11 @@ func (c *cmdInit) run(cmd *cobra.Command, args []string) error { return nil } - _, _, err = c.create(c.global.conf, args) + _, _, err = c.create(c.global.conf, args, false) return err } -func (c *cmdInit) create(conf *config.Config, args []string) (lxd.InstanceServer, string, error) { +func (c *cmdInit) create(conf *config.Config, args []string, launch bool) (lxd.InstanceServer, string, error) { var name string var image string var remote string @@ -164,10 +164,18 @@ func (c *cmdInit) create(conf *config.Config, args []string) (lxd.InstanceServer } if !c.global.flagQuiet { - if name == "" { - fmt.Printf(i18n.G("Creating the instance") + "\n") + if d.HasExtension("instance_create_start") && launch { + if name == "" { + fmt.Printf(i18n.G("Launching the instance") + "\n") + } else { + fmt.Printf(i18n.G("Launching %s")+"\n", name) + } } else { - fmt.Printf(i18n.G("Creating %s")+"\n", name) + if name == "" { + fmt.Printf(i18n.G("Creating the instance") + "\n") + } else { + fmt.Printf(i18n.G("Creating %s")+"\n", name) + } } } @@ -252,6 +260,7 @@ func (c *cmdInit) create(conf *config.Config, args []string) (lxd.InstanceServer Name: name, InstanceType: c.flagType, Type: instanceDBType, + Start: launch, } req.Config = configMap diff --git a/lxc/launch.go b/lxc/launch.go index fa7e57d84859..50cc5ec4e337 100644 --- a/lxc/launch.go +++ b/lxc/launch.go @@ -58,11 +58,16 @@ func (c *cmdLaunch) run(cmd *cobra.Command, args []string) error { } // Call the matching code from init - d, name, err := c.init.create(conf, args) + d, name, err := c.init.create(conf, args, true) if err != nil { return err } + // Check if the instance was started by the server. + if d.HasExtension("instance_create_start") { + return nil + } + // Get the remote var remote string if len(args) == 2 { From 358941e7e8531fd851a8c1ff7bb27f0a1598b3e0 Mon Sep 17 00:00:00 2001 From: Mark Bolton Date: Wed, 24 Jul 2024 14:26:22 -0700 Subject: [PATCH 039/106] doc/howto: Update documentation on start field Signed-off-by: Mark Bolton --- doc/howto/instances_create.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/howto/instances_create.md b/doc/howto/instances_create.md index 95610a85e0c9..4643b9fb1b8d 100644 --- a/doc/howto/instances_create.md +++ b/doc/howto/instances_create.md @@ -74,6 +74,19 @@ To start an instance, send a PUT request to change the instance state: See {ref}`instances-manage-start` for more information. +If you would like to start the instance upon creation, set the `start` property to true. The following example will create the container, then start it: + + lxc query --request POST /1.0/instances --data '{ + "name": "", + "source": { + "alias": "", + "protocol": "simplestreams", + "server": "", + "type": "image" + }, + "start": true + }' + ```` ````{group-tab} UI From b84a4fd4201c44a1a16d13318e67c2d4cf6a323c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20Peliz=C3=A4us?= Date: Thu, 25 Jul 2024 12:01:22 +0200 Subject: [PATCH 040/106] doc: Add remote storage driver cluster pool examples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides additional examples when to define (non-)member specific config keys and how to create cluster storage pools backed by remote storage drivers. Signed-off-by: Julian Pelizäus --- doc/howto/storage_pools.md | 49 ++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/doc/howto/storage_pools.md b/doc/howto/storage_pools.md index 4c38520f850d..97af5fa1a132 100644 --- a/doc/howto/storage_pools.md +++ b/doc/howto/storage_pools.md @@ -174,7 +174,7 @@ Create a storage pool named `pool5` that explicitly uses the PowerFlex SDC: lxc storage create pool5 powerflex powerflex.mode=sdc powerflex.pool= powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo (storage-pools-cluster)= -### Create a storage pool in a cluster +## Create a storage pool in a cluster If you are running a LXD cluster and want to add a storage pool, you must create the storage pool for each cluster member separately. The reason for this is that the configuration, for example, the storage location or the size of the pool, might be different between cluster members. @@ -183,7 +183,22 @@ Therefore, you must first create a pending storage pool on each member with the Make sure to use the same storage pool name for all members. Then create the storage pool without specifying the `--target` flag to actually set it up. -For example, the following series of commands sets up a storage pool with the name `my-pool` at different locations and with different sizes on three cluster members: +Also see {ref}`cluster-config-storage`. + +```{note} +For most storage drivers, the storage pools exist locally on each cluster member. +That means that if you create a storage volume in a storage pool on one member, it will not be available on other cluster members. + +This behavior is different for Ceph-based storage pools (`ceph`, `cephfs` and `cephobject`) where each storage pool exists in one central location and therefore, all cluster members access the same storage pool with the same storage volumes. +``` + +### Examples + +See the following examples for different storage drivers for instructions on how to create local or remote storage pools in a cluster. + +#### Create a local storage pool + +The following series of commands sets up a ZFS storage pool with the name `my-pool` at different locations and with different sizes on three cluster members: ```{terminal} :input: lxc storage create my-pool zfs source=/dev/sdX size=10GiB --target=vm01 @@ -196,13 +211,33 @@ Storage pool my-pool pending on member vm03 Storage pool my-pool created ``` -Also see {ref}`cluster-config-storage`. +#### Create a remote storage pool -```{note} -For most storage drivers, the storage pools exist locally on each cluster member. -That means that if you create a storage volume in a storage pool on one member, it will not be available on other cluster members. +The following series of commands sets up a Ceph RBD storage pool with the name `my-remote-pool` and the on-disk name `my-osd` on three cluster members. +Because the {config:option}`storage-ceph-pool-conf:ceph.osd.pool_name` configuration setting isn't member-specific, it must be set when creating the actual storage pool: -This behavior is different for Ceph-based storage pools (`ceph`, `cephfs` and `cephobject`) where each storage pool exists in one central location and therefore, all cluster members access the same storage pool with the same storage volumes. +```{terminal} +:input: lxc storage create my-remote-pool ceph --target=vm01 +Storage pool my-remote-pool pending on member vm01 +:input: lxc storage create my-remote-pool ceph --target=vm02 +Storage pool my-remote-pool pending on member vm02 +:input: lxc storage create my-remote-pool ceph --target=vm03 +Storage pool my-remote-pool pending on member vm03 +:input: lxc storage create my-remote-pool ceph ceph.osd.pool_name=my-osd +Storage pool my-remote-pool created +``` + +The following commands create a second storage pool `my-remote-pool2` using the Dell PowerFlex driver in SDC mode using the pool `sp1` in protection domain `pd1`: + +```{terminal} +:input: lxc storage create my-remote-pool2 powerflex --target=vm01 +Storage pool my-remote-pool2 pending on member vm01 +:input: lxc storage create my-remote-pool2 powerflex --target=vm02 +Storage pool my-remote-pool2 pending on member vm02 +:input: lxc storage create my-remote-pool2 powerflex --target=vm03 +Storage pool my-remote-pool2 pending on member vm03 +:input: lxc storage create my-remote-pool2 powerflex powerflex.mode=sdc powerflex.pool=sp1 powerflex.domain=pd1 powerflex.gateway=https://powerflex powerflex.user.name=lxd powerflex.user.password=foo +Storage pool my-remote-pool2 created ``` ## Configure storage pool settings From 83d0d7bf4d952c76a7beb1ddad00d824c04d10ee Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 25 Jul 2024 14:22:43 +0200 Subject: [PATCH 041/106] lxd/apparmor/instance_lxc: allow nosymfollow mount flag in more cases It turns out, that a ruleset: {{- if .feature_mount_nosymfollow }} # see https://github.com/canonical/lxd/pull/12698 mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /[^spd]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /d[^e]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /de[^v]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/.[^l]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/.l[^x]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/.lx[^c]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/.lxc?*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/[^.]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev?*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /p[^r]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /pr[^o]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /pro[^c]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /proc?*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /s[^y]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /sy[^s]*{,/**}, mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /sys?*{,/**}, {{- end }} is not enough to allow nosymfollow. We still getting AppArmor denials like this: [110841.647871] audit: type=1400 audit(1721910063.197:1611): apparmor="DENIED" operation="mount" class="mount" info="failed flags match" error=-13 profile="lxd-secure-oriole_" name="/dev/shm/" pid=712867 comm="(sd-mkdcreds)" flags="ro, nosuid, nodev, noexec, remount, bind" First of all, there is no "nosymfollow" in the kernel log. Which is a bug and should be fixed by: https://lore.kernel.org/all/20240628153712.288166-1-aleksandr.mikhalitsyn@canonical.com/ Secondly, it looks like these rules in the form of mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /some/path, just does not work at all. At least in AppArmor 4.0+ (have not yet tested with older ones). During my local experiments, I found that working variant of it might be: mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /some/path, or wider: mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow), Let's just add a wider variant of the rule in addition to what we already have for unprivileged containers. But keep in mind that something is wrong with these rules in their more restrictive form (with path specifier). This is a matter of a futher investigation, because it's important for privileged containers case. See also: canonical#12698 Closes #12698 May close #13810 Signed-off-by: Alexander Mikhalitsyn --- lxd/apparmor/instance_lxc.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lxd/apparmor/instance_lxc.go b/lxd/apparmor/instance_lxc.go index e61f306bf575..963c22bfb438 100644 --- a/lxd/apparmor/instance_lxc.go +++ b/lxd/apparmor/instance_lxc.go @@ -543,6 +543,16 @@ profile "{{ .name }}" flags=(attach_disconnected,mediate_deleted) { mount options=(ro,remount,bind,noatime,nosuid,noexec,nodev), mount options=(ro,remount,bind,nosuid,noexec,strictatime), mount options=(ro,remount,nosuid,noexec,strictatime), +{{- if .feature_mount_nosymfollow }} + mount options=(ro,remount,bind,nosymfollow), + mount options=(ro,remount,bind,nosymfollow,nodev), + mount options=(ro,remount,bind,nosymfollow,noexec), + mount options=(ro,remount,bind,nosymfollow,nosuid), + mount options=(ro,remount,bind,nosymfollow,noexec,nodev), + mount options=(ro,remount,bind,nosymfollow,nosuid,nodev), + mount options=(ro,remount,bind,nosymfollow,nosuid,noexec), + mount options=(ro,remount,bind,nosymfollow,nosuid,noexec,nodev), +{{- end }} # Allow remounting things read-only mount options=(ro,remount) /, From 38d7843af56a32179106e9584dee13b691db4270 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 11 Apr 2024 16:26:45 +0100 Subject: [PATCH 042/106] shared/entity: Return the project name when parsing the project URL. Previously this was just returned in the path arguments. Signed-off-by: Mark Laing --- shared/entity/type.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/shared/entity/type.go b/shared/entity/type.go index eea8f3ddc53c..d78b7c059a46 100644 --- a/shared/entity/type.go +++ b/shared/entity/type.go @@ -356,6 +356,11 @@ entityTypeLoop: } } + // If it's a project URL the project name is not a query parameter, it's in the path. + if entityType == TypeProject { + projectName = pathArguments[0] + } + return entityType, projectName, u.Query().Get("target"), pathArguments, nil } From f099f6f5eeac955ee28351db99c72480da60976a Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 11 Apr 2024 16:28:50 +0100 Subject: [PATCH 043/106] shared/entity: Skip project query parameter on project entities. So that `URL` is the inverse of `ParseURL`, we should explicitly ignore the `projectName` argument when creating the URL of a project. Signed-off-by: Mark Laing --- shared/entity/type.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/shared/entity/type.go b/shared/entity/type.go index d78b7c059a46..50ca18bb7a6a 100644 --- a/shared/entity/type.go +++ b/shared/entity/type.go @@ -216,8 +216,9 @@ func (t Type) URL(projectName string, location string, pathArguments ...string) u := api.NewURL().Path(path...) - // Always set project parameter if provided (operations and warnings may be project specific but it is not a requirement). - if projectName != "" { + // Set project parameter if provided and the entity type is not TypeProject (operations and warnings may be project + // specific but it is not a requirement). + if projectName != "" && t != TypeProject { u = u.WithQuery("project", projectName) } From 96c25af72c5c62636f0189d343ca27c5957426af Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 11 Apr 2024 16:29:46 +0100 Subject: [PATCH 044/106] shared/entity: Update unit tests for URL and Parse URL functions. We now expect the project name to be returned from ParseURL. Additionally, we now test that the normalised URL is what we expect. Signed-off-by: Mark Laing --- shared/entity/type_test.go | 355 ++++++++++++++++++++----------------- 1 file changed, 189 insertions(+), 166 deletions(-) diff --git a/shared/entity/type_test.go b/shared/entity/type_test.go index e33863a72663..7f0d027bb118 100644 --- a/shared/entity/type_test.go +++ b/shared/entity/type_test.go @@ -1,6 +1,7 @@ package entity import ( + "fmt" "net/url" "testing" @@ -12,158 +13,191 @@ import ( func TestURL(t *testing.T) { tests := []struct { - name string - rawURL string - expectedEntityType Type - expectedProject string - expectedLocation string - expectedPathArgs []string - expectedErr error + name string + rawURL string + expectedNormalisedURL string + expectedEntityType Type + expectedProject string + expectedLocation string + expectedPathArgs []string + expectedErr error }{ { - name: "containers", - rawURL: "/1.0/containers/my-container?project=my-project", - expectedEntityType: TypeContainer, - expectedProject: "my-project", - expectedPathArgs: []string{"my-container"}, - expectedErr: nil, - }, - { - name: "images", - rawURL: "/1.0/images/fwirnoaiwnerfoiawnef", - expectedEntityType: TypeImage, - expectedProject: "default", - expectedPathArgs: []string{"fwirnoaiwnerfoiawnef"}, - expectedErr: nil, - }, - { - name: "profiles", - rawURL: "/1.0/profiles/my-profile?project=my-project", - expectedEntityType: TypeProfile, - expectedProject: "my-project", - expectedPathArgs: []string{"my-profile"}, - expectedErr: nil, - }, - { - name: "projects", - rawURL: "/1.0/projects/my-project", - expectedEntityType: TypeProject, - expectedProject: "", - expectedPathArgs: []string{"my-project"}, - expectedErr: nil, - }, - { - name: "certificates", - rawURL: "/1.0/certificates/foawienfoawnefkanwelfknsfl", - expectedEntityType: TypeCertificate, - expectedProject: "", - expectedPathArgs: []string{"foawienfoawnefkanwelfknsfl"}, - expectedErr: nil, - }, - { - name: "instances", - rawURL: "/1.0/instances/my-instance", - expectedEntityType: TypeInstance, - expectedProject: "default", - expectedPathArgs: []string{"my-instance"}, - expectedErr: nil, - }, - { - name: "instance backup", - rawURL: "/1.0/instances/my-instance/backups/my-backup?project=my-project", - expectedEntityType: TypeInstanceBackup, - expectedProject: "my-project", - expectedPathArgs: []string{"my-instance", "my-backup"}, - expectedErr: nil, - }, - { - name: "instance snapshot", - rawURL: "/1.0/instances/my-instance/snapshots/my-snapshot", - expectedEntityType: TypeInstanceSnapshot, - expectedProject: "default", - expectedPathArgs: []string{"my-instance", "my-snapshot"}, - expectedErr: nil, - }, - { - name: "networks", - rawURL: "/1.0/networks/my-network?project=my-project", - expectedEntityType: TypeNetwork, - expectedProject: "my-project", - expectedPathArgs: []string{"my-network"}, - expectedErr: nil, - }, - { - name: "network acls", - rawURL: "/1.0/network-acls/my-network-acl", - expectedEntityType: TypeNetworkACL, - expectedProject: "default", - expectedPathArgs: []string{"my-network-acl"}, - expectedErr: nil, - }, - { - name: "cluster members", - rawURL: "/1.0/cluster/members/node01", - expectedEntityType: TypeNode, - expectedProject: "", - expectedPathArgs: []string{"node01"}, - expectedErr: nil, - }, - { - name: "operation", - rawURL: "/1.0/operations/3e75d1bf-30ed-45ce-9e02-267fa7338eb4", - expectedEntityType: TypeOperation, - expectedProject: "", - expectedPathArgs: []string{"3e75d1bf-30ed-45ce-9e02-267fa7338eb4"}, - expectedErr: nil, - }, - { - name: "storage pools", - rawURL: "/1.0/storage-pools/my-storage-pool", - expectedEntityType: TypeStoragePool, - expectedProject: "", - expectedPathArgs: []string{"my-storage-pool"}, - expectedErr: nil, - }, - { - name: "storage volumes", - rawURL: "/1.0/storage-pools/my-storage-pool/volumes/custom/my%2Fstorage-volume?project=my-project&target=node01", - expectedEntityType: TypeStorageVolume, - expectedProject: "my-project", - expectedLocation: "node01", - expectedPathArgs: []string{"my-storage-pool", "custom", "my/storage-volume"}, - expectedErr: nil, - }, - { - name: "storage volume backups", - rawURL: "/1.0/storage-pools/my-storage-pool/volumes/custom/my-storage-volume/backups/my-backup?project=my-project", - expectedEntityType: TypeStorageVolumeBackup, - expectedProject: "my-project", - expectedPathArgs: []string{"my-storage-pool", "custom", "my-storage-volume", "my-backup"}, - expectedErr: nil, - }, - { - name: "storage volume snapshots", - rawURL: "/1.0/storage-pools/my-storage-pool/volumes/custom/my-storage-volume/snapshots/my-snapshot?project=my-project", - expectedEntityType: TypeStorageVolumeSnapshot, - expectedProject: "my-project", - expectedPathArgs: []string{"my-storage-pool", "custom", "my-storage-volume", "my-snapshot"}, - expectedErr: nil, - }, - { - name: "warnings", - rawURL: "/1.0/warnings/3e75d1bf-30ed-45ce-9e02-267fa7338eb4", - expectedEntityType: TypeWarning, - expectedProject: "", - expectedPathArgs: []string{"3e75d1bf-30ed-45ce-9e02-267fa7338eb4"}, - expectedErr: nil, - }, - { - name: "cluster groups", - rawURL: "/1.0/cluster/groups/my-cluster-group", - expectedEntityType: TypeClusterGroup, - expectedProject: "", - expectedPathArgs: []string{"my-cluster-group"}, - expectedErr: nil, + name: "not a LXD URL", + rawURL: "/1.0/not/a/url", + expectedErr: fmt.Errorf("Failed to match entity URL %q", "/1.0/not/a/url"), + }, + { + name: "containers", + rawURL: "/1.0/containers/my-container?project=my-project", + expectedNormalisedURL: "/1.0/containers/my-container?project=my-project", + expectedEntityType: TypeContainer, + expectedProject: "my-project", + expectedPathArgs: []string{"my-container"}, + expectedErr: nil, + }, + { + name: "images", + rawURL: "/1.0/images/fwirnoaiwnerfoiawnef", + expectedNormalisedURL: "/1.0/images/fwirnoaiwnerfoiawnef?project=default", + expectedEntityType: TypeImage, + expectedProject: api.ProjectDefaultName, + expectedPathArgs: []string{"fwirnoaiwnerfoiawnef"}, + expectedErr: nil, + }, + { + name: "profiles", + rawURL: "/1.0/profiles/my-profile?project=my-project", + expectedNormalisedURL: "/1.0/profiles/my-profile?project=my-project", + expectedEntityType: TypeProfile, + expectedProject: "my-project", + expectedPathArgs: []string{"my-profile"}, + expectedErr: nil, + }, + { + name: "projects", + rawURL: "/1.0/projects/my-project", + expectedNormalisedURL: "/1.0/projects/my-project", + expectedEntityType: TypeProject, + expectedProject: "my-project", + expectedPathArgs: []string{"my-project"}, + expectedErr: nil, + }, + { + name: "certificates", + rawURL: "/1.0/certificates/foawienfoawnefkanwelfknsfl", + expectedNormalisedURL: "/1.0/certificates/foawienfoawnefkanwelfknsfl", + expectedEntityType: TypeCertificate, + expectedProject: "", + expectedPathArgs: []string{"foawienfoawnefkanwelfknsfl"}, + expectedErr: nil, + }, + { + name: "instances", + rawURL: "/1.0/instances/my-instance", + expectedNormalisedURL: "/1.0/instances/my-instance?project=default", + expectedEntityType: TypeInstance, + expectedProject: api.ProjectDefaultName, + expectedPathArgs: []string{"my-instance"}, + expectedErr: nil, + }, + { + name: "instance backup", + rawURL: "/1.0/instances/my-instance/backups/my-backup?project=my-project", + expectedNormalisedURL: "/1.0/instances/my-instance/backups/my-backup?project=my-project", + expectedEntityType: TypeInstanceBackup, + expectedProject: "my-project", + expectedPathArgs: []string{"my-instance", "my-backup"}, + expectedErr: nil, + }, + { + name: "instance snapshot", + rawURL: "/1.0/instances/my-instance/snapshots/my-snapshot", + expectedNormalisedURL: "/1.0/instances/my-instance/snapshots/my-snapshot?project=default", + expectedEntityType: TypeInstanceSnapshot, + expectedProject: api.ProjectDefaultName, + expectedPathArgs: []string{"my-instance", "my-snapshot"}, + expectedErr: nil, + }, + { + name: "networks", + rawURL: "/1.0/networks/my-network?project=my-project", + expectedNormalisedURL: "/1.0/networks/my-network?project=my-project", + expectedEntityType: TypeNetwork, + expectedProject: "my-project", + expectedPathArgs: []string{"my-network"}, + expectedErr: nil, + }, + { + name: "network acls", + rawURL: "/1.0/network-acls/my-network-acl", + expectedNormalisedURL: "/1.0/network-acls/my-network-acl?project=default", + expectedEntityType: TypeNetworkACL, + expectedProject: api.ProjectDefaultName, + expectedPathArgs: []string{"my-network-acl"}, + expectedErr: nil, + }, + { + name: "cluster members", + rawURL: "/1.0/cluster/members/node01", + expectedNormalisedURL: "/1.0/cluster/members/node01", + expectedEntityType: TypeNode, + expectedProject: "", + expectedPathArgs: []string{"node01"}, + expectedErr: nil, + }, + { + name: "operation", + rawURL: "/1.0/operations/3e75d1bf-30ed-45ce-9e02-267fa7338eb4", + expectedNormalisedURL: "/1.0/operations/3e75d1bf-30ed-45ce-9e02-267fa7338eb4", + expectedEntityType: TypeOperation, + expectedProject: "", + expectedPathArgs: []string{"3e75d1bf-30ed-45ce-9e02-267fa7338eb4"}, + expectedErr: nil, + }, + { + name: "storage pools", + rawURL: "/1.0/storage-pools/my-storage-pool", + expectedNormalisedURL: "/1.0/storage-pools/my-storage-pool", + expectedEntityType: TypeStoragePool, + expectedProject: "", + expectedPathArgs: []string{"my-storage-pool"}, + expectedErr: nil, + }, + { + name: "storage volumes", + rawURL: "/1.0/storage-pools/my-storage-pool/volumes/custom/my%2Fstorage-volume?project=my-project&target=node01", + expectedNormalisedURL: "/1.0/storage-pools/my-storage-pool/volumes/custom/my%2Fstorage-volume?project=my-project&target=node01", + expectedEntityType: TypeStorageVolume, + expectedProject: "my-project", + expectedLocation: "node01", + expectedPathArgs: []string{"my-storage-pool", "custom", "my/storage-volume"}, + expectedErr: nil, + }, + { + name: "storage volume backups", + rawURL: "/1.0/storage-pools/my-storage-pool/volumes/custom/my-storage-volume/backups/my-backup?project=my-project", + expectedNormalisedURL: "/1.0/storage-pools/my-storage-pool/volumes/custom/my-storage-volume/backups/my-backup?project=my-project", + expectedEntityType: TypeStorageVolumeBackup, + expectedProject: "my-project", + expectedPathArgs: []string{"my-storage-pool", "custom", "my-storage-volume", "my-backup"}, + expectedErr: nil, + }, + { + name: "storage volume snapshots", + rawURL: "/1.0/storage-pools/my-storage-pool/volumes/custom/my-storage-volume/snapshots/my-snapshot?project=my-project", + expectedNormalisedURL: "/1.0/storage-pools/my-storage-pool/volumes/custom/my-storage-volume/snapshots/my-snapshot?project=my-project", + expectedEntityType: TypeStorageVolumeSnapshot, + expectedProject: "my-project", + expectedPathArgs: []string{"my-storage-pool", "custom", "my-storage-volume", "my-snapshot"}, + expectedErr: nil, + }, + { + name: "storage buckets", + rawURL: "/1.0/storage-pools/my-storage-pool/buckets/my-bucket", + expectedNormalisedURL: "/1.0/storage-pools/my-storage-pool/buckets/my-bucket?project=default", + expectedEntityType: TypeStorageBucket, + expectedProject: api.ProjectDefaultName, + expectedPathArgs: []string{"my-storage-pool", "my-bucket"}, + expectedErr: nil, + }, + { + name: "warnings", + rawURL: "/1.0/warnings/3e75d1bf-30ed-45ce-9e02-267fa7338eb4", + expectedNormalisedURL: "/1.0/warnings/3e75d1bf-30ed-45ce-9e02-267fa7338eb4", + expectedEntityType: TypeWarning, + expectedProject: "", + expectedPathArgs: []string{"3e75d1bf-30ed-45ce-9e02-267fa7338eb4"}, + expectedErr: nil, + }, + { + name: "cluster groups", + rawURL: "/1.0/cluster/groups/my-cluster-group", + expectedNormalisedURL: "/1.0/cluster/groups/my-cluster-group", + expectedEntityType: TypeClusterGroup, + expectedProject: "", + expectedPathArgs: []string{"my-cluster-group"}, + expectedErr: nil, }, } @@ -181,24 +215,13 @@ func TestURL(t *testing.T) { } assert.Equal(t, tt.expectedErr, actualErr) + if tt.expectedErr != nil { + return + } - requiresProject, err := actualEntityType.RequiresProject() + normalisedURL, err := actualEntityType.URL(actualProject, actualLocation, actualPathArgs...) + assert.Equal(t, normalisedURL.String(), tt.expectedNormalisedURL) assert.NoError(t, err) - if u.Query().Get("project") != "" || !requiresProject { - // Assert that we can convert back to the same value. - actualURL, err := actualEntityType.URL(actualProject, actualLocation, actualPathArgs...) - assert.NoError(t, err) - assert.Equal(t, tt.rawURL, actualURL.String()) - } else { - // If the entity type requires a project but one wasn't set, assert that (entity.Type).URL sets the - // default project. - q := u.Query() - q.Set("project", api.ProjectDefaultName) - u.RawQuery = q.Encode() - actualURL, err := actualEntityType.URL(actualProject, actualLocation, actualPathArgs...) - assert.NoError(t, err) - assert.Equal(t, u.String(), actualURL.String()) - } }) } } From 44c324241235f8007692b51d400044554c722c58 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 11 Apr 2024 16:33:10 +0100 Subject: [PATCH 045/106] lxd/db/openfga: Remove logic for getting project name from path arguments. We can now use the value of `projectName` returned from ParseURL. Signed-off-by: Mark Laing --- lxd/db/openfga/openfga.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/lxd/db/openfga/openfga.go b/lxd/db/openfga/openfga.go index 5227a8cca03b..c1a15d73cd74 100644 --- a/lxd/db/openfga/openfga.go +++ b/lxd/db/openfga/openfga.go @@ -407,7 +407,7 @@ func (o *openfgaStore) ReadStartingWithUser(ctx context.Context, store string, f return nil, fmt.Errorf("ReadStartingWithUser: Failed to parse user entity URL %q: %w", userURL, err) } - _, _, _, userURLPathArguments, err := entity.ParseURL(*u) + _, projectName, _, userURLPathArguments, err := entity.ParseURL(*u) if err != nil { return nil, fmt.Errorf("ReadStartingWithUser: Unexpected user entity URL %q: %w", userURL, err) } @@ -421,13 +421,6 @@ func (o *openfgaStore) ReadStartingWithUser(ctx context.Context, store string, f return nil, fmt.Errorf("ReadStartingWithUser: Cannot list server relations for non-server entities") } - // If the filter is by project, we want to filter entity URLs by the project name. - var projectName string - if filter.Relation == "project" { - // The project name is the first path argument of a project URL. - projectName = userURLPathArguments[0] - } - // Get the entity URLs with the given type and project (if set). var entityURLs map[entity.Type]map[int]*api.URL err = o.clusterDB.Transaction(ctx, func(ctx context.Context, tx *db.ClusterTx) error { From 7f877b4e613866bb3cc65163f6cb344bb38473b4 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 11 Apr 2024 16:33:39 +0100 Subject: [PATCH 046/106] lxd/auth/drivers: Remove logic for getting project name from path arguments. We can now use the value of `projectName` returned from ParseURL. Signed-off-by: Mark Laing --- lxd/auth/drivers/tls.go | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/lxd/auth/drivers/tls.go b/lxd/auth/drivers/tls.go index 574e5918cc45..93d74edfae3a 100644 --- a/lxd/auth/drivers/tls.go +++ b/lxd/auth/drivers/tls.go @@ -65,15 +65,11 @@ func (t *tls) CheckPermission(ctx context.Context, entityURL *api.URL, entitleme return nil } - entityType, projectName, _, pathArgs, err := entity.ParseURL(entityURL.URL) + entityType, projectName, _, _, err := entity.ParseURL(entityURL.URL) if err != nil { return fmt.Errorf("Failed to parse entity URL: %w", err) } - if entityType == entity.TypeProject { - projectName = pathArgs[0] - } - // Check server level object types switch entityType { case entity.TypeServer: @@ -156,7 +152,7 @@ func (t *tls) GetPermissionChecker(ctx context.Context, entitlement auth.Entitle // Filter objects by project. return func(entityURL *api.URL) bool { - eType, project, _, pathArgs, err := entity.ParseURL(entityURL.URL) + eType, project, _, _, err := entity.ParseURL(entityURL.URL) if err != nil { logger.Warn("Permission checker failed to parse entity URL", logger.Ctx{"entity_url": entityURL, "err": err}) return false @@ -168,11 +164,6 @@ func (t *tls) GetPermissionChecker(ctx context.Context, entitlement auth.Entitle return false } - // If it's a project URL, the project name is in the path, not the query parameter. - if eType == entity.TypeProject { - project = pathArgs[0] - } - // If an effective project has been set in the request context. We expect all entities to be in that project. if effectiveProject != "" { return project == effectiveProject From 2b061f206a8107dc21831685e4cc9eb70e9ba873 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 11 Apr 2024 16:37:23 +0100 Subject: [PATCH 047/106] lxd/auth/drivers: Standardise URLs before performing OpenFGA requests. This commit regenerates the entity URLs that are passed into `CheckPermission` and the `PermissionChecker` returned by `GetPermissionChecker` to enforce that the project parameter is added to the URL even if it is "default". This is expected by the underlying `storage.OpenFGADatastore` implementation. Signed-off-by: Mark Laing --- lxd/auth/drivers/openfga.go | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/lxd/auth/drivers/openfga.go b/lxd/auth/drivers/openfga.go index b19b0e3dc981..4002e83f25b4 100644 --- a/lxd/auth/drivers/openfga.go +++ b/lxd/auth/drivers/openfga.go @@ -169,12 +169,18 @@ func (e *embeddedOpenFGA) CheckPermission(ctx context.Context, entityURL *api.UR } } - // Construct OpenFGA objects for the user (identity) and the entity. - entityType, _, _, _, err := entity.ParseURL(entityURL.URL) + // Deconstruct the given URL. + entityType, projectName, location, pathArguments, err := entity.ParseURL(entityURL.URL) if err != nil { return fmt.Errorf("Authorization driver failed to parse entity URL %q: %w", entityURL.String(), err) } + // Construct the URL in a standardised form (adding the project parameter if it was not present). + entityURL, err = entityType.URL(projectName, location, pathArguments...) + if err != nil { + return fmt.Errorf("Failed to standardize entity URL: %w", err) + } + userObject := fmt.Sprintf("%s:%s", entity.TypeIdentity, entity.IdentityURL(id.AuthenticationMethod, id.Identifier).String()) entityObject := fmt.Sprintf("%s:%s", entityType, entityURL.String()) @@ -387,7 +393,24 @@ func (e *embeddedOpenFGA) GetPermissionChecker(ctx context.Context, entitlement // Return a permission checker that constructs an OpenFGA object from the given URL and returns true if the object is // found in the list of objects in the response. return func(entityURL *api.URL) bool { - object := fmt.Sprintf("%s:%s", entityType, entityURL.String()) + parsedEntityType, projectName, location, pathArguments, err := entity.ParseURL(entityURL.URL) + if err != nil { + l.Error("Failed to parse permission checker entity URL", logger.Ctx{"url": entityURL.String(), "err": err}) + return false + } + + if parsedEntityType != entityType { + l.Error("Unexpected permission checker input URL", logger.Ctx{"expected_entity_type": entityType, "actual_entity_type": parsedEntityType, "url": entityURL.String()}) + return false + } + + standardisedEntityURL, err := entityType.URL(projectName, location, pathArguments...) + if err != nil { + l.Error("Failed to standardise permission checker entity URL", logger.Ctx{"url": entityURL.String(), "err": err}) + return false + } + + object := fmt.Sprintf("%s:%s", entityType, standardisedEntityURL.String()) return shared.ValueInSlice(object, objects) }, nil } From 197ab06127def3120a5a0307b77062d6bb9f39fc Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 11 Apr 2024 16:38:47 +0100 Subject: [PATCH 048/106] lxd: Fix `projectUsedBy` function. The entity URLs returned by `projectUsedBy` included the project query parameter even when the project was "default". Signed-off-by: Mark Laing --- lxd/api_project.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lxd/api_project.go b/lxd/api_project.go index 693ec8954844..2b043a35edeb 100644 --- a/lxd/api_project.go +++ b/lxd/api_project.go @@ -219,6 +219,13 @@ func projectUsedBy(ctx context.Context, tx *db.ClusterTx, project *cluster.Proje var usedBy []string for _, entityIDToURL := range entityURLs { for _, u := range entityIDToURL { + // Omit the project query parameter if it is the default project. + if u.Query().Get("project") == api.ProjectDefaultName { + q := u.Query() + q.Del("project") + u.RawQuery = q.Encode() + } + usedBy = append(usedBy, u.String()) } } From 4db25ae8ec4e13cb7ee3e91fba0008a11710d9e6 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 11 Apr 2024 16:40:40 +0100 Subject: [PATCH 049/106] lxd/project: Simplify `FilterUsedBy` function. We no longer need to enforce that the project query parameter is set in the URLs that are passed into Authorizer methods, nor do we need to strip the project query parameter from the URL. Signed-off-by: Mark Laing --- lxd/project/permissions.go | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/lxd/project/permissions.go b/lxd/project/permissions.go index c10969c7c768..6f6fbec8f1d2 100644 --- a/lxd/project/permissions.go +++ b/lxd/project/permissions.go @@ -1478,35 +1478,18 @@ func FilterUsedBy(authorizer auth.Authorizer, r *http.Request, entries []string) continue } - entityType, projectName, location, pathArguments, err := entity.ParseURL(*u) + entityType, _, _, _, err := entity.ParseURL(*u) if err != nil { logger.Warn("Failed to parse project used-by entity URL", logger.Ctx{"url": entry, "err": err}) continue } - entityURL, err := entityType.URL(projectName, location, pathArguments...) - if err != nil { - logger.Warn("Failed to create canonical entity URL for project used-by filtering", logger.Ctx{"url": entry, "err": err}) - continue - } - - urlsByEntityType[entityType] = append(urlsByEntityType[entityType], entityURL) + urlsByEntityType[entityType] = append(urlsByEntityType[entityType], &api.URL{URL: *u}) } // Filter the entries. usedBy := make([]string, 0, len(entries)) - // Used-by lists do not include the project query parameter if it is the default project. - appendUsedBy := func(u *api.URL) { - if u.Query().Get("project") == api.ProjectDefaultName { - q := u.Query() - q.Del("project") - u.RawQuery = q.Encode() - } - - usedBy = append(usedBy, u.String()) - } - for entityType, urls := range urlsByEntityType { // If only one entry of this type, check directly. if len(urls) == 1 { @@ -1515,7 +1498,7 @@ func FilterUsedBy(authorizer auth.Authorizer, r *http.Request, entries []string) continue } - appendUsedBy(urls[0]) + usedBy = append(usedBy, urls[0].String()) continue } @@ -1529,7 +1512,7 @@ func FilterUsedBy(authorizer auth.Authorizer, r *http.Request, entries []string) // Check each url and append. for _, u := range urls { if canViewEntity(u) { - appendUsedBy(u) + usedBy = append(usedBy, u.String()) } } } From 75b1d0913eab3fd37aace675cd4b4da75ecc8787 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Fri, 5 Jul 2024 16:24:47 +0100 Subject: [PATCH 050/106] lxd/db/cluster: Add project name to project entity queries. All LXD entity URLs are parsed into a project, a location, and a slice of path arguments. Previously when parsing a project URL, the project name was empty because it is present in the path arguments. Now that the project name is being returned, we need our entity SQL queries to match. Signed-off-by: Mark Laing --- lxd/db/cluster/entities.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lxd/db/cluster/entities.go b/lxd/db/cluster/entities.go index bb5762ae5535..e368ed85513f 100644 --- a/lxd/db/cluster/entities.go +++ b/lxd/db/cluster/entities.go @@ -233,7 +233,7 @@ var profileEntityByID = fmt.Sprintf(`%s WHERE profiles.id = ?`, profileEntities) var profileEntitiesByProjectName = fmt.Sprintf(`%s WHERE projects.name = ?`, profileEntities) // projectEntities returns all entities of type entity.TypeProject. -var projectEntities = fmt.Sprintf(`SELECT %d, projects.id, '', '', json_array(projects.name) FROM projects`, entityTypeProject) +var projectEntities = fmt.Sprintf(`SELECT %d, projects.id, projects.name, '', json_array(projects.name) FROM projects`, entityTypeProject) // projectEntities gets the entity of type entity.TypeProject with a particular ID. var projectEntityByID = fmt.Sprintf(`%s WHERE id = ?`, projectEntities) @@ -779,7 +779,7 @@ WHERE projects.name = ? var projectIDFromURL = ` SELECT ?, projects.id FROM projects -WHERE '' = ? +WHERE projects.name = ? AND '' = ? AND projects.name = ?` From d8eaba3515c12168d96b76c4fecf47d26871f8cc Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 13:22:04 +0100 Subject: [PATCH 051/106] shared/entity: Ignore "none" locations when constructing URLs. Signed-off-by: Mark Laing --- shared/entity/type.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/shared/entity/type.go b/shared/entity/type.go index 50ca18bb7a6a..16a899f9668c 100644 --- a/shared/entity/type.go +++ b/shared/entity/type.go @@ -222,11 +222,8 @@ func (t Type) URL(projectName string, location string, pathArguments ...string) u = u.WithQuery("project", projectName) } - // Always set location if provided. - if location != "" { - u = u.WithQuery("target", location) - } - + // Always set location if provided (empty or "none" locations are ignored). + u = u.Target(location) return u, nil } From 6232b215717838222a04ed1462f0eac20b0774ab Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Wed, 24 Jul 2024 23:32:21 +0000 Subject: [PATCH 052/106] shared: Pass CertOptions to KeyPairandCA Signed-off-by: Max Asnaashari --- shared/cert.go | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/shared/cert.go b/shared/cert.go index f9fc3254a1f8..93c2721bf014 100644 --- a/shared/cert.go +++ b/shared/cert.go @@ -29,6 +29,12 @@ import ( "github.com/canonical/lxd/shared/api" ) +// CertOptions holds configuration for creating a new CertInfo. +type CertOptions struct { + // AddHosts determines whether to populate the Subject Alternative Name DNS Names and IP Addresses fields. + AddHosts bool +} + // KeyPairAndCA returns a CertInfo object with a reference to the key pair and // (optionally) CA certificate located in the given directory and having the // given name prefix @@ -45,13 +51,13 @@ import ( // // If a CA certificate is found, it will be returned as well as second return // value (otherwise it will be nil). -func KeyPairAndCA(dir, prefix string, kind CertKind, addHosts bool) (*CertInfo, error) { +func KeyPairAndCA(dir, prefix string, kind CertKind, options CertOptions) (*CertInfo, error) { certFilename := filepath.Join(dir, prefix+".crt") keyFilename := filepath.Join(dir, prefix+".key") // Ensure that the certificate exists, or create a new one if it does // not. - err := FindOrGenCert(certFilename, keyFilename, kind == CertClient, addHosts) + err := FindOrGenCert(certFilename, keyFilename, kind == CertClient, options) if err != nil { return nil, err } @@ -252,14 +258,14 @@ func mynames() ([]string, error) { // FindOrGenCert generates a keypair if needed. // The type argument is false for server, true for client. -func FindOrGenCert(certf string, keyf string, certtype bool, addHosts bool) error { +func FindOrGenCert(certf string, keyf string, certtype bool, options CertOptions) error { if PathExists(certf) && PathExists(keyf) { return nil } /* If neither stat succeeded, then this is our first run and we * need to generate cert and privkey */ - err := GenCert(certf, keyf, certtype, addHosts) + err := GenCert(certf, keyf, certtype, options) if err != nil { return err } @@ -268,7 +274,7 @@ func FindOrGenCert(certf string, keyf string, certtype bool, addHosts bool) erro } // GenCert will create and populate a certificate file and a key file. -func GenCert(certf string, keyf string, certtype bool, addHosts bool) error { +func GenCert(certf string, keyf string, certtype bool, options CertOptions) error { /* Create the basenames if needed */ dir := filepath.Dir(certf) err := os.MkdirAll(dir, 0750) @@ -282,7 +288,7 @@ func GenCert(certf string, keyf string, certtype bool, addHosts bool) error { return err } - certBytes, keyBytes, err := GenerateMemCert(certtype, addHosts) + certBytes, keyBytes, err := GenerateMemCert(certtype, options) if err != nil { return err } @@ -322,7 +328,7 @@ func GenCert(certf string, keyf string, certtype bool, addHosts bool) error { // GenerateMemCert creates client or server certificate and key pair, // returning them as byte arrays in memory. -func GenerateMemCert(client bool, addHosts bool) ([]byte, []byte, error) { +func GenerateMemCert(client bool, options CertOptions) ([]byte, []byte, error) { privk, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) if err != nil { return nil, nil, fmt.Errorf("Failed to generate key: %w", err) @@ -372,7 +378,7 @@ func GenerateMemCert(client bool, addHosts bool) ([]byte, []byte, error) { template.ExtKeyUsage = []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth} } - if addHosts { + if options.AddHosts { hosts, err := mynames() if err != nil { return nil, nil, fmt.Errorf("Failed to get my hostname: %w", err) From 3e41a790c1cb245b2a3acc01c6692a36454c7ade Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Wed, 24 Jul 2024 23:32:44 +0000 Subject: [PATCH 053/106] shared: Pass SubjectName to CertOptions Signed-off-by: Max Asnaashari --- shared/cert.go | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/shared/cert.go b/shared/cert.go index 93c2721bf014..1627793b0070 100644 --- a/shared/cert.go +++ b/shared/cert.go @@ -33,6 +33,9 @@ import ( type CertOptions struct { // AddHosts determines whether to populate the Subject Alternative Name DNS Names and IP Addresses fields. AddHosts bool + + // SubjectName will be used in place of the system hostname for the SAN DNS Name and Issuer Common Name. + SubjectName string } // KeyPairAndCA returns a CertInfo object with a reference to the key pair and @@ -245,14 +248,19 @@ func TestingAltKeyPair() *CertInfo { /* * Generate a list of names for which the certificate will be valid. * This will include the hostname and ip address. + * If the `name` argument is non-empty, it will be used in place of the system hostname. */ -func mynames() ([]string, error) { - h, err := os.Hostname() - if err != nil { - return nil, err +func mynames(name string) ([]string, error) { + if name == "" { + h, err := os.Hostname() + if err != nil { + return nil, err + } + + name = h } - ret := []string{h, "127.0.0.1/8", "::1/128"} + ret := []string{name, "127.0.0.1/8", "::1/128"} return ret, nil } @@ -354,9 +362,12 @@ func GenerateMemCert(client bool, options CertOptions) ([]byte, []byte, error) { username = "UNKNOWN" } - hostname, err := os.Hostname() - if err != nil { - hostname = "UNKNOWN" + hostname := options.SubjectName + if hostname == "" { + hostname, err = os.Hostname() + if err != nil { + hostname = "UNKNOWN" + } } template := x509.Certificate{ @@ -379,7 +390,7 @@ func GenerateMemCert(client bool, options CertOptions) ([]byte, []byte, error) { } if options.AddHosts { - hosts, err := mynames() + hosts, err := mynames(hostname) if err != nil { return nil, nil, fmt.Errorf("Failed to get my hostname: %w", err) } From 7224c0d781aa0e974815597b44da482fbd98557b Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Wed, 24 Jul 2024 23:32:57 +0000 Subject: [PATCH 054/106] shared: Update usages Signed-off-by: Max Asnaashari --- lxc/config/cert.go | 2 +- lxc/file.go | 2 +- lxd-agent/network.go | 2 +- lxd-migrate/utils.go | 2 +- lxd-user/lxd.go | 2 +- lxd/instance/drivers/driver_qemu.go | 4 ++-- lxd/util/encryption.go | 6 +++--- shared/cert_test.go | 4 ++-- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lxc/config/cert.go b/lxc/config/cert.go index 132670061ce9..70457a7c43bf 100644 --- a/lxc/config/cert.go +++ b/lxc/config/cert.go @@ -28,7 +28,7 @@ func (c *Config) GenerateClientCertificate() error { certf := c.ConfigPath("client.crt") keyf := c.ConfigPath("client.key") - return shared.FindOrGenCert(certf, keyf, true, false) + return shared.FindOrGenCert(certf, keyf, true, shared.CertOptions{}) } // CopyGlobalCert will copy global (system-wide) certificate to the user config path. diff --git a/lxc/file.go b/lxc/file.go index de97e12bfa0a..87bad723228b 100644 --- a/lxc/file.go +++ b/lxc/file.go @@ -1188,7 +1188,7 @@ func (c *cmdFileMount) sshSFTPServer(ctx context.Context, instName string, resou } // Generate random host key. - _, privKey, err := shared.GenerateMemCert(false, false) + _, privKey, err := shared.GenerateMemCert(false, shared.CertOptions{}) if err != nil { return fmt.Errorf(i18n.G("Failed generating SSH host key: %w"), err) } diff --git a/lxd-agent/network.go b/lxd-agent/network.go index 0a6eeac8cf4c..da9e720da86e 100644 --- a/lxd-agent/network.go +++ b/lxd-agent/network.go @@ -49,7 +49,7 @@ func (l *networkListener) Accept() (net.Conn, error) { } func serverTLSConfig() (*tls.Config, error) { - certInfo, err := shared.KeyPairAndCA(".", "agent", shared.CertServer, false) + certInfo, err := shared.KeyPairAndCA(".", "agent", shared.CertServer, shared.CertOptions{}) if err != nil { return nil, err } diff --git a/lxd-migrate/utils.go b/lxd-migrate/utils.go index d1da2e20c836..dbad7c462d63 100644 --- a/lxd-migrate/utils.go +++ b/lxd-migrate/utils.go @@ -170,7 +170,7 @@ func (c *cmdMigrate) connectTarget(url string, certPath string, keyPath string, if certPath == "" || keyPath == "" { var err error - clientCrt, clientKey, err = shared.GenerateMemCert(true, false) + clientCrt, clientKey, err = shared.GenerateMemCert(true, shared.CertOptions{}) if err != nil { return nil, "", err } diff --git a/lxd-user/lxd.go b/lxd-user/lxd.go index 9b1bcfd2b457..1b8286b12342 100644 --- a/lxd-user/lxd.go +++ b/lxd-user/lxd.go @@ -166,7 +166,7 @@ func lxdSetupUser(uid uint32) error { revert.Add(func() { _ = os.RemoveAll(userPath) }) // Generate certificate. - err = shared.FindOrGenCert(filepath.Join(userPath, "client.crt"), filepath.Join(userPath, "client.key"), true, false) + err = shared.FindOrGenCert(filepath.Join(userPath, "client.crt"), filepath.Join(userPath, "client.key"), true, shared.CertOptions{}) if err != nil { return fmt.Errorf("Failed to generate user certificate: %w", err) } diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index 8214422e9efa..738c12366bc9 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -514,13 +514,13 @@ func (d *qemu) generateAgentCert() (agentCert string, agentKey string, clientCer clientKeyFile := filepath.Join(d.Path(), "agent-client.key") // Create server certificate. - err = shared.FindOrGenCert(agentCertFile, agentKeyFile, false, false) + err = shared.FindOrGenCert(agentCertFile, agentKeyFile, false, shared.CertOptions{}) if err != nil { return "", "", "", "", err } // Create client certificate. - err = shared.FindOrGenCert(clientCertFile, clientKeyFile, true, false) + err = shared.FindOrGenCert(clientCertFile, clientKeyFile, true, shared.CertOptions{}) if err != nil { return "", "", "", "", err } diff --git a/lxd/util/encryption.go b/lxd/util/encryption.go index 29c13ab34bb7..85e5c2a68a98 100644 --- a/lxd/util/encryption.go +++ b/lxd/util/encryption.go @@ -48,7 +48,7 @@ func LoadCert(dir string) (*shared.CertInfo, error) { prefix = "cluster" } - cert, err := shared.KeyPairAndCA(dir, prefix, shared.CertServer, true) + cert, err := shared.KeyPairAndCA(dir, prefix, shared.CertServer, shared.CertOptions{AddHosts: true}) if err != nil { return nil, fmt.Errorf("failed to load TLS certificate: %w", err) } @@ -62,7 +62,7 @@ func LoadCert(dir string) (*shared.CertInfo, error) { func LoadClusterCert(dir string) (*shared.CertInfo, error) { prefix := "cluster" - cert, err := shared.KeyPairAndCA(dir, prefix, shared.CertServer, true) + cert, err := shared.KeyPairAndCA(dir, prefix, shared.CertServer, shared.CertOptions{AddHosts: true}) if err != nil { return nil, fmt.Errorf("failed to load cluster TLS certificate: %w", err) } @@ -73,7 +73,7 @@ func LoadClusterCert(dir string) (*shared.CertInfo, error) { // LoadServerCert reads the LXD server certificate from the given var dir. func LoadServerCert(dir string) (*shared.CertInfo, error) { prefix := "server" - cert, err := shared.KeyPairAndCA(dir, prefix, shared.CertServer, true) + cert, err := shared.KeyPairAndCA(dir, prefix, shared.CertServer, shared.CertOptions{AddHosts: true}) if err != nil { return nil, fmt.Errorf("failed to load TLS certificate: %w", err) } diff --git a/shared/cert_test.go b/shared/cert_test.go index 7085ff1ded84..eabdc9b19bf1 100644 --- a/shared/cert_test.go +++ b/shared/cert_test.go @@ -20,7 +20,7 @@ func TestKeyPairAndCA(t *testing.T) { defer func() { _ = os.RemoveAll(dir) }() - info, err := shared.KeyPairAndCA(dir, "test", shared.CertServer, true) + info, err := shared.KeyPairAndCA(dir, "test", shared.CertServer, shared.CertOptions{AddHosts: true}) if err != nil { t.Errorf("initial call to KeyPairAndCA failed: %v", err) } @@ -67,7 +67,7 @@ func TestGenerateMemCert(t *testing.T) { t.Skip("skipping cert generation in short mode") } - cert, key, err := shared.GenerateMemCert(false, true) + cert, key, err := shared.GenerateMemCert(false, shared.CertOptions{AddHosts: true}) if err != nil { t.Error(err) return From 15cd5adc6c915916dd6bb49dac6e56003cbe4082 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 13:26:42 +0100 Subject: [PATCH 055/106] lxd: Add method to determine location of storage volume. Contents are copied and slightly modified from `cluster.ConnectIfVolumeIsRemote`. Signed-off-by: Mark Laing --- lxd/storage_volumes.go | 81 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/lxd/storage_volumes.go b/lxd/storage_volumes.go index 57cfd91705a2..cbd3ccf87795 100644 --- a/lxd/storage_volumes.go +++ b/lxd/storage_volumes.go @@ -6,6 +6,7 @@ import ( "crypto/x509" "encoding/json" "encoding/pem" + "errors" "fmt" "io" "net/http" @@ -2727,3 +2728,83 @@ func createStoragePoolVolumeFromBackup(s *state.State, r *http.Request, requestP revert.Success() return operations.OperationResponse(op) } + +// getRemoteVolumeNodeInfo figures out the cluster member on which the volume with the given name is defined. If it is +// the local cluster member it returns nil and no error. If it is another cluster member it returns a db.NodeInfo containing +// the name and address of the remote member. If there is more than one cluster member with a matching volume name, an +// error is returned. +func getRemoteVolumeNodeInfo(ctx context.Context, s *state.State, poolName string, projectName string, volumeName string, volumeType int) (*db.NodeInfo, error) { + localNodeID := s.DB.Cluster.GetNodeID() + var err error + var nodes []db.NodeInfo + var poolID int64 + var dbVolume *db.StorageVolume + err = s.DB.Cluster.Transaction(ctx, func(ctx context.Context, tx *db.ClusterTx) error { + poolID, err = tx.GetStoragePoolID(ctx, poolName) + if err != nil { + return err + } + + nodes, err = tx.GetStorageVolumeNodes(ctx, poolID, projectName, volumeName, volumeType) + if err != nil && !errors.Is(err, db.ErrNoClusterMember) { + return err + } else if err == nil { + return nil + } + + // If we couldn't get the nodes directly, get the volume for a subsequent check. + dbVolume, err = tx.GetStoragePoolVolume(ctx, poolID, projectName, volumeType, volumeName, true) + if err != nil { + return err + } + + return nil + }) + if err != nil { + return nil, err + } + + // If volume uses a remote storage driver and so has no explicit cluster member, then we need to check + // whether it is exclusively attached to remote instance, and if so then we need to forward the request to + // the node where it is currently used. This avoids conflicting with another member when using it locally. + if dbVolume != nil { + remoteInstance, err := storagePools.VolumeUsedByExclusiveRemoteInstancesWithProfiles(s, poolName, projectName, &dbVolume.StorageVolume) + if err != nil { + return nil, fmt.Errorf("Failed checking if volume %q is available: %w", volumeName, err) + } + + if remoteInstance == nil { + // Volume isn't exclusively attached to an instance. Use local cluster member. + return nil, nil + } + + var instNode db.NodeInfo + err = s.DB.Cluster.Transaction(ctx, func(ctx context.Context, tx *db.ClusterTx) error { + instNode, err = tx.GetNodeByName(ctx, remoteInstance.Node) + return err + }) + if err != nil { + return nil, fmt.Errorf("Failed getting cluster member info for %q: %w", remoteInstance.Node, err) + } + + // Replace node list with instance's cluster member node (which might be local member). + nodes = []db.NodeInfo{instNode} + } + + nodeCount := len(nodes) + if nodeCount > 1 { + return nil, fmt.Errorf("More than one cluster member has a volume named %q. Please target a specific member", volumeName) + } else if nodeCount < 1 { + // Should never get here. + return nil, fmt.Errorf("Volume %q has empty cluster member list", volumeName) + } + + node := nodes[0] + if node.ID == localNodeID { + // Use local cluster member if volume belongs to this local member. + return nil, nil + } + + // Connect to remote cluster member. + return &node, nil +} From 734bcfb5764174b2f85e5a4729e59787739679c5 Mon Sep 17 00:00:00 2001 From: Din Music Date: Tue, 2 Jul 2024 13:29:10 +0000 Subject: [PATCH 056/106] lxd-migrate: Add conversion option 'virtio' Signed-off-by: Din Music --- lxd-migrate/main_migrate.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lxd-migrate/main_migrate.go b/lxd-migrate/main_migrate.go index 23d8ccf58a1f..20fda359eb03 100644 --- a/lxd-migrate/main_migrate.go +++ b/lxd-migrate/main_migrate.go @@ -54,7 +54,7 @@ func (c *cmdMigrate) command() *cobra.Command { ` cmd.RunE = c.run cmd.Flags().StringVar(&c.flagRsyncArgs, "rsync-args", "", "Extra arguments to pass to rsync"+"``") - cmd.Flags().StringSliceVar(&c.flagConversionOpts, "conversion", []string{"format"}, "List of conversion opts. Allowed values are: [format]") + cmd.Flags().StringSliceVar(&c.flagConversionOpts, "conversion", []string{"format"}, "Comma-separated list of conversion options to apply. Allowed values are: [format, virtio]") return cmd } @@ -469,7 +469,7 @@ func (c *cmdMigrate) run(cmd *cobra.Command, args []string) error { } // Check conversion options. - supportedConversionOptions := []string{"format"} + supportedConversionOptions := []string{"format", "virtio"} for _, opt := range c.flagConversionOpts { if !slices.Contains(supportedConversionOptions, opt) { return fmt.Errorf("Unsupported conversion option %q, supported conversion options are %v", opt, supportedConversionOptions) From ae733289d9d2352a02ff54c19cb9256d483b8933 Mon Sep 17 00:00:00 2001 From: Din Music Date: Tue, 2 Jul 2024 13:30:14 +0000 Subject: [PATCH 057/106] lxd/instances_post: Allow conversion option virtio Signed-off-by: Din Music --- lxd/instances_post.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/instances_post.go b/lxd/instances_post.go index e4cce80a081f..0640ce6904f9 100644 --- a/lxd/instances_post.go +++ b/lxd/instances_post.go @@ -400,7 +400,7 @@ func createFromConversion(s *state.State, r *http.Request, projectName string, p // Validate conversion options. for _, opt := range req.Source.ConversionOptions { - if !slices.Contains([]string{"format"}, opt) { + if !slices.Contains([]string{"format", "virtio"}, opt) { return response.BadRequest(fmt.Errorf("Invalid conversion option %q", opt)) } } From 6e7fa19e29ace8c903435af3fbcab7c8d0eafa84 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 18 Jul 2024 15:07:47 +0000 Subject: [PATCH 058/106] lxd/storage/util: Add qemu-img info helper function Signed-off-by: Din Music --- lxd/storage/utils.go | 70 ++++++++++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/lxd/storage/utils.go b/lxd/storage/utils.go index 5b264e178a9c..8ba543581b78 100644 --- a/lxd/storage/utils.go +++ b/lxd/storage/utils.go @@ -518,7 +518,7 @@ func poolAndVolumeCommonRules(vol *drivers.Volume) map[string]func(string) error } // security.shifted and security.unmapped are only relevant for custom filesystem volumes. - if (vol == nil) || (vol != nil && vol.Type() == drivers.VolumeTypeCustom && vol.ContentType() == drivers.ContentTypeFS) { + if vol == nil || (vol.Type() == drivers.VolumeTypeCustom && vol.ContentType() == drivers.ContentTypeFS) { // lxdmeta:generate(entities=storage-btrfs,storage-cephfs,storage-ceph,storage-dir,storage-lvm,storage-zfs,storage-powerflex; group=volume-conf; key=security.shifted) // Enabling this option allows attaching the volume to multiple isolated instances. // --- @@ -721,37 +721,21 @@ func ImageUnpack(imageFile string, vol drivers.Volume, destBlockFile string, sys // convertBlockImage converts the qcow2 block image file into a raw block device. If needed it will attempt // to enlarge the destination volume to accommodate the unpacked qcow2 image file. - convertBlockImage := func(v drivers.Volume, imgPath string, dstPath string) (int64, error) { - // Get info about qcow2 file. Force input format to qcow2 so we don't rely on qemu-img's detection - // logic as that has been known to have vulnerabilities and we only support qcow2 images anyway. - // Use prlimit because qemu-img can consume considerable RAM & CPU time if fed a maliciously - // crafted disk image. Since cloud tenants are not to be trusted, ensure QEMU is limits to 1 GiB - // address space and 2 seconds CPU time, which ought to be more than enough for real world images. - cmd := []string{"prlimit", "--cpu=2", "--as=1073741824", "qemu-img", "info", "-f", "qcow2", "--output=json", imgPath} - imgJSON, err := apparmor.QemuImg(sysOS, cmd, imgPath, dstPath) + convertBlockImage := func(imgPath string, dstPath string) (int64, error) { + imgFormat, imgVirtualSize, err := qemuImageInfo(sysOS, imgPath) if err != nil { - return -1, fmt.Errorf("Failed reading image info %q: %w", imgPath, err) - } - - imgInfo := struct { - Format string `json:"format"` - VirtualSize int64 `json:"virtual-size"` - }{} - - err = json.Unmarshal([]byte(imgJSON), &imgInfo) - if err != nil { - return -1, fmt.Errorf("Failed unmarshalling image info %q: %w (%q)", imgPath, err, imgJSON) + return -1, err } // Belt and braces qcow2 check. - if imgInfo.Format != "qcow2" { - return -1, fmt.Errorf("Unexpected image format %q", imgInfo.Format) + if imgFormat != "qcow2" { + return -1, fmt.Errorf("Unexpected image format %q", imgFormat) } // Check whether image is allowed to be unpacked into pool volume. Create a partial image volume // struct and then use it to check that target volume size can be set as needed. imgVolConfig := map[string]string{ - "volatile.rootfs.size": fmt.Sprintf("%d", imgInfo.VirtualSize), + "volatile.rootfs.size": fmt.Sprintf("%d", imgVirtualSize), } imgVol := drivers.NewVolume(nil, "", drivers.VolumeTypeImage, drivers.ContentTypeBlock, "", imgVolConfig, nil) @@ -770,7 +754,7 @@ func ImageUnpack(imageFile string, vol drivers.Volume, destBlockFile string, sys // If the target volume's size is smaller than the image unpack size, then we need to // increase the target volume's size. - if volSizeBytes < imgInfo.VirtualSize { + if volSizeBytes < imgVirtualSize { l.Debug("Increasing volume size", logger.Ctx{"imgPath": imgPath, "dstPath": dstPath, "oldSize": volSizeBytes, "newSize": newVolSize, "allowUnsafeResize": allowUnsafeResize}) err = vol.SetQuota(newVolSize, allowUnsafeResize, nil) if err != nil { @@ -782,7 +766,7 @@ func ImageUnpack(imageFile string, vol drivers.Volume, destBlockFile string, sys // Convert the qcow2 format to a raw block device. l.Debug("Converting qcow2 image to raw disk", logger.Ctx{"imgPath": imgPath, "dstPath": dstPath}) - cmd = []string{ + cmd := []string{ "nice", "-n19", // Run with low priority to reduce CPU impact on other processes. "qemu-img", "convert", "-f", "qcow2", "-O", "raw", } @@ -813,7 +797,7 @@ func ImageUnpack(imageFile string, vol drivers.Volume, destBlockFile string, sys return -1, fmt.Errorf("Failed converting image to raw at %q: %w", dstPath, err) } - return imgInfo.VirtualSize, nil + return imgVirtualSize, nil } var imgSize int64 @@ -826,7 +810,7 @@ func ImageUnpack(imageFile string, vol drivers.Volume, destBlockFile string, sys } // Convert the qcow2 format to a raw block device. - imgSize, err = convertBlockImage(vol, imageRootfsFile, destBlockFile) + imgSize, err = convertBlockImage(imageRootfsFile, destBlockFile) if err != nil { return -1, err } @@ -848,7 +832,7 @@ func ImageUnpack(imageFile string, vol drivers.Volume, destBlockFile string, sys imgPath := filepath.Join(tempDir, "rootfs.img") // Convert the qcow2 format to a raw block device. - imgSize, err = convertBlockImage(vol, imgPath, destBlockFile) + imgSize, err = convertBlockImage(imgPath, destBlockFile) if err != nil { return -1, err } @@ -870,6 +854,36 @@ func ImageUnpack(imageFile string, vol drivers.Volume, destBlockFile string, sys return imgSize, nil } +// qemuImageInfo retrieves the format and virtual size of an image (size after unpacking the image) +// on the given path. +func qemuImageInfo(sysOS *sys.OS, imagePath string) (format string, bytes int64, err error) { + cmd := []string{ + // Use prlimit because qemu-img can consume considerable RAM & CPU time if fed + // a maliciously crafted disk image. Since cloud tenants are not to be trusted, + // ensure QEMU is limited to 1 GiB address space and 2 seconds of CPU time. + // This should be more than enough for real world images. + "prlimit", "--cpu=2", "--as=1073741824", + "qemu-img", "info", imagePath, "--output", "json", + } + + out, err := apparmor.QemuImg(sysOS, cmd, imagePath, "") + if err != nil { + return "", -1, fmt.Errorf("qemu-img info: %v", err) + } + + imgInfo := struct { + Format string `json:"format"` + VirtualSize int64 `json:"virtual-size"` // Image size after unpacking. + }{} + + err = json.Unmarshal([]byte(out), &imgInfo) + if err != nil { + return "", -1, fmt.Errorf("Failed unmarshalling image info: %v", err) + } + + return imgInfo.Format, imgInfo.VirtualSize, nil +} + // InstanceContentType returns the instance's content type. func InstanceContentType(inst instance.Instance) drivers.ContentType { contentType := drivers.ContentTypeFS From aa40077544067fe2612be55b109eacc34331b743 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 26 Jul 2024 11:23:25 +0200 Subject: [PATCH 059/106] lxd/apparmor/instance_lxc: fix all ro+remount rules While investigating #13810 I found that all ro+remount rules in the form: mount options=(ro,remount,bind,A,B,C) /some_pattern{,/**}, just does not work at all. This remount+bind case is a very special one, and we should rewrite all rules in this form: mount options=(ro,remount,bind,A,B,C) -> /some_pattern{,/**}, This syntax is not new. This change should be compatible with very old AppArmor versions including 2.11. Explanation why it was not noticed for years is that for unprivileged container case we have analogical rule but in a wider form: mount options=(ro,remount,bind,nodev,A,B,C), which masks the issue. But for privileged containers it's not. So, let's fix this for correctness. Signed-off-by: Alexander Mikhalitsyn --- lxd/apparmor/instance_lxc.go | 370 +++++++++++++++++------------------ 1 file changed, 185 insertions(+), 185 deletions(-) diff --git a/lxd/apparmor/instance_lxc.go b/lxd/apparmor/instance_lxc.go index 963c22bfb438..c158e650d582 100644 --- a/lxd/apparmor/instance_lxc.go +++ b/lxd/apparmor/instance_lxc.go @@ -85,194 +85,194 @@ profile "{{ .name }}" flags=(attach_disconnected,mediate_deleted) { mount fstype=tmpfs, # Allow various ro-bind-*re*-mounts of anything except /proc, /sys and /dev/.lxc - mount options=(ro,remount,bind) /[^spd]*{,/**}, - mount options=(ro,remount,bind) /d[^e]*{,/**}, - mount options=(ro,remount,bind) /de[^v]*{,/**}, - mount options=(ro,remount,bind) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind) /dev?*{,/**}, - mount options=(ro,remount,bind) /p[^r]*{,/**}, - mount options=(ro,remount,bind) /pr[^o]*{,/**}, - mount options=(ro,remount,bind) /pro[^c]*{,/**}, - mount options=(ro,remount,bind) /proc?*{,/**}, - mount options=(ro,remount,bind) /s[^y]*{,/**}, - mount options=(ro,remount,bind) /sy[^s]*{,/**}, - mount options=(ro,remount,bind) /sys?*{,/**}, - - mount options=(ro,remount,bind,nodev) /[^spd]*{,/**}, - mount options=(ro,remount,bind,nodev) /d[^e]*{,/**}, - mount options=(ro,remount,bind,nodev) /de[^v]*{,/**}, - mount options=(ro,remount,bind,nodev) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,nodev) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,nodev) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,nodev) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,nodev) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,nodev) /dev?*{,/**}, - mount options=(ro,remount,bind,nodev) /p[^r]*{,/**}, - mount options=(ro,remount,bind,nodev) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,nodev) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,nodev) /proc?*{,/**}, - mount options=(ro,remount,bind,nodev) /s[^y]*{,/**}, - mount options=(ro,remount,bind,nodev) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,nodev) /sys?*{,/**}, - - mount options=(ro,remount,bind,noexec) /[^spd]*{,/**}, - mount options=(ro,remount,bind,noexec) /d[^e]*{,/**}, - mount options=(ro,remount,bind,noexec) /de[^v]*{,/**}, - mount options=(ro,remount,bind,noexec) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,noexec) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,noexec) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,noexec) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,noexec) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,noexec) /dev?*{,/**}, - mount options=(ro,remount,bind,noexec) /p[^r]*{,/**}, - mount options=(ro,remount,bind,noexec) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,noexec) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,noexec) /proc?*{,/**}, - mount options=(ro,remount,bind,noexec) /s[^y]*{,/**}, - mount options=(ro,remount,bind,noexec) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,noexec) /sys?*{,/**}, - - mount options=(ro,remount,bind,noexec,nodev) /[^spd]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /d[^e]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /de[^v]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /dev?*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /p[^r]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /proc?*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /s[^y]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,noexec,nodev) /sys?*{,/**}, - - mount options=(ro,remount,bind,noatime) /[^spd]*{,/**}, - mount options=(ro,remount,bind,noatime) /d[^e]*{,/**}, - mount options=(ro,remount,bind,noatime) /de[^v]*{,/**}, - mount options=(ro,remount,bind,noatime) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,noatime) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,noatime) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,noatime) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,noatime) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,noatime) /dev?*{,/**}, - mount options=(ro,remount,bind,noatime) /p[^r]*{,/**}, - mount options=(ro,remount,bind,noatime) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,noatime) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,noatime) /proc?*{,/**}, - mount options=(ro,remount,bind,noatime) /s[^y]*{,/**}, - mount options=(ro,remount,bind,noatime) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,noatime) /sys?*{,/**}, - - mount options=(ro,remount,bind,nosuid) /[^spd]*{,/**}, - mount options=(ro,remount,bind,nosuid) /d[^e]*{,/**}, - mount options=(ro,remount,bind,nosuid) /de[^v]*{,/**}, - mount options=(ro,remount,bind,nosuid) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,nosuid) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,nosuid) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,nosuid) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,nosuid) /dev?*{,/**}, - mount options=(ro,remount,bind,nosuid) /p[^r]*{,/**}, - mount options=(ro,remount,bind,nosuid) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,nosuid) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid) /proc?*{,/**}, - mount options=(ro,remount,bind,nosuid) /s[^y]*{,/**}, - mount options=(ro,remount,bind,nosuid) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,nosuid) /sys?*{,/**}, - - mount options=(ro,remount,bind,nosuid,nodev) /[^spd]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /d[^e]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /de[^v]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /dev?*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /p[^r]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /proc?*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /s[^y]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,nosuid,nodev) /sys?*{,/**}, - - mount options=(ro,remount,bind,nosuid,noexec) /[^spd]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /d[^e]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /de[^v]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /dev?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /p[^r]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /proc?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /s[^y]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec) /sys?*{,/**}, - - mount options=(ro,remount,bind,nosuid,noexec,nodev) /[^spd]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /d[^e]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /de[^v]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /dev?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /p[^r]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /proc?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /s[^y]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev) /sys?*{,/**}, - - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /[^spd]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /d[^e]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /de[^v]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /dev?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /p[^r]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /proc?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /s[^y]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,strictatime) /sys?*{,/**}, + mount options=(ro,remount,bind) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind) -> /dev?*{,/**}, + mount options=(ro,remount,bind) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind) -> /proc?*{,/**}, + mount options=(ro,remount,bind) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind) -> /sys?*{,/**}, + + mount options=(ro,remount,bind,nodev) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,nodev) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /dev?*{,/**}, + mount options=(ro,remount,bind,nodev) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /proc?*{,/**}, + mount options=(ro,remount,bind,nodev) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,nodev) -> /sys?*{,/**}, + + mount options=(ro,remount,bind,noexec) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,noexec) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /dev?*{,/**}, + mount options=(ro,remount,bind,noexec) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /proc?*{,/**}, + mount options=(ro,remount,bind,noexec) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,noexec) -> /sys?*{,/**}, + + mount options=(ro,remount,bind,noexec,nodev) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /dev?*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /proc?*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,noexec,nodev) -> /sys?*{,/**}, + + mount options=(ro,remount,bind,noatime) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,noatime) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /dev?*{,/**}, + mount options=(ro,remount,bind,noatime) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /proc?*{,/**}, + mount options=(ro,remount,bind,noatime) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,noatime) -> /sys?*{,/**}, + + mount options=(ro,remount,bind,nosuid) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /dev?*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /proc?*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,nosuid) -> /sys?*{,/**}, + + mount options=(ro,remount,bind,nosuid,nodev) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /dev?*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /proc?*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,nosuid,nodev) -> /sys?*{,/**}, + + mount options=(ro,remount,bind,nosuid,noexec) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /dev?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /proc?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec) -> /sys?*{,/**}, + + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /dev?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /proc?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev) -> /sys?*{,/**}, + + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /dev?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /proc?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,strictatime) -> /sys?*{,/**}, {{- if .feature_mount_nosymfollow }} # see https://github.com/canonical/lxd/pull/12698 - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /[^spd]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /d[^e]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /de[^v]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/.[^l]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/.l[^x]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/.lx[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/.lxc?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev/[^.]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /dev?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /p[^r]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /pr[^o]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /pro[^c]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /proc?*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /s[^y]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /sy[^s]*{,/**}, - mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) /sys?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /[^spd]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /d[^e]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /de[^v]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /dev/.[^l]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /dev/.l[^x]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /dev/.lx[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /dev/.lxc?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /dev/[^.]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /dev?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /p[^r]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /pr[^o]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /pro[^c]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /proc?*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /s[^y]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /sy[^s]*{,/**}, + mount options=(ro,remount,bind,nosuid,noexec,nodev,nosymfollow) -> /sys?*{,/**}, {{- end }} # Allow bind-mounts of anything except /proc, /sys and /dev/.lxc From 948149d2cd0fd2a5f702236247aa078dcaacd740 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Thu, 18 Jul 2024 17:40:40 -0400 Subject: [PATCH 060/106] lxd/storage: Use writeback mode for qemu-img convert MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber (cherry picked from commit bbc1ea8cd0cb01b4830fd960007f4c5b6d05edf2) Signed-off-by: Din Music License: Apache-2.0 --- lxd/storage/backend_lxd.go | 2 +- lxd/storage/utils.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index 9557d0b9e852..f4c88505c8ee 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -1828,7 +1828,7 @@ func (b *lxdBackend) imageConversionFiller(imgPath string, imgFormat string) fun cmd := []string{ // Run with low priority to reduce CPU impact on other processes. "nice", "-n19", - "qemu-img", "convert", "-f", imgFormat, "-O", "raw", imgPath, diskPath, + "qemu-img", "convert", "-f", imgFormat, "-O", "raw", imgPath, diskPath, "-t", "writeback", } b.logger.Debug("Image conversion started") diff --git a/lxd/storage/utils.go b/lxd/storage/utils.go index 8ba543581b78..b438563701ab 100644 --- a/lxd/storage/utils.go +++ b/lxd/storage/utils.go @@ -768,7 +768,7 @@ func ImageUnpack(imageFile string, vol drivers.Volume, destBlockFile string, sys cmd := []string{ "nice", "-n19", // Run with low priority to reduce CPU impact on other processes. - "qemu-img", "convert", "-f", "qcow2", "-O", "raw", + "qemu-img", "convert", "-f", "qcow2", "-O", "raw", "-t", "writeback", } // Check for Direct I/O support. From 430005b9076cb3e2a2e36696de882c432f739934 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Thu, 18 Jul 2024 19:28:53 -0400 Subject: [PATCH 061/106] lxd/storage/backend_lxd: Improve image unpacking message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber (cherry picked from commit f57c3e41beb406da488888493ddae953aa71f9f3) Signed-off-by: Din Music License: Apache-2.0 --- lxd/storage/backend_lxd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index f4c88505c8ee..87f38fa75182 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -1783,7 +1783,7 @@ func (b *lxdBackend) imageFiller(fingerprint string, op *operations.Operation) f metadata := make(map[string]any) tracker = &ioprogress.ProgressTracker{ Handler: func(percent, speed int64) { - shared.SetProgressMetadata(metadata, "create_instance_from_image_unpack", "Unpack", percent, 0, speed) + shared.SetProgressMetadata(metadata, "create_instance_from_image_unpack", "Unpacking image", percent, 0, speed) _ = op.UpdateMetadata(metadata) }} } From 2d462fe3ec64597670f53202b71e8f3d5fdcca50 Mon Sep 17 00:00:00 2001 From: Din Music Date: Fri, 26 Jul 2024 07:02:41 +0000 Subject: [PATCH 062/106] lxd/storage/backend_lxd: Reuse qemu-img info helper to inspect image Signed-off-by: Din Music --- lxd/storage/backend_lxd.go | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index 87f38fa75182..0a1167ad1fa7 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -2470,40 +2470,21 @@ func (b *lxdBackend) CreateInstanceFromConversion(inst instance.Instance, conn i } // Extract image format and size. - cmd := []string{ - // Use prlimit because qemu-img can consume considerable RAM & CPU time if fed - // a maliciously crafted disk image. Since cloud tenants are not to be trusted, - // ensure QEMU is limited to 1 GiB address space and 2 seconds of CPU time. - // This should be more than enough for real world images. - "prlimit", "--cpu=2", "--as=1073741824", - "qemu-img", "info", imgPath, "--output", "json", - } - - out, err := apparmor.QemuImg(b.state.OS, cmd, imgPath, "") - if err != nil { - return fmt.Errorf("qemu-img info: %v", err) - } - - imgInfo := struct { - Format string `json:"format"` - Bytes int64 `json:"virtual-size"` - }{} - - err = json.Unmarshal([]byte(out), &imgInfo) + imgFormat, imgBytes, err := qemuImageInfo(b.state.OS, imgPath) if err != nil { - return fmt.Errorf("Failed to parse image information: %v", err) + return err } - srcDiskSize = imgInfo.Bytes + srcDiskSize = imgBytes if canResizeRootDiskSize { // Set size of the volume to the uncompressed image size. - l.Debug("Setting volume size to uncompressed image size", logger.Ctx{"size": fmt.Sprintf("%d", imgInfo.Bytes)}) - args.Config["size"] = fmt.Sprintf("%d", imgInfo.Bytes) + l.Debug("Setting volume size to uncompressed image size", logger.Ctx{"size": fmt.Sprintf("%d", imgBytes)}) + args.Config["size"] = fmt.Sprintf("%d", imgBytes) } // Convert received image into intance volume. - volFiller.Fill = b.imageConversionFiller(imgPath, imgInfo.Format) + volFiller.Fill = b.imageConversionFiller(imgPath, imgFormat) } else { // If volume size is provided, then use that as block volume size instead of pool default. // This way if the volume being received is larger than the pool default size, the created From 096f2c6799e10b8bd501fdec4e73f4f4ffc719a5 Mon Sep 17 00:00:00 2001 From: Din Music Date: Wed, 24 Jul 2024 11:49:55 +0000 Subject: [PATCH 063/106] lxd/storage/backend_lxd: Remove image after format conversion Signed-off-by: Din Music --- lxd/storage/backend_lxd.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index 0a1167ad1fa7..d4734d2576a1 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -1840,6 +1840,12 @@ func (b *lxdBackend) imageConversionFiller(imgPath string, imgFormat string) fun return -1, fmt.Errorf("qemu-img convert: failed to convert image from %q to %q format: %v", imgFormat, "raw", err) } + // Remove the image after the conversion to free up the space as soon as possible. + err = os.Remove(imgPath) + if err != nil { + return -1, err + } + // Convert volume size to bytes. volSizeBytes, err := units.ParseByteSizeString(vol.ConfigSize()) if err != nil { From 0de65c4aae4bff4e7a577247e6a8fbd876bb3115 Mon Sep 17 00:00:00 2001 From: Din Music Date: Wed, 24 Jul 2024 11:51:33 +0000 Subject: [PATCH 064/106] lxd/storage/backend_lxd: Indicate conversion format in log Signed-off-by: Din Music --- lxd/storage/backend_lxd.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index d4734d2576a1..d24871f155a6 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -1831,8 +1831,8 @@ func (b *lxdBackend) imageConversionFiller(imgPath string, imgFormat string) fun "qemu-img", "convert", "-f", imgFormat, "-O", "raw", imgPath, diskPath, "-t", "writeback", } - b.logger.Debug("Image conversion started") - defer b.logger.Debug("Image conversion finished") + b.logger.Debug("Image conversion started", logger.Ctx{"from": imgFormat, "to": "raw"}) + defer b.logger.Debug("Image conversion finished", logger.Ctx{"from": imgFormat, "to": "raw"}) out, err := apparmor.QemuImg(b.state.OS, cmd, imgPath, diskPath) if err != nil { From 58c86db7ee2a237aa20c6973d5aec6979c33810c Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 4 Jul 2024 12:44:23 +0000 Subject: [PATCH 065/106] lxd/storage/backend_lxd: Inject virtio drivers into imported image Use virt-v2v-in-place for injecting virtio drivers into the image. Signed-off-by: Din Music --- lxd/storage/backend_lxd.go | 67 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index d24871f155a6..441f08928c2d 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -11,6 +11,7 @@ import ( "net/http" "net/url" "os" + "os/exec" "path/filepath" "slices" "strings" @@ -54,6 +55,7 @@ import ( "github.com/canonical/lxd/shared/logger" "github.com/canonical/lxd/shared/revert" "github.com/canonical/lxd/shared/units" + "github.com/canonical/lxd/shared/version" ) var unavailablePools = make(map[string]struct{}) @@ -2347,9 +2349,9 @@ func (b *lxdBackend) CreateInstanceFromMigration(inst instance.Instance, conn io return nil } -// CreateInstanceFromConversion receives an image and creates and instance from it. -// Depending on provided conversionOptions, the image is also converted into the -// raw format. +// CreateInstanceFromConversion receives a disk or filesystem and creates and instance from it. +// Based on the provided conversion options, the received disk is converted into the raw format +// and/or the virtio drivers are injected into it. func (b *lxdBackend) CreateInstanceFromConversion(inst instance.Instance, conn io.ReadWriteCloser, args migration.VolumeTargetArgs, op *operations.Operation) error { l := b.logger.AddContext(logger.Ctx{"project": inst.Project().Name, "instance": inst.Name(), "args": fmt.Sprintf("%+v", args)}) l.Debug("CreateInstanceFromConversion started") @@ -2536,6 +2538,65 @@ func (b *lxdBackend) CreateInstanceFromConversion(inst instance.Instance, conn i revert.Add(func() { _ = b.driver.DeleteVolume(volCopy.Volume, op) }) + // At this point, the instance's volume is populated. If "virtio" option is enabled, + // inject the virtio drivers. + if slices.Contains(args.ConversionOptions, "virtio") { + b.logger.Debug("Inject virtio drivers started") + defer b.logger.Debug("Inject virtio drivers finished") + + err = b.driver.MountVolume(vol, op) + if err != nil { + return err + } + + defer func() { _, _ = b.driver.UnmountVolume(vol, true, op) }() + + diskPath, err := b.driver.GetVolumeDiskPath(vol) + if err != nil { + return err + } + + out, err := exec.Command("virt-v2v-in-place", "--version").CombinedOutput() + if err != nil { + return fmt.Errorf("Failed to get virt-v2v-in-place version: %w (%s)", err, string(out)) + } + + // Extract virt-v2v-in-place version (format is "virt-v2v-in-place 1.2.3"). + v2vVersionParts := strings.Split(strings.TrimSpace(string(out)), " ") + v2vVersion, err := version.NewDottedVersion(v2vVersionParts[len(v2vVersionParts)-1]) + if err != nil { + return err + } + + minVersion, err := version.NewDottedVersion("2.3.4") + if err != nil { + return err + } + + // Ensure virt-v2v-in-place version is higher then or equal to the minimum required version. + if v2vVersion.Compare(minVersion) < 0 { + return fmt.Errorf("The virt-v2v-in-place version %q does not match the minimum required version %q", v2vVersion, minVersion) + } + + // Run virt-v2v-in-place to inject virtio drivers. + cmd := exec.Command( + // Run with low priority to reduce the CPU impact on other processes. + "nice", "-n19", + "virt-v2v-in-place", "-i", "disk", "-if", "raw", "--block-driver", "virtio-scsi", diskPath, + ) + + // Instruct virt-v2v-in-place where to search for windows drivers. + cmd.Env = append(os.Environ(), + "VIRTIO_WIN=/usr/share/virtio-win/virtio-win.iso", + "VIRT_TOOLS_DATA_DIR=/usr/share/virt-tools", + ) + + out, err = cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("Failed to inject virtio drivers: %w (%s)", err, string(out)) + } + } + err = b.ensureInstanceSymlink(inst.Type(), inst.Project().Name, inst.Name(), vol.MountPath()) if err != nil { return err From 27f696d80bebfa844cf78ba266c20cdade0b3dd7 Mon Sep 17 00:00:00 2001 From: Din Music Date: Fri, 12 Jul 2024 08:11:01 +0000 Subject: [PATCH 066/106] doc: Document usage of conversion option Signed-off-by: Din Music --- doc/howto/import_machines_to_instances.md | 29 ++++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/doc/howto/import_machines_to_instances.md b/doc/howto/import_machines_to_instances.md index 41862b3827b3..2f20fee6ee93 100644 --- a/doc/howto/import_machines_to_instances.md +++ b/doc/howto/import_machines_to_instances.md @@ -32,18 +32,28 @@ The tool can create both containers and virtual machines: It is also not possible to create a virtual machine from the physical machine that you are using to do the migration, because the migration tool would be using the disk that it is copying. Instead, you could provide a bootable image, or a bootable partition or disk that is currently not in use. +The tool can also inject the required VIRTIO drivers into the image: + +* To convert the image into raw format and inject the VIRTIO drivers during the conversion, use the following command: + + lxd-migrate --conversion=format,virtio + + ```{note} + The conversion option `virtio` requires `virt-v2v-in-place` to be installed on the host where the LXD server runs. + ``` + +* For converting Windows images from a foreign hypervisor (not from QEMU/KVM with Q35/`virtio-scsi`), you must install additional drivers on the host: + * Install the `virtio-win` package or download the [`virtio-win.iso`](https://fedorapeople.org/groups/virt/virtio-win/direct-downloads/stable-virtio/virtio-win.iso) file and place it in the `/usr/share/virtio-win` directory. + * Download [`rhsrvany.exe` and `pnp_wait.exe`](https://github.com/rwmjones/rhsrvany), and place them in the `/usr/share/virt-tools/` directory. + ````{tip} - If you want to convert a Windows VM from a foreign hypervisor (not from QEMU/KVM with Q35/`virtio-scsi`), - you must install the `virtio-win` drivers to your Windows. Otherwise, your VM won't boot. -
- Expand to see how to integrate the required drivers to your Windows VM - Install the required tools on the host: + If you want to convert a Windows VM from a foreign hypervisor manually, + you must install both the required Windows drivers (as described above) and `virt-v2v` (>= 2.3.4). - 1. Install `virt-v2v` version >= 2.3.4 (this is the minimal version that supports the `--block-driver` option). - 1. Install the `virtio-win` package, or download the [`virtio-win.iso`](https://fedorapeople.org/groups/virt/virtio-win/direct-downloads/stable-virtio/virtio-win.iso) image and put it into the `/usr/share/virtio-win` folder. - 1. You might also need to install [`rhsrvany`](https://github.com/rwmjones/rhsrvany). +
+ Expand to see how to convert your Windows VM using virt-v2v - Now you can use `virt-v2v` to convert images from a foreign hypervisor to `raw` images for LXD and include the required drivers: + Use `virt-v2v` to convert Windows image into `raw` format and include the required drivers. ``` # Example 1. Convert a vmdk disk image to a raw image suitable for lxd-migrate @@ -53,6 +63,7 @@ The tool can create both containers and virtual machines: ``` You can find the resulting image in the `os` directory and use it with `lxd-migrate` on the next steps. + In addition, when migrating already converted images, `lxd-migrate` conversion options are not necessary.
```` From b45e0e526289ae957f94e719494c11b1f6bd2352 Mon Sep 17 00:00:00 2001 From: Din Music Date: Fri, 12 Jul 2024 08:37:54 +0000 Subject: [PATCH 067/106] doc: Fix exception rules for base URL Signed-off-by: Din Music --- doc/.sphinx/.markdownlint/exceptions.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/.sphinx/.markdownlint/exceptions.txt b/doc/.sphinx/.markdownlint/exceptions.txt index 63fd175bbcc9..a717484f5bc2 100644 --- a/doc/.sphinx/.markdownlint/exceptions.txt +++ b/doc/.sphinx/.markdownlint/exceptions.txt @@ -1,5 +1,5 @@ -.tmp/doc/howto/import_machines_to_instances.md:103: MD034 Bare URL used -.tmp/doc/howto/import_machines_to_instances.md:207: MD034 Bare URL used +.tmp/doc/howto/import_machines_to_instances.md:114: MD034 Bare URL used +.tmp/doc/howto/import_machines_to_instances.md:218: MD034 Bare URL used .tmp/doc/howto/network_forwards.md:66: MD004 Unordered list style .tmp/doc/howto/network_forwards.md:71: MD004 Unordered list style .tmp/doc/howto/network_forwards.md:67: MD005 Inconsistent indentation for list items at the same level From 93a096df11860d902a8ce686d533dc6b294a70eb Mon Sep 17 00:00:00 2001 From: Din Music Date: Fri, 12 Jul 2024 10:14:22 +0000 Subject: [PATCH 068/106] doc: Add virtio to the wordlist Signed-off-by: Din Music --- doc/.wordlist.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/.wordlist.txt b/doc/.wordlist.txt index cd6ce56b3c18..0dc12d3d3e72 100644 --- a/doc/.wordlist.txt +++ b/doc/.wordlist.txt @@ -53,6 +53,7 @@ UI UUID VDI VHDX +VIRTIO VM VMDK YAML From 9c7c23fbfe0b7d898dbd18907cdfc3fda07b184b Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:31:28 +0100 Subject: [PATCH 069/106] lxd: Add method to add storage volume details to request context. Signed-off-by: Mark Laing --- lxd/storage_volumes.go | 98 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/lxd/storage_volumes.go b/lxd/storage_volumes.go index cbd3ccf87795..b5aae33a750c 100644 --- a/lxd/storage_volumes.go +++ b/lxd/storage_volumes.go @@ -2729,6 +2729,104 @@ func createStoragePoolVolumeFromBackup(s *state.State, r *http.Request, requestP return operations.OperationResponse(op) } +// ctxStorageVolumeDetails is the request.CtxKey corresponding to storageVolumeDetails, which is added to the request +// context in addStoragePoolVolumeDetailsToRequestContext. +const ctxStorageVolumeDetails request.CtxKey = "storage-volume-details" + +// storageVolumeDetails contains details common to all storage volume requests. A value of this type is added to the +// request context when addStoragePoolVolumeDetailsToRequestContext is called. We do this to avoid repeated logic when +// parsing the request details and/or making database calls to get the storage pool or effective project. These fields +// are required for the storage volume access check, and are subsequently available in the storage volume handlers. +type storageVolumeDetails struct { + volumeName string + volumeTypeName string + volumeType int + pool storagePools.Pool + forwardingNodeInfo *db.NodeInfo +} + +// addStoragePoolVolumeDetailsToRequestContext extracts storageVolumeDetails from the http.Request and adds it to the +// request context with the ctxStorageVolumeDetails request.CtxKey. Additionally, the effective project of the storage +// bucket is added to the request context under request.CtxEffectiveProjectName. +func addStoragePoolVolumeDetailsToRequestContext(s *state.State, r *http.Request) error { + var details storageVolumeDetails + defer func() { + request.SetCtxValue(r, ctxStorageVolumeDetails, details) + }() + + volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) + if err != nil { + return err + } + + details.volumeName = volumeName + + if shared.IsSnapshot(volumeName) { + return api.StatusErrorf(http.StatusBadRequest, "Invalid storage volume %q", volumeName) + } + + volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) + if err != nil { + return err + } + + details.volumeTypeName = volumeTypeName + + // Convert the volume type name to our internal integer representation. + volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) + if err != nil { + return api.StatusErrorf(http.StatusBadRequest, err.Error()) + } + + details.volumeType = volumeType + + // Get the name of the storage pool the volume is supposed to be attached to. + poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + if err != nil { + return err + } + + // Load the storage pool containing the volume. This is required by the access handler as all remote volumes + // do not have a location (regardless of whether the caller used a target parameter to send the request to a + // particular member). + storagePool, err := storagePools.LoadByName(s, poolName) + if err != nil { + return err + } + + details.pool = storagePool + + // Get the effective project. + effectiveProject, err := project.StorageVolumeProject(s.DB.Cluster, request.ProjectParam(r), volumeType) + if err != nil { + return fmt.Errorf("Failed to get effective project name: %w", err) + } + + request.SetCtxValue(r, request.CtxEffectiveProjectName, effectiveProject) + + // If the target is set, we have all the information we need to perform the access check. + if request.QueryParam(r, "target") != "" { + return nil + } + + // If the request has already been forwarded, no reason to perform further logic to determine the location of the + // volume. + _, err = request.GetCtxValue[string](r.Context(), request.CtxForwardedProtocol) + if err == nil { + return nil + } + + // Get information about the cluster member containing the volume. + remoteNodeInfo, err := getRemoteVolumeNodeInfo(r.Context(), s, poolName, effectiveProject, volumeName, volumeType) + if err != nil { + return err + } + + details.forwardingNodeInfo = remoteNodeInfo + + return nil +} + // getRemoteVolumeNodeInfo figures out the cluster member on which the volume with the given name is defined. If it is // the local cluster member it returns nil and no error. If it is another cluster member it returns a db.NodeInfo containing // the name and address of the remote member. If there is more than one cluster member with a matching volume name, an From c0480a8ac57b3c51d1cd304c36d0986da0781610 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:35:57 +0100 Subject: [PATCH 070/106] lxd: Add an access handler specific to storage volumes. Signed-off-by: Mark Laing --- lxd/storage_volumes.go | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/lxd/storage_volumes.go b/lxd/storage_volumes.go index b5aae33a750c..03226c7fdbc3 100644 --- a/lxd/storage_volumes.go +++ b/lxd/storage_volumes.go @@ -81,6 +81,53 @@ var storagePoolVolumeTypeCmd = APIEndpoint{ Put: APIEndpointAction{Handler: storagePoolVolumePut, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanEdit, "poolName", "type", "volumeName")}, } +// storagePoolVolumeTypeAccessHandler returns an access handler which checks the given entitlement on a storage volume. +func storagePoolVolumeTypeAccessHandler(entitlement auth.Entitlement) func(d *Daemon, r *http.Request) response.Response { + return func(d *Daemon, r *http.Request) response.Response { + s := d.State() + err := addStoragePoolVolumeDetailsToRequestContext(s, r) + if err != nil { + return response.SmartError(err) + } + + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) + if err != nil { + return response.SmartError(err) + } + + var target string + + // Regardless of whether the caller specified a target parameter, we do not add it to the authorization check if + // the storage pool is remote. This is because the volume in the database has a NULL `node_id`, so the URL uniquely + // identifies the volume without the target parameter. + if !details.pool.Driver().Info().Remote { + // If the storage pool is local, we need to add a target parameter to the authorization check URL for the + // auth subsystem to consider it unique. + + // If the target parameter was specified, use that. + target = request.QueryParam(r, "target") + + if target == "" { + // Otherwise, check if the volume is located on another member. + if details.forwardingNodeInfo != nil { + // Use the name of the forwarding member as the location of the volume. + target = details.forwardingNodeInfo.Name + } else { + // If we're not forwarding the request, use the name of this member as the location of the volume. + target = s.ServerName + } + } + } + + err = s.Authorizer.CheckPermission(r.Context(), entity.StorageVolumeURL(request.ProjectParam(r), target, details.pool.Name(), details.volumeTypeName, details.volumeName), entitlement) + if err != nil { + return response.SmartError(err) + } + + return response.EmptySyncResponse + } +} + // swagger:operation GET /1.0/storage-volumes storage storage_volumes_get // // Get the storage volumes From f89a6eb11ffaa1a65e0ed02dc50d05b0b4bf33e2 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:38:39 +0100 Subject: [PATCH 071/106] lxd: Use the storage volume access handler for calls to specific volume. Signed-off-by: Mark Laing --- lxd/storage_volumes.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lxd/storage_volumes.go b/lxd/storage_volumes.go index 03226c7fdbc3..3106be7ea12d 100644 --- a/lxd/storage_volumes.go +++ b/lxd/storage_volumes.go @@ -74,11 +74,11 @@ var storagePoolVolumesTypeCmd = APIEndpoint{ var storagePoolVolumeTypeCmd = APIEndpoint{ Path: "storage-pools/{poolName}/volumes/{type}/{volumeName}", - Delete: APIEndpointAction{Handler: storagePoolVolumeDelete, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanDelete, "poolName", "type", "volumeName")}, - Get: APIEndpointAction{Handler: storagePoolVolumeGet, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanView, "poolName", "type", "volumeName")}, - Patch: APIEndpointAction{Handler: storagePoolVolumePatch, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanEdit, "poolName", "type", "volumeName")}, - Post: APIEndpointAction{Handler: storagePoolVolumePost, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanEdit, "poolName", "type", "volumeName")}, - Put: APIEndpointAction{Handler: storagePoolVolumePut, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanEdit, "poolName", "type", "volumeName")}, + Delete: APIEndpointAction{Handler: storagePoolVolumeDelete, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanDelete)}, + Get: APIEndpointAction{Handler: storagePoolVolumeGet, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanView)}, + Patch: APIEndpointAction{Handler: storagePoolVolumePatch, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanEdit)}, + Post: APIEndpointAction{Handler: storagePoolVolumePost, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanEdit)}, + Put: APIEndpointAction{Handler: storagePoolVolumePut, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanEdit)}, } // storagePoolVolumeTypeAccessHandler returns an access handler which checks the given entitlement on a storage volume. From de7de4c9c6b8ca6657fd3aec682d0f282828b9af Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 4 Jul 2024 15:16:15 +0100 Subject: [PATCH 072/106] lxd: Refactor forwardedResponseIfVolumeIsRemote. This function will now only forward the request to another member if a db.NodeInfo is found in the request context under `ctxStorageVolumeRemoteNodeInfo`. Signed-off-by: Mark Laing --- lxd/response.go | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/lxd/response.go b/lxd/response.go index 68e843b8812c..8f056f21bbc8 100644 --- a/lxd/response.go +++ b/lxd/response.go @@ -57,26 +57,21 @@ func forwardedResponseIfInstanceIsRemote(s *state.State, r *http.Request, projec return response.ForwardedResponse(client, r), nil } -// forwardedResponseIfVolumeIsRemote redirects a request to the node hosting -// the volume with the given pool ID, name and type. If the container is local, -// nothing gets done and nil is returned. If more than one node has a matching -// volume, an error is returned. -// -// This is used when no targetNode is specified, and saves users some typing -// when the volume name/type is unique to a node. -func forwardedResponseIfVolumeIsRemote(s *state.State, r *http.Request, poolName string, projectName string, volumeName string, volumeType int) response.Response { - if request.QueryParam(r, "target") != "" { +// forwardedResponseIfVolumeIsRemote checks for the presence of the ctxStorageVolumeRemoteNodeInfo key in the request context. +// If it is present, the db.NodeInfo value for this key is used to set up a client for the indicated member and forward the request. +// Otherwise, a nil response is returned to indicate that the request was not forwarded, and should continue within this member. +func forwardedResponseIfVolumeIsRemote(s *state.State, r *http.Request) response.Response { + storageVolumeDetails, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) + if err != nil { + return nil + } else if storageVolumeDetails.forwardingNodeInfo == nil { return nil } - client, err := cluster.ConnectIfVolumeIsRemote(s, poolName, projectName, volumeName, volumeType, s.Endpoints.NetworkCert(), s.ServerCert(), r) + client, err := cluster.Connect(storageVolumeDetails.forwardingNodeInfo.Address, s.Endpoints.NetworkCert(), s.ServerCert(), r, false) if err != nil { return response.SmartError(err) } - if client == nil { - return nil - } - return response.ForwardedResponse(client, r) } From c96bf8ed2e558b5eabe9e64b1ea551a1cf7660c4 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:40:08 +0100 Subject: [PATCH 073/106] lxd: Refactor storage volume handlers to use values from context. Signed-off-by: Mark Laing --- lxd/storage_volumes.go | 281 +++++++++++------------------------------ 1 file changed, 75 insertions(+), 206 deletions(-) diff --git a/lxd/storage_volumes.go b/lxd/storage_volumes.go index 3106be7ea12d..7970d36df814 100644 --- a/lxd/storage_volumes.go +++ b/lxd/storage_volumes.go @@ -1361,27 +1361,15 @@ func doVolumeMigration(s *state.State, r *http.Request, requestProjectName strin func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - if shared.IsSnapshot(volumeName) { + if shared.IsSnapshot(details.volumeName) { return response.BadRequest(fmt.Errorf("Invalid volume name")) } - // Get the name of the storage pool the volume is supposed to be attached to. - srcPoolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - req := api.StorageVolumePost{} // Parse the request. @@ -1398,17 +1386,17 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { // We currently only allow to create storage volumes of type storagePoolVolumeTypeCustom. // So check, that nothing else was requested. - if volumeTypeName != cluster.StoragePoolVolumeTypeNameCustom { - return response.BadRequest(fmt.Errorf("Renaming storage volumes of type %q is not allowed", volumeTypeName)) + if details.volumeTypeName != cluster.StoragePoolVolumeTypeNameCustom { + return response.BadRequest(fmt.Errorf("Renaming storage volumes of type %q is not allowed", details.volumeTypeName)) } requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, cluster.StoragePoolVolumeTypeCustom) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - targetProjectName := projectName + targetProjectName := effectiveProjectName if req.Project != "" { targetProjectName, err = project.StorageVolumeProject(s.DB.Cluster, req.Project, cluster.StoragePoolVolumeTypeCustom) if err != nil { @@ -1423,11 +1411,11 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { return response.BadRequest(fmt.Errorf("Target project does not have features.storage.volumes enabled")) } - if projectName == targetProjectName { + if effectiveProjectName == targetProjectName { return response.BadRequest(fmt.Errorf("Project and target project are the same")) } - // Check if user has access to effective storage target project + // Check if user has permission to copy/move the volume into the effective project corresponding to the target. err := s.Authorizer.CheckPermission(r.Context(), entity.ProjectURL(targetProjectName), auth.EntitlementCanCreateStorageVolumes) if err != nil { return response.SmartError(err) @@ -1487,24 +1475,13 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { return resp } - srcPool, err := storagePools.LoadByName(s, srcPoolName) - if err != nil { - return response.SmartError(err) - } - - if srcPool.Driver().Info().Name == "ceph" { + if details.pool.Driver().Info().Name == "ceph" { var dbVolume *db.StorageVolume var volumeNotFound bool var targetIsSet bool err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - // Load source volume. - srcPoolID, err := tx.GetStoragePoolID(ctx, srcPoolName) - if err != nil { - return err - } - - dbVolume, err = tx.GetStoragePoolVolume(ctx, srcPoolID, projectName, cluster.StoragePoolVolumeTypeCustom, volumeName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, cluster.StoragePoolVolumeTypeCustom, details.volumeName, true) if err != nil { // Check if the user provided an incorrect target query parameter and return a helpful error message. _, volumeNotFound = api.StatusErrorMatch(err, http.StatusNotFound) @@ -1527,7 +1504,7 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { Name: req.Name, } - return storagePoolVolumeTypePostRename(s, r, srcPool.Name(), projectName, &dbVolume.StorageVolume, req) + return storagePoolVolumeTypePostRename(s, r, details.pool.Name(), effectiveProjectName, &dbVolume.StorageVolume, req) } } else { resp := forwardedResponseToNode(s, r, req.Source.Location) @@ -1537,7 +1514,7 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { } err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - p, err := cluster.GetProject(ctx, tx.Tx(), projectName) + p, err := cluster.GetProject(ctx, tx.Tx(), effectiveProjectName) if err != nil { return err } @@ -1572,13 +1549,13 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { } run := func(op *operations.Operation) error { - return migrateStorageVolume(s, r, volumeName, srcPoolName, targetMemberInfo.Name, targetProjectName, req, op) + return migrateStorageVolume(s, r, details.volumeName, details.pool.Name(), targetMemberInfo.Name, targetProjectName, req, op) } resources := map[string][]api.URL{} - resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", srcPoolName, "volumes", "custom", volumeName)} + resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", "custom", details.volumeName)} - op, err := operations.OperationCreate(s, projectName, operations.OperationClassTask, operationtype.VolumeMigrate, resources, nil, run, nil, nil, r) + op, err := operations.OperationCreate(s, effectiveProjectName, operations.OperationClassTask, operationtype.VolumeMigrate, resources, nil, run, nil, nil, r) if err != nil { return response.InternalError(err) } @@ -1591,15 +1568,9 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { return resp } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // If source is set, we know the source and the target, and therefore don't need this function to figure out where to forward the request to. if req.Source.Location == "" { - resp = forwardedResponseIfVolumeIsRemote(s, r, srcPoolName, projectName, volumeName, volumeType) + resp := forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } @@ -1607,7 +1578,7 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { // This is a migration request so send back requested secrets. if req.Migration { - return storagePoolVolumeTypePostMigration(s, r, requestProjectName, projectName, srcPoolName, volumeName, req) + return storagePoolVolumeTypePostMigration(s, r, requestProjectName, effectiveProjectName, details.pool.Name(), details.volumeName, req) } // Retrieve ID of the storage pool (and check if the storage pool exists). @@ -1617,7 +1588,7 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { if req.Pool != "" { targetPoolName = req.Pool } else { - targetPoolName = srcPoolName + targetPoolName = details.pool.Name() } err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { @@ -1631,7 +1602,7 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { // Check that the name isn't already in use. - _, err = tx.GetStoragePoolNodeVolumeID(ctx, targetProjectName, req.Name, volumeType, targetPoolID) + _, err = tx.GetStoragePoolNodeVolumeID(ctx, targetProjectName, req.Name, details.volumeType, targetPoolID) return err }) @@ -1644,7 +1615,7 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { } // Check if the daemon itself is using it. - used, err := storagePools.VolumeUsedByDaemon(s, srcPoolName, volumeName) + used, err := storagePools.VolumeUsedByDaemon(s, details.pool.Name(), details.volumeName) if err != nil { return response.SmartError(err) } @@ -1658,13 +1629,7 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { var targetIsSet bool err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - // Load source volume. - srcPoolID, err := tx.GetStoragePoolID(ctx, srcPoolName) - if err != nil { - return err - } - - dbVolume, err = tx.GetStoragePoolVolume(ctx, srcPoolID, projectName, volumeType, volumeName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, details.volumeName, true) if err != nil { // Check if the user provided an incorrect target query parameter and return a helpful error message. _, volumeNotFound = api.StatusErrorMatch(err, http.StatusNotFound) @@ -1684,7 +1649,7 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { } // Check if a running instance is using it. - err = storagePools.VolumeUsedByInstanceDevices(s, srcPoolName, projectName, &dbVolume.StorageVolume, true, func(dbInst db.InstanceArgs, project api.Project, usedByDevices []string) error { + err = storagePools.VolumeUsedByInstanceDevices(s, details.pool.Name(), effectiveProjectName, &dbVolume.StorageVolume, true, func(dbInst db.InstanceArgs, project api.Project, usedByDevices []string) error { inst, err := instance.Load(s, dbInst, project) if err != nil { return err @@ -1701,12 +1666,12 @@ func storagePoolVolumePost(d *Daemon, r *http.Request) response.Response { } // Detect a rename request. - if (req.Pool == "" || req.Pool == srcPoolName) && (projectName == targetProjectName) { - return storagePoolVolumeTypePostRename(s, r, srcPoolName, projectName, &dbVolume.StorageVolume, req) + if (req.Pool == "" || req.Pool == details.pool.Name()) && (effectiveProjectName == targetProjectName) { + return storagePoolVolumeTypePostRename(s, r, details.pool.Name(), effectiveProjectName, &dbVolume.StorageVolume, req) } // Otherwise this is a move request. - return storagePoolVolumeTypePostMove(s, r, srcPoolName, projectName, targetProjectName, &dbVolume.StorageVolume, req) + return storagePoolVolumeTypePostMove(s, r, details.pool.Name(), effectiveProjectName, targetProjectName, &dbVolume.StorageVolume, req) } func migrateStorageVolume(s *state.State, r *http.Request, sourceVolumeName string, sourcePoolName string, targetNode string, projectName string, req api.StorageVolumePost, op *operations.Operation) error { @@ -2016,36 +1981,18 @@ func storagePoolVolumeTypePostMove(s *state.State, r *http.Request, poolName str func storagePoolVolumeGet(d *Daemon, r *http.Request) response.Response { s := d.State() - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if !shared.ValueInSlice(volumeType, supportedVolumeTypes) { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if !shared.ValueInSlice(details.volumeType, supportedVolumeTypes) { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -2055,7 +2002,7 @@ func storagePoolVolumeGet(d *Daemon, r *http.Request) response.Response { return resp } - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } @@ -2063,14 +2010,8 @@ func storagePoolVolumeGet(d *Daemon, r *http.Request) response.Response { var dbVolume *db.StorageVolume err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - // Get the ID of the storage pool the storage volume is supposed to be attached to. - poolID, err := tx.GetStoragePoolID(ctx, poolName) - if err != nil { - return err - } - // Get the storage volume. - dbVolume, err = tx.GetStoragePoolVolume(ctx, poolID, projectName, volumeType, volumeName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, details.volumeName, true) return err }) if err != nil { @@ -2084,7 +2025,7 @@ func storagePoolVolumeGet(d *Daemon, r *http.Request) response.Response { dbVolume.UsedBy = project.FilterUsedBy(s.Authorizer, r, volumeUsedBy) - etag := []any{volumeName, dbVolume.Type, dbVolume.Config} + etag := []any{details.volumeName, dbVolume.Type, dbVolume.Config} return response.SyncResponseETag(true, dbVolume.StorageVolume, etag) } @@ -2131,43 +2072,19 @@ func storagePoolVolumeGet(d *Daemon, r *http.Request) response.Response { func storagePoolVolumePut(d *Daemon, r *http.Request) response.Response { s := d.State() - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } // Check that the storage volume type is valid. - if !shared.ValueInSlice(volumeType, supportedVolumeTypes) { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) - } - - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(err) + if !shared.ValueInSlice(details.volumeType, supportedVolumeTypes) { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } resp := forwardedResponseIfTargetIsRemote(s, r) @@ -2175,7 +2092,7 @@ func storagePoolVolumePut(d *Daemon, r *http.Request) response.Response { return resp } - resp = forwardedResponseIfVolumeIsRemote(s, r, pool.Name(), projectName, volumeName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } @@ -2183,7 +2100,7 @@ func storagePoolVolumePut(d *Daemon, r *http.Request) response.Response { // Get the existing storage volume. var dbVolume *db.StorageVolume err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - dbVolume, err = tx.GetStoragePoolVolume(ctx, pool.ID(), projectName, volumeType, volumeName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, details.volumeName, true) return err }) if err != nil { @@ -2191,7 +2108,7 @@ func storagePoolVolumePut(d *Daemon, r *http.Request) response.Response { } // Validate the ETag - etag := []any{volumeName, dbVolume.Type, dbVolume.Config} + etag := []any{details.volumeName, dbVolume.Type, dbVolume.Config} err = util.EtagCheck(r, etag) if err != nil { @@ -2208,12 +2125,12 @@ func storagePoolVolumePut(d *Daemon, r *http.Request) response.Response { op := &operations.Operation{} op.SetRequestor(r) - if volumeType == cluster.StoragePoolVolumeTypeCustom { + if details.volumeType == cluster.StoragePoolVolumeTypeCustom { // Restore custom volume from snapshot if requested. This should occur first // before applying config changes so that changes are applied to the // restored volume. if req.Restore != "" { - err = pool.RestoreCustomVolume(projectName, dbVolume.Name, req.Restore, op) + err = details.pool.RestoreCustomVolume(effectiveProjectName, dbVolume.Name, req.Restore, op) if err != nil { return response.SmartError(err) } @@ -2225,31 +2142,31 @@ func storagePoolVolumePut(d *Daemon, r *http.Request) response.Response { if req.Config != nil || req.Restore == "" { // Possibly check if project limits are honored. err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - return project.AllowVolumeUpdate(s.GlobalConfig, tx, projectName, volumeName, req, dbVolume.Config) + return project.AllowVolumeUpdate(s.GlobalConfig, tx, effectiveProjectName, details.volumeName, req, dbVolume.Config) }) if err != nil { return response.SmartError(err) } - err = pool.UpdateCustomVolume(projectName, dbVolume.Name, req.Description, req.Config, op) + err = details.pool.UpdateCustomVolume(effectiveProjectName, dbVolume.Name, req.Description, req.Config, op) if err != nil { return response.SmartError(err) } } - } else if volumeType == cluster.StoragePoolVolumeTypeContainer || volumeType == cluster.StoragePoolVolumeTypeVM { - inst, err := instance.LoadByProjectAndName(s, projectName, dbVolume.Name) + } else if details.volumeType == cluster.StoragePoolVolumeTypeContainer || details.volumeType == cluster.StoragePoolVolumeTypeVM { + inst, err := instance.LoadByProjectAndName(s, effectiveProjectName, dbVolume.Name) if err != nil { return response.SmartError(err) } // Handle instance volume update requests. - err = pool.UpdateInstance(inst, req.Description, req.Config, op) + err = details.pool.UpdateInstance(inst, req.Description, req.Config, op) if err != nil { return response.SmartError(err) } - } else if volumeType == cluster.StoragePoolVolumeTypeImage { + } else if details.volumeType == cluster.StoragePoolVolumeTypeImage { // Handle image update requests. - err = pool.UpdateImage(dbVolume.Name, req.Description, req.Config, op) + err = details.pool.UpdateImage(dbVolume.Name, req.Description, req.Config, op) if err != nil { return response.SmartError(err) } @@ -2302,45 +2219,21 @@ func storagePoolVolumePut(d *Daemon, r *http.Request) response.Response { func storagePoolVolumePatch(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } - if shared.IsSnapshot(volumeName) { + if shared.IsSnapshot(details.volumeName) { return response.BadRequest(fmt.Errorf("Invalid volume name")) } - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is custom. - if volumeType != cluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) - } - - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) - if err != nil { - return response.SmartError(err) + if details.volumeType != cluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } - pool, err := storagePools.LoadByName(s, poolName) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -2350,7 +2243,7 @@ func storagePoolVolumePatch(d *Daemon, r *http.Request) response.Response { return resp } - resp = forwardedResponseIfVolumeIsRemote(s, r, pool.Name(), projectName, volumeName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } @@ -2358,7 +2251,7 @@ func storagePoolVolumePatch(d *Daemon, r *http.Request) response.Response { // Get the existing storage volume. var dbVolume *db.StorageVolume err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - dbVolume, err = tx.GetStoragePoolVolume(ctx, pool.ID(), projectName, volumeType, volumeName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, details.volumeName, true) return err }) if err != nil { @@ -2366,7 +2259,7 @@ func storagePoolVolumePatch(d *Daemon, r *http.Request) response.Response { } // Validate the ETag. - etag := []any{volumeName, dbVolume.Type, dbVolume.Config} + etag := []any{details.volumeName, dbVolume.Type, dbVolume.Config} err = util.EtagCheck(r, etag) if err != nil { @@ -2395,7 +2288,7 @@ func storagePoolVolumePatch(d *Daemon, r *http.Request) response.Response { op := &operations.Operation{} op.SetRequestor(r) - err = pool.UpdateCustomVolume(projectName, dbVolume.Name, req.Description, req.Config, op) + err = details.pool.UpdateCustomVolume(effectiveProjectName, dbVolume.Name, req.Description, req.Config, op) if err != nil { return response.SmartError(err) } @@ -2435,42 +2328,18 @@ func storagePoolVolumePatch(d *Daemon, r *http.Request) response.Response { func storagePoolVolumeDelete(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - if shared.IsSnapshot(volumeName) { - return response.BadRequest(fmt.Errorf("Invalid storage volume %q", volumeName)) - } - - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) - if err != nil { - return response.SmartError(err) + if shared.IsSnapshot(details.volumeName) { + return response.BadRequest(fmt.Errorf("Invalid storage volume %q", details.volumeName)) } // Check that the storage volume type is valid. - if !shared.ValueInSlice(volumeType, supportedVolumeTypes) { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if !shared.ValueInSlice(details.volumeType, supportedVolumeTypes) { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } resp := forwardedResponseIfTargetIsRemote(s, r) @@ -2478,25 +2347,25 @@ func storagePoolVolumeDelete(d *Daemon, r *http.Request) response.Response { return resp } - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } - if volumeType != cluster.StoragePoolVolumeTypeCustom && volumeType != cluster.StoragePoolVolumeTypeImage { - return response.BadRequest(fmt.Errorf("Storage volumes of type %q cannot be deleted with the storage API", volumeTypeName)) - } - - // Get the storage pool the storage volume is supposed to be attached to. - pool, err := storagePools.LoadByName(s, poolName) + requestProjectName := request.ProjectParam(r) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } + if details.volumeType != cluster.StoragePoolVolumeTypeCustom && details.volumeType != cluster.StoragePoolVolumeTypeImage { + return response.BadRequest(fmt.Errorf("Storage volumes of type %q cannot be deleted with the storage API", details.volumeTypeName)) + } + // Get the storage volume. var dbVolume *db.StorageVolume err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - dbVolume, err = tx.GetStoragePoolVolume(ctx, pool.ID(), projectName, volumeType, volumeName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, details.volumeName, true) return err }) if err != nil { @@ -2520,7 +2389,7 @@ func storagePoolVolumeDelete(d *Daemon, r *http.Request) response.Response { } if len(volumeUsedBy) > 0 { - if len(volumeUsedBy) != 1 || volumeType != cluster.StoragePoolVolumeTypeImage || !isImageURL(volumeUsedBy[0], dbVolume.Name) { + if len(volumeUsedBy) != 1 || details.volumeType != cluster.StoragePoolVolumeTypeImage || !isImageURL(volumeUsedBy[0], dbVolume.Name) { return response.BadRequest(fmt.Errorf("The storage volume is still in use")) } } @@ -2529,13 +2398,13 @@ func storagePoolVolumeDelete(d *Daemon, r *http.Request) response.Response { op := &operations.Operation{} op.SetRequestor(r) - switch volumeType { + switch details.volumeType { case cluster.StoragePoolVolumeTypeCustom: - err = pool.DeleteCustomVolume(projectName, volumeName, op) + err = details.pool.DeleteCustomVolume(effectiveProjectName, details.volumeName, op) case cluster.StoragePoolVolumeTypeImage: - err = pool.DeleteImage(volumeName, op) + err = details.pool.DeleteImage(details.volumeName, op) default: - return response.BadRequest(fmt.Errorf(`Storage volumes of type %q cannot be deleted with the storage API`, volumeTypeName)) + return response.BadRequest(fmt.Errorf(`Storage volumes of type %q cannot be deleted with the storage API`, details.volumeTypeName)) } if err != nil { From 16d432d705e79d41de525f878d253e3a5e25c6aa Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:40:38 +0100 Subject: [PATCH 074/106] lxd: Use storage volume location in access check when listing. Signed-off-by: Mark Laing --- lxd/storage_volumes.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lxd/storage_volumes.go b/lxd/storage_volumes.go index 7970d36df814..4f947bbd0ee0 100644 --- a/lxd/storage_volumes.go +++ b/lxd/storage_volumes.go @@ -804,7 +804,7 @@ func storagePoolVolumesGet(d *Daemon, r *http.Request) response.Response { vol := &dbVol.StorageVolume volumeName, _, _ := api.GetParentAndSnapshotName(vol.Name) - if !userHasPermission(entity.StorageVolumeURL(vol.Project, "", dbVol.Pool, dbVol.Type, volumeName)) { + if !userHasPermission(entity.StorageVolumeURL(vol.Project, vol.Location, dbVol.Pool, dbVol.Type, volumeName)) { continue } @@ -828,7 +828,7 @@ func storagePoolVolumesGet(d *Daemon, r *http.Request) response.Response { for _, dbVol := range dbVolumes { volumeName, _, _ := api.GetParentAndSnapshotName(dbVol.Name) - if !userHasPermission(entity.StorageVolumeURL(dbVol.Project, "", dbVol.Pool, dbVol.Type, volumeName)) { + if !userHasPermission(entity.StorageVolumeURL(dbVol.Project, dbVol.Location, dbVol.Pool, dbVol.Type, volumeName)) { continue } From 0f50bc3a4474415221b9434e9bfa63a963bd69de Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:42:23 +0100 Subject: [PATCH 075/106] lxd: Refactor volume backup handlers to use volume access handler. Signed-off-by: Mark Laing --- lxd/storage_volumes_backup.go | 275 ++++++++-------------------------- 1 file changed, 65 insertions(+), 210 deletions(-) diff --git a/lxd/storage_volumes_backup.go b/lxd/storage_volumes_backup.go index 6fc361e339da..d84a4e96e72b 100644 --- a/lxd/storage_volumes_backup.go +++ b/lxd/storage_volumes_backup.go @@ -21,11 +21,9 @@ import ( "github.com/canonical/lxd/lxd/project" "github.com/canonical/lxd/lxd/request" "github.com/canonical/lxd/lxd/response" - storagePools "github.com/canonical/lxd/lxd/storage" "github.com/canonical/lxd/lxd/util" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" - "github.com/canonical/lxd/shared/entity" "github.com/canonical/lxd/shared/logger" "github.com/canonical/lxd/shared/version" ) @@ -33,22 +31,22 @@ import ( var storagePoolVolumeTypeCustomBackupsCmd = APIEndpoint{ Path: "storage-pools/{poolName}/volumes/{type}/{volumeName}/backups", - Get: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupsGet, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanView, "poolName", "type", "volumeName")}, - Post: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupsPost, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanManageBackups, "poolName", "type", "volumeName")}, + Get: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupsGet, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanView)}, + Post: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupsPost, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanManageBackups)}, } var storagePoolVolumeTypeCustomBackupCmd = APIEndpoint{ Path: "storage-pools/{poolName}/volumes/{type}/{volumeName}/backups/{backupName}", - Get: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupGet, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanView, "poolName", "type", "volumeName")}, - Post: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupPost, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanManageBackups, "poolName", "type", "volumeName")}, - Delete: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupDelete, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanManageBackups, "poolName", "type", "volumeName")}, + Get: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupGet, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanView)}, + Post: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupPost, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanManageBackups)}, + Delete: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupDelete, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanManageBackups)}, } var storagePoolVolumeTypeCustomBackupExportCmd = APIEndpoint{ Path: "storage-pools/{poolName}/volumes/{type}/{volumeName}/backups/{backupName}/export", - Get: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupExportGet, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanView, "poolName", "type", "volumeName")}, + Get: APIEndpointAction{Handler: storagePoolVolumeTypeCustomBackupExportGet, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanView)}, } // swagger:operation GET /1.0/storage-pools/{poolName}/volumes/{type}/{volumeName}/backups storage storage_pool_volumes_type_backups_get @@ -156,56 +154,23 @@ var storagePoolVolumeTypeCustomBackupExportCmd = APIEndpoint{ func storagePoolVolumeTypeCustomBackupsGet(d *Daemon, r *http.Request) response.Response { s := d.State() - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, cluster.StoragePoolVolumeTypeCustom) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - // Get the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if volumeType != cluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) - } - - var poolID int64 - - err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - var err error - - poolID, _, _, err = tx.GetStoragePool(ctx, poolName) - - return err - }) - if err != nil { - return response.SmartError(err) + if details.volumeType != cluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } // Handle requests targeted to a volume on a different node - resp := forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, cluster.StoragePoolVolumeTypeCustom) + resp := forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } @@ -215,7 +180,7 @@ func storagePoolVolumeTypeCustomBackupsGet(d *Daemon, r *http.Request) response. var volumeBackups []db.StoragePoolVolumeBackup err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - volumeBackups, err = tx.GetStoragePoolVolumeBackups(ctx, projectName, volumeName, poolID) + volumeBackups, err = tx.GetStoragePoolVolumeBackups(ctx, effectiveProjectName, details.volumeName, details.pool.ID()) return err }) if err != nil { @@ -225,7 +190,7 @@ func storagePoolVolumeTypeCustomBackupsGet(d *Daemon, r *http.Request) response. backups := make([]*backup.VolumeBackup, len(volumeBackups)) for i, b := range volumeBackups { - backups[i] = backup.NewVolumeBackup(s, projectName, poolName, volumeName, b.ID, b.Name, b.CreationDate, b.ExpiryDate, b.VolumeOnly, b.OptimizedStorage) + backups[i] = backup.NewVolumeBackup(s, effectiveProjectName, details.pool.Name(), details.volumeName, b.ID, b.Name, b.CreationDate, b.ExpiryDate, b.VolumeOnly, b.OptimizedStorage) } resultString := []string{} @@ -233,7 +198,7 @@ func storagePoolVolumeTypeCustomBackupsGet(d *Daemon, r *http.Request) response. for _, backup := range backups { if !recursion { - url := api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", "custom", volumeName, "backups", strings.Split(backup.Name(), "/")[1]).String() + url := api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", "custom", details.volumeName, "backups", strings.Split(backup.Name(), "/")[1]).String() resultString = append(resultString, url) } else { render := backup.Render() @@ -288,43 +253,24 @@ func storagePoolVolumeTypeCustomBackupsGet(d *Daemon, r *http.Request) response. func storagePoolVolumeTypeCustomBackupsPost(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if volumeType != cluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if details.volumeType != cluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, cluster.StoragePoolVolumeTypeCustom) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - err := project.AllowBackupCreation(tx, projectName) + err := project.AllowBackupCreation(tx, effectiveProjectName) return err }) if err != nil { @@ -336,27 +282,14 @@ func storagePoolVolumeTypeCustomBackupsPost(d *Daemon, r *http.Request) response return resp } - var poolID int64 - - err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - var err error - - poolID, _, _, err = tx.GetStoragePool(ctx, poolName) - - return err - }) - if err != nil { - return response.SmartError(err) - } - - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, cluster.StoragePoolVolumeTypeCustom) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } var dbVolume *db.StorageVolume err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - dbVolume, err = tx.GetStoragePoolVolume(ctx, poolID, projectName, volumeType, volumeName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, details.volumeName, true) return err }) if err != nil { @@ -393,14 +326,14 @@ func storagePoolVolumeTypeCustomBackupsPost(d *Daemon, r *http.Request) response // come up with a name. err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - backups, err = tx.GetStoragePoolVolumeBackupsNames(ctx, projectName, volumeName, poolID) + backups, err = tx.GetStoragePoolVolumeBackupsNames(ctx, effectiveProjectName, details.volumeName, details.pool.ID()) return err }) if err != nil { return response.BadRequest(err) } - base := volumeName + shared.SnapshotDelimiter + "backup" + base := details.volumeName + shared.SnapshotDelimiter + "backup" length := len(base) max := 0 @@ -430,7 +363,7 @@ func storagePoolVolumeTypeCustomBackupsPost(d *Daemon, r *http.Request) response return response.BadRequest(fmt.Errorf("Backup names may not contain slashes")) } - fullName := volumeName + shared.SnapshotDelimiter + req.Name + fullName := details.volumeName + shared.SnapshotDelimiter + req.Name volumeOnly := req.VolumeOnly backup := func(op *operations.Operation) error { @@ -444,19 +377,19 @@ func storagePoolVolumeTypeCustomBackupsPost(d *Daemon, r *http.Request) response CompressionAlgorithm: req.CompressionAlgorithm, } - err := volumeBackupCreate(s, args, projectName, poolName, volumeName) + err := volumeBackupCreate(s, args, effectiveProjectName, details.pool.Name(), details.volumeName) if err != nil { return fmt.Errorf("Create volume backup: %w", err) } - s.Events.SendLifecycle(projectName, lifecycle.StorageVolumeBackupCreated.Event(poolName, volumeTypeName, args.Name, projectName, op.Requestor(), logger.Ctx{"type": volumeTypeName})) + s.Events.SendLifecycle(effectiveProjectName, lifecycle.StorageVolumeBackupCreated.Event(details.pool.Name(), details.volumeTypeName, args.Name, effectiveProjectName, op.Requestor(), logger.Ctx{"type": details.volumeTypeName})) return nil } resources := map[string][]api.URL{} - resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName)} - resources["backups"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName, "backups", req.Name)} + resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName)} + resources["backups"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName, "backups", req.Name)} op, err := operations.OperationCreate(s, requestProjectName, operations.OperationClassTask, operationtype.CustomVolumeBackupCreate, resources, nil, backup, nil, nil, r) if err != nil { @@ -514,20 +447,7 @@ func storagePoolVolumeTypeCustomBackupsPost(d *Daemon, r *http.Request) response func storagePoolVolumeTypeCustomBackupGet(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -538,19 +458,12 @@ func storagePoolVolumeTypeCustomBackupGet(d *Daemon, r *http.Request) response.R return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if volumeType != cluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if details.volumeType != cluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, cluster.StoragePoolVolumeTypeCustom) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -560,14 +473,14 @@ func storagePoolVolumeTypeCustomBackupGet(d *Daemon, r *http.Request) response.R return resp } - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, cluster.StoragePoolVolumeTypeCustom) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } - fullName := volumeName + shared.SnapshotDelimiter + backupName + fullName := details.volumeName + shared.SnapshotDelimiter + backupName - backup, err := storagePoolVolumeBackupLoadByName(s, projectName, poolName, fullName) + backup, err := storagePoolVolumeBackupLoadByName(s, effectiveProjectName, details.pool.Name(), fullName) if err != nil { return response.SmartError(err) } @@ -615,20 +528,7 @@ func storagePoolVolumeTypeCustomBackupGet(d *Daemon, r *http.Request) response.R func storagePoolVolumeTypeCustomBackupPost(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -639,19 +539,13 @@ func storagePoolVolumeTypeCustomBackupPost(d *Daemon, r *http.Request) response. return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if volumeType != cluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if details.volumeType != cluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, cluster.StoragePoolVolumeTypeCustom) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -661,7 +555,7 @@ func storagePoolVolumeTypeCustomBackupPost(d *Daemon, r *http.Request) response. return resp } - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, cluster.StoragePoolVolumeTypeCustom) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } @@ -677,14 +571,14 @@ func storagePoolVolumeTypeCustomBackupPost(d *Daemon, r *http.Request) response. return response.BadRequest(fmt.Errorf("Backup names may not contain slashes")) } - oldName := volumeName + shared.SnapshotDelimiter + backupName + oldName := details.volumeName + shared.SnapshotDelimiter + backupName - backup, err := storagePoolVolumeBackupLoadByName(s, projectName, poolName, oldName) + backup, err := storagePoolVolumeBackupLoadByName(s, effectiveProjectName, details.pool.Name(), oldName) if err != nil { return response.SmartError(err) } - newName := volumeName + shared.SnapshotDelimiter + req.Name + newName := details.volumeName + shared.SnapshotDelimiter + req.Name rename := func(op *operations.Operation) error { err := backup.Rename(newName) @@ -692,14 +586,14 @@ func storagePoolVolumeTypeCustomBackupPost(d *Daemon, r *http.Request) response. return err } - s.Events.SendLifecycle(projectName, lifecycle.StorageVolumeBackupRenamed.Event(poolName, volumeTypeName, newName, projectName, op.Requestor(), logger.Ctx{"old_name": oldName})) + s.Events.SendLifecycle(effectiveProjectName, lifecycle.StorageVolumeBackupRenamed.Event(details.pool.Name(), details.volumeTypeName, newName, effectiveProjectName, op.Requestor(), logger.Ctx{"old_name": oldName})) return nil } resources := map[string][]api.URL{} - resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName)} - resources["backups"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName, "backups", oldName)} + resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName)} + resources["backups"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName, "backups", oldName)} op, err := operations.OperationCreate(s, requestProjectName, operations.OperationClassTask, operationtype.CustomVolumeBackupRename, resources, nil, rename, nil, nil, r) if err != nil { @@ -743,20 +637,7 @@ func storagePoolVolumeTypeCustomBackupPost(d *Daemon, r *http.Request) response. func storagePoolVolumeTypeCustomBackupDelete(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -767,19 +648,13 @@ func storagePoolVolumeTypeCustomBackupDelete(d *Daemon, r *http.Request) respons return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if volumeType != cluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if details.volumeType != cluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, cluster.StoragePoolVolumeTypeCustom) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -789,14 +664,14 @@ func storagePoolVolumeTypeCustomBackupDelete(d *Daemon, r *http.Request) respons return resp } - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, cluster.StoragePoolVolumeTypeCustom) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } - fullName := volumeName + shared.SnapshotDelimiter + backupName + fullName := details.volumeName + shared.SnapshotDelimiter + backupName - backup, err := storagePoolVolumeBackupLoadByName(s, projectName, poolName, fullName) + backup, err := storagePoolVolumeBackupLoadByName(s, effectiveProjectName, details.pool.Name(), fullName) if err != nil { return response.SmartError(err) } @@ -807,14 +682,14 @@ func storagePoolVolumeTypeCustomBackupDelete(d *Daemon, r *http.Request) respons return err } - s.Events.SendLifecycle(projectName, lifecycle.StorageVolumeBackupDeleted.Event(poolName, volumeTypeName, fullName, projectName, op.Requestor(), nil)) + s.Events.SendLifecycle(effectiveProjectName, lifecycle.StorageVolumeBackupDeleted.Event(details.pool.Name(), details.volumeTypeName, fullName, effectiveProjectName, op.Requestor(), nil)) return nil } resources := map[string][]api.URL{} - resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName)} - resources["backups"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName, "backups", backupName)} + resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName)} + resources["backups"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName, "backups", backupName)} op, err := operations.OperationCreate(s, requestProjectName, operations.OperationClassTask, operationtype.CustomVolumeBackupRemove, resources, nil, remove, nil, nil, r) if err != nil { @@ -854,20 +729,7 @@ func storagePoolVolumeTypeCustomBackupDelete(d *Daemon, r *http.Request) respons func storagePoolVolumeTypeCustomBackupExportGet(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -878,19 +740,12 @@ func storagePoolVolumeTypeCustomBackupExportGet(d *Daemon, r *http.Request) resp return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if volumeType != cluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if details.volumeType != cluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, cluster.StoragePoolVolumeTypeCustom) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -900,24 +755,24 @@ func storagePoolVolumeTypeCustomBackupExportGet(d *Daemon, r *http.Request) resp return resp } - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, cluster.StoragePoolVolumeTypeCustom) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } - fullName := volumeName + shared.SnapshotDelimiter + backupName + fullName := details.volumeName + shared.SnapshotDelimiter + backupName // Ensure the volume exists - _, err = storagePoolVolumeBackupLoadByName(s, projectName, poolName, fullName) + _, err = storagePoolVolumeBackupLoadByName(s, effectiveProjectName, details.pool.Name(), fullName) if err != nil { return response.SmartError(err) } ent := response.FileResponseEntry{ - Path: shared.VarPath("backups", "custom", poolName, project.StorageVolume(projectName, fullName)), + Path: shared.VarPath("backups", "custom", details.pool.Name(), project.StorageVolume(effectiveProjectName, fullName)), } - s.Events.SendLifecycle(projectName, lifecycle.StorageVolumeBackupRetrieved.Event(poolName, volumeTypeName, fullName, projectName, request.CreateRequestor(r), nil)) + s.Events.SendLifecycle(effectiveProjectName, lifecycle.StorageVolumeBackupRetrieved.Event(details.pool.Name(), details.volumeTypeName, fullName, effectiveProjectName, request.CreateRequestor(r), nil)) return response.FileResponse(r, []response.FileResponseEntry{ent}, nil) } From 80ccc3a2b23e3a5c1dea51cba6dd99a940e0f2b8 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:42:32 +0100 Subject: [PATCH 076/106] lxd: Refactor volume snapshot handlers to use volume access handler. Signed-off-by: Mark Laing --- lxd/storage_volumes_snapshot.go | 328 +++++++------------------------- 1 file changed, 71 insertions(+), 257 deletions(-) diff --git a/lxd/storage_volumes_snapshot.go b/lxd/storage_volumes_snapshot.go index 4bd14f4edd54..292e0daadd65 100644 --- a/lxd/storage_volumes_snapshot.go +++ b/lxd/storage_volumes_snapshot.go @@ -29,7 +29,6 @@ import ( "github.com/canonical/lxd/lxd/util" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" - "github.com/canonical/lxd/shared/entity" "github.com/canonical/lxd/shared/logger" "github.com/canonical/lxd/shared/version" ) @@ -37,18 +36,18 @@ import ( var storagePoolVolumeSnapshotsTypeCmd = APIEndpoint{ Path: "storage-pools/{poolName}/volumes/{type}/{volumeName}/snapshots", - Get: APIEndpointAction{Handler: storagePoolVolumeSnapshotsTypeGet, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanView, "poolName", "type", "volumeName")}, - Post: APIEndpointAction{Handler: storagePoolVolumeSnapshotsTypePost, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanManageSnapshots, "poolName", "type", "volumeName")}, + Get: APIEndpointAction{Handler: storagePoolVolumeSnapshotsTypeGet, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanView)}, + Post: APIEndpointAction{Handler: storagePoolVolumeSnapshotsTypePost, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanManageSnapshots)}, } var storagePoolVolumeSnapshotTypeCmd = APIEndpoint{ Path: "storage-pools/{poolName}/volumes/{type}/{volumeName}/snapshots/{snapshotName}", - Delete: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypeDelete, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanManageSnapshots, "poolName", "type", "volumeName")}, - Get: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypeGet, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanView, "poolName", "type", "volumeName")}, - Post: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypePost, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanManageSnapshots, "poolName", "type", "volumeName")}, - Patch: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypePatch, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanManageSnapshots, "poolName", "type", "volumeName")}, - Put: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypePut, AccessHandler: allowPermission(entity.TypeStorageVolume, auth.EntitlementCanManageSnapshots, "poolName", "type", "volumeName")}, + Delete: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypeDelete, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanManageSnapshots)}, + Get: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypeGet, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanView)}, + Post: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypePost, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanManageSnapshots)}, + Patch: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypePatch, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanManageSnapshots)}, + Put: APIEndpointAction{Handler: storagePoolVolumeSnapshotTypePut, AccessHandler: storagePoolVolumeTypeAccessHandler(auth.EntitlementCanManageSnapshots)}, } // swagger:operation POST /1.0/storage-pools/{poolName}/volumes/{type}/{volumeName}/snapshots storage storage_pool_volumes_type_snapshots_post @@ -91,44 +90,24 @@ var storagePoolVolumeSnapshotTypeCmd = APIEndpoint{ func storagePoolVolumeSnapshotsTypePost(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the pool. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } - // Get the name of the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if volumeType != dbCluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if details.volumeType != dbCluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } - // Get the project name. requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - dbProject, err := dbCluster.GetProject(context.Background(), tx.Tx(), projectName) + dbProject, err := dbCluster.GetProject(context.Background(), tx.Tx(), effectiveProjectName) if err != nil { return err } @@ -155,7 +134,7 @@ func storagePoolVolumeSnapshotsTypePost(d *Daemon, r *http.Request) response.Res return resp } - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } @@ -168,7 +147,7 @@ func storagePoolVolumeSnapshotsTypePost(d *Daemon, r *http.Request) response.Res } // Check that this isn't a restricted volume - used, err := storagePools.VolumeUsedByDaemon(s, poolName, volumeName) + used, err := storagePools.VolumeUsedByDaemon(s, details.pool.Name(), details.volumeName) if err != nil { return response.InternalError(err) } @@ -177,17 +156,11 @@ func storagePoolVolumeSnapshotsTypePost(d *Daemon, r *http.Request) response.Res return response.BadRequest(fmt.Errorf("Volumes used by LXD itself cannot have snapshots")) } - // Retrieve the storage pool (and check if the storage pool exists). - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(err) - } - var parentDBVolume *db.StorageVolume var parentVolumeArgs db.StorageVolumeArgs err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { // Get the parent volume so we can get the config. - parentDBVolume, err = tx.GetStoragePoolVolume(ctx, pool.ID(), projectName, volumeType, volumeName, true) + parentDBVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, details.volumeName, true) if err != nil { return err } @@ -216,14 +189,14 @@ func storagePoolVolumeSnapshotsTypePost(d *Daemon, r *http.Request) response.Res } // Validate the snapshot name using same rule as pool name. - err = pool.ValidateName(req.Name) + err = details.pool.ValidateName(req.Name) if err != nil { return response.BadRequest(err) } err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { // Ensure that the snapshot doesn't already exist. - snapDBVolume, err := tx.GetStoragePoolVolume(ctx, pool.ID(), projectName, volumeType, fmt.Sprintf("%s/%s", volumeName, req.Name), true) + snapDBVolume, err := tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, fmt.Sprintf("%s/%s", details.volumeName, req.Name), true) if err != nil && !response.IsNotFoundError(err) { return err } else if snapDBVolume != nil { @@ -249,12 +222,12 @@ func storagePoolVolumeSnapshotsTypePost(d *Daemon, r *http.Request) response.Res // Create the snapshot. snapshot := func(op *operations.Operation) error { - return pool.CreateCustomVolumeSnapshot(projectName, volumeName, req.Name, expiry, op) + return details.pool.CreateCustomVolumeSnapshot(effectiveProjectName, details.volumeName, req.Name, expiry, op) } resources := map[string][]api.URL{} - resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName)} - resources["storage_volume_snapshots"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName, "snapshots", req.Name)} + resources["storage_volumes"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName)} + resources["storage_volume_snapshots"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName, "snapshots", req.Name)} op, err := operations.OperationCreate(s, requestProjectName, operations.OperationClassTask, operationtype.VolumeSnapshotCreate, resources, nil, snapshot, nil, nil, r) if err != nil { @@ -369,57 +342,30 @@ func storagePoolVolumeSnapshotsTypePost(d *Daemon, r *http.Request) response.Res func storagePoolVolumeSnapshotsTypeGet(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the pool the storage volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } recursion := util.IsRecursionRequest(r) - // Get the name of the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the volume type. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) - if err != nil { - return response.SmartError(err) - } - - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if !shared.ValueInSlice(volumeType, supportedVolumeTypes) { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if !shared.ValueInSlice(details.volumeType, supportedVolumeTypes) { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - var poolID int64 var volumes []db.StorageVolumeArgs err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { var err error - // Retrieve ID of the storage pool (and check if the storage pool exists). - poolID, err = tx.GetStoragePoolID(ctx, poolName) - if err != nil { - return err - } - // Get the names of all storage volume snapshots of a given volume. - volumes, err = tx.GetLocalStoragePoolVolumeSnapshotsWithType(ctx, projectName, volumeName, volumeType, poolID) + volumes, err = tx.GetLocalStoragePoolVolumeSnapshotsWithType(ctx, effectiveProjectName, details.volumeName, details.volumeType, details.pool.ID()) if err != nil { return err } @@ -437,18 +383,18 @@ func storagePoolVolumeSnapshotsTypeGet(d *Daemon, r *http.Request) response.Resp _, snapshotName, _ := api.GetParentAndSnapshotName(volume.Name) if !recursion { - resultString = append(resultString, fmt.Sprintf("/%s/storage-pools/%s/volumes/%s/%s/snapshots/%s", version.APIVersion, poolName, volumeTypeName, volumeName, snapshotName)) + resultString = append(resultString, fmt.Sprintf("/%s/storage-pools/%s/volumes/%s/%s/snapshots/%s", version.APIVersion, details.pool.Name(), details.volumeTypeName, details.volumeName, snapshotName)) } else { var vol *db.StorageVolume err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - vol, err = tx.GetStoragePoolVolume(ctx, poolID, projectName, volumeType, volume.Name, true) + vol, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, volume.Name, true) return err }) if err != nil { return response.SmartError(err) } - volumeUsedBy, err := storagePoolVolumeUsedByGet(s, projectName, vol) + volumeUsedBy, err := storagePoolVolumeUsedByGet(s, effectiveProjectName, vol) if err != nil { return response.SmartError(err) } @@ -517,20 +463,7 @@ func storagePoolVolumeSnapshotsTypeGet(d *Daemon, r *http.Request) response.Resp func storagePoolVolumeSnapshotTypePost(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -541,20 +474,13 @@ func storagePoolVolumeSnapshotTypePost(d *Daemon, r *http.Request) response.Resp return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if volumeType != dbCluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if details.volumeType != dbCluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } - // Get the project name. requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -565,12 +491,13 @@ func storagePoolVolumeSnapshotTypePost(d *Daemon, r *http.Request) response.Resp return resp } - fullSnapshotName := fmt.Sprintf("%s/%s", volumeName, snapshotName) - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, volumeName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } + fullSnapshotName := fmt.Sprintf("%s/%s", details.volumeName, snapshotName) + // Parse the request. req := api.StorageVolumeSnapshotPost{} err = json.NewDecoder(r.Body).Decode(&req) @@ -591,21 +518,16 @@ func storagePoolVolumeSnapshotTypePost(d *Daemon, r *http.Request) response.Resp Target: req.Target, } - return storagePoolVolumeTypePostMigration(s, r, requestProjectName, projectName, poolName, fullSnapshotName, req) + return storagePoolVolumeTypePostMigration(s, r, requestProjectName, effectiveProjectName, details.pool.Name(), fullSnapshotName, req) } // Rename the snapshot. snapshotRename := func(op *operations.Operation) error { - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return err - } - - return pool.RenameCustomVolumeSnapshot(projectName, fullSnapshotName, req.Name, op) + return details.pool.RenameCustomVolumeSnapshot(effectiveProjectName, fullSnapshotName, req.Name, op) } resources := map[string][]api.URL{} - resources["storage_volume_snapshots"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName, "snapshots", snapshotName)} + resources["storage_volume_snapshots"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName, "snapshots", snapshotName)} op, err := operations.OperationCreate(s, requestProjectName, operations.OperationClassTask, operationtype.VolumeSnapshotRename, resources, nil, snapshotRename, nil, nil, r) if err != nil { @@ -663,21 +585,7 @@ func storagePoolVolumeSnapshotTypePost(d *Daemon, r *http.Request) response.Resp func storagePoolVolumeSnapshotTypeGet(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage pool the volume is supposed to be - // attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -688,15 +596,7 @@ func storagePoolVolumeSnapshotTypeGet(d *Daemon, r *http.Request) response.Respo return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - - // Get the project name. - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -707,24 +607,18 @@ func storagePoolVolumeSnapshotTypeGet(d *Daemon, r *http.Request) response.Respo return resp } - fullSnapshotName := fmt.Sprintf("%s/%s", volumeName, snapshotName) - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, fullSnapshotName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } - var poolID int64 + fullSnapshotName := fmt.Sprintf("%s/%s", details.volumeName, snapshotName) + var dbVolume *db.StorageVolume var expiry time.Time err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - // Get the snapshot. - poolID, _, _, err = tx.GetStoragePool(ctx, poolName) - if err != nil { - return err - } - - dbVolume, err = tx.GetStoragePoolVolume(ctx, poolID, projectName, volumeType, fullSnapshotName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, fullSnapshotName, true) if err != nil { return err } @@ -794,21 +688,7 @@ func storagePoolVolumeSnapshotTypeGet(d *Daemon, r *http.Request) response.Respo func storagePoolVolumeSnapshotTypePut(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage pool the volume is supposed to be - // attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -819,15 +699,7 @@ func storagePoolVolumeSnapshotTypePut(d *Daemon, r *http.Request) response.Respo return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - - // Get the project name. - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -838,24 +710,18 @@ func storagePoolVolumeSnapshotTypePut(d *Daemon, r *http.Request) response.Respo return resp } - fullSnapshotName := fmt.Sprintf("%s/%s", volumeName, snapshotName) - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, fullSnapshotName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } - var poolID int64 + fullSnapshotName := fmt.Sprintf("%s/%s", details.volumeName, snapshotName) + var dbVolume *db.StorageVolume var expiry time.Time err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - // Get the snapshot. - poolID, _, _, err = tx.GetStoragePool(ctx, poolName) - if err != nil { - return err - } - - dbVolume, err = tx.GetStoragePoolVolume(ctx, poolID, projectName, volumeType, fullSnapshotName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, fullSnapshotName, true) if err != nil { return err } @@ -885,7 +751,7 @@ func storagePoolVolumeSnapshotTypePut(d *Daemon, r *http.Request) response.Respo return response.BadRequest(err) } - return doStoragePoolVolumeSnapshotUpdate(s, r, poolName, projectName, dbVolume.Name, volumeType, req) + return doStoragePoolVolumeSnapshotUpdate(s, r, effectiveProjectName, dbVolume.Name, details.volumeType, req) } // swagger:operation PATCH /1.0/storage-pools/{poolName}/volumes/{type}/{volumeName}/snapshots/{snapshotName} storage storage_pool_volumes_type_snapshot_patch @@ -930,21 +796,7 @@ func storagePoolVolumeSnapshotTypePut(d *Daemon, r *http.Request) response.Respo func storagePoolVolumeSnapshotTypePatch(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage pool the volume is supposed to be - // attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -955,15 +807,7 @@ func storagePoolVolumeSnapshotTypePatch(d *Daemon, r *http.Request) response.Res return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - - // Get the project name. - requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -974,24 +818,18 @@ func storagePoolVolumeSnapshotTypePatch(d *Daemon, r *http.Request) response.Res return resp } - fullSnapshotName := fmt.Sprintf("%s/%s", volumeName, snapshotName) - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, fullSnapshotName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } - var poolID int64 + fullSnapshotName := fmt.Sprintf("%s/%s", details.volumeName, snapshotName) + var dbVolume *db.StorageVolume var expiry time.Time err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - // Get the snapshot. - poolID, _, _, err = tx.GetStoragePool(ctx, poolName) - if err != nil { - return err - } - - dbVolume, err = tx.GetStoragePoolVolume(ctx, poolID, projectName, volumeType, fullSnapshotName, true) + dbVolume, err = tx.GetStoragePoolVolume(ctx, details.pool.ID(), effectiveProjectName, details.volumeType, fullSnapshotName, true) if err != nil { return err } @@ -1024,16 +862,16 @@ func storagePoolVolumeSnapshotTypePatch(d *Daemon, r *http.Request) response.Res return response.BadRequest(err) } - return doStoragePoolVolumeSnapshotUpdate(s, r, poolName, projectName, dbVolume.Name, volumeType, req) + return doStoragePoolVolumeSnapshotUpdate(s, r, effectiveProjectName, dbVolume.Name, details.volumeType, req) } -func doStoragePoolVolumeSnapshotUpdate(s *state.State, r *http.Request, poolName string, projectName string, volName string, volumeType int, req api.StorageVolumeSnapshotPut) response.Response { +func doStoragePoolVolumeSnapshotUpdate(s *state.State, r *http.Request, projectName string, volName string, volumeType int, req api.StorageVolumeSnapshotPut) response.Response { expiry := time.Time{} if req.ExpiresAt != nil { expiry = *req.ExpiresAt } - pool, err := storagePools.LoadByName(s, poolName) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -1044,7 +882,7 @@ func doStoragePoolVolumeSnapshotUpdate(s *state.State, r *http.Request, poolName // Update the database. if volumeType == dbCluster.StoragePoolVolumeTypeCustom { - err = pool.UpdateCustomVolumeSnapshot(projectName, volName, req.Description, nil, expiry, op) + err = details.pool.UpdateCustomVolumeSnapshot(projectName, volName, req.Description, nil, expiry, op) if err != nil { return response.SmartError(err) } @@ -1054,7 +892,7 @@ func doStoragePoolVolumeSnapshotUpdate(s *state.State, r *http.Request, poolName return response.SmartError(err) } - err = pool.UpdateInstanceSnapshot(inst, req.Description, nil, op) + err = details.pool.UpdateInstanceSnapshot(inst, req.Description, nil, op) if err != nil { return response.SmartError(err) } @@ -1097,20 +935,7 @@ func doStoragePoolVolumeSnapshotUpdate(s *state.State, r *http.Request, poolName func storagePoolVolumeSnapshotTypeDelete(d *Daemon, r *http.Request) response.Response { s := d.State() - // Get the name of the storage pool the volume is supposed to be attached to. - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the volume type. - volumeTypeName, err := url.PathUnescape(mux.Vars(r)["type"]) - if err != nil { - return response.SmartError(err) - } - - // Get the name of the storage volume. - volumeName, err := url.PathUnescape(mux.Vars(r)["volumeName"]) + details, err := request.GetCtxValue[storageVolumeDetails](r.Context(), ctxStorageVolumeDetails) if err != nil { return response.SmartError(err) } @@ -1121,20 +946,13 @@ func storagePoolVolumeSnapshotTypeDelete(d *Daemon, r *http.Request) response.Re return response.SmartError(err) } - // Convert the volume type name to our internal integer representation. - volumeType, err := storagePools.VolumeTypeNameToDBType(volumeTypeName) - if err != nil { - return response.BadRequest(err) - } - // Check that the storage volume type is valid. - if volumeType != dbCluster.StoragePoolVolumeTypeCustom { - return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", volumeTypeName)) + if details.volumeType != dbCluster.StoragePoolVolumeTypeCustom { + return response.BadRequest(fmt.Errorf("Invalid storage volume type %q", details.volumeTypeName)) } - // Get the project name. requestProjectName := request.ProjectParam(r) - projectName, err := project.StorageVolumeProject(s.DB.Cluster, requestProjectName, volumeType) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } @@ -1145,23 +963,19 @@ func storagePoolVolumeSnapshotTypeDelete(d *Daemon, r *http.Request) response.Re return resp } - fullSnapshotName := fmt.Sprintf("%s/%s", volumeName, snapshotName) - resp = forwardedResponseIfVolumeIsRemote(s, r, poolName, projectName, fullSnapshotName, volumeType) + resp = forwardedResponseIfVolumeIsRemote(s, r) if resp != nil { return resp } - snapshotDelete := func(op *operations.Operation) error { - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return err - } + fullSnapshotName := fmt.Sprintf("%s/%s", details.volumeName, snapshotName) - return pool.DeleteCustomVolumeSnapshot(projectName, fullSnapshotName, op) + snapshotDelete := func(op *operations.Operation) error { + return details.pool.DeleteCustomVolumeSnapshot(effectiveProjectName, fullSnapshotName, op) } resources := map[string][]api.URL{} - resources["storage_volume_snapshots"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", poolName, "volumes", volumeTypeName, volumeName, "snapshots", snapshotName)} + resources["storage_volume_snapshots"] = []api.URL{*api.NewURL().Path(version.APIVersion, "storage-pools", details.pool.Name(), "volumes", details.volumeTypeName, details.volumeName, "snapshots", snapshotName)} op, err := operations.OperationCreate(s, requestProjectName, operations.OperationClassTask, operationtype.VolumeSnapshotDelete, resources, nil, snapshotDelete, nil, nil, r) if err != nil { From 6d291949c60353adab12d29301fb717ef6356664 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 25 Jul 2024 17:05:58 +0100 Subject: [PATCH 077/106] lxd: Add function to add storage volume details to context. Signed-off-by: Mark Laing --- lxd/storage_buckets.go | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/lxd/storage_buckets.go b/lxd/storage_buckets.go index 19c10ca0f46f..3528d6402323 100644 --- a/lxd/storage_buckets.go +++ b/lxd/storage_buckets.go @@ -1147,3 +1147,57 @@ func storagePoolBucketKeyPut(d *Daemon, r *http.Request) response.Response { return response.EmptySyncResponse } + +// ctxStorageBucketDetails is the request.CtxKey corresponding to storageBucketDetails, which is added to the request +// context in addStorageBucketDetailsToContext. +const ctxStorageBucketDetails request.CtxKey = "storage-bucket-details" + +// storageBucketDetails contains details common to all storage volume requests. A value of this type is added to the +// request context when addStorageBucketDetailsToContext is called. We do this to avoid repeated logic when +// parsing the request details and/or making database calls to get the storage pool or effective project. These fields +// are required for the storage bucket access check, and are subsequently available in the storage bucket handlers. +type storageBucketDetails struct { + bucketName string + pool storagePools.Pool +} + +// addStorageBucketDetailsToContext extracts storageBucketDetails from the http.Request and adds it to the +// request context with the ctxStorageBucketDetails request.CtxKey. Additionally, the effective project of the storage +// bucket is added to the request context under request.CtxEffectiveProjectName. +func addStorageBucketDetailsToContext(d *Daemon, r *http.Request) error { + var details storageBucketDetails + defer func() { + request.SetCtxValue(r, ctxStorageBucketDetails, details) + }() + + s := d.State() + + projectName := request.ProjectParam(r) + + effectiveProjectName, err := project.StorageBucketProject(r.Context(), s.DB.Cluster, projectName) + if err != nil { + return err + } + + request.SetCtxValue(r, request.CtxEffectiveProjectName, effectiveProjectName) + + poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + if err != nil { + return err + } + + pool, err := storagePools.LoadByName(s, poolName) + if err != nil { + return fmt.Errorf("Failed loading storage pool: %w", err) + } + + details.pool = pool + + bucketName, err := url.PathUnescape(mux.Vars(r)["bucketName"]) + if err != nil { + return err + } + + details.bucketName = bucketName + return nil +} From 40b2fd0f10acf979e86d556f395014da8f095e01 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 25 Jul 2024 17:06:17 +0100 Subject: [PATCH 078/106] lxd: Add storage bucket specific access handler. Signed-off-by: Mark Laing --- lxd/storage_buckets.go | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/lxd/storage_buckets.go b/lxd/storage_buckets.go index 3528d6402323..0f64df6c873d 100644 --- a/lxd/storage_buckets.go +++ b/lxd/storage_buckets.go @@ -55,6 +55,40 @@ var storagePoolBucketKeyCmd = APIEndpoint{ Put: APIEndpointAction{Handler: storagePoolBucketKeyPut, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanEdit, "poolName", "bucketName")}, } +// storageBucketAccessHandler returns an access handler that checks for the given entitlement against a storage bucket. +// The storage pool containing the bucket and the effective project of the bucket are added to the request context for +// later use. +func storageBucketAccessHandler(entitlement auth.Entitlement) func(d *Daemon, r *http.Request) response.Response { + return func(d *Daemon, r *http.Request) response.Response { + s := d.State() + + err := addStorageBucketDetailsToContext(d, r) + if err != nil { + return response.SmartError(err) + } + + details, err := request.GetCtxValue[storageBucketDetails](r.Context(), ctxStorageBucketDetails) + if err != nil { + return nil + } + + // If the storage pool is a remote driver, the auth subsystem does not require a target parameter to create a + // unique URL for the storage bucket. So even if the caller supplied a target parameter, we don't use it in the + // access check if the pool is remote. + target := "" + if !details.pool.Driver().Info().Remote { + target = request.QueryParam(r, "target") + } + + err = s.Authorizer.CheckPermission(r.Context(), entity.StorageBucketURL(request.ProjectParam(r), target, details.pool.Name(), details.bucketName), entitlement) + if err != nil { + return response.SmartError(err) + } + + return response.EmptySyncResponse + } +} + // API endpoints // swagger:operation GET /1.0/storage-pools/{poolName}/buckets storage storage_pool_buckets_get From 17db769f6c4edce7b2ae480b9873b25f9a9c0204 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:47:53 +0100 Subject: [PATCH 079/106] lxd: Update storage bucket endpoint actions to use the bucket access handler. Signed-off-by: Mark Laing --- lxd/storage_buckets.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lxd/storage_buckets.go b/lxd/storage_buckets.go index 0f64df6c873d..8fe9fb6ecd8f 100644 --- a/lxd/storage_buckets.go +++ b/lxd/storage_buckets.go @@ -34,25 +34,25 @@ var storagePoolBucketsCmd = APIEndpoint{ var storagePoolBucketCmd = APIEndpoint{ Path: "storage-pools/{poolName}/buckets/{bucketName}", - Delete: APIEndpointAction{Handler: storagePoolBucketDelete, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanDelete, "poolName", "bucketName")}, - Get: APIEndpointAction{Handler: storagePoolBucketGet, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanView, "poolName", "bucketName")}, - Patch: APIEndpointAction{Handler: storagePoolBucketPut, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanEdit, "poolName", "bucketName")}, - Put: APIEndpointAction{Handler: storagePoolBucketPut, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanEdit, "poolName", "bucketName")}, + Delete: APIEndpointAction{Handler: storagePoolBucketDelete, AccessHandler: storageBucketAccessHandler(auth.EntitlementCanDelete)}, + Get: APIEndpointAction{Handler: storagePoolBucketGet, AccessHandler: storageBucketAccessHandler(auth.EntitlementCanView)}, + Patch: APIEndpointAction{Handler: storagePoolBucketPut, AccessHandler: storageBucketAccessHandler(auth.EntitlementCanEdit)}, + Put: APIEndpointAction{Handler: storagePoolBucketPut, AccessHandler: storageBucketAccessHandler(auth.EntitlementCanEdit)}, } var storagePoolBucketKeysCmd = APIEndpoint{ Path: "storage-pools/{poolName}/buckets/{bucketName}/keys", - Get: APIEndpointAction{Handler: storagePoolBucketKeysGet, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanView, "poolName", "bucketName")}, - Post: APIEndpointAction{Handler: storagePoolBucketKeysPost, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanEdit, "poolName", "bucketName")}, + Get: APIEndpointAction{Handler: storagePoolBucketKeysGet, AccessHandler: storageBucketAccessHandler(auth.EntitlementCanView)}, + Post: APIEndpointAction{Handler: storagePoolBucketKeysPost, AccessHandler: storageBucketAccessHandler(auth.EntitlementCanEdit)}, } var storagePoolBucketKeyCmd = APIEndpoint{ Path: "storage-pools/{poolName}/buckets/{bucketName}/keys/{keyName}", - Delete: APIEndpointAction{Handler: storagePoolBucketKeyDelete, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanEdit, "poolName", "bucketName")}, - Get: APIEndpointAction{Handler: storagePoolBucketKeyGet, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanView, "poolName", "bucketName")}, - Put: APIEndpointAction{Handler: storagePoolBucketKeyPut, AccessHandler: allowPermission(entity.TypeStorageBucket, auth.EntitlementCanEdit, "poolName", "bucketName")}, + Delete: APIEndpointAction{Handler: storagePoolBucketKeyDelete, AccessHandler: storageBucketAccessHandler(auth.EntitlementCanEdit)}, + Get: APIEndpointAction{Handler: storagePoolBucketKeyGet, AccessHandler: storageBucketAccessHandler(auth.EntitlementCanView)}, + Put: APIEndpointAction{Handler: storagePoolBucketKeyPut, AccessHandler: storageBucketAccessHandler(auth.EntitlementCanEdit)}, } // storageBucketAccessHandler returns an access handler that checks for the given entitlement against a storage bucket. From 69a47b87debdf5bd13809f6ca52428c24042576e Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:48:16 +0100 Subject: [PATCH 080/106] lxd: Refactor storage bucket handlers to use values from context. Signed-off-by: Mark Laing --- lxd/storage_buckets.go | 154 ++++++++++------------------------------- 1 file changed, 37 insertions(+), 117 deletions(-) diff --git a/lxd/storage_buckets.go b/lxd/storage_buckets.go index 8fe9fb6ecd8f..53f74c43dead 100644 --- a/lxd/storage_buckets.go +++ b/lxd/storage_buckets.go @@ -238,7 +238,7 @@ func storagePoolBucketsGet(d *Daemon, r *http.Request) response.Response { var filteredDBBuckets []*db.StorageBucket for _, bucket := range dbBuckets { - if !userHasPermission(entity.StorageBucketURL(requestProjectName, "", poolName, bucket.Name)) { + if !userHasPermission(entity.StorageBucketURL(requestProjectName, bucket.Location, poolName, bucket.Name)) { continue } @@ -323,43 +323,33 @@ func storagePoolBucketGet(d *Daemon, r *http.Request) response.Response { return resp } - bucketProjectName, err := project.StorageBucketProject(r.Context(), s.DB.Cluster, request.ProjectParam(r)) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + details, err := request.GetCtxValue[storageBucketDetails](r.Context(), ctxStorageBucketDetails) if err != nil { return response.SmartError(err) } - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(fmt.Errorf("Failed loading storage pool: %w", err)) - } - - if !pool.Driver().Info().Buckets { + if !details.pool.Driver().Info().Buckets { return response.BadRequest(fmt.Errorf("Storage pool does not support buckets")) } - bucketName, err := url.PathUnescape(mux.Vars(r)["bucketName"]) - if err != nil { - return response.SmartError(err) - } - targetMember := request.QueryParam(r, "target") memberSpecific := targetMember != "" var bucket *db.StorageBucket err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - bucket, err = tx.GetStoragePoolBucket(ctx, pool.ID(), bucketProjectName, memberSpecific, bucketName) + bucket, err = tx.GetStoragePoolBucket(ctx, details.pool.ID(), effectiveProjectName, memberSpecific, details.bucketName) return err }) if err != nil { return response.SmartError(err) } - u := pool.GetBucketURL(bucket.Name) + u := details.pool.GetBucketURL(bucket.Name) if u != nil { bucket.S3URL = u.String() } @@ -548,22 +538,12 @@ func storagePoolBucketPut(d *Daemon, r *http.Request) response.Response { return resp } - bucketProjectName, err := project.StorageBucketProject(r.Context(), s.DB.Cluster, request.ProjectParam(r)) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(fmt.Errorf("Failed loading storage pool: %w", err)) - } - - bucketName, err := url.PathUnescape(mux.Vars(r)["bucketName"]) + details, err := request.GetCtxValue[storageBucketDetails](r.Context(), ctxStorageBucketDetails) if err != nil { return response.SmartError(err) } @@ -581,7 +561,7 @@ func storagePoolBucketPut(d *Daemon, r *http.Request) response.Response { var bucket *db.StorageBucket err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - bucket, err = tx.GetStoragePoolBucket(ctx, pool.ID(), bucketProjectName, memberSpecific, bucketName) + bucket, err = tx.GetStoragePoolBucket(ctx, details.pool.ID(), effectiveProjectName, memberSpecific, details.bucketName) return err }) if err != nil { @@ -598,12 +578,12 @@ func storagePoolBucketPut(d *Daemon, r *http.Request) response.Response { } } - err = pool.UpdateBucket(bucketProjectName, bucketName, req, nil) + err = details.pool.UpdateBucket(effectiveProjectName, details.bucketName, req, nil) if err != nil { return response.SmartError(fmt.Errorf("Failed updating storage bucket: %w", err)) } - s.Events.SendLifecycle(bucketProjectName, lifecycle.StorageBucketUpdated.Event(pool, bucketProjectName, bucketName, request.CreateRequestor(r), nil)) + s.Events.SendLifecycle(effectiveProjectName, lifecycle.StorageBucketUpdated.Event(details.pool, effectiveProjectName, details.bucketName, request.CreateRequestor(r), nil)) return response.EmptySyncResponse } @@ -645,32 +625,22 @@ func storagePoolBucketDelete(d *Daemon, r *http.Request) response.Response { return resp } - bucketProjectName, err := project.StorageBucketProject(r.Context(), s.DB.Cluster, request.ProjectParam(r)) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) - if err != nil { - return response.SmartError(err) - } - - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(fmt.Errorf("Failed loading storage pool: %w", err)) - } - - bucketName, err := url.PathUnescape(mux.Vars(r)["bucketName"]) + details, err := request.GetCtxValue[storageBucketDetails](r.Context(), ctxStorageBucketDetails) if err != nil { return response.SmartError(err) } - err = pool.DeleteBucket(bucketProjectName, bucketName, nil) + err = details.pool.DeleteBucket(effectiveProjectName, details.bucketName, nil) if err != nil { return response.SmartError(fmt.Errorf("Failed deleting storage bucket: %w", err)) } - s.Events.SendLifecycle(bucketProjectName, lifecycle.StorageBucketDeleted.Event(pool, bucketProjectName, bucketName, request.CreateRequestor(r), nil)) + s.Events.SendLifecycle(effectiveProjectName, lifecycle.StorageBucketDeleted.Event(details.pool, effectiveProjectName, details.bucketName, request.CreateRequestor(r), nil)) return response.EmptySyncResponse } @@ -777,31 +747,21 @@ func storagePoolBucketKeysGet(d *Daemon, r *http.Request) response.Response { return resp } - bucketProjectName, err := project.StorageBucketProject(r.Context(), s.DB.Cluster, request.ProjectParam(r)) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + details, err := request.GetCtxValue[storageBucketDetails](r.Context(), ctxStorageBucketDetails) if err != nil { return response.SmartError(err) } - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(fmt.Errorf("Failed loading storage pool: %w", err)) - } - - driverInfo := pool.Driver().Info() + driverInfo := details.pool.Driver().Info() if !driverInfo.Buckets { return response.BadRequest(fmt.Errorf("Storage pool driver %q does not support buckets", driverInfo.Name)) } - bucketName, err := url.PathUnescape(mux.Vars(r)["bucketName"]) - if err != nil { - return response.SmartError(err) - } - // If target is set, get buckets only for this cluster members. targetMember := request.QueryParam(r, "target") memberSpecific := targetMember != "" @@ -809,7 +769,7 @@ func storagePoolBucketKeysGet(d *Daemon, r *http.Request) response.Response { var dbBucket *db.StorageBucket var dbBucketKeys []*db.StorageBucketKey err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - dbBucket, err = tx.GetStoragePoolBucket(ctx, pool.ID(), bucketProjectName, memberSpecific, bucketName) + dbBucket, err = tx.GetStoragePoolBucket(ctx, details.pool.ID(), effectiveProjectName, memberSpecific, details.bucketName) if err != nil { return fmt.Errorf("Failed loading storage bucket: %w", err) } @@ -836,7 +796,7 @@ func storagePoolBucketKeysGet(d *Daemon, r *http.Request) response.Response { bucketKeyURLs := make([]string, 0, len(dbBucketKeys)) for _, dbBucketKey := range dbBucketKeys { - bucketKeyURLs = append(bucketKeyURLs, dbBucketKey.URL(version.APIVersion, poolName, bucketProjectName, bucketName).String()) + bucketKeyURLs = append(bucketKeyURLs, dbBucketKey.URL(version.APIVersion, details.pool.Name(), effectiveProjectName, details.bucketName).String()) } return response.SyncResponse(true, bucketKeyURLs) @@ -882,17 +842,12 @@ func storagePoolBucketKeysPost(d *Daemon, r *http.Request) response.Response { return resp } - bucketProjectName, err := project.StorageBucketProject(r.Context(), s.DB.Cluster, request.ProjectParam(r)) - if err != nil { - return response.SmartError(err) - } - - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - bucketName, err := url.PathUnescape(mux.Vars(r)["bucketName"]) + details, err := request.GetCtxValue[storageBucketDetails](r.Context(), ctxStorageBucketDetails) if err != nil { return response.SmartError(err) } @@ -904,18 +859,13 @@ func storagePoolBucketKeysPost(d *Daemon, r *http.Request) response.Response { return response.BadRequest(err) } - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(fmt.Errorf("Failed loading storage pool: %w", err)) - } - - key, err := pool.CreateBucketKey(bucketProjectName, bucketName, req, nil) + key, err := details.pool.CreateBucketKey(effectiveProjectName, details.bucketName, req, nil) if err != nil { return response.SmartError(fmt.Errorf("Failed creating storage bucket key: %w", err)) } - lc := lifecycle.StorageBucketKeyCreated.Event(pool, bucketProjectName, pool.Name(), req.Name, request.CreateRequestor(r), nil) - s.Events.SendLifecycle(bucketProjectName, lc) + lc := lifecycle.StorageBucketKeyCreated.Event(details.pool, effectiveProjectName, details.pool.Name(), req.Name, request.CreateRequestor(r), nil) + s.Events.SendLifecycle(effectiveProjectName, lc) return response.SyncResponseLocation(true, key, lc.Source) } @@ -957,22 +907,12 @@ func storagePoolBucketKeyDelete(d *Daemon, r *http.Request) response.Response { return resp } - bucketProjectName, err := project.StorageBucketProject(r.Context(), s.DB.Cluster, request.ProjectParam(r)) - if err != nil { - return response.SmartError(err) - } - - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(fmt.Errorf("Failed loading storage pool: %w", err)) - } - - bucketName, err := url.PathUnescape(mux.Vars(r)["bucketName"]) + details, err := request.GetCtxValue[storageBucketDetails](r.Context(), ctxStorageBucketDetails) if err != nil { return response.SmartError(err) } @@ -982,12 +922,12 @@ func storagePoolBucketKeyDelete(d *Daemon, r *http.Request) response.Response { return response.SmartError(err) } - err = pool.DeleteBucketKey(bucketProjectName, bucketName, keyName, nil) + err = details.pool.DeleteBucketKey(effectiveProjectName, details.bucketName, keyName, nil) if err != nil { return response.SmartError(fmt.Errorf("Failed deleting storage bucket key: %w", err)) } - s.Events.SendLifecycle(bucketProjectName, lifecycle.StorageBucketKeyDeleted.Event(pool, bucketProjectName, pool.Name(), bucketName, request.CreateRequestor(r), nil)) + s.Events.SendLifecycle(effectiveProjectName, lifecycle.StorageBucketKeyDeleted.Event(details.pool, effectiveProjectName, details.pool.Name(), details.bucketName, request.CreateRequestor(r), nil)) return response.EmptySyncResponse } @@ -1040,30 +980,20 @@ func storagePoolBucketKeyGet(d *Daemon, r *http.Request) response.Response { return resp } - bucketProjectName, err := project.StorageBucketProject(r.Context(), s.DB.Cluster, request.ProjectParam(r)) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + details, err := request.GetCtxValue[storageBucketDetails](r.Context(), ctxStorageBucketDetails) if err != nil { return response.SmartError(err) } - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(fmt.Errorf("Failed loading storage pool: %w", err)) - } - - if !pool.Driver().Info().Buckets { + if !details.pool.Driver().Info().Buckets { return response.BadRequest(fmt.Errorf("Storage pool does not support buckets")) } - bucketName, err := url.PathUnescape(mux.Vars(r)["bucketName"]) - if err != nil { - return response.SmartError(err) - } - keyName, err := url.PathUnescape(mux.Vars(r)["keyName"]) if err != nil { return response.SmartError(err) @@ -1074,7 +1004,7 @@ func storagePoolBucketKeyGet(d *Daemon, r *http.Request) response.Response { var bucketKey *db.StorageBucketKey err = s.DB.Cluster.Transaction(r.Context(), func(ctx context.Context, tx *db.ClusterTx) error { - bucket, err := tx.GetStoragePoolBucket(ctx, pool.ID(), bucketProjectName, memberSpecific, bucketName) + bucket, err := tx.GetStoragePoolBucket(ctx, details.pool.ID(), effectiveProjectName, memberSpecific, details.bucketName) if err != nil { return err } @@ -1140,22 +1070,12 @@ func storagePoolBucketKeyPut(d *Daemon, r *http.Request) response.Response { return resp } - bucketProjectName, err := project.StorageBucketProject(r.Context(), s.DB.Cluster, request.ProjectParam(r)) - if err != nil { - return response.SmartError(err) - } - - poolName, err := url.PathUnescape(mux.Vars(r)["poolName"]) + effectiveProjectName, err := request.GetCtxValue[string](r.Context(), request.CtxEffectiveProjectName) if err != nil { return response.SmartError(err) } - pool, err := storagePools.LoadByName(s, poolName) - if err != nil { - return response.SmartError(fmt.Errorf("Failed loading storage pool: %w", err)) - } - - bucketName, err := url.PathUnescape(mux.Vars(r)["bucketName"]) + details, err := request.GetCtxValue[storageBucketDetails](r.Context(), ctxStorageBucketDetails) if err != nil { return response.SmartError(err) } @@ -1172,12 +1092,12 @@ func storagePoolBucketKeyPut(d *Daemon, r *http.Request) response.Response { return response.BadRequest(err) } - err = pool.UpdateBucketKey(bucketProjectName, bucketName, keyName, req, nil) + err = details.pool.UpdateBucketKey(effectiveProjectName, details.bucketName, keyName, req, nil) if err != nil { return response.SmartError(fmt.Errorf("Failed updating storage bucket key: %w", err)) } - s.Events.SendLifecycle(bucketProjectName, lifecycle.StorageBucketKeyUpdated.Event(pool, bucketProjectName, pool.Name(), bucketName, request.CreateRequestor(r), nil)) + s.Events.SendLifecycle(effectiveProjectName, lifecycle.StorageBucketKeyUpdated.Event(details.pool, effectiveProjectName, details.pool.Name(), details.bucketName, request.CreateRequestor(r), nil)) return response.EmptySyncResponse } From 279f3aa8760f8c6741d99c3f66bc6a3a0ca4bef8 Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Thu, 30 May 2024 14:51:43 +0100 Subject: [PATCH 081/106] lxd/cluster: Remove `ConnectIfVolumeIsRemote`. This is no longer used. Signed-off-by: Mark Laing --- lxd/cluster/connect.go | 81 ------------------------------------------ 1 file changed, 81 deletions(-) diff --git a/lxd/cluster/connect.go b/lxd/cluster/connect.go index 795aa63d5696..004dc4bcfdea 100644 --- a/lxd/cluster/connect.go +++ b/lxd/cluster/connect.go @@ -16,7 +16,6 @@ import ( "github.com/canonical/lxd/lxd/instance/instancetype" "github.com/canonical/lxd/lxd/request" "github.com/canonical/lxd/lxd/state" - storagePools "github.com/canonical/lxd/lxd/storage" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" "github.com/canonical/lxd/shared/version" @@ -121,86 +120,6 @@ func ConnectIfInstanceIsRemote(s *state.State, projectName string, instName stri return client, nil } -// ConnectIfVolumeIsRemote figures out the address of the cluster member on which the volume with the given name is -// defined. If it's not the local cluster member it will connect to it and return the connected client, otherwise -// it just returns nil. If there is more than one cluster member with a matching volume name, an error is returned. -func ConnectIfVolumeIsRemote(s *state.State, poolName string, projectName string, volumeName string, volumeType int, networkCert *shared.CertInfo, serverCert *shared.CertInfo, r *http.Request) (lxd.InstanceServer, error) { - localNodeID := s.DB.Cluster.GetNodeID() - var err error - var nodes []db.NodeInfo - var poolID int64 - err = s.DB.Cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error { - poolID, err = tx.GetStoragePoolID(ctx, poolName) - if err != nil { - return err - } - - nodes, err = tx.GetStorageVolumeNodes(ctx, poolID, projectName, volumeName, volumeType) - if err != nil { - return err - } - - return nil - }) - if err != nil && err != db.ErrNoClusterMember { - return nil, err - } - - // If volume uses a remote storage driver and so has no explicit cluster member, then we need to check - // whether it is exclusively attached to remote instance, and if so then we need to forward the request to - // the node whereit is currently used. This avoids conflicting with another member when using it locally. - if err == db.ErrNoClusterMember { - // GetStoragePoolVolume returns a volume with an empty Location field for remote drivers. - var dbVolume *db.StorageVolume - err = s.DB.Cluster.Transaction(context.TODO(), func(ctx context.Context, tx *db.ClusterTx) error { - dbVolume, err = tx.GetStoragePoolVolume(ctx, poolID, projectName, volumeType, volumeName, true) - return err - }) - if err != nil { - return nil, err - } - - remoteInstance, err := storagePools.VolumeUsedByExclusiveRemoteInstancesWithProfiles(s, poolName, projectName, &dbVolume.StorageVolume) - if err != nil { - return nil, fmt.Errorf("Failed checking if volume %q is available: %w", volumeName, err) - } - - if remoteInstance == nil { - // Volume isn't exclusively attached to an instance. Use local cluster member. - return nil, nil - } - - var instNode db.NodeInfo - err = s.DB.Cluster.Transaction(s.ShutdownCtx, func(ctx context.Context, tx *db.ClusterTx) error { - instNode, err = tx.GetNodeByName(ctx, remoteInstance.Node) - return err - }) - if err != nil { - return nil, fmt.Errorf("Failed getting cluster member info for %q: %w", remoteInstance.Node, err) - } - - // Replace node list with instance's cluster member node (which might be local member). - nodes = []db.NodeInfo{instNode} - } - - nodeCount := len(nodes) - if nodeCount > 1 { - return nil, fmt.Errorf("More than one cluster member has a volume named %q. Please target a specific member", volumeName) - } else if nodeCount < 1 { - // Should never get here. - return nil, fmt.Errorf("Volume %q has empty cluster member list", volumeName) - } - - node := nodes[0] - if node.ID == localNodeID { - // Use local cluster member if volume belongs to this local member. - return nil, nil - } - - // Connect to remote cluster member. - return Connect(node.Address, networkCert, serverCert, r, false) -} - // SetupTrust is a convenience around InstanceServer.CreateCertificate that adds the given server certificate to // the trusted pool of the cluster at the given address, using the given password. The certificate is added as // type CertificateTypeServer to allow intra-member communication. If a certificate with the same fingerprint From b89cab4d5bae9b77a1481569aa930e312fd425b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20Peliz=C3=A4us?= Date: Fri, 26 Jul 2024 15:57:51 +0200 Subject: [PATCH 082/106] doc: Use consistent naming for remote pool creation examples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Julian Pelizäus --- doc/howto/storage_pools.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/howto/storage_pools.md b/doc/howto/storage_pools.md index 97af5fa1a132..6ecf5596cddd 100644 --- a/doc/howto/storage_pools.md +++ b/doc/howto/storage_pools.md @@ -198,7 +198,7 @@ See the following examples for different storage drivers for instructions on how #### Create a local storage pool -The following series of commands sets up a ZFS storage pool with the name `my-pool` at different locations and with different sizes on three cluster members: +Create a storage pool named `my-pool` using the ZFS driver at different locations and with different sizes on three cluster members: ```{terminal} :input: lxc storage create my-pool zfs source=/dev/sdX size=10GiB --target=vm01 @@ -213,7 +213,7 @@ Storage pool my-pool created #### Create a remote storage pool -The following series of commands sets up a Ceph RBD storage pool with the name `my-remote-pool` and the on-disk name `my-osd` on three cluster members. +Create a storage pool named `my-remote-pool` using the Ceph RBD driver and the on-disk name `my-osd` on three cluster members. Because the {config:option}`storage-ceph-pool-conf:ceph.osd.pool_name` configuration setting isn't member-specific, it must be set when creating the actual storage pool: ```{terminal} @@ -227,7 +227,7 @@ Storage pool my-remote-pool pending on member vm03 Storage pool my-remote-pool created ``` -The following commands create a second storage pool `my-remote-pool2` using the Dell PowerFlex driver in SDC mode using the pool `sp1` in protection domain `pd1`: +Create a second storage pool named `my-remote-pool2` using the Dell PowerFlex driver in SDC mode and the pool `sp1` in protection domain `pd1`: ```{terminal} :input: lxc storage create my-remote-pool2 powerflex --target=vm01 From fc3f07f4fdfcab40c9f307e2012566dcb94bdf79 Mon Sep 17 00:00:00 2001 From: Din Music Date: Mon, 29 Jul 2024 08:09:37 +0000 Subject: [PATCH 083/106] doc: Update rhsrvany link and fix code blocks Signed-off-by: Din Music --- doc/howto/import_machines_to_instances.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/howto/import_machines_to_instances.md b/doc/howto/import_machines_to_instances.md index 2f20fee6ee93..75e63ad42d3a 100644 --- a/doc/howto/import_machines_to_instances.md +++ b/doc/howto/import_machines_to_instances.md @@ -44,14 +44,14 @@ The tool can also inject the required VIRTIO drivers into the image: * For converting Windows images from a foreign hypervisor (not from QEMU/KVM with Q35/`virtio-scsi`), you must install additional drivers on the host: * Install the `virtio-win` package or download the [`virtio-win.iso`](https://fedorapeople.org/groups/virt/virtio-win/direct-downloads/stable-virtio/virtio-win.iso) file and place it in the `/usr/share/virtio-win` directory. - * Download [`rhsrvany.exe` and `pnp_wait.exe`](https://github.com/rwmjones/rhsrvany), and place them in the `/usr/share/virt-tools/` directory. + * Download [`rhsrvany.exe` and `pnp_wait.exe`](https://github.com/rwmjones/rhsrvany?tab=readme-ov-file#binary-releases), and place them in the `/usr/share/virt-tools/` directory. ````{tip} If you want to convert a Windows VM from a foreign hypervisor manually, you must install both the required Windows drivers (as described above) and `virt-v2v` (>= 2.3.4).
- Expand to see how to convert your Windows VM using virt-v2v + Expand to see how to convert your Windows VM using virt-v2v Use `virt-v2v` to convert Windows image into `raw` format and include the required drivers. From 688caaf3077b66538a34b947375d92388697f493 Mon Sep 17 00:00:00 2001 From: Ardelean Calin Date: Sun, 28 Jul 2024 13:43:34 +0300 Subject: [PATCH 084/106] lxd/device: added support for extended attributes Signed-off-by: Ardelean Calin --- lxd/device/device_utils_disk.go | 1 + 1 file changed, 1 insertion(+) diff --git a/lxd/device/device_utils_disk.go b/lxd/device/device_utils_disk.go index c9bdd53712d7..68d51cb46e94 100644 --- a/lxd/device/device_utils_disk.go +++ b/lxd/device/device_utils_disk.go @@ -505,6 +505,7 @@ func DiskVMVirtiofsdStart(kernelVersion version.DottedVersion, inst instance.Ins // Start the virtiofsd process in non-daemon mode. args := []string{ "--fd=3", + "--xattr", "-o", fmt.Sprintf("source=%s", sharePath), } From 783bd3b0ad8e310ad243eda0e7031e4363b154dc Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 29 Jul 2024 14:32:46 +0200 Subject: [PATCH 085/106] lxd/seccomp: fix bpf syscall number for arm64 A right bpf() syscall number on arm64 is 280: https://github.com/torvalds/linux/blob/dc1c8034e31b14a2e5e212104ec508aec44ce1b9/tools/lib/bpf/bpf.c#L49 Signed-off-by: Alexander Mikhalitsyn --- lxd/seccomp/seccomp.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/seccomp/seccomp.go b/lxd/seccomp/seccomp.go index 543a755362ab..c0b04054417b 100644 --- a/lxd/seccomp/seccomp.go +++ b/lxd/seccomp/seccomp.go @@ -122,7 +122,7 @@ static const struct lxd_seccomp_data_arch seccomp_notify_syscall_table[] = { { AUDIT_ARCH_I386, 14, 297, 226, 21, 357, 156, 116, 350 }, #endif #ifdef AUDIT_ARCH_AARCH64 - { AUDIT_ARCH_AARCH64, -1, 33, 5, 21, 386, 156, 179, 273 }, + { AUDIT_ARCH_AARCH64, -1, 33, 5, 21, 280, 156, 179, 273 }, #endif #ifdef AUDIT_ARCH_ARM { AUDIT_ARCH_ARM, 14, 324, 226, 21, 386, 156, 116, 379 }, From 39414f9b6ace426214de183e551aa32d084b806a Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 29 Jul 2024 15:56:37 +0200 Subject: [PATCH 086/106] lxd/include: update bpf header files Our bpf UAPI header files are obsolete, which leads to errors like: level=debug msg="Handling bpf syscall" audit_architecture=3221225655 bpf_attach_type=-22 bpf_cmd=-22 bpf_prog_type=-22 container=t1 project=default seccomp_notify_fd=35 seccomp_notify_flags=0 seccomp_notify_id=8290735664846847091 seccomp_notify_mem_fd=34 seccomp_notify_pid=1942874 syscall_continue=true syscall_handler_error="file too large - Failed to handle bpf syscall" syscall_number=280 Let's update our headers to ones from Linux kernel version 6.10. Signed-off-by: Alexander Mikhalitsyn --- lxd/include/lxd_bpf.h | 4130 ++++++++++++++++++++++++++++++---- lxd/include/lxd_bpf_common.h | 6 +- 2 files changed, 3690 insertions(+), 446 deletions(-) diff --git a/lxd/include/lxd_bpf.h b/lxd/include/lxd_bpf.h index 1884d2091dc9..8cf67ff06988 100644 --- a/lxd/include/lxd_bpf.h +++ b/lxd/include/lxd_bpf.h @@ -5,8 +5,8 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#ifndef __LINUX_BPF_H__ -#define __LINUX_BPF_H__ +#ifndef _UAPI__LINUX_BPF_H__ +#define _UAPI__LINUX_BPF_H__ #include #include "lxd_bpf_common.h" @@ -19,7 +19,9 @@ /* ld/ldx fields */ #define BPF_DW 0x18 /* double word (64-bit) */ -#define BPF_XADD 0xc0 /* exclusive add */ +#define BPF_MEMSX 0x80 /* load with sign extension */ +#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */ +#define BPF_XADD 0xc0 /* exclusive add - legacy name */ /* alu/jmp fields */ #define BPF_MOV 0xb0 /* mov reg to reg */ @@ -40,9 +42,19 @@ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ #define BPF_JSLT 0xc0 /* SLT is signed, '<' */ #define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ +#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* atomic op type fields (stored in immediate) */ +#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */ +#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ +#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ + +enum bpf_cond_pseudo_jmp { + BPF_MAY_GOTO = 0, +}; + /* Register numbers */ enum { BPF_REG_0 = 0, @@ -70,18 +82,843 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; -/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for + * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for + * the trailing flexible array member) instead. + */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ __u8 data[0]; /* Arbitrary size */ }; +/* Header for bpf_lpm_trie_key structs */ +struct bpf_lpm_trie_key_hdr { + __u32 prefixlen; +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */ +struct bpf_lpm_trie_key_u8 { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + __u8 data[]; /* Arbitrary size */ +}; + struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ - __u32 attach_type; /* program attach type */ + __u32 attach_type; /* program attach type (enum bpf_attach_type) */ +}; + +enum bpf_cgroup_iter_order { + BPF_CGROUP_ITER_ORDER_UNSPEC = 0, + BPF_CGROUP_ITER_SELF_ONLY, /* process only a single object. */ + BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ + BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ + BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */ +}; + +union bpf_iter_link_info { + struct { + __u32 map_fd; + } map; + struct { + enum bpf_cgroup_iter_order order; + + /* At most one of cgroup_fd and cgroup_id can be non-zero. If + * both are zero, the walk starts from the default cgroup v2 + * root. For walking v1 hierarchy, one should always explicitly + * specify cgroup_fd. + */ + __u32 cgroup_fd; + __u64 cgroup_id; + } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; -/* BPF syscall commands, see bpf(2) man-page for details. */ +/* BPF syscall commands, see bpf(2) man-page for more details. */ +/** + * DOC: eBPF Syscall Preamble + * + * The operation to be performed by the **bpf**\ () system call is determined + * by the *cmd* argument. Each operation takes an accompanying argument, + * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see + * below). The size argument is the size of the union pointed to by *attr*. + */ +/** + * DOC: eBPF Syscall Commands + * + * BPF_MAP_CREATE + * Description + * Create a map and return a file descriptor that refers to the + * map. The close-on-exec file descriptor flag (see **fcntl**\ (2)) + * is automatically enabled for the new file descriptor. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_MAP_CREATE** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_LOOKUP_ELEM + * Description + * Look up an element with a given *key* in the map referred to + * by the file descriptor *map_fd*. + * + * The *flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_ELEM + * Description + * Create or update an element (key/value pair) in a specified map. + * + * The *flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create a new element or update an existing element. + * **BPF_NOEXIST** + * Create a new element only if it did not exist. + * **BPF_EXIST** + * Update an existing element. + * **BPF_F_LOCK** + * Update a spin_lock-ed map element. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, + * **E2BIG**, **EEXIST**, or **ENOENT**. + * + * **E2BIG** + * The number of elements in the map reached the + * *max_entries* limit specified at map creation time. + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * + * BPF_MAP_DELETE_ELEM + * Description + * Look up and delete an element by key in a specified map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_GET_NEXT_KEY + * Description + * Look up an element by key in a specified map and return the key + * of the next element. Can be used to iterate over all elements + * in the map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * The following cases can be used to iterate over all elements of + * the map: + * + * * If *key* is not found, the operation returns zero and sets + * the *next_key* pointer to the key of the first element. + * * If *key* is found, the operation returns zero and sets the + * *next_key* pointer to the key of the next element. + * * If *key* is the last element, returns -1 and *errno* is set + * to **ENOENT**. + * + * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or + * **EINVAL** on error. + * + * BPF_PROG_LOAD + * Description + * Verify and load an eBPF program, returning a new file + * descriptor associated with the program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES). + * + * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is + * automatically enabled for the new file descriptor. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_OBJ_PIN + * Description + * Pin an eBPF program or map referred by the specified *bpf_fd* + * to the provided *pathname* on the filesystem. + * + * The *pathname* argument must not contain a dot ("."). + * + * On success, *pathname* retains a reference to the eBPF object, + * preventing deallocation of the object when the original + * *bpf_fd* is closed. This allow the eBPF object to live beyond + * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent + * process. + * + * Applying **unlink**\ (2) or similar calls to the *pathname* + * unpins the object from the filesystem, removing the reference. + * If no other file descriptors or filesystem nodes refer to the + * same object, it will be deallocated (see NOTES). + * + * The filesystem type for the parent directory of *pathname* must + * be **BPF_FS_MAGIC**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_OBJ_GET + * Description + * Open a file descriptor for the eBPF object pinned to the + * specified *pathname*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_PROG_ATTACH + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook. + * + * The *attach_type* specifies the eBPF attachment point to + * attach the program to, and must be one of *bpf_attach_type* + * (see below). + * + * The *attach_bpf_fd* must be a valid file descriptor for a + * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap + * or sock_ops type corresponding to the specified *attach_type*. + * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_TYPE_SK_SKB**, + * **BPF_PROG_TYPE_SK_MSG** + * + * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**). + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_DETACH + * Description + * Detach the eBPF program associated with the *target_fd* at the + * hook specified by *attach_type*. The program must have been + * previously attached using **BPF_PROG_ATTACH**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_TEST_RUN + * Description + * Run the eBPF program associated with the *prog_fd* a *repeat* + * number of times against a provided program context *ctx_in* and + * data *data_in*, and return the modified program context + * *ctx_out*, *data_out* (for example, packet data), result of the + * execution *retval*, and *duration* of the test run. + * + * The sizes of the buffers provided as input and output + * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must + * be provided in the corresponding variables *ctx_size_in*, + * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any + * of these parameters are not provided (ie set to NULL), the + * corresponding size field must be zero. + * + * Some program types have particular requirements: + * + * **BPF_PROG_TYPE_SK_LOOKUP** + * *data_in* and *data_out* must be NULL. + * + * **BPF_PROG_TYPE_RAW_TRACEPOINT**, + * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE** + * + * *ctx_out*, *data_in* and *data_out* must be NULL. + * *repeat* must be zero. + * + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * **ENOSPC** + * Either *data_size_out* or *ctx_size_out* is too small. + * **ENOTSUPP** + * This command is not supported by the program type of + * the program referred to by *prog_fd*. + * + * BPF_PROG_GET_NEXT_ID + * Description + * Fetch the next eBPF program currently loaded into the kernel. + * + * Looks for the eBPF program with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF programs + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_GET_NEXT_ID + * Description + * Fetch the next eBPF map currently loaded into the kernel. + * + * Looks for the eBPF map with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF maps + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_PROG_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF program corresponding to + * *prog_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF map corresponding to + * *map_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_OBJ_GET_INFO_BY_FD + * Description + * Obtain information about the eBPF object corresponding to + * *bpf_fd*. + * + * Populates up to *info_len* bytes of *info*, which will be in + * one of the following formats depending on the eBPF object type + * of *bpf_fd*: + * + * * **struct bpf_prog_info** + * * **struct bpf_map_info** + * * **struct bpf_btf_info** + * * **struct bpf_link_info** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_QUERY + * Description + * Obtain information about eBPF programs associated with the + * specified *attach_type* hook. + * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_QUERY** always fetches the number of programs + * attached and the *attach_flags* which were used to attach those + * programs. Additionally, if *prog_ids* is nonzero and the number + * of attached programs is less than *prog_cnt*, populates + * *prog_ids* with the eBPF program ids of the programs attached + * at *target_fd*. + * + * The following flags may alter the result: + * + * **BPF_F_QUERY_EFFECTIVE** + * Only return information regarding programs which are + * currently effective at the specified *target_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_RAW_TRACEPOINT_OPEN + * Description + * Attach an eBPF program to a tracepoint *name* to access kernel + * internal arguments of the tracepoint in their raw form. + * + * The *prog_fd* must be a valid file descriptor associated with + * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**. + * + * No ABI guarantees are made about the content of tracepoint + * arguments exposed to the corresponding eBPF program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_LOAD + * Description + * Verify and load BPF Type Format (BTF) metadata into the kernel, + * returning a new file descriptor associated with the metadata. + * BTF is described in more detail at + * https://www.kernel.org/doc/html/latest/bpf/btf.html. + * + * The *btf* parameter must point to valid memory providing + * *btf_size* bytes of BTF binary metadata. + * + * The returned file descriptor can be passed to other **bpf**\ () + * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to + * associate the BTF with those objects. + * + * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional + * parameters to specify a *btf_log_buf*, *btf_log_size* and + * *btf_log_level* which allow the kernel to return freeform log + * output regarding the BTF verification process. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_GET_FD_BY_ID + * Description + * Open a file descriptor for the BPF Type Format (BTF) + * corresponding to *btf_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_TASK_FD_QUERY + * Description + * Obtain information about eBPF programs associated with the + * target process identified by *pid* and *fd*. + * + * If the *pid* and *fd* are associated with a tracepoint, kprobe + * or uprobe perf event, then the *prog_id* and *fd_type* will + * be populated with the eBPF program id and file descriptor type + * of type **bpf_task_fd_type**. If associated with a kprobe or + * uprobe, the *probe_offset* and *probe_addr* will also be + * populated. Optionally, if *buf* is provided, then up to + * *buf_len* bytes of *buf* will be populated with the name of + * the tracepoint, kprobe or uprobe. + * + * The resulting *prog_id* may be introspected in deeper detail + * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_LOOKUP_AND_DELETE_ELEM + * Description + * Look up an element with the given *key* in the map referred to + * by the file descriptor *fd*, and if found, delete the element. + * + * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map + * types, the *flags* argument needs to be set to 0, but for other + * map types, it may be specified as: + * + * **BPF_F_LOCK** + * Look up and delete the value of a spin-locked map + * without returning the lock. This must be specified if + * the elements contain a spinlock. + * + * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types + * implement this command as a "pop" operation, deleting the top + * element rather than one corresponding to *key*. + * The *key* and *key_len* parameters should be zeroed when + * issuing this operation for these map types. + * + * This command is only valid for the following map types: + * * **BPF_MAP_TYPE_QUEUE** + * * **BPF_MAP_TYPE_STACK** + * * **BPF_MAP_TYPE_HASH** + * * **BPF_MAP_TYPE_PERCPU_HASH** + * * **BPF_MAP_TYPE_LRU_HASH** + * * **BPF_MAP_TYPE_LRU_PERCPU_HASH** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_FREEZE + * Description + * Freeze the permissions of the specified map. + * + * Write permissions may be frozen by passing zero *flags*. + * Upon success, no future syscall invocations may alter the + * map state of *map_fd*. Write operations from eBPF programs + * are still possible for a frozen map. + * + * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_BTF_GET_NEXT_ID + * Description + * Fetch the next BPF Type Format (BTF) object currently loaded + * into the kernel. + * + * Looks for the BTF object with an id greater than *start_id* + * and updates *next_id* on success. If no other BTF objects + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_LOOKUP_BATCH + * Description + * Iterate and fetch multiple elements in a map. + * + * Two opaque values are used to manage batch operations, + * *in_batch* and *out_batch*. Initially, *in_batch* must be set + * to NULL to begin the batched operation. After each subsequent + * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant + * *out_batch* as the *in_batch* for the next operation to + * continue iteration from the current point. Both *in_batch* and + * *out_batch* must point to memory large enough to hold a key, + * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters + * must be at least 4 bytes wide regardless of key size. + * + * The *keys* and *values* are output parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are copied into the + * user buffer, with the keys copied into *keys* and the values + * copied into the corresponding indices in *values*. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **ENOSPC** to indicate that *keys* or + * *values* is too small to dump an entire bucket during + * iteration of a hash-based map type. + * + * BPF_MAP_LOOKUP_AND_DELETE_BATCH + * Description + * Iterate and delete all elements in a map. + * + * This operation has the same behavior as + * **BPF_MAP_LOOKUP_BATCH** with two exceptions: + * + * * Every element that is successfully returned is also deleted + * from the map. This is at least *count* elements. Note that + * *count* is both an input and an output parameter. + * * Upon returning with *errno* set to **EFAULT**, up to + * *count* elements may be deleted without returning the keys + * and values of the deleted elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_BATCH + * Description + * Update multiple elements in a map by *key*. + * + * The *keys* and *values* are input parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * Each element specified in *keys* is sequentially updated to the + * value in the corresponding index in *values*. The *in_batch* + * and *out_batch* parameters are ignored and should be zeroed. + * + * The *elem_flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create new elements or update a existing elements. + * **BPF_NOEXIST** + * Create new elements only if they do not exist. + * **BPF_EXIST** + * Update existing elements. + * **BPF_F_LOCK** + * Update spin_lock-ed map elements. This must be + * specified if the map value contains a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or + * **E2BIG**. **E2BIG** indicates that the number of elements in + * the map reached the *max_entries* limit specified at map + * creation time. + * + * May set *errno* to one of the following error codes under + * specific circumstances: + * + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * + * BPF_MAP_DELETE_BATCH + * Description + * Delete multiple elements in a map by *key*. + * + * The *keys* parameter is an input parameter which must point + * to memory large enough to hold *count* items based on the key + * size of the map *map_fd*, that is, *key_size* * *count*. + * + * Each element specified in *keys* is sequentially deleted. The + * *in_batch*, *out_batch*, and *values* parameters are ignored + * and should be zeroed. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. If + * *errno* is **EFAULT**, up to *count* elements may be been + * deleted. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_CREATE + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook and return a file descriptor handle for + * managing the link. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_UPDATE + * Description + * Update the eBPF program in the specified *link_fd* to + * *new_prog_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF Link corresponding to + * *link_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_GET_NEXT_ID + * Description + * Fetch the next eBPF link currently loaded into the kernel. + * + * Looks for the eBPF link with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF links + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_ENABLE_STATS + * Description + * Enable eBPF runtime statistics gathering. + * + * Runtime statistics gathering for the eBPF runtime is disabled + * by default to minimize the corresponding performance overhead. + * This command enables statistics globally. + * + * Multiple programs may independently enable statistics. + * After gathering the desired statistics, eBPF runtime statistics + * may be disabled again by calling **close**\ (2) for the file + * descriptor returned by this function. Statistics will only be + * disabled system-wide when all outstanding file descriptors + * returned by prior calls for this subcommand are closed. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_ITER_CREATE + * Description + * Create an iterator on top of the specified *link_fd* (as + * previously created using **BPF_LINK_CREATE**) and return a + * file descriptor that can be used to trigger the iteration. + * + * If the resulting file descriptor is pinned to the filesystem + * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls + * for that path will trigger the iterator to read kernel state + * using the eBPF program attached to *link_fd*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_DETACH + * Description + * Forcefully detach the specified *link_fd* from its + * corresponding attachment point. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_BIND_MAP + * Description + * Bind a map to the lifetime of an eBPF program. + * + * The map identified by *map_fd* is bound to the program + * identified by *prog_fd* and only released when *prog_fd* is + * released. This may be used in cases where metadata should be + * associated with a program which otherwise does not contain any + * references to the map (for example, embedded in the eBPF + * program instructions). + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * NOTES + * eBPF objects (maps and programs) can be shared between processes. + * + * * After **fork**\ (2), the child inherits file descriptors + * referring to the same eBPF objects. + * * File descriptors referring to eBPF objects can be transferred over + * **unix**\ (7) domain sockets. + * * File descriptors referring to eBPF objects can be duplicated in the + * usual way, using **dup**\ (2) and similar calls. + * * File descriptors referring to eBPF objects can be pinned to the + * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2). + * + * An eBPF object is deallocated only after all file descriptors referring + * to the object have been closed and no references remain pinned to the + * filesystem or attached (for example, bound to a program or device). + */ enum bpf_cmd { BPF_MAP_CREATE, BPF_MAP_LOOKUP_ELEM, @@ -94,6 +931,7 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_RUN = BPF_PROG_TEST_RUN, BPF_PROG_GET_NEXT_ID, BPF_MAP_GET_NEXT_ID, BPF_PROG_GET_FD_BY_ID, @@ -117,6 +955,10 @@ enum bpf_cmd { BPF_LINK_GET_NEXT_ID, BPF_ENABLE_STATS, BPF_ITER_CREATE, + BPF_LINK_DETACH, + BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -139,15 +981,36 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, - BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching + * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to + * both cgroup-attached and other progs and supports all functionality + * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark + * BPF_MAP_TYPE_CGROUP_STORAGE deprecated. + */ + BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs + * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE + + * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE + * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE + * deprecated. + */ + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, BPF_MAP_TYPE_DEVMAP_HASH, BPF_MAP_TYPE_STRUCT_OPS, BPF_MAP_TYPE_RINGBUF, + BPF_MAP_TYPE_INODE_STORAGE, + BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, + BPF_MAP_TYPE_USER_RINGBUF, + BPF_MAP_TYPE_CGRP_STORAGE, + BPF_MAP_TYPE_ARENA, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -189,6 +1052,10 @@ enum bpf_prog_type { BPF_PROG_TYPE_STRUCT_OPS, BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, + BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -226,6 +1093,29 @@ enum bpf_attach_type { BPF_CGROUP_INET4_GETSOCKNAME, BPF_CGROUP_INET6_GETSOCKNAME, BPF_XDP_DEVMAP, + BPF_CGROUP_INET_SOCK_RELEASE, + BPF_XDP_CPUMAP, + BPF_SK_LOOKUP, + BPF_XDP, + BPF_SK_SKB_VERDICT, + BPF_SK_REUSEPORT_SELECT, + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, + BPF_PERF_EVENT, + BPF_TRACE_KPROBE_MULTI, + BPF_LSM_CGROUP, + BPF_STRUCT_OPS, + BPF_NETFILTER, + BPF_TCX_INGRESS, + BPF_TCX_EGRESS, + BPF_TRACE_UPROBE_MULTI, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, + BPF_NETKIT_PRIMARY, + BPF_NETKIT_PEER, + BPF_TRACE_KPROBE_SESSION, __MAX_BPF_ATTACH_TYPE }; @@ -238,8 +1128,28 @@ enum bpf_link_type { BPF_LINK_TYPE_CGROUP = 3, BPF_LINK_TYPE_ITER = 4, BPF_LINK_TYPE_NETNS = 5, + BPF_LINK_TYPE_XDP = 6, + BPF_LINK_TYPE_PERF_EVENT = 7, + BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, + BPF_LINK_TYPE_NETFILTER = 10, + BPF_LINK_TYPE_TCX = 11, + BPF_LINK_TYPE_UPROBE_MULTI = 12, + BPF_LINK_TYPE_NETKIT = 13, + BPF_LINK_TYPE_SOCKMAP = 14, + __MAX_BPF_LINK_TYPE, +}; - MAX_BPF_LINK_TYPE, +#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE + +enum bpf_perf_event_type { + BPF_PERF_EVENT_UNSPEC = 0, + BPF_PERF_EVENT_UPROBE = 1, + BPF_PERF_EVENT_URETPROBE = 2, + BPF_PERF_EVENT_KPROBE = 3, + BPF_PERF_EVENT_KRETPROBE = 4, + BPF_PERF_EVENT_TRACEPOINT = 5, + BPF_PERF_EVENT_EVENT = 6, }; /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command @@ -288,7 +1198,12 @@ enum bpf_link_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_MULTI (1U << 1) +/* Generic attachment flags. */ #define BPF_F_REPLACE (1U << 2) +#define BPF_F_BEFORE (1U << 3) +#define BPF_F_AFTER (1U << 4) +#define BPF_F_ID (1U << 5) +#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -297,7 +1212,7 @@ enum bpf_link_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) -/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will allow any alignment whatsoever. On platforms * with strict alignment requirements for loads ands stores (such * as sparc and mips) the verifier validates that all loads and @@ -332,24 +1247,103 @@ enum bpf_link_type { /* The verifier internal test flag. Behavior is undefined */ #define BPF_F_TEST_STATE_FREQ (1U << 3) +/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will + * restrict map and helper usage for such programs. Sleepable BPF programs can + * only be attached to hooks where kernel execution context allows sleeping. + * Such programs are allowed to use helpers that may sleep like + * bpf_copy_from_user(). + */ +#define BPF_F_SLEEPABLE (1U << 4) + +/* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program + * fully support xdp frags. + */ +#define BPF_F_XDP_HAS_FRAGS (1U << 5) + +/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded + * program becomes device-bound but can access XDP metadata. + */ +#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) + +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_REG_INVARIANTS (1U << 7) + +/* link_create.kprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_KPROBE_MULTI attach type to create return probe. + */ +enum { + BPF_F_KPROBE_MULTI_RETURN = (1U << 0) +}; + +/* link_create.uprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. + */ +enum { + BPF_F_UPROBE_MULTI_RETURN = (1U << 0) +}; + +/* link_create.netfilter.flags used in LINK_CREATE command for + * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. + */ +#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have - * two extensions: - * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd map fd - * insn[1].imm: 0 offset into value - * insn[0].off: 0 0 - * insn[1].off: 0 0 - * ldimm64 rewrite: address of map address of map[0]+offset - * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE + * the following extensions: + * + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] + * insn[0].imm: map fd or fd_idx + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map + * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 -#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_IDX 5 + +/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE + * insn[0].imm: map fd or fd_idx + * insn[1].imm: offset into value + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of map[0]+offset + * verifier type: PTR_TO_MAP_VALUE + */ +#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_IDX_VALUE 6 + +/* insn[0].src_reg: BPF_PSEUDO_BTF_ID + * insn[0].imm: kernel btd id of VAR + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the kernel variable + * verifier type: PTR_TO_BTF_ID or PTR_TO_MEM, depending on whether the var + * is struct/union. + */ +#define BPF_PSEUDO_BTF_ID 3 +/* insn[0].src_reg: BPF_PSEUDO_FUNC + * insn[0].imm: insn offset to the func + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the function + * verifier type: PTR_TO_FUNC. + */ +#define BPF_PSEUDO_FUNC 4 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function */ #define BPF_PSEUDO_CALL 1 +/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL, + * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel + */ +#define BPF_PSEUDO_KFUNC_CALL 2 + +enum bpf_addr_space_cast { + BPF_ADDR_SPACE_CAST = 1, +}; /* flags for BPF_MAP_UPDATE_ELEM command */ enum { @@ -391,16 +1385,47 @@ enum { /* Enable memory-mapping BPF map */ BPF_F_MMAPABLE = (1U << 10), + +/* Share perf_event among processes */ + BPF_F_PRESERVE_ELEMS = (1U << 11), + +/* Create a map that is suitable to be an inner map with dynamic max entries */ + BPF_F_INNER_MAP = (1U << 12), + +/* Create a map that will be registered/unregesitered by the backed bpf_link */ + BPF_F_LINK = (1U << 13), + +/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ + BPF_F_PATH_FD = (1U << 14), + +/* Flag for value_type_btf_obj_fd, the fd is available */ + BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15), + +/* BPF token FD is passed in a corresponding command's token_fd field */ + BPF_F_TOKEN_FD = (1U << 16), + +/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */ + BPF_F_SEGV_ON_FAULT = (1U << 17), + +/* Do not translate kernel bpf_arena pointers to user pointers */ + BPF_F_NO_USER_CONV = (1U << 18), }; /* Flags for BPF_PROG_QUERY. */ /* Query effective (directly attached + inherited from ancestor cgroups) * programs that will be executed for events within a cgroup. - * attach_flags with this flag are returned only for directly attached programs. + * attach_flags with this flag are always returned 0. */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* Flags for BPF_PROG_TEST_RUN */ + +/* If set, run the test on the cpu specified by bpf_attr.test.cpu */ +#define BPF_F_TEST_RUN_ON_CPU (1U << 0) +/* If set, XDP frames will be transmitted after processing */ +#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) + /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { /* enabled run_time_ns and run_cnt */ @@ -450,6 +1475,25 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + * + * BPF_MAP_TYPE_ARENA - contains the address where user space + * is going to mmap() the arena. It has to be page aligned. + */ + __u64 map_extra; + + __s32 value_type_btf_obj_fd; /* fd pointing to a BTF + * type data for + * btf_vmlinux_value_type_id. + */ + /* BPF token FD to use with BPF_MAP_CREATE operation. + * If provided, map_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -504,24 +1548,54 @@ union bpf_attr { __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ - __u32 attach_prog_fd; /* 0 to attach to vmlinux */ + union { + /* valid prog_fd to attach to bpf prog */ + __u32 attach_prog_fd; + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; + __u32 core_relo_cnt; /* number of bpf_core_relo */ + __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 log_true_size; + /* BPF token FD to use with BPF_PROG_LOAD operation. + * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ __aligned_u64 pathname; __u32 bpf_fd; __u32 file_flags; + /* Same as dirfd in openat() syscall; see openat(2) + * manpage for details of path FD and pathname semantics; + * path_fd should accompanied by BPF_F_PATH_FD flag set in + * file_flags field, otherwise it should be set to zero; + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. + */ + __s32 path_fd; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ - __u32 target_fd; /* container object to attach to */ - __u32 attach_bpf_fd; /* eBPF program to attach */ + union { + __u32 target_fd; /* target object to attach to or ... */ + __u32 target_ifindex; /* target ifindex */ + }; + __u32 attach_bpf_fd; __u32 attach_type; __u32 attach_flags; - __u32 replace_bpf_fd; /* previously attached eBPF - * program to replace if - * BPF_F_REPLACE is used - */ + __u32 replace_bpf_fd; + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -543,6 +1617,9 @@ union bpf_attr { */ __aligned_u64 ctx_in; __aligned_u64 ctx_out; + __u32 flags; + __u32 cpu; + __u32 batch_size; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ @@ -564,17 +1641,33 @@ union bpf_attr { } info; struct { /* anonymous struct used by BPF_PROG_QUERY command */ - __u32 target_fd; /* container object to query */ + union { + __u32 target_fd; /* target object to query or ... */ + __u32 target_ifindex; /* target ifindex */ + }; __u32 attach_type; __u32 query_flags; __u32 attach_flags; __aligned_u64 prog_ids; - __u32 prog_cnt; + union { + __u32 prog_cnt; + __u32 count; + }; + __u32 :32; + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; + __aligned_u64 link_ids; + __aligned_u64 link_attach_flags; + __u64 revision; } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ - __u64 name; - __u32 prog_fd; + __u64 name; + __u32 prog_fd; + __u32 :32; + __aligned_u64 cookie; } raw_tracepoint; struct { /* anonymous struct for BPF_BTF_LOAD */ @@ -583,6 +1676,16 @@ union bpf_attr { __u32 btf_size; __u32 btf_log_size; __u32 btf_log_level; + /* output: actual total log contents size (including termintaing zero). + * It could be both larger than original log_size (if log was + * truncated), or smaller (if log buffer wasn't filled completely). + */ + __u32 btf_log_true_size; + __u32 btf_flags; + /* BPF token FD to use with BPF_BTF_LOAD operation. + * If provided, btf_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 btf_token_fd; }; struct { @@ -602,22 +1705,102 @@ union bpf_attr { } task_fd_query; struct { /* struct used by BPF_LINK_CREATE command */ - __u32 prog_fd; /* eBPF program to attach */ - __u32 target_fd; /* object to attach to */ + union { + __u32 prog_fd; /* eBPF program to attach */ + __u32 map_fd; /* struct_ops to attach */ + }; + union { + __u32 target_fd; /* target object to attach to or ... */ + __u32 target_ifindex; /* target ifindex */ + }; __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ + union { + __u32 target_btf_id; /* btf_id of target to attach to */ + struct { + __aligned_u64 iter_info; /* extra bpf_iter_link_info */ + __u32 iter_info_len; /* iter_info length */ + }; + struct { + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 bpf_cookie; + } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + __aligned_u64 cookies; + } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } tcx; + struct { + __aligned_u64 path; + __aligned_u64 offsets; + __aligned_u64 ref_ctr_offsets; + __aligned_u64 cookies; + __u32 cnt; + __u32 flags; + __u32 pid; + } uprobe_multi; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } netkit; + }; } link_create; struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; + struct { + __u32 link_fd; + } link_detach; + struct { /* struct used by BPF_ENABLE_STATS command */ __u32 type; } enable_stats; @@ -627,6 +1810,17 @@ union bpf_attr { __u32 flags; } iter_create; + struct { /* struct used by BPF_PROG_BIND_MAP command */ + __u32 prog_fd; + __u32 map_fd; + __u32 flags; /* extra flags */ + } prog_bind_map; + + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -634,7 +1828,7 @@ union bpf_attr { * parsed and used to produce a manual page. The workflow is the following, * and requires the rst2man utility: * - * $ ./scripts/bpf_helpers_doc.py \ + * $ ./scripts/bpf_doc.py \ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 * $ man /tmp/bpf-helpers.7 @@ -653,7 +1847,7 @@ union bpf_attr { * Map value associated to *key*, or **NULL** if no entry was * found. * - * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) * Description * Add or update the value of the entry associated to *key* in * *map* with *value*. *flags* is one of: @@ -671,13 +1865,13 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * long bpf_map_delete_elem(struct bpf_map *map, const void *key) * Description * Delete entry with *key* from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) * Description * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. @@ -695,21 +1889,21 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...) * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) - * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if + * to file *\/sys/kernel/tracing/trace* from TraceFS, if * available. It can take up to three additional **u64** * arguments (as an eBPF helpers, the total number of arguments is * limited to five). * * Each time the helper is called, it appends a line to the trace. - * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is - * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this. + * Lines are discarded while *\/sys/kernel/tracing/trace* is + * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this. * The format of the trace is customizable, and the exact output * one will get depends on the options set in - * *\/sys/kernel/debug/tracing/trace_options* (see also the + * *\/sys/kernel/tracing/trace_options* (see also the * *README* file under the same directory). However, it usually * defaults to something like: * @@ -745,7 +1939,7 @@ union bpf_attr { * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and + * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values @@ -769,13 +1963,13 @@ union bpf_attr { * u32 bpf_get_smp_processor_id(void) * Description * Get the SMP (symmetric multiprocessing) processor id. Note that - * all programs run with preemption disabled, which means that the + * all programs run with migration disabled, which means that the * SMP processor id is stable during all the execution of the * program. * Return * The SMP id of the processor running the program. * - * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. *flags* are a combination of @@ -792,7 +1986,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) * Description * Recompute the layer 3 (e.g. IP) checksum for the packet * associated to *skb*. Computation is incremental, so the helper @@ -817,7 +2011,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) * Description * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the * packet associated to *skb*. Computation is incremental, so the @@ -849,7 +2043,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) * Description * This special helper is used to trigger a "tail call", or in * other words, to jump into another eBPF program. The same stack @@ -876,11 +2070,11 @@ union bpf_attr { * if the maximum number of tail calls has been reached for this * chain of programs. This limit is defined in the kernel by the * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 32. + * which is currently set to 33. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) * Description * Clone and redirect the packet associated to *skb* to another * net device of index *ifindex*. Both ingress and egress @@ -902,9 +2096,13 @@ union bpf_attr { * performed again, if the helper is used in combination with * direct packet access. * Return - * 0 on success, or a negative error in case of failure. + * 0 on success, or a negative error in case of failure. Positive + * error indicates a potential drop or congestion in the target + * device. The particular positive error codes are not defined. * * u64 bpf_get_current_pid_tgid(void) + * Description + * Get the current pid and tgid. * Return * A 64-bit integer containing the current tgid and pid, and * created as such: @@ -912,11 +2110,13 @@ union bpf_attr { * *current_task*\ **->pid**. * * u64 bpf_get_current_uid_gid(void) + * Description + * Get the current uid and gid. * Return * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(void *buf, u32 size_of_buf) + * long bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -953,7 +2153,7 @@ union bpf_attr { * Return * The classid, or 0 for the default unconfigured classid. * - * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) * Description * Push a *vlan_tci* (VLAN tag control information) of protocol * *vlan_proto* to the packet associated to *skb*, then update @@ -969,7 +2169,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_vlan_pop(struct sk_buff *skb) + * long bpf_skb_vlan_pop(struct sk_buff *skb) * Description * Pop a VLAN header from the packet associated to *skb*. * @@ -981,7 +2181,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Get tunnel metadata. This helper takes a pointer *key* to an * empty **struct bpf_tunnel_key** of **size**, that will be @@ -1032,7 +2232,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Populate tunnel metadata for packet associated to *skb.* The * tunnel metadata is set to the contents of *key*, of *size*. The @@ -1054,6 +2254,9 @@ union bpf_attr { * sending the packet. This flag was added for GRE * encapsulation, but might be used with other protocols * as well in the future. + * **BPF_F_NO_TUNNEL_KEY** + * Add a flag to tunnel metadata indicating that no tunnel + * key should be set in the resulting tunnel header. * * Here is a typical usage on the transmit path: * @@ -1098,7 +2301,7 @@ union bpf_attr { * The value of the perf event counter read from the map, or a * negative error code in case of failure. * - * int bpf_redirect(u32 ifindex, u64 flags) + * long bpf_redirect(u32 ifindex, u64 flags) * Description * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_clone_redirect**\ @@ -1125,7 +2328,7 @@ union bpf_attr { * Description * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the + * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. @@ -1145,7 +2348,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1190,7 +2393,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) + * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1207,7 +2410,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) + * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1276,7 +2479,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1294,7 +2497,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1304,7 +2507,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) * Description * Change the protocol of the *skb* to *proto*. Currently * supported are transition from IPv4 to IPv6, and from IPv6 to @@ -1331,7 +2534,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * long bpf_skb_change_type(struct sk_buff *skb, u32 type) * Description * Change the packet type for the packet associated to *skb*. This * comes down to setting *skb*\ **->pkt_type** to *type*, except @@ -1358,7 +2561,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) * Description * Check whether *skb* is a descendant of the cgroup2 held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. @@ -1386,10 +2589,12 @@ union bpf_attr { * The 32-bit hash. * * u64 bpf_get_current_task(void) + * Description + * Get the current task. * Return * A pointer to the current task struct. * - * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * long bpf_probe_write_user(void *dst, const void *src, u32 len) * Description * Attempt in a safe way to write *len* bytes from the buffer * *src* to *dst* in memory. It only works for threads that are in @@ -1408,7 +2613,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) * Description * Check whether the probe is being run is the context of a given * subset of the cgroup2 hierarchy. The cgroup2 to test is held by @@ -1416,11 +2621,11 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 1, if current task belongs to the cgroup2. + * * 0, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * - * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) * Description * Resize (trim or grow) the packet associated to *skb* to the * new *len*. The *flags* are reserved for future usage, and must @@ -1444,12 +2649,13 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * long bpf_skb_pull_data(struct sk_buff *skb, u32 len) * Description * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes * from *skb* readable and writable. If a zero value is passed for - * *len*, then the whole length of the *skb* is pulled. + * *len*, then all bytes in the linear part of *skb* will be made + * readable and writable. * * This helper is only needed for reading and writing with direct * packet access. @@ -1499,8 +2705,10 @@ union bpf_attr { * indicate that the hash is outdated and to trigger a * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. + * Return + * void. * - * int bpf_get_numa_node_id(void) + * long bpf_get_numa_node_id(void) * Description * Return the id of the current NUMA node. The primary use case * for this helper is the selection of sockets for the local NUMA @@ -1511,7 +2719,7 @@ union bpf_attr { * Return * The id of current NUMA node. * - * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) * Description * Grows headroom of packet associated to *skb* and adjusts the * offset of the MAC header accordingly, adding *len* bytes of @@ -1532,7 +2740,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that * it is possible to use a negative value for *delta*. This helper @@ -1547,7 +2755,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for @@ -1570,24 +2778,34 @@ union bpf_attr { * networking traffic statistics as it provides a global socket * identifier that can be assumed unique. * Return - * A 8-byte long non-decreasing number on success, or 0 if the - * socket field is missing inside *skb*. + * A 8-byte long unique number on success, or 0 if the socket + * field is missing inside *skb*. * * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return - * A 8-byte long non-decreasing number. + * A 8-byte long unique number. + * + * u64 bpf_get_socket_cookie(struct sock *sk) + * Description + * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts + * *sk*, but gets socket from a BTF **struct sock**. This helper + * also works for sleepable programs. + * Return + * A 8-byte long unique number or 0 if *sk* is NULL. * * u32 bpf_get_socket_uid(struct sk_buff *skb) + * Description + * Get the owner UID of the socked associated to *skb*. * Return * The owner UID of the socket associated to *skb*. If the socket * is **NULL**, or if it is not a full socket (i.e. if it is a @@ -1595,14 +2813,14 @@ union bpf_attr { * is returned (note that **overflowuid** might also be the actual * UID value for the socket). * - * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * long bpf_set_hash(struct sk_buff *skb, u32 hash) * Description * Set the full hash for *skb* (set the field *skb*\ **->hash**) * to value *hash*. * Return * 0 * - * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1613,24 +2831,32 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: * * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, - * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**, + * **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, - * **TCP_BPF_SNDCWND_CLAMP**. + * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, + * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, + * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, + * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, + * **TCP_BPF_RTO_MIN**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * * **IPPROTO_IPV6**, which supports the following *optname*\ s: + * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -1645,10 +2871,12 @@ union bpf_attr { * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). + * (room space is added or removed between the layer 2 and + * layer 3 headers). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). + * (room space is added or removed between the layer 3 and + * layer 4 headers). * * The following flags are supported at this time: * @@ -1668,6 +2896,15 @@ union bpf_attr { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**, + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**: + * Indicate the new IP header version after decapsulating the outer + * IP header. Used when the inner and outer IP versions are different. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -1676,7 +2913,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -1688,8 +2925,12 @@ union bpf_attr { * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be * one of the XDP program return codes up to **XDP_TX**, as chosen - * by the caller. Any higher bits in the *flags* argument must be - * unset. + * by the caller. The higher bits of *flags* can be set to + * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. + * + * With BPF_F_BROADCAST the packet will be broadcasted to all the + * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress + * interface will be excluded when do broadcasting. * * See also **bpf_redirect**\ (), which only supports redirecting * to an ifindex, but doesn't require a map to do so. @@ -1697,7 +2938,7 @@ union bpf_attr { * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. * - * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) + * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1708,7 +2949,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a *map* referencing sockets. The * *skops* is used as a new value for the entry associated to @@ -1727,7 +2968,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) * Description * Adjust the address pointed by *xdp_md*\ **->data_meta** by * *delta* (which can be positive or negative). Note that this @@ -1756,7 +2997,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) * Description * Read the value of a perf event counter, and store it into *buf* * of size *buf_size*. This helper relies on a *map* of type @@ -1806,9 +3047,9 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description - * For en eBPF program attached to a perf event, retrieve the + * For an eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in * the structure pointed by *buf* and of size *buf_size*. Enabled * and running times are also stored in the structure (see @@ -1817,7 +3058,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1829,20 +3070,18 @@ union bpf_attr { * *bpf_socket* should be one of the following: * * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. - * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** - * and **BPF_CGROUP_INET6_CONNECT**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**, + * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. - * It supports the following *level*\ s: - * - * * **IPPROTO_TCP**, which supports *optname* - * **TCP_CONGESTION**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * It supports the same set of *optname*\ s that is supported by + * the **bpf_setsockopt**\ () helper. The exceptions are + * **TCP_BPF_*** is **bpf_setsockopt**\ () only and + * **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_regs *regs, u64 rc) + * long bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. @@ -1867,7 +3106,7 @@ union bpf_attr { * Return * 0 * - * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) * Description * Attempt to set the value of the **bpf_sock_ops_cb_flags** field * for the full TCP socket associated to *bpf_sock_ops* to @@ -1911,7 +3150,7 @@ union bpf_attr { * be set is returned (which comes down to 0 if all bits were set * as required). * - * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -1925,7 +3164,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, apply the verdict of the eBPF program to * the next *bytes* (number of bytes) of message *msg*. @@ -1959,7 +3198,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, prevent the execution of the verdict eBPF * program for message *msg* until *bytes* (byte number) have been @@ -1977,7 +3216,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) * Description * For socket policies, pull in non-linear data from user space * for *msg* and set pointers *msg*\ **->data** and *msg*\ @@ -2008,7 +3247,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) * Description * Bind the socket associated to *ctx* to the address pointed by * *addr*, of length *addr_len*. This allows for making outgoing @@ -2026,7 +3265,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is * possible to both shrink and grow the packet tail. @@ -2040,7 +3279,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) * Description * Retrieve the XFRM state (IP transform framework, see also * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. @@ -2056,7 +3295,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) + * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -2072,8 +3311,18 @@ union bpf_attr { * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. + * Collect (build_id, file_offset) instead of ips for user + * stack, only valid if **BPF_F_USER_STACK** is also + * specified. + * + * *file_offset* is an offset relative to the beginning + * of the executable or shared object file backing the vma + * which the *ip* falls in. It is *not* an offset relative + * to that object's base address. Accordingly, it must be + * adjusted by adding (sh_addr - sh_offset), where + * sh_{addr,offset} correspond to the executable section + * containing *file_offset* in the object, for comparisons + * to symbols' st_value to be valid. * * **bpf_get_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject @@ -2086,10 +3335,10 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack= * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2111,7 +3360,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * Description * Do FIB lookup in kernel tables using parameters in *params*. * If lookup is successful and result shows packet is to be @@ -2130,9 +3379,27 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). + * **BPF_FIB_LOOKUP_SKIP_NEIGH** + * Skip the neighbour table lookup. *params*->dmac + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). + * **BPF_FIB_LOOKUP_SRC** + * Derive and set source IP addr in *params*->ipv{4,6}_src + * for the nexthop. If the src addr cannot be derived, + * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this + * case, *params*->dmac and *params*->smac are not set either. + * **BPF_FIB_LOOKUP_MARK** + * Use the mark present in *params*->mark for the fib lookup. + * This option should not be used with BPF_FIB_LOOKUP_DIRECT, + * as it only has meaning for full lookups. * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -2142,7 +3409,10 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU + * was exceeded and output params->mtu_result contains the MTU. + * + * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2161,7 +3431,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -2175,11 +3445,11 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. - * if the verdeict eBPF program returns **SK_PASS**), redirect it + * if the verdict eBPF program returns **SK_PASS**), redirect it * to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and * egress interfaces can be used for redirection. The @@ -2189,7 +3459,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) * Description * Encapsulate the packet associated to *skb* within a Layer 3 * protocol header. This header is provided in the buffer at @@ -2226,7 +3496,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. Only the flags, tag and TLVs @@ -2241,7 +3511,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) * Description * Adjust the size allocated to TLVs in the outermost IPv6 * Segment Routing Header contained in the packet associated to @@ -2257,7 +3527,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) * Description * Apply an IPv6 Segment Routing action of type *action* to the * packet associated to *skb*. Each action takes a parameter @@ -2286,7 +3556,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_repeat(void *ctx) + * long bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded repeat key message. This delays @@ -2305,7 +3575,7 @@ union bpf_attr { * Return * 0 * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded key press with *scancode*, @@ -2348,6 +3618,9 @@ union bpf_attr { * The id is returned or 0 in case the id could not be retrieved. * * u64 bpf_get_current_cgroup_id(void) + * Description + * Get the current cgroup id based on the cgroup within which + * the current task is running. * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. @@ -2365,15 +3638,15 @@ union bpf_attr { * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the **BPF_STX_XADD** instruction to alter + * For example, by using the **BPF_ATOMIC** instructions to alter * the shared data. * Return * A pointer to the local storage area. * - * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a - * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*. * It checks the selected socket is matching the incoming * request in the socket buffer. * Return @@ -2415,7 +3688,7 @@ union bpf_attr { * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will + * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or @@ -2452,7 +3725,7 @@ union bpf_attr { * Look for an IPv6 socket. * * If the *netns* is a negative signed 32-bit integer, then the - * socket lookup table in the netns associated with the *ctx* will + * socket lookup table in the netns associated with the *ctx* * will be used. For the TC hooks, this is the netns of the device * in the skb. For socket hooks, this is the netns of the socket. * If *netns* is any other signed 32-bit value greater than or @@ -2471,7 +3744,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(void *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -2479,7 +3752,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) * Description * Push an element *value* in *map*. *flags* is one of: * @@ -2489,19 +3762,19 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * long bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * long bpf_map_peek_elem(struct bpf_map *map, void *value) * Description * Get an element from *map* without removing it. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2517,7 +3790,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if @@ -2529,7 +3802,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded pointer movement. @@ -2543,7 +3816,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_lock(struct bpf_spin_lock *lock) + * long bpf_spin_lock(struct bpf_spin_lock *lock) * Description * Acquire a spinlock represented by the pointer *lock*, which is * stored as part of a value of a map. Taking the lock allows to @@ -2591,7 +3864,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * long bpf_spin_unlock(struct bpf_spin_lock *lock) * Description * Release the *lock* previously locked by a call to * **bpf_spin_lock**\ (\ *lock*\ ). @@ -2614,7 +3887,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buff *skb) + * long bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** @@ -2651,22 +3924,23 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains **sizeof**\ (**struct tcphdr**). + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). * Return * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. * - * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description * Get name of sysctl in /proc/sys/ and copy it into provided by * program buffer *buf* of size *buf_len*. @@ -2682,7 +3956,7 @@ union bpf_attr { * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * - * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get current value of sysctl as it is presented in /proc/sys * (incl. newline, etc), and copy it as a string into provided @@ -2701,7 +3975,7 @@ union bpf_attr { * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. * - * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get new value being written by user space to sysctl (before * the actual write happens) and copy it as a string into @@ -2718,7 +3992,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) * Description * Override new value being written by user space to sysctl with * value provided by program in buffer *buf* of size *buf_len*. @@ -2735,7 +4009,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to a long integer according to the given base @@ -2759,7 +4033,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to an unsigned long integer according to the @@ -2782,7 +4056,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags) + * void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags) * Description * Get a bpf-local-storage from a *sk*. * @@ -2798,6 +4072,9 @@ union bpf_attr { * "type". The bpf-local-storage "type" (i.e. the *map*) is * searched against all bpf-local-storages residing at *sk*. * + * *sk* is a kernel **struct sock** pointer for LSM program. + * *sk* is a **struct bpf_sock** pointer for other program types. + * * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be * used such that a new bpf-local-storage will be * created if one does not exist. *value* can be used @@ -2810,15 +4087,16 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, void *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return * 0 on success. * * **-ENOENT** if the bpf-local-storage cannot be found. + * **-EINVAL** if sk is not a fullsock (e.g. a request_sock). * - * int bpf_send_signal(u32 sig) + * long bpf_send_signal(u32 sig) * Description * Send signal *sig* to the process of the current task. * The signal may be delivered to any of this process's threads. @@ -2833,17 +4111,18 @@ union bpf_attr { * * **-EAGAIN** if bpf program can try again. * - * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Try to issue a SYN cookie for the packet with corresponding * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. * * *iph* points to the start of the IPv4 or IPv6 header, while * *iph_len* contains **sizeof**\ (**struct iphdr**) or - * **sizeof**\ (**struct ip6hdr**). + * **sizeof**\ (**struct ipv6hdr**). * * *th* points to the start of the TCP header, while *th_len* - * contains the length of the TCP header. + * contains the length of the TCP header with options (at least + * **sizeof**\ (**struct tcphdr**)). * Return * On success, lower 32 bits hold the generated SYN cookie in * followed by 16 bits which hold the MSS value for that cookie, @@ -2859,7 +4138,7 @@ union bpf_attr { * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 * - * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -2883,21 +4162,21 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from user space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from kernel space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe user address * *unsafe_ptr* to *dst*. The *size* should include the @@ -2906,10 +4185,10 @@ union bpf_attr { * string length is larger than *size*, just *size*-1 bytes are * copied and the last byte is set to NUL. * - * On success, the length of the copied string is returned. This - * makes this helper useful in tracing programs for reading - * strings, and more importantly to get its length at runtime. See - * the following snippet: + * On success, returns the number of bytes that were written, + * including the terminal NUL. This makes this helper useful in + * tracing programs for reading strings, and more importantly to + * get its length at runtime. See the following snippet: * * :: * @@ -2937,11 +4216,11 @@ union bpf_attr { * **->mm->env_start**: using this helper and the return value, * one can quickly iterate at the right offset of the memory area. * Return - * On success, the strictly positive length of the string, + * On success, the strictly positive length of the output string, * including the trailing NUL character. On error, a negative * value. * - * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. @@ -2949,14 +4228,14 @@ union bpf_attr { * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * - * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_send_signal_thread(u32 sig) + * long bpf_send_signal_thread(u32 sig) * Description * Send signal *sig* to the thread corresponding to the current task. * Return @@ -2976,7 +4255,7 @@ union bpf_attr { * Return * The 64 bit jiffies * - * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the * branch records (**struct perf_branch_entry**) associated to *ctx* @@ -2995,7 +4274,7 @@ union bpf_attr { * * **-ENOENT** if architecture does not support branch records. * - * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. @@ -3007,7 +4286,7 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * - * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -3062,8 +4341,12 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags) * Description + * Helper is overloaded depending on BPF program type. This + * description applies to **BPF_PROG_TYPE_SCHED_CLS** and + * **BPF_PROG_TYPE_SCHED_ACT** programs. + * * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, * will cause *skb* to be delivered to the specified socket. @@ -3086,8 +4369,55 @@ union bpf_attr { * **-EOPNOTSUPP** if the operation is not supported, for example * a call from outside of TC ingress. * - * **-ESOCKTNOSUPPORT** if the socket type is not supported - * (reuseport). + * long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags) + * Description + * Helper is overloaded depending on BPF program type. This + * description applies to **BPF_PROG_TYPE_SK_LOOKUP** programs. + * + * Select the *sk* as a result of a socket lookup. + * + * For the operation to succeed passed socket must be compatible + * with the packet description provided by the *ctx* object. + * + * L4 protocol (**IPPROTO_TCP** or **IPPROTO_UDP**) must + * be an exact match. While IP family (**AF_INET** or + * **AF_INET6**) must be compatible, that is IPv6 sockets + * that are not v6-only can be selected for IPv4 packets. + * + * Only TCP listeners and UDP unconnected sockets can be + * selected. *sk* can also be NULL to reset any previous + * selection. + * + * *flags* argument can combination of following values: + * + * * **BPF_SK_LOOKUP_F_REPLACE** to override the previous + * socket selection, potentially done by a BPF program + * that ran before us. + * + * * **BPF_SK_LOOKUP_F_NO_REUSEPORT** to skip + * load-balancing within reuseport group for the socket + * being selected. + * + * On success *ctx->sk* will point to the selected socket. + * + * Return + * 0 on success, or a negative errno in case of failure. + * + * * **-EAFNOSUPPORT** if socket family (*sk->family*) is + * not compatible with packet family (*ctx->family*). + * + * * **-EEXIST** if socket has been already selected, + * potentially by another program, and + * **BPF_SK_LOOKUP_F_REPLACE** flag was not specified. + * + * * **-EINVAL** if unsupported flags were specified. + * + * * **-EPROTOTYPE** if socket L4 protocol + * (*sk->protocol*) doesn't match packet protocol + * (*ctx->protocol*). + * + * * **-ESOCKTNOSUPPORT** if socket is not in allowed + * state (TCP listening or UDP unconnected). * * u64 bpf_ktime_get_boot_ns(void) * Description @@ -3097,7 +4427,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print * out the format string. @@ -3106,7 +4436,7 @@ union bpf_attr { * arguments. The *data* are a **u64** array and corresponding format string * values are stored in the array. For strings and pointers where pointees * are accessed, only the pointer values are stored in the *data* array. - * The *data_len* is the size of *data* in bytes. + * The *data_len* is the size of *data* in bytes - must be a multiple of 8. * * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory. * Reading kernel memory may fail due to either invalid address or @@ -3126,7 +4456,7 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * long bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the @@ -3134,270 +4464,1560 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure: * - * **-EOVERFLOW** if an overflow happened: The same object will be tried again. + * **-EOVERFLOW** if an overflow happened: The same object will be tried again. + * + * u64 bpf_sk_cgroup_id(void *sk) + * Description + * Return the cgroup v2 id of the socket *sk*. + * + * *sk* must be a non-**NULL** pointer to a socket, e.g. one + * returned from **bpf_sk_lookup_xxx**\ (), + * **bpf_sk_fullsock**\ (), etc. The format of returned id is + * same as in **bpf_skb_cgroup_id**\ (). + * + * This helper is available only if the kernel was compiled with + * the **CONFIG_SOCK_CGROUP_DATA** configuration option. + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *sk* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *sk*, then return value will be same as that + * of **bpf_sk_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *sk*. + * + * The format of returned id and helper limitations are same as in + * **bpf_sk_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) + * Description + * Copy *size* bytes from *data* into a ring buffer *ringbuf*. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * An adaptive notification is a notification sent whenever the user-space + * process has caught up and consumed all available payloads. In case the user-space + * process is still processing a previous payload, then no notification is needed + * as it will process the newly added payload automatically. + * Return + * 0 on success, or a negative error in case of failure. + * + * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf*. + * *flags* must be 0. + * Return + * Valid pointer with *size* bytes of memory available; NULL, + * otherwise. + * + * void bpf_ringbuf_submit(void *data, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * See 'bpf_ringbuf_output()' for the definition of adaptive notification. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard(void *data, u64 flags) + * Description + * Discard reserved ring buffer sample, pointed to by *data*. + * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification + * of new data availability is sent. + * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification + * of new data availability is sent unconditionally. + * If **0** is specified in *flags*, an adaptive notification + * of new data availability is sent. + * + * See 'bpf_ringbuf_output()' for the definition of adaptive notification. + * Return + * Nothing. Always succeeds. + * + * u64 bpf_ringbuf_query(void *ringbuf, u64 flags) + * Description + * Query various characteristics of provided ring buffer. What + * exactly is queries is determined by *flags*: + * + * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. + * * **BPF_RB_RING_SIZE**: The size of ring buffer. + * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). + * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). + * + * Data returned is just a momentary snapshot of actual values + * and could be inaccurate, so this facility should be used to + * power heuristics and for reporting, not to make 100% correct + * calculation. + * Return + * Requested value, or 0, if *flags* are not recognized. + * + * long bpf_csum_level(struct sk_buff *skb, u64 level) + * Description + * Change the skbs checksum level by one layer up or down, or + * reset it entirely to none in order to have the stack perform + * checksum validation. The level is applicable to the following + * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of + * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | + * through **bpf_skb_adjust_room**\ () helper with passing in + * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call + * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since + * the UDP header is removed. Similarly, an encap of the latter + * into the former could be accompanied by a helper call to + * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the + * skb is still intended to be processed in higher layers of the + * stack instead of just egressing at tc. + * + * There are three supported level settings at this time: + * + * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs + * with CHECKSUM_UNNECESSARY. + * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs + * with CHECKSUM_UNNECESSARY. + * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and + * sets CHECKSUM_NONE to force checksum validation by the stack. + * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current + * skb->csum_level. + * Return + * 0 on success, or a negative error in case of failure. In the + * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level + * is returned or the error code -EACCES in case the skb is not + * subject to CHECKSUM_UNNECESSARY. + * + * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * Note: the user stack will only be populated if the *task* is + * the current task; all other tasks will return -EOPNOTSUPP. + * To achieve this, the helper needs *task*, which is a valid + * pointer to **struct task_struct**. To store the stacktrace, the + * bpf program provides *buf* with a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * The *task* must be the current task. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_task_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack= + * Return + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. + * + * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) + * Description + * Load header option. Support reading a particular TCP header + * option for bpf program (**BPF_PROG_TYPE_SOCK_OPS**). + * + * If *flags* is 0, it will search the option from the + * *skops*\ **->skb_data**. The comment in **struct bpf_sock_ops** + * has details on what skb_data contains under different + * *skops*\ **->op**. + * + * The first byte of the *searchby_res* specifies the + * kind that it wants to search. + * + * If the searching kind is an experimental kind + * (i.e. 253 or 254 according to RFC6994). It also + * needs to specify the "magic" which is either + * 2 bytes or 4 bytes. It then also needs to + * specify the size of the magic by using + * the 2nd byte which is "kind-length" of a TCP + * header option and the "kind-length" also + * includes the first 2 bytes "kind" and "kind-length" + * itself as a normal TCP header option also does. + * + * For example, to search experimental kind 254 with + * 2 byte magic 0xeB9F, the searchby_res should be + * [ 254, 4, 0xeB, 0x9F, 0, 0, .... 0 ]. + * + * To search for the standard window scale option (3), + * the *searchby_res* should be [ 3, 0, 0, .... 0 ]. + * Note, kind-length must be 0 for regular option. + * + * Searching for No-Op (0) and End-of-Option-List (1) are + * not supported. + * + * *len* must be at least 2 bytes which is the minimal size + * of a header option. + * + * Supported flags: + * + * * **BPF_LOAD_HDR_OPT_TCP_SYN** to search from the + * saved_syn packet or the just-received syn packet. + * + * Return + * > 0 when found, the header option is copied to *searchby_res*. + * The return value is the total length copied. On failure, a + * negative error code is returned: + * + * **-EINVAL** if a parameter is invalid. + * + * **-ENOMSG** if the option is not found. + * + * **-ENOENT** if no syn packet is available when + * **BPF_LOAD_HDR_OPT_TCP_SYN** is used. + * + * **-ENOSPC** if there is not enough space. Only *len* number of + * bytes are copied. + * + * **-EFAULT** on failure to parse the header options in the + * packet. + * + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. + * + * long bpf_store_hdr_opt(struct bpf_sock_ops *skops, const void *from, u32 len, u64 flags) + * Description + * Store header option. The data will be copied + * from buffer *from* with length *len* to the TCP header. + * + * The buffer *from* should have the whole option that + * includes the kind, kind-length, and the actual + * option data. The *len* must be at least kind-length + * long. The kind-length does not have to be 4 byte + * aligned. The kernel will take care of the padding + * and setting the 4 bytes aligned value to th->doff. + * + * This helper will check for duplicated option + * by searching the same option in the outgoing skb. + * + * This helper can only be called during + * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** If param is invalid. + * + * **-ENOSPC** if there is not enough space in the header. + * Nothing has been written + * + * **-EEXIST** if the option already exists. + * + * **-EFAULT** on failure to parse the existing header options. + * + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. + * + * long bpf_reserve_hdr_opt(struct bpf_sock_ops *skops, u32 len, u64 flags) + * Description + * Reserve *len* bytes for the bpf header option. The + * space will be used by **bpf_store_hdr_opt**\ () later in + * **BPF_SOCK_OPS_WRITE_HDR_OPT_CB**. + * + * If **bpf_reserve_hdr_opt**\ () is called multiple times, + * the total number of bytes will be reserved. + * + * This helper can only be called during + * **BPF_SOCK_OPS_HDR_OPT_LEN_CB**. + * + * Return + * 0 on success, or negative error in case of failure: + * + * **-EINVAL** if a parameter is invalid. + * + * **-ENOSPC** if there is not enough space in the header. + * + * **-EPERM** if the helper cannot be used under the current + * *skops*\ **->op**. + * + * void *bpf_inode_storage_get(struct bpf_map *map, void *inode, void *value, u64 flags) + * Description + * Get a bpf_local_storage from an *inode*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *inode* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *inode*) except this + * helper enforces the key must be an inode and the map must also + * be a **BPF_MAP_TYPE_INODE_STORAGE**. + * + * Underneath, the value is stored locally at *inode* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *inode*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * int bpf_inode_storage_delete(struct bpf_map *map, void *inode) + * Description + * Delete a bpf_local_storage from an *inode*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. + * + * long bpf_d_path(struct path *path, char *buf, u32 sz) + * Description + * Return full path for given **struct path** object, which + * needs to be the kernel BTF *path* object. The path is + * returned in the provided buffer *buf* of size *sz* and + * is zero terminated. + * + * Return + * On success, the strictly positive length of the string, + * including the trailing NUL character. On error, a negative + * value. + * + * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr) + * Description + * Read *size* bytes from user space address *user_ptr* and store + * the data in *dst*. This is a wrapper of **copy_from_user**\ (). + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags) + * Description + * Use BTF to store a string representation of *ptr*->ptr in *str*, + * using *ptr*->type_id. This value should specify the type + * that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1) + * can be used to look up vmlinux BTF type ids. Traversing the + * data structure using BTF, the type information and values are + * stored in the first *str_size* - 1 bytes of *str*. Safe copy of + * the pointer data is carried out to avoid kernel crashes during + * operation. Smaller types can use string space on the stack; + * larger programs can use map data to store the string + * representation. + * + * The string can be subsequently shared with userspace via + * bpf_perf_event_output() or ring buffer interfaces. + * bpf_trace_printk() is to be avoided as it places too small + * a limit on string size to be useful. + * + * *flags* is a combination of + * + * **BTF_F_COMPACT** + * no formatting around type information + * **BTF_F_NONAME** + * no struct/union member names/types + * **BTF_F_PTR_RAW** + * show raw (unobfuscated) pointer values; + * equivalent to printk specifier %px. + * **BTF_F_ZERO** + * show zero-valued struct/union members; they + * are not displayed by default + * + * Return + * The number of bytes that were written (or would have been + * written if output had to be truncated due to string size), + * or a negative error in cases of failure. + * + * long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags) + * Description + * Use BTF to write to seq_write a string representation of + * *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf(). + * *flags* are identical to those used for bpf_snprintf_btf. + * Return + * 0 on success or a negative error in case of failure. + * + * u64 bpf_skb_cgroup_classid(struct sk_buff *skb) + * Description + * See **bpf_get_cgroup_classid**\ () for the main description. + * This helper differs from **bpf_get_cgroup_classid**\ () in that + * the cgroup v1 net_cls class is retrieved only from the *skb*'s + * associated socket instead of the current process. + * Return + * The id is returned or 0 in case the id could not be retrieved. + * + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex* + * and fill in L2 addresses from neighboring subsystem. This helper + * is somewhat similar to **bpf_redirect**\ (), except that it + * populates L2 addresses as well, meaning, internally, the helper + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types, and enabled + * for IPv4 and IPv6 protocols. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. + * + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on *cpu*. A ksym is an + * extern variable decorated with '__ksym'. For ksym, there is a + * global var (either static or global) defined of the same name + * in the kernel. The ksym is percpu if the global var is percpu. + * The returned pointer points to the global percpu var on *cpu*. + * + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the + * kernel, except that bpf_per_cpu_ptr() may return NULL. This + * happens if *cpu* is larger than nr_cpu_ids. The caller of + * bpf_per_cpu_ptr() must check the returned value. + * Return + * A pointer pointing to the kernel percpu variable on *cpu*, or + * NULL, if *cpu* is invalid. + * + * void *bpf_this_cpu_ptr(const void *percpu_ptr) + * Description + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a + * pointer to the percpu kernel variable on this cpu. See the + * description of 'ksym' in **bpf_per_cpu_ptr**\ (). + * + * bpf_this_cpu_ptr() has the same semantic as this_cpu_ptr() in + * the kernel. Different from **bpf_per_cpu_ptr**\ (), it would + * never return NULL. + * Return + * A pointer pointing to the kernel percpu variable on this cpu. + * + * long bpf_redirect_peer(u32 ifindex, u64 flags) + * Description + * Redirect the packet to another net device of index *ifindex*. + * This helper is somewhat similar to **bpf_redirect**\ (), except + * that the redirection happens to the *ifindex*' peer device and + * the netns switch takes place from ingress to ingress without + * going through the CPU's backlog queue. + * + * The *flags* argument is reserved and must be 0. The helper is + * currently only supported for tc BPF program types at the + * ingress hook and for veth and netkit target device types. The + * peer device must reside in a different network namespace. + * Return + * The helper returns **TC_ACT_REDIRECT** on success or + * **TC_ACT_SHOT** on error. + * + * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *task*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *task* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this + * helper enforces the key must be a task_struct and the map must also + * be a **BPF_MAP_TYPE_TASK_STORAGE**. + * + * Underneath, the value is stored locally at *task* instead of + * the *map*. The *map* is used as the bpf-local-storage + * "type". The bpf-local-storage "type" (i.e. the *map*) is + * searched against all bpf_local_storage residing at *task*. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task) + * Description + * Delete a bpf_local_storage from a *task*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. + * + * struct task_struct *bpf_get_current_task_btf(void) + * Description + * Return a BTF pointer to the "current" task. + * This pointer can also be used in helpers that accept an + * *ARG_PTR_TO_BTF_ID* of type *task_struct*. + * Return + * Pointer to the current task. + * + * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags) + * Description + * Set or clear certain options on *bprm*: + * + * **BPF_F_BPRM_SECUREEXEC** Set the secureexec bit + * which sets the **AT_SECURE** auxv for glibc. The bit + * is cleared if the flag is not specified. + * Return + * **-EINVAL** if invalid *flags* are passed, zero otherwise. + * + * u64 bpf_ktime_get_coarse_ns(void) + * Description + * Return a coarse-grained version of the time elapsed since + * system boot, in nanoseconds. Does not include time the system + * was suspended. + * + * See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**) + * Return + * Current *ktime*. + * + * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) + * Description + * Returns the stored IMA hash of the *inode* (if it's available). + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUPP** if IMA is disabled or **-EINVAL** if + * invalid arguments are passed. + * + * struct socket *bpf_sock_from_file(struct file *file) + * Description + * If the given file represents a socket, returns the associated + * socket. + * Return + * A pointer to a struct socket on success or NULL if the file is + * not a socket. + * + * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) + * Description + * Check packet size against exceeding MTU of net device (based + * on *ifindex*). This helper will likely be used in combination + * with helpers that adjust/change the packet size. + * + * The argument *len_diff* can be used for querying with a planned + * size change. This allows to check MTU prior to changing packet + * ctx. Providing a *len_diff* adjustment that is larger than the + * actual packet size (resulting in negative packet size) will in + * principle not exceed the MTU, which is why it is not considered + * a failure. Other BPF helpers are needed for performing the + * planned size change; therefore the responsibility for catching + * a negative packet size belongs in those helpers. + * + * Specifying *ifindex* zero means the MTU check is performed + * against the current net device. This is practical if this isn't + * used prior to redirect. + * + * On input *mtu_len* must be a valid pointer, else verifier will + * reject BPF program. If the value *mtu_len* is initialized to + * zero then the ctx packet size is use. When value *mtu_len* is + * provided as input this specify the L3 length that the MTU check + * is done against. Remember XDP and TC length operate at L2, but + * this value is L3 as this correlate to MTU and IP-header tot_len + * values which are L3 (similar behavior as bpf_fib_lookup). + * + * The Linux kernel route table can configure MTUs on a more + * specific per route level, which is not provided by this helper. + * For route level MTU checks use the **bpf_fib_lookup**\ () + * helper. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** for tc cls_act programs. + * + * The *flags* argument can be a combination of one or more of the + * following values: + * + * **BPF_MTU_CHK_SEGS** + * This flag will only works for *ctx* **struct sk_buff**. + * If packet context contains extra packet segment buffers + * (often knows as GSO skb), then MTU check is harder to + * check at this point, because in transmit path it is + * possible for the skb packet to get re-segmented + * (depending on net device features). This could still be + * a MTU violation, so this flag enables performing MTU + * check against segments, with a different violation + * return code to tell it apart. Check cannot use len_diff. + * + * On return *mtu_len* pointer contains the MTU value of the net + * device. Remember the net device configured MTU is the L3 size, + * which is returned here and XDP and TC length operate at L2. + * Helper take this into account for you, but remember when using + * MTU value in your BPF-code. + * + * Return + * * 0 on success, and populate MTU value in *mtu_len* pointer. + * + * * < 0 if any input argument is invalid (*mtu_len* not updated) + * + * MTU violations return positive values, but also populate MTU + * value in *mtu_len* pointer, as this can be needed for + * implementing PMTU handing: + * + * * **BPF_MTU_CHK_RET_FRAG_NEEDED** + * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** + * + * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For each element in **map**, call **callback_fn** function with + * **map**, **callback_ctx** and other map-specific parameters. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * The following are a list of supported map types and their + * respective expected callback signatures: + * + * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, + * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, + * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY + * + * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); + * + * For per_cpu maps, the map_value is the value on the cpu where the + * bpf_prog is running. + * + * If **callback_fn** return 0, the helper will continue to the next + * element. If return value is 1, the helper will skip the rest of + * elements and return. Other return values are not used now. + * + * Return + * The number of traversed map elements for success, **-EINVAL** for + * invalid **flags**. + * + * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len) + * Description + * Outputs a string into the **str** buffer of size **str_size** + * based on a format string stored in a read-only map pointed by + * **fmt**. + * + * Each format specifier in **fmt** corresponds to one u64 element + * in the **data** array. For strings and pointers where pointees + * are accessed, only the pointer values are stored in the *data* + * array. The *data_len* is the size of *data* in bytes - must be + * a multiple of 8. + * + * Formats **%s** and **%p{i,I}{4,6}** require to read kernel + * memory. Reading kernel memory may fail due to either invalid + * address or valid address but requiring a major memory fault. If + * reading kernel memory fails, the string for **%s** will be an + * empty string, and the ip address for **%p{i,I}{4,6}** will be 0. + * Not returning error to bpf program is consistent with what + * **bpf_trace_printk**\ () does for now. + * + * Return + * The strictly positive length of the formatted string, including + * the trailing zero character. If the return value is greater than + * **str_size**, **str** contains a truncated string, guaranteed to + * be zero-terminated except when **str_size** is 0. + * + * Or **-EBUSY** if the per-CPU memory copy buffer is busy. + * + * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size) + * Description + * Execute bpf syscall with given arguments. + * Return + * A syscall result. + * + * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) + * Description + * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. + * Return + * Returns btf_id and btf_obj_fd in lower and upper 32 bits. + * + * long bpf_sys_close(u32 fd) + * Description + * Execute close syscall for given FD. + * Return + * A syscall result. + * + * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags) + * Description + * Initialize the timer. + * First 4 bits of *flags* specify clockid. + * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed. + * All other bits of *flags* are reserved. + * The verifier will reject the program if *timer* is not from + * the same *map*. + * Return + * 0 on success. + * **-EBUSY** if *timer* is already initialized. + * **-EINVAL** if invalid *flags* are passed. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn) + * Description + * Configure the timer to call *callback_fn* static function. + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EPERM** if *timer* is in a map that doesn't have any user references. + * The user space should either hold a file descriptor to a map with timers + * or pin such map in bpffs. When map is unpinned or file descriptor is + * closed all timers in the map will be cancelled and freed. + * + * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags) + * Description + * Set timer expiration N nanoseconds from the current time. The + * configured callback will be invoked in soft irq context on some cpu + * and will not repeat unless another bpf_timer_start() is made. + * In such case the next invocation can migrate to a different cpu. + * Since struct bpf_timer is a field inside map element the map + * owns the timer. The bpf_timer_set_callback() will increment refcnt + * of BPF program to make sure that callback_fn code stays valid. + * When user space reference to a map reaches zero all timers + * in a map are cancelled and corresponding program's refcnts are + * decremented. This is done to make sure that Ctrl-C of a user + * process doesn't leave any timers running. If map is pinned in + * bpffs the callback_fn can re-arm itself indefinitely. + * bpf_map_update/delete_elem() helpers and user space sys_bpf commands + * cancel and free the timer in the given map element. + * The map can contain timers that invoke callback_fn-s from different + * programs. The same callback_fn can serve different timers from + * different maps if key/value layout matches across maps. + * Every bpf_timer_set_callback() can have different callback_fn. + * + * *flags* can be one of: + * + * **BPF_F_TIMER_ABS** + * Start the timer in absolute expire value instead of the + * default relative one. + * **BPF_F_TIMER_CPU_PIN** + * Timer will be pinned to the CPU of the caller. + * + * Return + * 0 on success. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier + * or invalid *flags* are passed. + * + * long bpf_timer_cancel(struct bpf_timer *timer) + * Description + * Cancel the timer and wait for callback_fn to finish if it was running. + * Return + * 0 if the timer was not active. + * 1 if the timer was active. + * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier. + * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its + * own timer which would have led to a deadlock otherwise. + * + * u64 bpf_get_func_ip(void *ctx) + * Description + * Get address of the traced function (for tracing and kprobe programs). + * + * When called for kprobe program attached as uprobe it returns + * probe address for both entry and return uprobe. + * + * Return + * Address of the traced function for kprobe. + * 0 for kprobes placed within the function (not at the entry). + * Address of the probe for uprobe and return uprobe. + * + * u64 bpf_get_attach_cookie(void *ctx) + * Description + * Get bpf_cookie value provided (optionally) during the program + * attachment. It might be different for each individual + * attachment, even if BPF program itself is the same. + * Expects BPF program context *ctx* as a first argument. + * + * Supported for the following program types: + * - kprobe/uprobe; + * - tracepoint; + * - perf_event. + * Return + * Value specified by user at BPF link creation/attachment time + * or 0, if it was not specified. + * + * long bpf_task_pt_regs(struct task_struct *task) + * Description + * Get the struct pt_regs associated with **task**. + * Return + * A pointer to struct pt_regs. + * + * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags) + * Description + * Get branch trace from hardware engines like Intel LBR. The + * hardware engine is stopped shortly after the helper is + * called. Therefore, the user need to filter branch entries + * based on the actual use case. To capture branch trace + * before the trigger point of the BPF program, the helper + * should be called at the beginning of the BPF program. + * + * The data is stored as struct perf_branch_entry into output + * buffer *entries*. *size* is the size of *entries* in bytes. + * *flags* is reserved for now and must be zero. + * + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-ENOENT** if architecture does not support branch records. + * + * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 + * to format and can handle more format args as a result. + * + * Arguments are to be used as in **bpf_seq_printf**\ () helper. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. + * + * struct unix_sock *bpf_skc_to_unix_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *unix_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) + * Description + * Get the address of a kernel symbol, returned in *res*. *res* is + * set to 0 if the symbol is not found. + * Return + * On success, zero. On error, a negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-EINVAL** if string *name* is not the same size as *name_sz*. + * + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. + * + * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * Find vma of *task* that contains *addr*, call *callback_fn* + * function with *task*, *vma*, and *callback_ctx*. + * The *callback_fn* should be a static function and + * the *callback_ctx* should be a pointer to the stack. + * The *flags* is used to control certain aspects of the helper. + * Currently, the *flags* must be 0. + * + * The expected callback signature is + * + * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); + * + * Return + * 0 on success. + * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. + * **-EBUSY** if failed to try lock mmap_lock. + * **-EINVAL** for invalid **flags**. + * + * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_loops**, call **callback_fn** function + * with **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. Currently, nr_loops is + * limited to 1 << 23 (~8 million) loops. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the loop. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * loop. If return value is 1, the helper will skip the rest of + * the loops and return. Other return values are not used now, + * and will be rejected by the verifier. + * + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. + * + * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) + * Description + * Do strncmp() between **s1** and **s2**. **s1** doesn't need + * to be null-terminated and **s1_sz** is the maximum storage + * size of **s1**. **s2** must be a read-only string. + * Return + * An integer less than, equal to, or greater than zero + * if the first **s1_sz** bytes of **s1** is found to be + * less than, to match, or be greater than **s2**. + * + * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) + * Description + * Get **n**-th argument register (zero based) of the traced function (for tracing programs) + * returned in **value**. + * + * Return + * 0 on success. + * **-EINVAL** if n >= argument register count of traced function. + * + * long bpf_get_func_ret(void *ctx, u64 *value) + * Description + * Get return value of the traced function (for tracing programs) + * in **value**. + * + * Return + * 0 on success. + * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. + * + * long bpf_get_func_arg_cnt(void *ctx) + * Description + * Get number of registers of the traced function (for tracing programs) where + * function arguments are stored in these registers. + * + * Return + * The number of argument registers of the traced function. + * + * int bpf_get_retval(void) + * Description + * Get the BPF program's return value that will be returned to the upper layers. + * + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. + * Return + * The BPF program's return value. + * + * int bpf_set_retval(int retval) + * Description + * Set the BPF program's return value that will be returned to the upper layers. + * + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. + * + * Note that there is the following corner case where the program exports an error + * via bpf_set_retval but signals success via 'return 1': + * + * bpf_set_retval(-EPERM); + * return 1; + * + * In this case, the BPF program's return value will use helper's -EPERM. This + * still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case. + * + * Return + * 0 on success, or a negative error in case of failure. + * + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) + * Description + * Get the total size of a given xdp buff (linear and paged area) + * Return + * The total size of a given xdp buffer. + * + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * This helper is provided as an easy way to load data from a + * xdp buffer. It can be used to load *len* bytes from *offset* from + * the frame associated to *xdp_md*, into the buffer pointed by + * *buf*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) + * Description + * Store *len* bytes from buffer *buf* into the frame + * associated to *xdp_md*, at *offset*. + * Return + * 0 on success, or a negative error in case of failure. + * + * long bpf_copy_from_user_task(void *dst, u32 size, const void *user_ptr, struct task_struct *tsk, u64 flags) + * Description + * Read *size* bytes from user space address *user_ptr* in *tsk*'s + * address space, and stores the data in *dst*. *flags* is not + * used yet and is provided for future extensibility. This helper + * can only be used by sleepable programs. + * Return + * 0 on success, or a negative error in case of failure. On error + * *dst* buffer is zeroed out. + * + * long bpf_skb_set_tstamp(struct sk_buff *skb, u64 tstamp, u32 tstamp_type) + * Description + * Change the __sk_buff->tstamp_type to *tstamp_type* + * and set *tstamp* to the __sk_buff->tstamp together. + * + * If there is no need to change the __sk_buff->tstamp_type, + * the tstamp value can be directly written to __sk_buff->tstamp + * instead. + * + * BPF_SKB_TSTAMP_DELIVERY_MONO is the only tstamp that + * will be kept during bpf_redirect_*(). A non zero + * *tstamp* must be used with the BPF_SKB_TSTAMP_DELIVERY_MONO + * *tstamp_type*. + * + * A BPF_SKB_TSTAMP_UNSPEC *tstamp_type* can only be used + * with a zero *tstamp*. + * + * Only IPv4 and IPv6 skb->protocol are supported. + * + * This function is most useful when it needs to set a + * mono delivery time to __sk_buff->tstamp and then + * bpf_redirect_*() to the egress of an iface. For example, + * changing the (rcv) timestamp in __sk_buff->tstamp at + * ingress to a mono delivery time and then bpf_redirect_*() + * to sch_fq@phy-dev. + * Return + * 0 on success. + * **-EINVAL** for invalid input + * **-EOPNOTSUPP** for unsupported protocol + * + * long bpf_ima_file_hash(struct file *file, void *dst, u32 size) + * Description + * Returns a calculated IMA hash of the *file*. + * If the hash is larger than *size*, then only *size* + * bytes will be copied to *dst* + * Return + * The **hash_algo** is returned on success, + * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if + * invalid arguments are passed. + * + * void *bpf_kptr_xchg(void *map_value, void *ptr) + * Description + * Exchange kptr at pointer *map_value* with *ptr*, and return the + * old value. *ptr* can be NULL, otherwise it must be a referenced + * pointer which will be released when this helper is called. + * Return + * The old value of kptr (which can be NULL). The returned pointer + * if not NULL, is a reference which must be released using its + * corresponding release function, or moved into a BPF map before + * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if + * *flags* is not 0. + * + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * + * *flags* must be 0 except for skb-type dynptrs. + * + * For skb-type dynptrs: + * * All data slices of the dynptr are automatically + * invalidated after **bpf_dynptr_write**\ (). This is + * because writing may pull the skb and change the + * underlying packet buffer. + * + * * For *flags*, please see the flags accepted by + * **bpf_skb_store_bytes**\ (). + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, + * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). + * + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * + * skb and xdp type dynptrs may not use bpf_dynptr_data. They should + * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv4/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * s64 bpf_tcp_raw_gen_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IPv6/TCP headers, *iph* and *th*, without depending on a + * listening socket. + * + * *iph* points to the IPv6 header. + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header (at least + * **sizeof**\ (**struct tcphdr**)). + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if *th_len* is invalid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * long bpf_tcp_raw_check_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th) + * Description + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. + * + * *iph* points to the IPv4 header. + * + * *th* points to the TCP header. + * Return + * 0 if *iph* and *th* are a valid SYN cookie ACK. + * + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. * - * u64 bpf_sk_cgroup_id(struct bpf_sock *sk) + * long bpf_tcp_raw_check_syncookie_ipv6(struct ipv6hdr *iph, struct tcphdr *th) * Description - * Return the cgroup v2 id of the socket *sk*. + * Check whether *iph* and *th* contain a valid SYN cookie ACK + * without depending on a listening socket. * - * *sk* must be a non-**NULL** pointer to a full socket, e.g. one - * returned from **bpf_sk_lookup_xxx**\ (), - * **bpf_sk_fullsock**\ (), etc. The format of returned id is - * same as in **bpf_skb_cgroup_id**\ (). + * *iph* points to the IPv6 header. * - * This helper is available only if the kernel was compiled with - * the **CONFIG_SOCK_CGROUP_DATA** configuration option. + * *th* points to the TCP header. * Return - * The id is returned or 0 in case the id could not be retrieved. + * 0 if *iph* and *th* are a valid SYN cookie ACK. * - * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level) + * On failure, the returned value is one of the following: + * + * **-EACCES** if the SYN cookie is not valid. + * + * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * u64 bpf_ktime_get_tai_ns(void) * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *sk* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *sk*, then return value will be same as that - * of **bpf_sk_cgroup_id**\ (). + * A nonsettable system-wide clock derived from wall-clock time but + * ignoring leap seconds. This clock does not experience + * discontinuities and backwards jumps caused by NTP inserting leap + * seconds as CLOCK_REALTIME does. * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *sk*. + * See: **clock_gettime**\ (**CLOCK_TAI**) + * Return + * Current *ktime*. * - * The format of returned id and helper limitations are same as in - * **bpf_sk_cgroup_id**\ (). + * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) + * Description + * Drain samples from the specified user ring buffer, and invoke + * the provided callback for each such sample: + * + * long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx); + * + * If **callback_fn** returns 0, the helper will continue to try + * and drain the next sample, up to a maximum of + * BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1, + * the helper will skip the rest of the samples and return. Other + * return values are not used now, and will be rejected by the + * verifier. * Return - * The id is returned or 0 in case the id could not be retrieved. + * The number of drained samples if no error was encountered while + * draining samples, or 0 if no samples were present in the ring + * buffer. If a user-space producer was epoll-waiting on this map, + * and at least one sample was drained, they will receive an event + * notification notifying them of available space in the ring + * buffer. If the BPF_RB_NO_WAKEUP flag is passed to this + * function, no wakeup notification will be sent. If the + * BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will + * be sent even if no sample was drained. * - * long bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) - * Description - * Copy *size* bytes from *data* into a ring buffer *ringbuf*. - * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification - * of new data availability is sent. - * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification - * of new data availability is sent unconditionally. - * Return - * 0 on success, or a negative error in case of failure. + * On failure, the returned value is one of the following: * - * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags) - * Description - * Reserve *size* bytes of payload in a ring buffer *ringbuf*. - * Return - * Valid pointer with *size* bytes of memory available; NULL, - * otherwise. + * **-EBUSY** if the ring buffer is contended, and another calling + * context was concurrently draining the ring buffer. * - * void bpf_ringbuf_submit(void *data, u64 flags) - * Description - * Submit reserved ring buffer sample, pointed to by *data*. - * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification - * of new data availability is sent. - * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification - * of new data availability is sent unconditionally. - * Return - * Nothing. Always succeeds. + * **-EINVAL** if user-space is not properly tracking the ring + * buffer due to the producer position not being aligned to 8 + * bytes, a sample not being aligned to 8 bytes, or the producer + * position not matching the advertised length of a sample. * - * void bpf_ringbuf_discard(void *data, u64 flags) - * Description - * Discard reserved ring buffer sample, pointed to by *data*. - * If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification - * of new data availability is sent. - * If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification - * of new data availability is sent unconditionally. - * Return - * Nothing. Always succeeds. + * **-E2BIG** if user-space has tried to publish a sample which is + * larger than the size of the ring buffer, or which cannot fit + * within a struct bpf_dynptr. * - * u64 bpf_ringbuf_query(void *ringbuf, u64 flags) + * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags) * Description - * Query various characteristics of provided ring buffer. What - * exactly is queries is determined by *flags*: - * - * * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed. - * * **BPF_RB_RING_SIZE**: The size of ring buffer. - * * **BPF_RB_CONS_POS**: Consumer position (can wrap around). - * * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around). + * Get a bpf_local_storage from the *cgroup*. * - * Data returned is just a momentary snapshot of actual values - * and could be inaccurate, so this facility should be used to - * power heuristics and for reporting, not to make 100% correct - * calculation. + * Logically, it could be thought of as getting the value from + * a *map* with *cgroup* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this + * helper enforces the key must be a cgroup struct and the map must also + * be a **BPF_MAP_TYPE_CGRP_STORAGE**. + * + * In reality, the local-storage value is embedded directly inside of the + * *cgroup* object itself, rather than being located in the + * **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is + * queried for some *map* on a *cgroup* object, the kernel will perform an + * O(n) iteration over all of the live local-storage values for that + * *cgroup* object until the local-storage value for the *map* is found. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. * Return - * Requested value, or 0, if *flags* are not recognized. + * A bpf_local_storage pointer is returned on success. * - * int bpf_csum_level(struct sk_buff *skb, u64 level) - * Description - * Change the skbs checksum level by one layer up or down, or - * reset it entirely to none in order to have the stack perform - * checksum validation. The level is applicable to the following - * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of - * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP | - * through **bpf_skb_adjust_room**\ () helper with passing in - * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call - * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since - * the UDP header is removed. Similarly, an encap of the latter - * into the former could be accompanied by a helper call to - * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the - * skb is still intended to be processed in higher layers of the - * stack instead of just egressing at tc. + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. * - * There are three supported level settings at this time: + * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup) + * Description + * Delete a bpf_local_storage from a *cgroup*. + * Return + * 0 on success. * - * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs - * with CHECKSUM_UNNECESSARY. - * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs - * with CHECKSUM_UNNECESSARY. - * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and - * sets CHECKSUM_NONE to force checksum validation by the stack. - * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current - * skb->csum_level. - * Return - * 0 on success, or a negative error in case of failure. In the - * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level - * is returned or the error code -EACCES in case the skb is not - * subject to CHECKSUM_UNNECESSARY. + * **-ENOENT** if the bpf_local_storage cannot be found. + */ +#define ___BPF_FUNC_MAPPER(FN, ctx...) \ + FN(unspec, 0, ##ctx) \ + FN(map_lookup_elem, 1, ##ctx) \ + FN(map_update_elem, 2, ##ctx) \ + FN(map_delete_elem, 3, ##ctx) \ + FN(probe_read, 4, ##ctx) \ + FN(ktime_get_ns, 5, ##ctx) \ + FN(trace_printk, 6, ##ctx) \ + FN(get_prandom_u32, 7, ##ctx) \ + FN(get_smp_processor_id, 8, ##ctx) \ + FN(skb_store_bytes, 9, ##ctx) \ + FN(l3_csum_replace, 10, ##ctx) \ + FN(l4_csum_replace, 11, ##ctx) \ + FN(tail_call, 12, ##ctx) \ + FN(clone_redirect, 13, ##ctx) \ + FN(get_current_pid_tgid, 14, ##ctx) \ + FN(get_current_uid_gid, 15, ##ctx) \ + FN(get_current_comm, 16, ##ctx) \ + FN(get_cgroup_classid, 17, ##ctx) \ + FN(skb_vlan_push, 18, ##ctx) \ + FN(skb_vlan_pop, 19, ##ctx) \ + FN(skb_get_tunnel_key, 20, ##ctx) \ + FN(skb_set_tunnel_key, 21, ##ctx) \ + FN(perf_event_read, 22, ##ctx) \ + FN(redirect, 23, ##ctx) \ + FN(get_route_realm, 24, ##ctx) \ + FN(perf_event_output, 25, ##ctx) \ + FN(skb_load_bytes, 26, ##ctx) \ + FN(get_stackid, 27, ##ctx) \ + FN(csum_diff, 28, ##ctx) \ + FN(skb_get_tunnel_opt, 29, ##ctx) \ + FN(skb_set_tunnel_opt, 30, ##ctx) \ + FN(skb_change_proto, 31, ##ctx) \ + FN(skb_change_type, 32, ##ctx) \ + FN(skb_under_cgroup, 33, ##ctx) \ + FN(get_hash_recalc, 34, ##ctx) \ + FN(get_current_task, 35, ##ctx) \ + FN(probe_write_user, 36, ##ctx) \ + FN(current_task_under_cgroup, 37, ##ctx) \ + FN(skb_change_tail, 38, ##ctx) \ + FN(skb_pull_data, 39, ##ctx) \ + FN(csum_update, 40, ##ctx) \ + FN(set_hash_invalid, 41, ##ctx) \ + FN(get_numa_node_id, 42, ##ctx) \ + FN(skb_change_head, 43, ##ctx) \ + FN(xdp_adjust_head, 44, ##ctx) \ + FN(probe_read_str, 45, ##ctx) \ + FN(get_socket_cookie, 46, ##ctx) \ + FN(get_socket_uid, 47, ##ctx) \ + FN(set_hash, 48, ##ctx) \ + FN(setsockopt, 49, ##ctx) \ + FN(skb_adjust_room, 50, ##ctx) \ + FN(redirect_map, 51, ##ctx) \ + FN(sk_redirect_map, 52, ##ctx) \ + FN(sock_map_update, 53, ##ctx) \ + FN(xdp_adjust_meta, 54, ##ctx) \ + FN(perf_event_read_value, 55, ##ctx) \ + FN(perf_prog_read_value, 56, ##ctx) \ + FN(getsockopt, 57, ##ctx) \ + FN(override_return, 58, ##ctx) \ + FN(sock_ops_cb_flags_set, 59, ##ctx) \ + FN(msg_redirect_map, 60, ##ctx) \ + FN(msg_apply_bytes, 61, ##ctx) \ + FN(msg_cork_bytes, 62, ##ctx) \ + FN(msg_pull_data, 63, ##ctx) \ + FN(bind, 64, ##ctx) \ + FN(xdp_adjust_tail, 65, ##ctx) \ + FN(skb_get_xfrm_state, 66, ##ctx) \ + FN(get_stack, 67, ##ctx) \ + FN(skb_load_bytes_relative, 68, ##ctx) \ + FN(fib_lookup, 69, ##ctx) \ + FN(sock_hash_update, 70, ##ctx) \ + FN(msg_redirect_hash, 71, ##ctx) \ + FN(sk_redirect_hash, 72, ##ctx) \ + FN(lwt_push_encap, 73, ##ctx) \ + FN(lwt_seg6_store_bytes, 74, ##ctx) \ + FN(lwt_seg6_adjust_srh, 75, ##ctx) \ + FN(lwt_seg6_action, 76, ##ctx) \ + FN(rc_repeat, 77, ##ctx) \ + FN(rc_keydown, 78, ##ctx) \ + FN(skb_cgroup_id, 79, ##ctx) \ + FN(get_current_cgroup_id, 80, ##ctx) \ + FN(get_local_storage, 81, ##ctx) \ + FN(sk_select_reuseport, 82, ##ctx) \ + FN(skb_ancestor_cgroup_id, 83, ##ctx) \ + FN(sk_lookup_tcp, 84, ##ctx) \ + FN(sk_lookup_udp, 85, ##ctx) \ + FN(sk_release, 86, ##ctx) \ + FN(map_push_elem, 87, ##ctx) \ + FN(map_pop_elem, 88, ##ctx) \ + FN(map_peek_elem, 89, ##ctx) \ + FN(msg_push_data, 90, ##ctx) \ + FN(msg_pop_data, 91, ##ctx) \ + FN(rc_pointer_rel, 92, ##ctx) \ + FN(spin_lock, 93, ##ctx) \ + FN(spin_unlock, 94, ##ctx) \ + FN(sk_fullsock, 95, ##ctx) \ + FN(tcp_sock, 96, ##ctx) \ + FN(skb_ecn_set_ce, 97, ##ctx) \ + FN(get_listener_sock, 98, ##ctx) \ + FN(skc_lookup_tcp, 99, ##ctx) \ + FN(tcp_check_syncookie, 100, ##ctx) \ + FN(sysctl_get_name, 101, ##ctx) \ + FN(sysctl_get_current_value, 102, ##ctx) \ + FN(sysctl_get_new_value, 103, ##ctx) \ + FN(sysctl_set_new_value, 104, ##ctx) \ + FN(strtol, 105, ##ctx) \ + FN(strtoul, 106, ##ctx) \ + FN(sk_storage_get, 107, ##ctx) \ + FN(sk_storage_delete, 108, ##ctx) \ + FN(send_signal, 109, ##ctx) \ + FN(tcp_gen_syncookie, 110, ##ctx) \ + FN(skb_output, 111, ##ctx) \ + FN(probe_read_user, 112, ##ctx) \ + FN(probe_read_kernel, 113, ##ctx) \ + FN(probe_read_user_str, 114, ##ctx) \ + FN(probe_read_kernel_str, 115, ##ctx) \ + FN(tcp_send_ack, 116, ##ctx) \ + FN(send_signal_thread, 117, ##ctx) \ + FN(jiffies64, 118, ##ctx) \ + FN(read_branch_records, 119, ##ctx) \ + FN(get_ns_current_pid_tgid, 120, ##ctx) \ + FN(xdp_output, 121, ##ctx) \ + FN(get_netns_cookie, 122, ##ctx) \ + FN(get_current_ancestor_cgroup_id, 123, ##ctx) \ + FN(sk_assign, 124, ##ctx) \ + FN(ktime_get_boot_ns, 125, ##ctx) \ + FN(seq_printf, 126, ##ctx) \ + FN(seq_write, 127, ##ctx) \ + FN(sk_cgroup_id, 128, ##ctx) \ + FN(sk_ancestor_cgroup_id, 129, ##ctx) \ + FN(ringbuf_output, 130, ##ctx) \ + FN(ringbuf_reserve, 131, ##ctx) \ + FN(ringbuf_submit, 132, ##ctx) \ + FN(ringbuf_discard, 133, ##ctx) \ + FN(ringbuf_query, 134, ##ctx) \ + FN(csum_level, 135, ##ctx) \ + FN(skc_to_tcp6_sock, 136, ##ctx) \ + FN(skc_to_tcp_sock, 137, ##ctx) \ + FN(skc_to_tcp_timewait_sock, 138, ##ctx) \ + FN(skc_to_tcp_request_sock, 139, ##ctx) \ + FN(skc_to_udp6_sock, 140, ##ctx) \ + FN(get_task_stack, 141, ##ctx) \ + FN(load_hdr_opt, 142, ##ctx) \ + FN(store_hdr_opt, 143, ##ctx) \ + FN(reserve_hdr_opt, 144, ##ctx) \ + FN(inode_storage_get, 145, ##ctx) \ + FN(inode_storage_delete, 146, ##ctx) \ + FN(d_path, 147, ##ctx) \ + FN(copy_from_user, 148, ##ctx) \ + FN(snprintf_btf, 149, ##ctx) \ + FN(seq_printf_btf, 150, ##ctx) \ + FN(skb_cgroup_classid, 151, ##ctx) \ + FN(redirect_neigh, 152, ##ctx) \ + FN(per_cpu_ptr, 153, ##ctx) \ + FN(this_cpu_ptr, 154, ##ctx) \ + FN(redirect_peer, 155, ##ctx) \ + FN(task_storage_get, 156, ##ctx) \ + FN(task_storage_delete, 157, ##ctx) \ + FN(get_current_task_btf, 158, ##ctx) \ + FN(bprm_opts_set, 159, ##ctx) \ + FN(ktime_get_coarse_ns, 160, ##ctx) \ + FN(ima_inode_hash, 161, ##ctx) \ + FN(sock_from_file, 162, ##ctx) \ + FN(check_mtu, 163, ##ctx) \ + FN(for_each_map_elem, 164, ##ctx) \ + FN(snprintf, 165, ##ctx) \ + FN(sys_bpf, 166, ##ctx) \ + FN(btf_find_by_name_kind, 167, ##ctx) \ + FN(sys_close, 168, ##ctx) \ + FN(timer_init, 169, ##ctx) \ + FN(timer_set_callback, 170, ##ctx) \ + FN(timer_start, 171, ##ctx) \ + FN(timer_cancel, 172, ##ctx) \ + FN(get_func_ip, 173, ##ctx) \ + FN(get_attach_cookie, 174, ##ctx) \ + FN(task_pt_regs, 175, ##ctx) \ + FN(get_branch_snapshot, 176, ##ctx) \ + FN(trace_vprintk, 177, ##ctx) \ + FN(skc_to_unix_sock, 178, ##ctx) \ + FN(kallsyms_lookup_name, 179, ##ctx) \ + FN(find_vma, 180, ##ctx) \ + FN(loop, 181, ##ctx) \ + FN(strncmp, 182, ##ctx) \ + FN(get_func_arg, 183, ##ctx) \ + FN(get_func_ret, 184, ##ctx) \ + FN(get_func_arg_cnt, 185, ##ctx) \ + FN(get_retval, 186, ##ctx) \ + FN(set_retval, 187, ##ctx) \ + FN(xdp_get_buff_len, 188, ##ctx) \ + FN(xdp_load_bytes, 189, ##ctx) \ + FN(xdp_store_bytes, 190, ##ctx) \ + FN(copy_from_user_task, 191, ##ctx) \ + FN(skb_set_tstamp, 192, ##ctx) \ + FN(ima_file_hash, 193, ##ctx) \ + FN(kptr_xchg, 194, ##ctx) \ + FN(map_lookup_percpu_elem, 195, ##ctx) \ + FN(skc_to_mptcp_sock, 196, ##ctx) \ + FN(dynptr_from_mem, 197, ##ctx) \ + FN(ringbuf_reserve_dynptr, 198, ##ctx) \ + FN(ringbuf_submit_dynptr, 199, ##ctx) \ + FN(ringbuf_discard_dynptr, 200, ##ctx) \ + FN(dynptr_read, 201, ##ctx) \ + FN(dynptr_write, 202, ##ctx) \ + FN(dynptr_data, 203, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \ + FN(ktime_get_tai_ns, 208, ##ctx) \ + FN(user_ringbuf_drain, 209, ##ctx) \ + FN(cgrp_storage_get, 210, ##ctx) \ + FN(cgrp_storage_delete, 211, ##ctx) \ + /* */ + +/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't + * know or care about integer value that is now passed as second argument */ -#define __BPF_FUNC_MAPPER(FN) \ - FN(unspec), \ - FN(map_lookup_elem), \ - FN(map_update_elem), \ - FN(map_delete_elem), \ - FN(probe_read), \ - FN(ktime_get_ns), \ - FN(trace_printk), \ - FN(get_prandom_u32), \ - FN(get_smp_processor_id), \ - FN(skb_store_bytes), \ - FN(l3_csum_replace), \ - FN(l4_csum_replace), \ - FN(tail_call), \ - FN(clone_redirect), \ - FN(get_current_pid_tgid), \ - FN(get_current_uid_gid), \ - FN(get_current_comm), \ - FN(get_cgroup_classid), \ - FN(skb_vlan_push), \ - FN(skb_vlan_pop), \ - FN(skb_get_tunnel_key), \ - FN(skb_set_tunnel_key), \ - FN(perf_event_read), \ - FN(redirect), \ - FN(get_route_realm), \ - FN(perf_event_output), \ - FN(skb_load_bytes), \ - FN(get_stackid), \ - FN(csum_diff), \ - FN(skb_get_tunnel_opt), \ - FN(skb_set_tunnel_opt), \ - FN(skb_change_proto), \ - FN(skb_change_type), \ - FN(skb_under_cgroup), \ - FN(get_hash_recalc), \ - FN(get_current_task), \ - FN(probe_write_user), \ - FN(current_task_under_cgroup), \ - FN(skb_change_tail), \ - FN(skb_pull_data), \ - FN(csum_update), \ - FN(set_hash_invalid), \ - FN(get_numa_node_id), \ - FN(skb_change_head), \ - FN(xdp_adjust_head), \ - FN(probe_read_str), \ - FN(get_socket_cookie), \ - FN(get_socket_uid), \ - FN(set_hash), \ - FN(setsockopt), \ - FN(skb_adjust_room), \ - FN(redirect_map), \ - FN(sk_redirect_map), \ - FN(sock_map_update), \ - FN(xdp_adjust_meta), \ - FN(perf_event_read_value), \ - FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), \ - FN(sock_ops_cb_flags_set), \ - FN(msg_redirect_map), \ - FN(msg_apply_bytes), \ - FN(msg_cork_bytes), \ - FN(msg_pull_data), \ - FN(bind), \ - FN(xdp_adjust_tail), \ - FN(skb_get_xfrm_state), \ - FN(get_stack), \ - FN(skb_load_bytes_relative), \ - FN(fib_lookup), \ - FN(sock_hash_update), \ - FN(msg_redirect_hash), \ - FN(sk_redirect_hash), \ - FN(lwt_push_encap), \ - FN(lwt_seg6_store_bytes), \ - FN(lwt_seg6_adjust_srh), \ - FN(lwt_seg6_action), \ - FN(rc_repeat), \ - FN(rc_keydown), \ - FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), \ - FN(get_local_storage), \ - FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), \ - FN(sk_lookup_tcp), \ - FN(sk_lookup_udp), \ - FN(sk_release), \ - FN(map_push_elem), \ - FN(map_pop_elem), \ - FN(map_peek_elem), \ - FN(msg_push_data), \ - FN(msg_pop_data), \ - FN(rc_pointer_rel), \ - FN(spin_lock), \ - FN(spin_unlock), \ - FN(sk_fullsock), \ - FN(tcp_sock), \ - FN(skb_ecn_set_ce), \ - FN(get_listener_sock), \ - FN(skc_lookup_tcp), \ - FN(tcp_check_syncookie), \ - FN(sysctl_get_name), \ - FN(sysctl_get_current_value), \ - FN(sysctl_get_new_value), \ - FN(sysctl_set_new_value), \ - FN(strtol), \ - FN(strtoul), \ - FN(sk_storage_get), \ - FN(sk_storage_delete), \ - FN(send_signal), \ - FN(tcp_gen_syncookie), \ - FN(skb_output), \ - FN(probe_read_user), \ - FN(probe_read_kernel), \ - FN(probe_read_user_str), \ - FN(probe_read_kernel_str), \ - FN(tcp_send_ack), \ - FN(send_signal_thread), \ - FN(jiffies64), \ - FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), \ - FN(xdp_output), \ - FN(get_netns_cookie), \ - FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), \ - FN(ktime_get_boot_ns), \ - FN(seq_printf), \ - FN(seq_write), \ - FN(sk_cgroup_id), \ - FN(sk_ancestor_cgroup_id), \ - FN(ringbuf_output), \ - FN(ringbuf_reserve), \ - FN(ringbuf_submit), \ - FN(ringbuf_discard), \ - FN(ringbuf_query), \ - FN(csum_level), +#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name), +#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN) /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ -#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x +#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y, enum bpf_func_id { - __BPF_FUNC_MAPPER(__BPF_ENUM_FN) + ___BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; #undef __BPF_ENUM_FN @@ -3450,6 +6070,12 @@ enum { BPF_F_ZERO_CSUM_TX = (1ULL << 1), BPF_F_DONT_FRAGMENT = (1ULL << 2), BPF_F_SEQ_NUMBER = (1ULL << 3), + BPF_F_NO_TUNNEL_KEY = (1ULL << 4), +}; + +/* BPF_FUNC_skb_get_tunnel_key flags. */ +enum { + BPF_F_TUNINFO_FLAGS = (1ULL << 4), }; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and @@ -3483,6 +6109,9 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), + BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7), + BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8), }; enum { @@ -3499,9 +6128,13 @@ enum { BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), }; -/* BPF_FUNC_sk_storage_get flags */ +/* BPF_FUNC__storage_get flags */ enum { - BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), + BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0), + /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility + * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead. + */ + BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE, }; /* BPF_FUNC_read_branch_records flags. */ @@ -3532,6 +6165,12 @@ enum { BPF_RINGBUF_HDR_SZ = 8, }; +/* BPF_FUNC_sk_assign flags in bpf_sk_lookup context. */ +enum { + BPF_SK_LOOKUP_F_REPLACE = (1ULL << 0), + BPF_SK_LOOKUP_F_NO_REUSEPORT = (1ULL << 1), +}; + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -3551,12 +6190,32 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_IP, }; +/* Flags for bpf_bprm_opts_set helper */ +enum { + BPF_F_BPRM_SECUREEXEC = (1ULL << 0), +}; + +/* Flags for bpf_redirect_map helper */ +enum { + BPF_F_BROADCAST = (1ULL << 3), + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ __u64 :64; \ } __attribute__((aligned(8))) +enum { + BPF_SKB_TSTAMP_UNSPEC, + BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ + /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, + * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC + * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + */ +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -3597,6 +6256,9 @@ struct __sk_buff { __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); __u32 gso_size; + __u8 tstamp_type; + __u32 :24; /* Padding, future use. */ + __u64 hwtstamp; }; struct bpf_tunnel_key { @@ -3607,8 +6269,15 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; - __u16 tunnel_ext; /* Padding, future use. */ + union { + __u16 tunnel_ext; /* compat */ + __be16 tunnel_flags; + }; __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. @@ -3647,6 +6316,11 @@ enum bpf_ret_code { * represented by BPF_REDIRECT above). */ BPF_LWT_REROUTE = 128, + /* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR + * to indicate that no custom dissection was performed, and + * fallback to standard dissector is requested. + */ + BPF_FLOW_DISSECTOR_CONTINUE = 129, }; struct bpf_sock { @@ -3660,7 +6334,8 @@ struct bpf_sock { __u32 src_ip4; __u32 src_ip6[4]; __u32 src_port; /* host byte order */ - __u32 dst_port; /* network byte order */ + __be16 dst_port; /* network byte order */ + __u16 :16; /* zero padding */ __u32 dst_ip4; __u32 dst_ip6[4]; __u32 state; @@ -3729,6 +6404,19 @@ struct bpf_sock_tuple { }; }; +/* (Simplified) user return codes for tcx prog type. + * A valid tcx program must return one of these defined values. All other + * return codes are reserved for future use. Must remain compatible with + * their TC_ACT_* counter-parts. For compatibility in behavior, unknown + * return codes are mapped to TCX_NEXT. + */ +enum tcx_action_base { + TCX_NEXT = -1, + TCX_PASS = 0, + TCX_DROP = 2, + TCX_REDIRECT = 7, +}; + struct bpf_xdp_sock { __u32 queue_id; }; @@ -3775,6 +6463,19 @@ struct bpf_devmap_val { } bpf_prog; }; +/* CPUMAP map-value layout + * + * The struct data-layout of map-value is a configuration interface. + * New members can only be added to the end of this structure. + */ +struct bpf_cpumap_val { + __u32 qsize; /* queue size to remote target CPU */ + union { + int fd; /* prog fd on map write */ + __u32 id; /* prog id on map read */ + } bpf_prog; +}; + enum sk_action { SK_DROP = 0, SK_PASS, @@ -3822,6 +6523,20 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + /* When reuse->migrating_sk is NULL, it is selecting a sk for the + * new incoming connection request (e.g. selecting a listen sk for + * the received SYN in the TCP case). reuse->sk is one of the sk + * in the reuseport group. The bpf prog can use reuse->sk to learn + * the local listening ip/port without looking into the skb. + * + * When reuse->migrating_sk is not NULL, reuse->sk is closed and + * reuse->migrating_sk is the socket that needs to be migrated + * to another listening socket. migrating_sk could be a fullsock + * sk that is fully established or a reqsk that is in-the-middle + * of 3-way handshake. + */ + __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(struct bpf_sock *, migrating_sk); }; #define BPF_TAG_SIZE 8 @@ -3862,6 +6577,10 @@ struct bpf_prog_info { __aligned_u64 prog_tags; __u64 run_time_ns; __u64 run_cnt; + __u64 recursion_misses; + __u32 verified_insns; + __u32 attach_btf_obj_id; + __u32 attach_btf_id; } __attribute__((aligned(8))); struct bpf_map_info { @@ -3879,12 +6598,17 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 btf_vmlinux_id; + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { __aligned_u64 btf; __u32 btf_size; __u32 id; + __aligned_u64 name; + __u32 name_len; + __u32 kernel_btf; } __attribute__((aligned(8))); struct bpf_link_info { @@ -3898,21 +6622,120 @@ struct bpf_link_info { } raw_tracepoint; struct { __u32 attach_type; + __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ + __u32 target_btf_id; /* BTF type id inside the object */ } tracing; struct { __u64 cgroup_id; __u32 attach_type; } cgroup; + struct { + __aligned_u64 target_name; /* in/out: target_name buffer ptr */ + __u32 target_name_len; /* in/out: target_name buffer len */ + + /* If the iter specific field is 32 bits, it can be put + * in the first or second union. Otherwise it should be + * put in the second union. + */ + union { + struct { + __u32 map_id; + } map; + }; + union { + struct { + __u64 cgroup_id; + __u32 order; + } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; + }; + } iter; struct { __u32 netns_ino; __u32 attach_type; } netns; + struct { + __u32 ifindex; + } xdp; + struct { + __u32 map_id; + } struct_ops; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; + struct { + __aligned_u64 addrs; + __u32 count; /* in/out: kprobe_multi function count */ + __u32 flags; + __u64 missed; + __aligned_u64 cookies; + } kprobe_multi; + struct { + __aligned_u64 path; + __aligned_u64 offsets; + __aligned_u64 ref_ctr_offsets; + __aligned_u64 cookies; + __u32 path_size; /* in/out: real path size on success, including zero byte */ + __u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */ + __u32 flags; + __u32 pid; + } uprobe_multi; + struct { + __u32 type; /* enum bpf_perf_event_type */ + __u32 :32; + union { + struct { + __aligned_u64 file_name; /* in/out */ + __u32 name_len; + __u32 offset; /* offset from file_name */ + __u64 cookie; + } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ + struct { + __aligned_u64 func_name; /* in/out */ + __u32 name_len; + __u32 offset; /* offset from func_name */ + __u64 addr; + __u64 missed; + __u64 cookie; + } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ + struct { + __aligned_u64 tp_name; /* in/out */ + __u32 name_len; + __u32 :32; + __u64 cookie; + } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */ + struct { + __u64 config; + __u32 type; + __u32 :32; + __u64 cookie; + } event; /* BPF_PERF_EVENT_EVENT */ + }; + } perf_event; + struct { + __u32 ifindex; + __u32 attach_type; + } tcx; + struct { + __u32 ifindex; + __u32 attach_type; + } netkit; + struct { + __u32 map_id; + __u32 attach_type; + } sockmap; }; } __attribute__((aligned(8))); /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on - * attach attach type). + * attach type). */ struct bpf_sock_addr { __u32 user_family; /* Allows 4-byte read, but no write. */ @@ -3987,6 +6810,37 @@ struct bpf_sock_ops { __u64 bytes_received; __u64 bytes_acked; __bpf_md_ptr(struct bpf_sock *, sk); + /* [skb_data, skb_data_end) covers the whole TCP header. + * + * BPF_SOCK_OPS_PARSE_HDR_OPT_CB: The packet received + * BPF_SOCK_OPS_HDR_OPT_LEN_CB: Not useful because the + * header has not been written. + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB: The header and options have + * been written so far. + * BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: The SYNACK that concludes + * the 3WHS. + * BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: The ACK that concludes + * the 3WHS. + * + * bpf_load_hdr_opt() can also be used to read a particular option. + */ + __bpf_md_ptr(void *, skb_data); + __bpf_md_ptr(void *, skb_data_end); + __u32 skb_len; /* The total length of a packet. + * It includes the header, options, + * and payload. + */ + __u32 skb_tcp_flags; /* tcp_flags of the header. It provides + * an easy way to check for tcp_flags + * without parsing skb_data. + * + * In particular, the skb_tcp_flags + * will still be available in + * BPF_SOCK_OPS_HDR_OPT_LEN even though + * the outgoing header has not + * been written yet. + */ + __u64 skb_hwtstamp; }; /* Definitions for bpf_sock_ops_cb_flags */ @@ -3995,8 +6849,51 @@ enum { BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), + /* Call bpf for all received TCP headers. The bpf prog will be + * called under sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + * + * It could be used at the client/active side (i.e. connect() side) + * when the server told it that the server was in syncookie + * mode and required the active side to resend the bpf-written + * options. The active side can keep writing the bpf-options until + * it received a valid packet from the server side to confirm + * the earlier packet (and options) has been received. The later + * example patch is using it like this at the active side when the + * server is in syncookie mode. + * + * The bpf prog will usually turn this off in the common cases. + */ + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG = (1<<4), + /* Call bpf when kernel has received a header option that + * the kernel cannot handle. The bpf prog will be called under + * sock_ops->op == BPF_SOCK_OPS_PARSE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_PARSE_HDR_OPT_CB + * for the header option related helpers that will be useful + * to the bpf programs. + */ + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG = (1<<5), + /* Call bpf when the kernel is writing header options for the + * outgoing packet. The bpf prog will first be called + * to reserve space in a skb under + * sock_ops->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB. Then + * the bpf prog will be called to write the header option(s) + * under sock_ops->op == BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * + * Please refer to the comment in BPF_SOCK_OPS_HDR_OPT_LEN_CB + * and BPF_SOCK_OPS_WRITE_HDR_OPT_CB for the header option + * related helpers that will be useful to the bpf programs. + * + * The kernel gets its chance to reserve space and write + * options first before the BPF program does. + */ + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG = (1<<6), /* Mask of all currently supported cb flags */ - BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, + BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, }; /* List of known BPF sock_ops operators. @@ -4051,6 +6948,65 @@ enum { * socket transition to LISTEN state. */ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + * Arg1: measured RTT input (mrtt) + * Arg2: updated srtt + */ + BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. + * It will be called to handle + * the packets received at + * an already established + * connection. + * + * sock_ops->skb_data: + * Referring to the received skb. + * It covers the TCP header only. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option. + */ + BPF_SOCK_OPS_HDR_OPT_LEN_CB, /* Reserve space for writing the + * header option later in + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Not available because no header has + * been written yet. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the + * outgoing skb. (e.g. SYN, ACK, FIN). + * + * bpf_reserve_hdr_opt() should + * be used to reserve space. + */ + BPF_SOCK_OPS_WRITE_HDR_OPT_CB, /* Write the header options + * Arg1: bool want_cookie. (in + * writing SYNACK only) + * + * sock_ops->skb_data: + * Referring to the outgoing skb. + * It covers the TCP header + * that has already been written + * by the kernel and the + * earlier bpf-progs. + * + * sock_ops->skb_tcp_flags: + * The tcp_flags of the outgoing + * skb. (e.g. SYN, ACK, FIN). + * + * bpf_store_hdr_opt() should + * be used to write the + * option. + * + * bpf_load_hdr_opt() can also + * be used to search for a + * particular option that + * has already been written + * by the kernel or the + * earlier bpf-progs. */ }; @@ -4072,6 +7028,7 @@ enum { BPF_TCP_LISTEN, BPF_TCP_CLOSING, /* Now a valid state */ BPF_TCP_NEW_SYN_RECV, + BPF_TCP_BOUND_INACTIVE, BPF_TCP_MAX_STATES /* Leave at the end! */ }; @@ -4079,6 +7036,63 @@ enum { enum { TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ + TCP_BPF_DELACK_MAX = 1003, /* Max delay ack in usecs */ + TCP_BPF_RTO_MIN = 1004, /* Min delay ack in usecs */ + /* Copy the SYN pkt to optval + * + * BPF_PROG_TYPE_SOCK_OPS only. It is similar to the + * bpf_getsockopt(TCP_SAVED_SYN) but it does not limit + * to only getting from the saved_syn. It can either get the + * syn packet from: + * + * 1. the just-received SYN packet (only available when writing the + * SYNACK). It will be useful when it is not necessary to + * save the SYN packet for latter use. It is also the only way + * to get the SYN during syncookie mode because the syn + * packet cannot be saved during syncookie. + * + * OR + * + * 2. the earlier saved syn which was done by + * bpf_setsockopt(TCP_SAVE_SYN). + * + * The bpf_getsockopt(TCP_BPF_SYN*) option will hide where the + * SYN packet is obtained. + * + * If the bpf-prog does not need the IP[46] header, the + * bpf-prog can avoid parsing the IP header by using + * TCP_BPF_SYN. Otherwise, the bpf-prog can get both + * IP[46] and TCP header by using TCP_BPF_SYN_IP. + * + * >0: Total number of bytes copied + * -ENOSPC: Not enough space in optval. Only optlen number of + * bytes is copied. + * -ENOENT: The SYN skb is not available now and the earlier SYN pkt + * is not saved by setsockopt(TCP_SAVE_SYN). + */ + TCP_BPF_SYN = 1005, /* Copy the TCP header */ + TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ + TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ +}; + +enum { + BPF_LOAD_HDR_OPT_TCP_SYN = (1ULL << 0), +}; + +/* args[0] value during BPF_SOCK_OPS_HDR_OPT_LEN_CB and + * BPF_SOCK_OPS_WRITE_HDR_OPT_CB. + */ +enum { + BPF_WRITE_HDR_TCP_CURRENT_MSS = 1, /* Kernel is finding the + * total option spaces + * required for an established + * sk in order to calculate the + * MSS. No skb is actually + * sent. + */ + BPF_WRITE_HDR_TCP_SYNACK_COOKIE = 2, /* Kernel is in syncookie mode + * when sending a SYN. + */ }; struct bpf_perf_event_value { @@ -4115,6 +7129,10 @@ struct bpf_raw_tracepoint_args { enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), + BPF_FIB_LOOKUP_SRC = (1U << 4), + BPF_FIB_LOOKUP_MARK = (1U << 5), }; enum { @@ -4127,6 +7145,7 @@ enum { BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */ }; struct bpf_fib_lookup { @@ -4140,9 +7159,13 @@ struct bpf_fib_lookup { __be16 sport; __be16 dport; - /* total length of packet from network header - used for MTU check */ - __u16 tot_len; + union { /* used for MTU check */ + /* input to lookup */ + __u16 tot_len; /* L3 length from network hdr (iph->tot_len) */ + /* output: MTU value */ + __u16 mtu_result; + } __attribute__((packed, aligned(2))); /* input: L3 device index for lookup * output: device index from FIB lookup */ @@ -4157,6 +7180,9 @@ struct bpf_fib_lookup { __u32 rt_metric; }; + /* input: source address to consider for lookup + * output: source address result from lookup + */ union { __be32 ipv4_src; __u32 ipv6_src[4]; /* in6_addr; network order */ @@ -4171,11 +7197,53 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; - __u8 smac[6]; /* ETH_ALEN */ - __u8 dmac[6]; /* ETH_ALEN */ + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + + union { + /* input */ + struct { + __u32 mark; /* policy routing */ + /* 2 4-byte holes for input */ + }; + + /* output: source and dest mac */ + struct { + __u8 smac[6]; /* ETH_ALEN */ + __u8 dmac[6]; /* ETH_ALEN */ + }; + }; +}; + +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + +/* bpf_check_mtu flags*/ +enum bpf_check_mtu_flags { + BPF_MTU_CHK_SEGS = (1U << 0), +}; + +enum bpf_check_mtu_ret { + BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */ + BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */ + BPF_MTU_CHK_RET_SEGS_TOOBIG, /* GSO re-segmentation needed to fwd */ }; enum bpf_task_fd_type { @@ -4237,6 +7305,38 @@ struct bpf_spin_lock { __u32 val; }; +struct bpf_timer { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + +struct bpf_wq { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + +struct bpf_dynptr { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + +struct bpf_list_head { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + +struct bpf_list_node { + __u64 __opaque[3]; +} __attribute__((aligned(8))); + +struct bpf_rb_root { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + +struct bpf_rb_node { + __u64 __opaque[4]; +} __attribute__((aligned(8))); + +struct bpf_refcount { + __u32 __opaque[1]; +} __attribute__((aligned(4))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. @@ -4261,4 +7361,148 @@ struct bpf_pidns_info { __u32 pid; __u32 tgid; }; -#endif /* __LINUX_BPF_H__ */ + +/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ +struct bpf_sk_lookup { + union { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ + }; + + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ + __u32 remote_ip4; /* Network byte order */ + __u32 remote_ip6[4]; /* Network byte order */ + __be16 remote_port; /* Network byte order */ + __u16 :16; /* Zero padding */ + __u32 local_ip4; /* Network byte order */ + __u32 local_ip6[4]; /* Network byte order */ + __u32 local_port; /* Host byte order */ + __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ +}; + +/* + * struct btf_ptr is used for typed pointer representation; the + * type id is used to render the pointer data as the appropriate type + * via the bpf_snprintf_btf() helper described above. A flags field - + * potentially to specify additional details about the BTF pointer + * (rather than its mode of display) - is included for future use. + * Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately. + */ +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; /* BTF ptr flags; unused at present. */ +}; + +/* + * Flags to control bpf_snprintf_btf() behaviour. + * - BTF_F_COMPACT: no formatting around type information + * - BTF_F_NONAME: no struct/union member names/types + * - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values; + * equivalent to %px. + * - BTF_F_ZERO: show zero-valued struct/union members; they + * are not displayed by default + */ +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; + +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. It is emitted by llvm and passed to + * libbpf and later to the kernel. + */ +enum bpf_core_relo_kind { + BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ + BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ + BPF_CORE_TYPE_MATCHES = 12, /* type match in target kernel */ +}; + +/* + * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf + * and from libbpf to the kernel. + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * - kind - one of enum bpf_core_relo_kind; + * + * Example: + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int *x = &s->a; // encoded as "0:0" (a is field #0) + * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; + +/* + * Flags to control bpf_timer_start() behaviour. + * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is + * relative to current time. + * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller. + */ +enum { + BPF_F_TIMER_ABS = (1ULL << 0), + BPF_F_TIMER_CPU_PIN = (1ULL << 1), +}; + +/* BPF numbers iterator state */ +struct bpf_iter_num { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + +#endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/lxd/include/lxd_bpf_common.h b/lxd/include/lxd_bpf_common.h index f0fe1394971d..ee97668bdadb 100644 --- a/lxd/include/lxd_bpf_common.h +++ b/lxd/include/lxd_bpf_common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __LINUX_BPF_COMMON_H__ -#define __LINUX_BPF_COMMON_H__ +#ifndef _UAPI__LINUX_BPF_COMMON_H__ +#define _UAPI__LINUX_BPF_COMMON_H__ /* Instruction classes */ #define BPF_CLASS(code) ((code) & 0x07) @@ -54,4 +54,4 @@ #define BPF_MAXINSNS 4096 #endif -#endif /* __LINUX_BPF_COMMON_H__ */ +#endif /* _UAPI__LINUX_BPF_COMMON_H__ */ From 32a975b40141d7c07c46a6d0cb484b5538d61c1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Wed, 27 Mar 2024 17:27:59 -0400 Subject: [PATCH 087/106] lxd/device/disk: Allow hotplug of disks on live-migratable VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber (cherry picked from commit e134d99072870259ea7727e1eb8b2b70cafdfed4) Signed-off-by: Mark Laing License: Apache-2.0 --- lxd/device/disk.go | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/lxd/device/disk.go b/lxd/device/disk.go index da61a882acf4..1abc3fb40172 100644 --- a/lxd/device/disk.go +++ b/lxd/device/disk.go @@ -115,18 +115,7 @@ func (d *disk) sourceIsCeph() bool { // CanHotPlug returns whether the device can be managed whilst the instance is running. func (d *disk) CanHotPlug() bool { - // Containers support hot-plugging all disk types. - if d.inst.Type() == instancetype.Container { - return true - } - - // Only VirtioFS works with path hotplug. - // As migration.stateful turns off VirtioFS, this also turns off hotplugging of paths. - if shared.IsTrue(d.inst.ExpandedConfig()["migration.stateful"]) { - return false - } - - // Block disks can be hot-plugged into VMs. + // All disks can be hot-plugged. return true } From 58840d407eb56e42ce822bb12b9fe235a6f2dd0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Wed, 27 Mar 2024 17:49:42 -0400 Subject: [PATCH 088/106] lxd/device/disk: Check for VM live-migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber (cherry picked from commit 356c6842d99b31a4da6083287ae579fe3b73bc64) Signed-off-by: Mark Laing License: Apache-2.0 --- lxd/device/disk.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lxd/device/disk.go b/lxd/device/disk.go index 1abc3fb40172..0e72ff8d56fd 100644 --- a/lxd/device/disk.go +++ b/lxd/device/disk.go @@ -525,6 +525,25 @@ func (d *disk) validateConfig(instConf instance.ConfigReader) error { } } + // Restrict disks allowed when live-migratable. + if instConf.Type() == instancetype.VM && shared.IsTrue(instConf.ExpandedConfig()["migration.stateful"]) { + if d.config["path"] != "" && d.config["path"] != "/" { + return fmt.Errorf("Shared filesystem are incompatible with migration.stateful=true") + } + + if d.config["pool"] == "" { + return fmt.Errorf("Only LXD-managed disks are allowed with migration.stateful=true") + } + + if d.config["io.bus"] == "nvme" { + return fmt.Errorf("NVME disks aren't supported with migration.stateful=true") + } + + if d.config["path"] != "/" && d.pool != nil && !d.pool.Driver().Info().Remote { + return fmt.Errorf("Only additional disks coming from a shared storage pool are supported with migration.stateful=true") + } + } + return nil } From 6c85dbdc57042fbf85331afff7a99f8e2f1d2256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Wed, 27 Mar 2024 17:56:09 -0400 Subject: [PATCH 089/106] lxd/instance: Add ID to ConfigReader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber (cherry picked from commit 72a06a4f33a0a693533f4a5799b4e1c1029501ab) Signed-off-by: Mark Laing License: Apache-2.0 --- lxd/instance/instance_interface.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lxd/instance/instance_interface.go b/lxd/instance/instance_interface.go index cc26005da6df..8763436848d0 100644 --- a/lxd/instance/instance_interface.go +++ b/lxd/instance/instance_interface.go @@ -62,6 +62,8 @@ type ConfigReader interface { Project() api.Project Type() instancetype.Type Architecture() int + ID() int + ExpandedConfig() map[string]string ExpandedDevices() deviceConfig.Devices LocalConfig() map[string]string @@ -128,7 +130,6 @@ type Instance interface { OnHook(hookName string, args map[string]string) error // Properties. - ID() int Location() string Name() string CloudInitID() string From 905b8a9661f29a273401b930cf1ca4d9590b13f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Wed, 27 Mar 2024 17:56:29 -0400 Subject: [PATCH 090/106] lxd/device/disk: Allow external disk live-migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber (cherry picked from commit 6499313e75f4539123be2a9b8cead15022ea6140) Signed-off-by: Mark Laing License: Apache-2.0 --- lxd/device/disk.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lxd/device/disk.go b/lxd/device/disk.go index 0e72ff8d56fd..0627a00e3a46 100644 --- a/lxd/device/disk.go +++ b/lxd/device/disk.go @@ -456,8 +456,8 @@ func (d *disk) validateConfig(instConf instance.ConfigReader) error { return fmt.Errorf("Failed checking if custom volume is exclusively attached to another instance: %w", err) } - if remoteInstance != nil { - return fmt.Errorf("Custom volume is already attached to an instance on a different node") + if remoteInstance != nil && remoteInstance.ID != instConf.ID() { + return fmt.Errorf("Custom volume is already attached to an instance on a different cluster member") } // Check that block volumes are *only* attached to VM instances. From c8fe61b736a9efe2b6a2b5871c01f58f8fbeb9a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Wed, 27 Mar 2024 21:34:29 -0400 Subject: [PATCH 091/106] lxd/instance/qemu: Support live-migration of instances with extra disks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber (cherry picked from commit 67777f96337938374fbc208d9fa9beed5e227346) Signed-off-by: Mark Laing License: Apache-2.0 --- lxd/instance/drivers/driver_qemu.go | 90 +++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index 738c12366bc9..906afb3be1a2 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -6857,6 +6857,51 @@ func (d *qemu) migrateSendLive(pool storagePools.Pool, clusterMoveSourceName str return err } + // Derive the effective storage project name from the instance config's project. + storageProjectName, err := project.StorageVolumeProject(d.state.DB.Cluster, d.project.Name, dbCluster.StoragePoolVolumeTypeCustom) + if err != nil { + return err + } + + // Notify the shared disks that they're going to be accessed from another system. + diskPools := make(map[string]storagePools.Pool, len(d.expandedDevices)) + for _, dev := range d.expandedDevices.Sorted() { + if dev.Config["type"] != "disk" || dev.Config["path"] == "/" { + continue + } + + poolName := dev.Config["pool"] + if poolName == "" { + continue + } + + diskPool, ok := diskPools[poolName] + if !ok { + // Load the pool for the disk. + diskPool, err = storagePools.LoadByName(d.state, poolName) + if err != nil { + return fmt.Errorf("Failed loading storage pool: %w", err) + } + + // Save it to the pools map to avoid loading it from the DB multiple times. + diskPools[poolName] = diskPool + } + + // Setup the volume entry. + extraSourceArgs := &migration.VolumeSourceArgs{ + ClusterMove: true, + } + + vol := diskPool.GetVolume(storageDrivers.VolumeTypeCustom, storageDrivers.ContentTypeBlock, project.StorageVolume(storageProjectName, dev.Config["source"]), nil) + volCopy := storageDrivers.NewVolumeCopy(vol) + + // Call MigrateVolume on the source. + err = diskPool.Driver().MigrateVolume(volCopy, nil, extraSourceArgs, nil) + if err != nil { + return fmt.Errorf("Failed to prepare device %q for migration: %w", dev.Name, err) + } + } + // Non-shared storage snapshot transfer. if !sharedStorage { listener, err := net.Listen("unix", "") @@ -7338,6 +7383,51 @@ func (d *qemu) MigrateReceive(args instance.MigrateReceiveArgs) error { return fmt.Errorf("Failed creating instance on target: %w", err) } + // Derive the effective storage project name from the instance config's project. + storageProjectName, err := project.StorageVolumeProject(d.state.DB.Cluster, d.project.Name, dbCluster.StoragePoolVolumeTypeCustom) + if err != nil { + return err + } + + // Notify the shared disks that they're going to be accessed from another system. + diskPools := make(map[string]storagePools.Pool, len(d.expandedDevices)) + for _, dev := range d.expandedDevices.Sorted() { + if dev.Config["type"] != "disk" || dev.Config["path"] == "/" { + continue + } + + poolName := dev.Config["pool"] + if poolName == "" { + continue + } + + diskPool, ok := diskPools[poolName] + if !ok { + // Load the pool for the disk. + diskPool, err = storagePools.LoadByName(d.state, poolName) + if err != nil { + return fmt.Errorf("Failed loading storage pool: %w", err) + } + + // Save it to the pools map to avoid loading it from the DB multiple times. + diskPools[poolName] = diskPool + } + + // Setup the volume entry. + extraTargetArgs := migration.VolumeTargetArgs{ + ClusterMoveSourceName: args.ClusterMoveSourceName, + } + + vol := diskPool.GetVolume(storageDrivers.VolumeTypeCustom, storageDrivers.ContentTypeBlock, project.StorageVolume(storageProjectName, dev.Config["source"]), nil) + volCopy := storageDrivers.NewVolumeCopy(vol) + + // Create a volume from the migration. + err = diskPool.Driver().CreateVolumeFromMigration(volCopy, nil, extraTargetArgs, nil, nil) + if err != nil { + return fmt.Errorf("Failed to prepare device %q for migration: %w", dev.Name, err) + } + } + // Only delete all instance volumes on error if the pool volume creation has succeeded to // avoid deleting an existing conflicting volume. isRemoteClusterMove := args.ClusterMoveSourceName != "" && poolInfo.Remote From bb409a2ef51aa3a4f4c00e722bcc15656722724f Mon Sep 17 00:00:00 2001 From: Mark Laing Date: Tue, 30 Jul 2024 15:08:47 +0100 Subject: [PATCH 092/106] lxd: Remove restriction on live-migration with attached volumes. Signed-off-by: Mark Laing --- lxd/instance_post.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/lxd/instance_post.go b/lxd/instance_post.go index 74cfac80143f..e39004bc0900 100644 --- a/lxd/instance_post.go +++ b/lxd/instance_post.go @@ -968,15 +968,6 @@ func migrateInstance(s *state.State, r *http.Request, inst instance.Instance, ta return err } - // In case of live migration, only root disk can be migrated. - if req.Live && inst.IsRunning() { - for _, rawConfig := range inst.ExpandedDevices() { - if rawConfig["type"] == "disk" && !instancetype.IsRootDiskDevice(rawConfig) { - return fmt.Errorf("Cannot live migrate instance with attached custom volume") - } - } - } - // Retrieve storage pool of the source instance. srcPool, err := storagePools.LoadByInstance(s, inst) if err != nil { From 8c4542a999a8dddb3ee6d169ff0bb88793869b90 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Wed, 31 Jul 2024 16:23:03 +0100 Subject: [PATCH 093/106] lxd/device/device/utils/disk: Use -o flag for xattr mode of virtiofsd To support more versions of virtiofsd. Signed-off-by: Thomas Parrott --- lxd/device/device_utils_disk.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lxd/device/device_utils_disk.go b/lxd/device/device_utils_disk.go index 68d51cb46e94..224e6f817056 100644 --- a/lxd/device/device_utils_disk.go +++ b/lxd/device/device_utils_disk.go @@ -505,7 +505,8 @@ func DiskVMVirtiofsdStart(kernelVersion version.DottedVersion, inst instance.Ins // Start the virtiofsd process in non-daemon mode. args := []string{ "--fd=3", - "--xattr", + // use -o flags for support in wider versions of virtiofsd. + "-o", "xattr", "-o", fmt.Sprintf("source=%s", sharePath), } From d21d1912c7e5070dfb1a1f30675029dbbdba03ce Mon Sep 17 00:00:00 2001 From: Wesley Hershberger Date: Thu, 25 Jul 2024 00:29:33 -0500 Subject: [PATCH 094/106] lxd/instance/drivers: Fix deadlock during failed snapshot creation We need a non-locking delete on an `Instance`. I don't love the type assertion here. However, since CreateInternal returns Instance, we don't have a lot of options beyond enriching that interface or providing an additional one. A solution that doesn't use type assertions would be to define an internal interface for `delete` and possibly a few other functions in the drivers/ package and implement it for common/lxc/qemu. Alternatively, moving the `Interface` interface into the same package as the drivers would allow us to define some private methods on `Interface` instead. Signed-off-by: Wesley Hershberger --- lxd/instance/drivers/driver_common.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lxd/instance/drivers/driver_common.go b/lxd/instance/drivers/driver_common.go index 9d547032a899..540357150f3f 100644 --- a/lxd/instance/drivers/driver_common.go +++ b/lxd/instance/drivers/driver_common.go @@ -727,7 +727,16 @@ func (d *common) snapshotCommon(inst instance.Instance, name string, expiry time return fmt.Errorf("Create instance snapshot: %w", err) } - revert.Add(func() { _ = snap.Delete(true) }) + revert.Add(func() { + switch s := snap.(type) { + case *lxc: + _ = s.delete(true) + case *qemu: + _ = s.delete(true) + default: + logger.Error("Failed to delete snapshot during revert", logger.Ctx{"instance": inst.Name(), "snapshot": snap.Name()}) + } + }) // Mount volume for backup.yaml writing. _, err = pool.MountInstance(inst, d.op) From e87c0bdd622d57743b45e35636a46db0e58802d4 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 1 Aug 2024 10:59:30 +0000 Subject: [PATCH 095/106] lxd/storage/backend_lxd: Create missing file when receiving raw image Signed-off-by: Din Music --- lxd/storage/backend_lxd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index 441f08928c2d..af8b34a6727a 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -1870,7 +1870,7 @@ func (b *lxdBackend) recvVolumeFiller(conn io.ReadWriteCloser, contentType drive } } else { // Receive block volume. - to, err := os.OpenFile(rootBlockPath, os.O_WRONLY|os.O_TRUNC, 0) + to, err := os.OpenFile(rootBlockPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) if err != nil { return -1, fmt.Errorf("Error opening file for writing %q: %w", rootBlockPath, err) } From 9e1152403244ccec0f0c69180ad54216c45a00a0 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 1 Aug 2024 11:00:52 +0000 Subject: [PATCH 096/106] lxd/storage/backend_lxd: Set volume size directly on volume config Signed-off-by: Din Music --- lxd/storage/backend_lxd.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index af8b34a6727a..3f4108d2a699 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -2434,10 +2434,6 @@ func (b *lxdBackend) CreateInstanceFromConversion(inst instance.Instance, conn i return err } - // Override args.Name and args.Config to ensure volume is created based on instance. - args.Config = vol.Config() - args.Name = inst.Name() - // Get instance's root disk device from local devices. Do not use expanded devices, as we want // to determine whether the root disk volume size was explicitly set by the client. canResizeRootDiskSize := true @@ -2488,7 +2484,7 @@ func (b *lxdBackend) CreateInstanceFromConversion(inst instance.Instance, conn i if canResizeRootDiskSize { // Set size of the volume to the uncompressed image size. l.Debug("Setting volume size to uncompressed image size", logger.Ctx{"size": fmt.Sprintf("%d", imgBytes)}) - args.Config["size"] = fmt.Sprintf("%d", imgBytes) + vol.SetConfigSize(fmt.Sprintf("%d", imgBytes)) } // Convert received image into intance volume. @@ -2499,7 +2495,7 @@ func (b *lxdBackend) CreateInstanceFromConversion(inst instance.Instance, conn i // block volume will still be able to accommodate it. if canResizeRootDiskSize && contentType == drivers.ContentTypeBlock && args.VolumeSize > 0 { l.Debug("Setting volume size to source disk size", logger.Ctx{"size": args.VolumeSize}) - args.Config["size"] = fmt.Sprintf("%d", args.VolumeSize) + vol.SetConfigSize(fmt.Sprintf("%d", args.VolumeSize)) } srcDiskSize = args.VolumeSize From 7100972ceafb25a9c0ae144aec5d00ee40967e78 Mon Sep 17 00:00:00 2001 From: Din Music Date: Thu, 1 Aug 2024 11:05:36 +0000 Subject: [PATCH 097/106] lxd/storage/backend_lxd: Remove unnecessary usage of volume copy Signed-off-by: Din Music --- lxd/storage/backend_lxd.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lxd/storage/backend_lxd.go b/lxd/storage/backend_lxd.go index 3f4108d2a699..0aefc1827ae1 100644 --- a/lxd/storage/backend_lxd.go +++ b/lxd/storage/backend_lxd.go @@ -2525,14 +2525,12 @@ func (b *lxdBackend) CreateInstanceFromConversion(inst instance.Instance, conn i return fmt.Errorf("Volume size (%s) is lower then source disk size (%s)", volSize, imgSize) } - volCopy := drivers.NewVolumeCopy(vol) - - err = b.driver.CreateVolume(volCopy.Volume, &volFiller, op) + err = b.driver.CreateVolume(vol, &volFiller, op) if err != nil { return err } - revert.Add(func() { _ = b.driver.DeleteVolume(volCopy.Volume, op) }) + revert.Add(func() { _ = b.driver.DeleteVolume(vol, op) }) // At this point, the instance's volume is populated. If "virtio" option is enabled, // inject the virtio drivers. From b4ef26f7aaaebd34e97e0dbcd78d561ddc5de3fc Mon Sep 17 00:00:00 2001 From: Wesley Hershberger Date: Thu, 25 Jul 2024 16:16:06 -0500 Subject: [PATCH 098/106] test: Snapshot creation failure This at least runs through the reverter that gets built in case snapshot creation fails. Signed-off-by: Wesley Hershberger --- test/main.sh | 1 + test/suites/snapshots.sh | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/test/main.sh b/test/main.sh index 52209d29c54e..2fc2dcc44405 100755 --- a/test/main.sh +++ b/test/main.sh @@ -317,6 +317,7 @@ if [ "${1:-"all"}" != "cluster" ]; then run_test test_snap_expiry "snapshot expiry" run_test test_snap_schedule "snapshot scheduling" run_test test_snap_volume_db_recovery "snapshot volume database record recovery" + run_test test_snap_fail "snapshot creation failure" run_test test_config_profiles "profiles and configuration" run_test test_config_edit "container configuration edit" run_test test_property "container property" diff --git a/test/suites/snapshots.sh b/test/suites/snapshots.sh index 9056f198f7db..e360a99288cb 100644 --- a/test/suites/snapshots.sh +++ b/test/suites/snapshots.sh @@ -415,3 +415,23 @@ test_snap_volume_db_recovery() { lxc start c1 lxc delete -f c1 } + +test_snap_fail() { + local lxd_backend + lxd_backend=$(storage_backend "$LXD_DIR") + + ensure_import_testimage + + if [ "${lxd_backend}" = "zfs" ]; then + # Containers should fail to snapshot when root is full (can't write to backup.yaml) + lxc launch testimage c1 --device root,size=2MiB + lxc exec c1 -- dd if=/dev/urandom of=/root/big.bin count=100 bs=100K || true + + ! lxc snapshot c1 || false + + # Make sure that the snapshot creation failed (c1 has 0 snapshots) + [ "$(lxc ls --columns nS --format csv | awk --field-separator , '/c1/{print $2}')" -eq 0 ] + + lxc delete --force c1 + fi +} From 4d4a2fe07adc22e69c9db082a30afa20132caf41 Mon Sep 17 00:00:00 2001 From: Wesley Hershberger Date: Thu, 1 Aug 2024 17:17:00 -0500 Subject: [PATCH 099/106] lxd/instance/drivers: Allow zfs containers with full... ... rootfs to stop. Signed-off-by: Wesley Hershberger --- lxd/instance/drivers/driver_lxc.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index 5a832bda3e0f..eb55d59a2d27 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -2954,16 +2954,25 @@ func (d *lxc) onStop(args map[string]string) error { d.cleanupDevices(false, "") // Remove directory ownership (to avoid issue if uidmap is re-used) + // Fails on zfs when the dataset is full due to CoW err := os.Chown(d.Path(), 0, 0) if err != nil { - op.Done(fmt.Errorf("Failed clearing ownership: %w", err)) - return + if !strings.Contains(err.Error(), "disk quota exceeded") { + op.Done(fmt.Errorf("Failed clearing ownership: %w", err)) + return + } + + d.logger.Error("Failed clearing ownership; skipping", logger.Ctx{"err": err}) } err = os.Chmod(d.Path(), 0100) if err != nil { - op.Done(fmt.Errorf("Failed clearing permissions: %w", err)) - return + if !strings.Contains(err.Error(), "disk quota exceeded") { + op.Done(fmt.Errorf("Failed clearing permissions: %w", err)) + return + } + + d.logger.Error("Failed clearing permissions; skipping", logger.Ctx{"err": err}) } // Stop the storage for this container From 6f0f230cd2af31f1da7118fa66280ce2cd133d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Thu, 21 Dec 2023 13:57:05 -0500 Subject: [PATCH 100/106] lxd/instances/qemu: Don't fail event sending on missing agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber (cherry picked from commit 2fbdbe9518f8739d5e4adcca0e9e963e96f41053) Signed-off-by: Mark Bolton License: Apache-2.0 --- lxd/instance/drivers/driver_qemu.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index 906afb3be1a2..b71184a06e35 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -8588,6 +8588,11 @@ func (d *qemu) devlxdEventSend(eventType string, eventMessage map[string]any) er client, err := d.getAgentClient() if err != nil { + // Don't fail if the VM simply doesn't have an agent. + if err == errQemuAgentOffline { + return nil + } + return err } From 2edfc4ce55aabd0415d9ecb2d37d980d5771d866 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Fri, 2 Aug 2024 10:36:32 +0100 Subject: [PATCH 101/106] i18n: Update translation templates Signed-off-by: Thomas Parrott --- po/ar.po | 35 ++++++++++++++++++++++------------- po/ber.po | 35 ++++++++++++++++++++++------------- po/bg.po | 35 ++++++++++++++++++++++------------- po/ca.po | 35 ++++++++++++++++++++++------------- po/cs.po | 35 ++++++++++++++++++++++------------- po/de.po | 36 +++++++++++++++++++++++------------- po/el.po | 35 ++++++++++++++++++++++------------- po/eo.po | 35 ++++++++++++++++++++++------------- po/es.po | 36 +++++++++++++++++++++++------------- po/fa.po | 35 ++++++++++++++++++++++------------- po/fi.po | 35 ++++++++++++++++++++++------------- po/fr.po | 36 +++++++++++++++++++++++------------- po/he.po | 35 ++++++++++++++++++++++------------- po/hi.po | 35 ++++++++++++++++++++++------------- po/id.po | 35 ++++++++++++++++++++++------------- po/it.po | 36 +++++++++++++++++++++++------------- po/ja.po | 36 +++++++++++++++++++++++------------- po/ka.po | 35 ++++++++++++++++++++++------------- po/ko.po | 35 ++++++++++++++++++++++------------- po/lxd.pot | 35 ++++++++++++++++++++++------------- po/mr.po | 35 ++++++++++++++++++++++------------- po/nb_NO.po | 35 ++++++++++++++++++++++------------- po/nl.po | 35 ++++++++++++++++++++++------------- po/pa.po | 35 ++++++++++++++++++++++------------- po/pl.po | 35 ++++++++++++++++++++++------------- po/pt.po | 35 ++++++++++++++++++++++------------- po/pt_BR.po | 36 +++++++++++++++++++++++------------- po/ru.po | 36 +++++++++++++++++++++++------------- po/si.po | 35 ++++++++++++++++++++++------------- po/sl.po | 35 ++++++++++++++++++++++------------- po/sr.po | 35 ++++++++++++++++++++++------------- po/sv.po | 35 ++++++++++++++++++++++------------- po/te.po | 35 ++++++++++++++++++++++------------- po/th.po | 35 ++++++++++++++++++++++------------- po/tr.po | 35 ++++++++++++++++++++++------------- po/tzm.po | 35 ++++++++++++++++++++++------------- po/ug.po | 35 ++++++++++++++++++++++------------- po/uk.po | 35 ++++++++++++++++++++++------------- po/zh_Hans.po | 35 ++++++++++++++++++++++------------- po/zh_Hant.po | 35 ++++++++++++++++++++++------------- 40 files changed, 887 insertions(+), 520 deletions(-) diff --git a/po/ar.po b/po/ar.po index 91ddf601c19d..5bb77f6ec597 100644 --- a/po/ar.po +++ b/po/ar.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: lxd\n" "Report-Msgid-Bugs-To: lxd@lists.canonical.com\n" -"POT-Creation-Date: 2024-07-08 08:58+0100\n" +"POT-Creation-Date: 2024-08-02 10:36+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: Automatically generated\n" "Language-Team: none\n" @@ -766,7 +766,7 @@ msgstr "" msgid "As neither could be found, the raw SPICE socket can be found at:" msgstr "" -#: lxc/init.go:324 lxc/rebuild.go:130 +#: lxc/init.go:333 lxc/rebuild.go:130 msgid "Asked for a VM but image is of type container" msgstr "" @@ -871,7 +871,7 @@ msgstr "" msgid "Bad key/value pair: %s" msgstr "" -#: lxc/copy.go:139 lxc/init.go:224 lxc/move.go:380 lxc/project.go:129 +#: lxc/copy.go:139 lxc/init.go:232 lxc/move.go:380 lxc/project.go:129 #, c-format msgid "Bad key=value pair: %q" msgstr "" @@ -1492,12 +1492,12 @@ msgstr "" msgid "Created: %s" msgstr "" -#: lxc/init.go:170 +#: lxc/init.go:177 #, c-format msgid "Creating %s" msgstr "" -#: lxc/init.go:168 +#: lxc/init.go:175 msgid "Creating the instance" msgstr "" @@ -1819,7 +1819,7 @@ msgstr "" msgid "Device: %s" msgstr "" -#: lxc/init.go:384 +#: lxc/init.go:393 msgid "Didn't get any affected image, instance or snapshot from server" msgstr "" @@ -2864,7 +2864,7 @@ msgstr "" msgid "Instance name is mandatory" msgstr "" -#: lxc/init.go:395 +#: lxc/init.go:404 #, c-format msgid "Instance name is: %s" msgstr "" @@ -3051,6 +3051,15 @@ msgstr "" msgid "Last used: never" msgstr "" +#: lxc/init.go:171 +#, c-format +msgid "Launching %s" +msgstr "" + +#: lxc/init.go:169 +msgid "Launching the instance" +msgstr "" + #: lxc/info.go:220 #, c-format msgid "Link detected: %v" @@ -4930,7 +4939,7 @@ msgstr "" msgid "Retrieve the instance's console log" msgstr "" -#: lxc/init.go:338 +#: lxc/init.go:347 #, c-format msgid "Retrieving image: %s" msgstr "" @@ -5565,7 +5574,7 @@ msgstr "" msgid "Start instances" msgstr "" -#: lxc/launch.go:82 +#: lxc/launch.go:87 #, c-format msgid "Starting %s" msgstr "" @@ -5770,7 +5779,7 @@ msgid "" "restarted" msgstr "" -#: lxc/init.go:416 +#: lxc/init.go:425 msgid "The instance you are starting doesn't have any network attached to it." msgstr "" @@ -5931,11 +5940,11 @@ msgstr "" msgid "Timestamps:" msgstr "" -#: lxc/init.go:418 +#: lxc/init.go:427 msgid "To attach a network to an instance, use: lxc network attach" msgstr "" -#: lxc/init.go:417 +#: lxc/init.go:426 msgid "To create a new network, use: lxc network create" msgstr "" @@ -6003,7 +6012,7 @@ msgstr "" msgid "Transmit policy" msgstr "" -#: lxc/action.go:287 lxc/launch.go:114 +#: lxc/action.go:287 lxc/launch.go:119 #, c-format msgid "Try `lxc info --show-log %s` for more info" msgstr "" diff --git a/po/ber.po b/po/ber.po index 8e2c466c7e3a..d69da01fb9ee 100644 --- a/po/ber.po +++ b/po/ber.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: lxd\n" "Report-Msgid-Bugs-To: lxd@lists.canonical.com\n" -"POT-Creation-Date: 2024-07-08 08:58+0100\n" +"POT-Creation-Date: 2024-08-02 10:36+0100\n" "PO-Revision-Date: 2022-03-10 15:10+0000\n" "Last-Translator: Anonymous \n" "Language-Team: Berber \n" "Language-Team: Bulgarian \n" "Language-Team: Catalan \n" "Language-Team: Czech \n" "Language-Team: German \n" "Language-Team: Greek \n" "Language-Team: Esperanto \n" "Language-Team: Spanish \n" "Language-Team: Persian \n" "Language-Team: Finnish \n" "Language-Team: French \n" "Language-Team: Hebrew \n" "Language-Team: Hindi \n" "Language-Team: Indonesian \n" "Language-Team: Italian \n" "Language-Team: Japanese \n" "Language-Team: Korean \n" "Language-Team: LANGUAGE \n" @@ -728,7 +728,7 @@ msgstr "" msgid "As neither could be found, the raw SPICE socket can be found at:" msgstr "" -#: lxc/init.go:324 lxc/rebuild.go:130 +#: lxc/init.go:333 lxc/rebuild.go:130 msgid "Asked for a VM but image is of type container" msgstr "" @@ -830,7 +830,7 @@ msgstr "" msgid "Bad key/value pair: %s" msgstr "" -#: lxc/copy.go:139 lxc/init.go:224 lxc/move.go:380 lxc/project.go:129 +#: lxc/copy.go:139 lxc/init.go:232 lxc/move.go:380 lxc/project.go:129 #, c-format msgid "Bad key=value pair: %q" msgstr "" @@ -1405,12 +1405,12 @@ msgstr "" msgid "Created: %s" msgstr "" -#: lxc/init.go:170 +#: lxc/init.go:177 #, c-format msgid "Creating %s" msgstr "" -#: lxc/init.go:168 +#: lxc/init.go:175 msgid "Creating the instance" msgstr "" @@ -1613,7 +1613,7 @@ msgstr "" msgid "Device: %s" msgstr "" -#: lxc/init.go:384 +#: lxc/init.go:393 msgid "Didn't get any affected image, instance or snapshot from server" msgstr "" @@ -2617,7 +2617,7 @@ msgstr "" msgid "Instance name is mandatory" msgstr "" -#: lxc/init.go:395 +#: lxc/init.go:404 #, c-format msgid "Instance name is: %s" msgstr "" @@ -2798,6 +2798,15 @@ msgstr "" msgid "Last used: never" msgstr "" +#: lxc/init.go:171 +#, c-format +msgid "Launching %s" +msgstr "" + +#: lxc/init.go:169 +msgid "Launching the instance" +msgstr "" + #: lxc/info.go:220 #, c-format msgid "Link detected: %v" @@ -4554,7 +4563,7 @@ msgstr "" msgid "Retrieve the instance's console log" msgstr "" -#: lxc/init.go:338 +#: lxc/init.go:347 #, c-format msgid "Retrieving image: %s" msgstr "" @@ -5154,7 +5163,7 @@ msgstr "" msgid "Start instances" msgstr "" -#: lxc/launch.go:82 +#: lxc/launch.go:87 #, c-format msgid "Starting %s" msgstr "" @@ -5354,7 +5363,7 @@ msgstr "" msgid "The instance is currently running. Use --force to have it stopped and restarted" msgstr "" -#: lxc/init.go:416 +#: lxc/init.go:425 msgid "The instance you are starting doesn't have any network attached to it." msgstr "" @@ -5509,11 +5518,11 @@ msgstr "" msgid "Timestamps:" msgstr "" -#: lxc/init.go:418 +#: lxc/init.go:427 msgid "To attach a network to an instance, use: lxc network attach" msgstr "" -#: lxc/init.go:417 +#: lxc/init.go:426 msgid "To create a new network, use: lxc network create" msgstr "" @@ -5579,7 +5588,7 @@ msgstr "" msgid "Transmit policy" msgstr "" -#: lxc/action.go:287 lxc/launch.go:114 +#: lxc/action.go:287 lxc/launch.go:119 #, c-format msgid "Try `lxc info --show-log %s` for more info" msgstr "" diff --git a/po/mr.po b/po/mr.po index c43444af7c96..a20329d4a13b 100644 --- a/po/mr.po +++ b/po/mr.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: lxd\n" "Report-Msgid-Bugs-To: lxd@lists.canonical.com\n" -"POT-Creation-Date: 2024-07-08 08:58+0100\n" +"POT-Creation-Date: 2024-08-02 10:36+0100\n" "PO-Revision-Date: 2022-03-10 15:10+0000\n" "Last-Translator: Anonymous \n" "Language-Team: Marathi \n" "Language-Team: Norwegian Bokmål \n" "Language-Team: Dutch \n" "Language-Team: Punjabi \n" "Language-Team: Polish \n" "Language-Team: Portuguese (Brazil) ,=: %s" msgid "Bad key/value pair: %s" msgstr "par de chave/valor inválido %s" -#: lxc/copy.go:139 lxc/init.go:224 lxc/move.go:380 lxc/project.go:129 +#: lxc/copy.go:139 lxc/init.go:232 lxc/move.go:380 lxc/project.go:129 #, fuzzy, c-format msgid "Bad key=value pair: %q" msgstr "par de chave=valor inválido %s" @@ -1801,12 +1801,12 @@ msgstr "" msgid "Created: %s" msgstr "Criado: %s" -#: lxc/init.go:170 +#: lxc/init.go:177 #, c-format msgid "Creating %s" msgstr "Criando %s" -#: lxc/init.go:168 +#: lxc/init.go:175 #, fuzzy msgid "Creating the instance" msgstr "Criando %s" @@ -2146,7 +2146,7 @@ msgstr "" msgid "Device: %s" msgstr "Em cache: %s" -#: lxc/init.go:384 +#: lxc/init.go:393 msgid "Didn't get any affected image, instance or snapshot from server" msgstr "" @@ -3236,7 +3236,7 @@ msgstr "" msgid "Instance name is mandatory" msgstr "" -#: lxc/init.go:395 +#: lxc/init.go:404 #, c-format msgid "Instance name is: %s" msgstr "" @@ -3425,6 +3425,16 @@ msgstr "" msgid "Last used: never" msgstr "" +#: lxc/init.go:171 +#, fuzzy, c-format +msgid "Launching %s" +msgstr "Criando %s" + +#: lxc/init.go:169 +#, fuzzy +msgid "Launching the instance" +msgstr "Criando %s" + #: lxc/info.go:220 #, fuzzy, c-format msgid "Link detected: %v" @@ -5389,7 +5399,7 @@ msgstr "" msgid "Retrieve the instance's console log" msgstr "" -#: lxc/init.go:338 +#: lxc/init.go:347 #, c-format msgid "Retrieving image: %s" msgstr "" @@ -6068,7 +6078,7 @@ msgstr "" msgid "Start instances" msgstr "" -#: lxc/launch.go:82 +#: lxc/launch.go:87 #, c-format msgid "Starting %s" msgstr "" @@ -6278,7 +6288,7 @@ msgid "" "restarted" msgstr "" -#: lxc/init.go:416 +#: lxc/init.go:425 msgid "The instance you are starting doesn't have any network attached to it." msgstr "" @@ -6441,11 +6451,11 @@ msgstr "" msgid "Timestamps:" msgstr "" -#: lxc/init.go:418 +#: lxc/init.go:427 msgid "To attach a network to an instance, use: lxc network attach" msgstr "" -#: lxc/init.go:417 +#: lxc/init.go:426 msgid "To create a new network, use: lxc network create" msgstr "" @@ -6513,7 +6523,7 @@ msgstr "Editar arquivos no container" msgid "Transmit policy" msgstr "" -#: lxc/action.go:287 lxc/launch.go:114 +#: lxc/action.go:287 lxc/launch.go:119 #, c-format msgid "Try `lxc info --show-log %s` for more info" msgstr "" diff --git a/po/ru.po b/po/ru.po index 86f975e19bd7..6a714405d8b9 100644 --- a/po/ru.po +++ b/po/ru.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: lxd\n" "Report-Msgid-Bugs-To: lxd@lists.canonical.com\n" -"POT-Creation-Date: 2024-07-08 08:58+0100\n" +"POT-Creation-Date: 2024-08-02 10:36+0100\n" "PO-Revision-Date: 2022-03-10 15:06+0000\n" "Last-Translator: Александр Киль \n" "Language-Team: Russian \n" "Language-Team: Sinhala \n" "Language-Team: Slovenian \n" "Language-Team: Serbian \n" "Language-Team: Swedish \n" "Language-Team: Telugu \n" "Language-Team: Turkish \n" "Language-Team: Tamazight (Central Atlas) \n" "Language-Team: Uyghur \n" "Language-Team: Ukrainian \n" "Language-Team: Chinese (Simplified) \n" "Language-Team: Chinese (Traditional) Date: Fri, 2 Aug 2024 10:39:50 +0100 Subject: [PATCH 102/106] gomod: Update depdencies Signed-off-by: Thomas Parrott --- go.mod | 18 +++++++++--------- go.sum | 36 ++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/go.mod b/go.mod index 57e0fc755451..97a4b30c4f88 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.22.5 require ( github.com/Rican7/retry v0.3.1 github.com/armon/go-proxyproto v0.1.0 - github.com/canonical/go-dqlite v1.21.0 + github.com/canonical/go-dqlite v1.22.0 github.com/checkpoint-restore/go-criu/v6 v6.3.0 github.com/dell/goscaleio v1.15.0 github.com/digitalocean/go-qemu v0.0.0-20230711162256-2e3d0186973e @@ -24,7 +24,7 @@ require ( github.com/gosexy/gettext v0.0.0-20160830220431-74466a0a0c4a github.com/j-keck/arping v1.0.3 github.com/jaypipes/pcidb v1.0.1 - github.com/jochenvg/go-udev v0.0.0-20171110120927-d6b62d56d37b + github.com/jochenvg/go-udev v0.0.0-20240801134859-b65ed646224b github.com/juju/gomaasapi v0.0.0-20200602032615-aa561369c767 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/lxc/go-lxc v0.0.0-20240606200241-27b3d116511f @@ -38,10 +38,10 @@ require ( github.com/mitchellh/mapstructure v1.5.0 github.com/oklog/ulid/v2 v2.1.0 github.com/olekukonko/tablewriter v0.0.5 - github.com/openfga/api/proto v0.0.0-20240722084519-a9261bb50796 + github.com/openfga/api/proto v0.0.0-20240723155248-7e5be7b65c27 github.com/openfga/language/pkg/go v0.2.0-beta.0 github.com/openfga/openfga v1.5.6 - github.com/osrg/gobgp/v3 v3.28.0 + github.com/osrg/gobgp/v3 v3.29.0 github.com/pkg/sftp v1.13.6 github.com/pkg/xattr v0.4.10 github.com/robfig/cron/v3 v3.0.1 @@ -51,7 +51,7 @@ require ( github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 github.com/vishvananda/netlink v1.2.1-beta.2 github.com/zitadel/oidc/v3 v3.26.0 - go.starlark.net v0.0.0-20240705175910-70002002b310 + go.starlark.net v0.0.0-20240725214946-42030a7cedce go.uber.org/zap v1.27.0 golang.org/x/crypto v0.25.0 golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 @@ -90,7 +90,7 @@ require ( github.com/google/renameio v1.0.1 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.21.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/hcl v1.0.0 // indirect @@ -109,7 +109,7 @@ require ( github.com/kr/fs v0.1.0 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mattn/go-isatty v0.0.20 // indirect - github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/mdlayher/socket v0.5.1 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect @@ -151,8 +151,8 @@ require ( golang.org/x/mod v0.19.0 // indirect golang.org/x/net v0.27.0 // indirect golang.org/x/tools v0.23.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240711142825-46eb208f015d // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240711142825-46eb208f015d // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240730163845-b1a4ccb954bf // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf // indirect google.golang.org/grpc v1.65.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect diff --git a/go.sum b/go.sum index b9bd20673bc4..17415ec552da 100644 --- a/go.sum +++ b/go.sum @@ -71,8 +71,8 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I= github.com/bmatcuk/doublestar/v4 v4.6.1/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= -github.com/canonical/go-dqlite v1.21.0 h1:4gLDdV2GF+vg0yv9Ff+mfZZNQ1JGhnQ3GnS2GeZPHfA= -github.com/canonical/go-dqlite v1.21.0/go.mod h1:Uvy943N8R4CFUAs59A1NVaziWY9nJ686lScY7ywurfg= +github.com/canonical/go-dqlite v1.22.0 h1:DuJmfcREl4gkQJyvZzjl2GHFZROhbPyfdjDRQXpkOyw= +github.com/canonical/go-dqlite v1.22.0/go.mod h1:Uvy943N8R4CFUAs59A1NVaziWY9nJ686lScY7ywurfg= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -281,8 +281,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vb github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 h1:pRhl55Yx1eC7BZ1N+BBWwnKaMyD8uC+34TLdndZMAKk= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0/go.mod h1:XKMd7iuf/RGPSMJ/U4HP0zS2Z9Fh8Ps9a+6X26m/tmI= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.21.0 h1:CWyXh/jylQWp2dtiV33mY4iSSp6yf4lmn+c7/tN+ObI= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.21.0/go.mod h1:nCLIt0w3Ept2NwF8ThLmrppXsfT07oC8k0XNDxd8sVU= github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -340,8 +340,8 @@ github.com/jeremija/gosubmit v0.2.7 h1:At0OhGCFGPXyjPYAsCchoBUhE099pcBXmsb4iZqRO github.com/jeremija/gosubmit v0.2.7/go.mod h1:Ui+HS073lCFREXBbdfrJzMB57OI/bdxTiLtrDHHhFPI= github.com/jkeiser/iter v0.0.0-20200628201005-c8aa0ae784d1 h1:smvLGU3obGU5kny71BtE/ibR0wIXRUiRFDmSn0Nxz1E= github.com/jkeiser/iter v0.0.0-20200628201005-c8aa0ae784d1/go.mod h1:fP/NdyhRVOv09PLRbVXrSqHhrfQypdZwgE2L4h2U5C8= -github.com/jochenvg/go-udev v0.0.0-20171110120927-d6b62d56d37b h1:dgF9Rx3oPIz2d816jKSjnShkJfmtYc/N/DxGDFv2CGk= -github.com/jochenvg/go-udev v0.0.0-20171110120927-d6b62d56d37b/go.mod h1:IBDUGq30U56w969YNPomhMbRje1GrhUsCh7tHdwgLXA= +github.com/jochenvg/go-udev v0.0.0-20240801134859-b65ed646224b h1:Pzf7tldbCVqwl3NnOnTamEWdh/rL41fsoYCn2HdHgRA= +github.com/jochenvg/go-udev v0.0.0-20240801134859-b65ed646224b/go.mod h1:IBDUGq30U56w969YNPomhMbRje1GrhUsCh7tHdwgLXA= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= @@ -474,8 +474,8 @@ github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-sqlite3 v1.14.7/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= @@ -541,15 +541,15 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= -github.com/openfga/api/proto v0.0.0-20240722084519-a9261bb50796 h1:4z3o/S0mxuoB3n1p4Xrg72cLY1V6Zj4z+I6sEFHcPpk= -github.com/openfga/api/proto v0.0.0-20240722084519-a9261bb50796/go.mod h1:gil5LBD8tSdFQbUkCQdnXsoeU9kDJdJgbGdHkgJfcd0= +github.com/openfga/api/proto v0.0.0-20240723155248-7e5be7b65c27 h1:4DI5cY46DLLjZoKNGpe9FRluNl+mN4yxkRL9VjWRbFk= +github.com/openfga/api/proto v0.0.0-20240723155248-7e5be7b65c27/go.mod h1:gil5LBD8tSdFQbUkCQdnXsoeU9kDJdJgbGdHkgJfcd0= github.com/openfga/language/pkg/go v0.2.0-beta.0 h1:dTvgDkQImfNnH1iDvxnUIbz4INvKr4kS46dI12oAEzM= github.com/openfga/language/pkg/go v0.2.0-beta.0/go.mod h1:mCwEY2IQvyNgfEwbfH0C0ERUwtL8z6UjSAF8zgn5Xbg= github.com/openfga/openfga v1.5.6 h1:V5VPXbDnThXHORJaP0Hv0kdw0gtS62eV4H0IQk0EqfE= github.com/openfga/openfga v1.5.6/go.mod h1:Iv2BfL2b6ANYrqWIANSoEveZPh51LV2YnoexrUI8bvU= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/osrg/gobgp/v3 v3.28.0 h1:Oy96v6TUiCxMq32b2cmfcREhPFwBoNK+JtBKwjhGQgw= -github.com/osrg/gobgp/v3 v3.28.0/go.mod h1:ZGeSti9mURR/o5hf5R6T1FM5g1yiEBZbhP+TuqYJUpI= +github.com/osrg/gobgp/v3 v3.29.0 h1:ISWjY5YQ45THcvXWdG2ykzXWxS22rgE6U9YWdaI/ki8= +github.com/osrg/gobgp/v3 v3.29.0/go.mod h1:ZGeSti9mURR/o5hf5R6T1FM5g1yiEBZbhP+TuqYJUpI= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= @@ -714,8 +714,8 @@ go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+ go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= -go.starlark.net v0.0.0-20240705175910-70002002b310 h1:tEAOMoNmN2MqVNi0MMEWpTtPI4YNCXgxmAGtuv3mST0= -go.starlark.net v0.0.0-20240705175910-70002002b310/go.mod h1:YKMCv9b1WrfWmeqdV5MAuEHWsu5iC+fe6kYl2sQjdI8= +go.starlark.net v0.0.0-20240725214946-42030a7cedce h1:YyGqCjZtGZJ+mRPaenEiB87afEO2MFRzLiJNZ0Z0bPw= +go.starlark.net v0.0.0-20240725214946-42030a7cedce/go.mod h1:YKMCv9b1WrfWmeqdV5MAuEHWsu5iC+fe6kYl2sQjdI8= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= @@ -1069,10 +1069,10 @@ google.golang.org/genproto v0.0.0-20210310155132-4ce2db91004e/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20210319143718-93e7006c17a6/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210402141018-6c239bbf2bb1/go.mod h1:9lPAdzaEmUacj36I+k7YKbEc5CXzPIeORRgDAUOu28A= google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c/go.mod h1:UODoCrxHCcBojKKwX1terBiRUaqAsFqJiF615XL43r0= -google.golang.org/genproto/googleapis/api v0.0.0-20240711142825-46eb208f015d h1:kHjw/5UfflP/L5EbledDrcG4C2597RtymmGRZvHiCuY= -google.golang.org/genproto/googleapis/api v0.0.0-20240711142825-46eb208f015d/go.mod h1:mw8MG/Qz5wfgYr6VqVCiZcHe/GJEfI+oGGDCohaVgB0= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240711142825-46eb208f015d h1:JU0iKnSg02Gmb5ZdV8nYsKEKsP6o/FGVWTrw4i1DA9A= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240711142825-46eb208f015d/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= +google.golang.org/genproto/googleapis/api v0.0.0-20240730163845-b1a4ccb954bf h1:GillM0Ef0pkZPIB+5iO6SDK+4T9pf6TpaYR6ICD5rVE= +google.golang.org/genproto/googleapis/api v0.0.0-20240730163845-b1a4ccb954bf/go.mod h1:OFMYQFHJ4TM3JRlWDZhJbZfra2uqc3WLBZiaaqP4DtU= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf h1:liao9UHurZLtiEwBgT9LMOnKYsHze6eA6w1KQCMVN2Q= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= From 2018aebb8f1bbbeebccc460365646a5856eeeb49 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Fri, 2 Aug 2024 10:48:38 +0100 Subject: [PATCH 103/106] Makefile: Pin go mod deps for websocket and openfga websocket package had regressions on riscv64 on LP. openfga package changes its function signatures and so needs work in LXD to build. Signed-off-by: Thomas Parrott --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index e9b8a5d669bd..2604e5107d65 100644 --- a/Makefile +++ b/Makefile @@ -90,6 +90,8 @@ ifneq "$(LXD_OFFLINE)" "" exit 1 endif go get -t -v -d -u ./... + go get github.com/gorilla/websocket@v1.5.1 # Due to riscv64 crashes in LP + go get github.com/openfga/openfga@v1.5.6 # Due to build failures go mod tidy -go=$(GOMIN) @echo "Dependencies updated" From 1f583c11aca3d55e7a863deaac77587a5755ccae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Wed, 31 Jul 2024 00:44:09 -0400 Subject: [PATCH 104/106] lxd/storage/drivers/zfs: Always call tryGetVolumeDiskPathFromDataset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber (cherry picked from commit ecbe1e1f6cc029b4f4f2c479c81a49b8f4a8cadd) Signed-off-by: Din Music License: Apache-2.0 --- lxd/storage/drivers/driver_zfs_volumes.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lxd/storage/drivers/driver_zfs_volumes.go b/lxd/storage/drivers/driver_zfs_volumes.go index 293737f4d30a..037a40514673 100644 --- a/lxd/storage/drivers/driver_zfs_volumes.go +++ b/lxd/storage/drivers/driver_zfs_volumes.go @@ -1977,7 +1977,11 @@ func (d *zfs) getVolumeDiskPathFromDataset(dataset string) (string, error) { // GetVolumeDiskPath returns the location of a root disk block device. func (d *zfs) GetVolumeDiskPath(vol Volume) (string, error) { - return d.getVolumeDiskPathFromDataset(d.dataset(vol, false)) + // Wait up to 30 seconds for the device to appear. + ctx, cancel := context.WithTimeout(d.state.ShutdownCtx, 30*time.Second) + defer cancel() + + return d.tryGetVolumeDiskPathFromDataset(ctx, d.dataset(vol, false)) } // ListVolumes returns a list of LXD volumes in storage pool. From ed5ca859f9b4a10a57339d5afbc7b2bb8c8de62c Mon Sep 17 00:00:00 2001 From: Din Music Date: Fri, 2 Aug 2024 11:37:14 +0000 Subject: [PATCH 105/106] lxd/storage/drivers/zfs: Call GetVolumeDiskPath instead of creating new contexts Signed-off-by: Din Music --- lxd/storage/drivers/driver_zfs_volumes.go | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/lxd/storage/drivers/driver_zfs_volumes.go b/lxd/storage/drivers/driver_zfs_volumes.go index 037a40514673..4a8c702a9db3 100644 --- a/lxd/storage/drivers/driver_zfs_volumes.go +++ b/lxd/storage/drivers/driver_zfs_volumes.go @@ -216,11 +216,7 @@ func (d *zfs) CreateVolume(vol Volume, filler *VolumeFiller, op *operations.Oper } if vol.contentType == ContentTypeFS { - // Wait up to 30 seconds for the device to appear. - ctx, cancel := context.WithTimeout(d.state.ShutdownCtx, 30*time.Second) - defer cancel() - - devPath, err := d.tryGetVolumeDiskPathFromDataset(ctx, d.dataset(vol, false)) + devPath, err := d.GetVolumeDiskPath(vol) if err != nil { return err } @@ -2129,11 +2125,7 @@ func (d *zfs) activateVolume(vol Volume) (bool, error) { revert.Add(func() { _ = d.setDatasetProperties(dataset, fmt.Sprintf("volmode=%s", current)) }) - // Wait up to 30 seconds for the device to appear. - ctx, cancel := context.WithTimeout(d.state.ShutdownCtx, 30*time.Second) - defer cancel() - - _, err := d.tryGetVolumeDiskPathFromDataset(ctx, dataset) + _, err := d.GetVolumeDiskPath(vol) if err != nil { return false, fmt.Errorf("Failed to activate volume: %v", err) } From 5ed661476151d1addb8e5a132aa16cb86a67ddf4 Mon Sep 17 00:00:00 2001 From: Thomas Parrott Date: Fri, 2 Aug 2024 14:06:14 +0100 Subject: [PATCH 106/106] test/godeps/lxd-agent: Update depedencies Signed-off-by: Thomas Parrott --- test/godeps/lxd-agent.list | 1 + 1 file changed, 1 insertion(+) diff --git a/test/godeps/lxd-agent.list b/test/godeps/lxd-agent.list index 137b7543a2e3..cea6d8df2dac 100644 --- a/test/godeps/lxd-agent.list +++ b/test/godeps/lxd-agent.list @@ -217,6 +217,7 @@ golang.org/x/oauth2 golang.org/x/oauth2/clientcredentials golang.org/x/oauth2/internal golang.org/x/sync/errgroup +golang.org/x/sync/semaphore golang.org/x/sys/unix golang.org/x/text/cases golang.org/x/text/encoding