diff --git a/cli/cli.go b/cli/cli.go index 06b465af8..9ecaf5922 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -60,7 +60,7 @@ func Execute(main *cobra.Command) { "Alias for --log-level=DEBUG") WrapPreRun(main, func(cmd *cobra.Command, args []string) error { - startLogging(cmd) + StartLogging(cmd) return nil }) @@ -78,7 +78,7 @@ func setRepoLogLevel(repo string, l capnslog.LogLevel) { r.SetRepoLogLevel(l) } -func startLogging(cmd *cobra.Command) { +func StartLogging(cmd *cobra.Command) { switch { case logDebug: logLevel = capnslog.DEBUG diff --git a/cmd/kola/kola.go b/cmd/kola/kola.go index 81d91b27b..8b831007f 100644 --- a/cmd/kola/kola.go +++ b/cmd/kola/kola.go @@ -134,13 +134,11 @@ func runRun(cmd *cobra.Command, args []string) { // needs to be after RunTests() because harness empties the directory if err := writeProps(); err != nil { - fmt.Fprintf(os.Stderr, "%v\n", err) - os.Exit(1) + plog.Fatal(err) } if runErr != nil { - fmt.Fprintf(os.Stderr, "%v\n", runErr) - os.Exit(1) + plog.Fatal(runErr) } } diff --git a/cmd/kola/options.go b/cmd/kola/options.go index 2d9781080..514ca8fdd 100644 --- a/cmd/kola/options.go +++ b/cmd/kola/options.go @@ -124,6 +124,7 @@ func init() { sv(&kola.AzureOptions.ResourceGroup, "azure-resource-group", "", "Deploy resources in an existing resource group") sv(&kola.AzureOptions.AvailabilitySet, "azure-availability-set", "", "Deploy instances with an existing availibity set") sv(&kola.AzureOptions.KolaVnet, "azure-kola-vnet", "", "Pass the vnet/subnet that kola is being ran from to restrict network access to created storage accounts") + bv(&kola.AzureOptions.TrustedLaunch, "azure-trusted-launch", false, "Enable trusted launch for VMs (default \"false\")") // do-specific options sv(&kola.DOOptions.ConfigPath, "do-config-file", "", "DigitalOcean config file (default \"~/"+auth.DOConfigPath+"\")") diff --git a/cmd/ore/azure/azure.go b/cmd/ore/azure/azure.go index 9892411cd..cd22fdb0b 100644 --- a/cmd/ore/azure/azure.go +++ b/cmd/ore/azure/azure.go @@ -19,6 +19,7 @@ import ( "github.com/spf13/cobra" "github.com/flatcar/mantle/cli" + "github.com/flatcar/mantle/platform" "github.com/flatcar/mantle/platform/api/azure" ) @@ -42,10 +43,12 @@ func init() { } func preauth(cmd *cobra.Command, args []string) error { + cli.StartLogging(cmd) plog.Printf("Creating Azure API...") a, err := azure.New(&azure.Options{ Location: azureLocation, + Options: &platform.Options{}, }) if err != nil { plog.Fatalf("Failed to create Azure API: %v", err) diff --git a/cmd/ore/azure/create-gallery-image.go b/cmd/ore/azure/create-gallery-image.go new file mode 100644 index 000000000..f8e16409c --- /dev/null +++ b/cmd/ore/azure/create-gallery-image.go @@ -0,0 +1,118 @@ +// Copyright 2018 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package azure + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/flatcar/mantle/platform/api/azure" + "github.com/flatcar/mantle/sdk" + "github.com/spf13/cobra" +) + +var ( + cmdCreateGalleryImage = &cobra.Command{ + Use: "create-gallery-image", + Short: "Create Azure Gallery Image", + Long: "Create Azure Gallery Image mage from a VHD image", + RunE: runCreateGalleryImage, + } + + vhd string + blobName string + storageAccount string + resourceGrp string + hyperVGeneration string + board string +) + +func init() { + sv := cmdCreateGalleryImage.Flags().StringVar + + sv(&imageName, "image-name", "", "image name (optional)") + sv(&blobName, "blob-name", "", "source blob name (optional)") + sv(&vhd, "file", defaultUploadFile(), "source VHD file") + sv(&resourceGrp, "resource-group", "", "resource group name (optional)") + sv(&hyperVGeneration, "hyper-v-generation", "V2", "Hyper-V generation (V2 or V1)") + sv(&board, "board", "amd64-usr", "board name (amd64-usr or arm64-usr)") + sv(&storageAccount, "storage-account", "", "storage account name (optional)") + + Azure.AddCommand(cmdCreateGalleryImage) +} + +func azureSanitize(name string) string { + name = strings.Replace(name, ".", "-", -1) + name = strings.Replace(name, "+", "-", -1) + return name +} + +func runCreateGalleryImage(cmd *cobra.Command, args []string) error { + var err error + if err = api.SetupClients(); err != nil { + plog.Fatalf("setting up clients: %v\n", err) + } + api.Opts.Board = board + api.Opts.HyperVGeneration = hyperVGeneration + + if blobName == "" { + ver, err := sdk.VersionsFromDir(filepath.Dir(vhd)) + if err != nil { + plog.Fatalf("Unable to get version from image directory, provide a -blob-name flag or include a version.txt in the image directory: %v\n", err) + } + blobName = fmt.Sprintf("flatcar-dev-%s-%s", os.Getenv("USER"), ver.Version) + } + if imageName == "" { + imageName = azureSanitize(strings.TrimSuffix(blobName, ".vhd")) + } + if resourceGrp == "" { + resourceGrp, err = api.CreateResourceGroup("kola-cluster-image") + if err != nil { + plog.Fatalf("Couldn't create resource group: %v\n", err) + } + } + if storageAccount == "" { + storageAccount, err = api.CreateStorageAccount(resourceGrp) + if err != nil { + plog.Fatalf("Couldn't create storage account: %v\n", err) + } + } + client, err := api.GetBlobServiceClient(storageAccount) + if err != nil { + plog.Fatalf("failed to create blob service client for %q: %v", ubo.storageacct, err) + } + + container := "vhds" + if err := azure.UploadBlob(client, vhd, container, blobName, true); err != nil { + plog.Fatalf("Uploading blob failed: %v", err) + } + blobUrl := azure.BlobURL(client, container, blobName) + imgID, err := api.CreateGalleryImage(imageName, resourceGrp, storageAccount, blobUrl) + if err != nil { + plog.Fatalf("Couldn't create gallery image: %v\n", err) + } + err = json.NewEncoder(os.Stdout).Encode(&struct { + ID *string + }{ + ID: &imgID, + }) + if err != nil { + plog.Fatalf("Couldn't encode result: %v\n", err) + } + return nil +} diff --git a/cmd/ore/azure/upload-blob.go b/cmd/ore/azure/upload-blob.go index ba49fd940..21febdfb2 100644 --- a/cmd/ore/azure/upload-blob.go +++ b/cmd/ore/azure/upload-blob.go @@ -64,7 +64,7 @@ func init() { func defaultUploadFile() string { build := sdk.BuildRoot() - return build + "/images/amd64-usr/latest/coreos_production_azure_image.vhd" + return build + "/images/amd64-usr/latest/flatcar_production_azure_image.vhd" } func runUploadBlob(cmd *cobra.Command, args []string) { @@ -73,7 +73,7 @@ func runUploadBlob(cmd *cobra.Command, args []string) { if err != nil { plog.Fatalf("Unable to get version from image directory, provide a -blob-name flag or include a version.txt in the image directory: %v\n", err) } - ubo.blob = fmt.Sprintf("Container-Linux-dev-%s-%s.vhd", os.Getenv("USER"), ver.Version) + ubo.blob = fmt.Sprintf("flatcar-dev-%s-%s.vhd", os.Getenv("USER"), ver.Version) } if err := api.SetupClients(); err != nil { diff --git a/kola/harness.go b/kola/harness.go index cb59f5f5d..be66629f3 100644 --- a/kola/harness.go +++ b/kola/harness.go @@ -432,7 +432,7 @@ func RunTests(patterns []string, channel, offering, pltfrm, outputDir string, ss version, err := getClusterSemver(flight, outputDir) if err != nil { - plog.Fatal(err) + return fmt.Errorf("getClusterSemver: %w ", err) } // If the version is > 3033, we can safely use user-data instead of custom-data for @@ -453,7 +453,7 @@ func RunTests(patterns []string, channel, offering, pltfrm, outputDir string, ss // one more filter pass now that we know real version tests, err = FilterTests(tests, patterns, channel, offering, pltfrm, *version) if err != nil { - plog.Fatal(err) + return fmt.Errorf("FilterTests: %v", err) } } diff --git a/kola/tests/misc/nvidia.go b/kola/tests/misc/nvidia.go index 7a6f24edf..49a1e6881 100644 --- a/kola/tests/misc/nvidia.go +++ b/kola/tests/misc/nvidia.go @@ -3,8 +3,10 @@ package misc import ( "bytes" "fmt" + "strings" "time" + "github.com/coreos/go-semver/semver" "github.com/coreos/pkg/capnslog" "github.com/flatcar/mantle/kola" "github.com/flatcar/mantle/kola/cluster" @@ -28,13 +30,19 @@ func init() { Platforms: []string{"azure"}, Architectures: []string{"amd64"}, Flags: []register.Flag{register.NoEnableSelinux}, + SkipFunc: skipOnNonGpu, }) } -func verifyNvidiaInstallation(c cluster.TestCluster) { - if kola.AzureOptions.Size != "Standard_NC6s_v3" { - c.Skip("skipping due to wrong instance size") +func skipOnNonGpu(version semver.Version, channel, arch, platform string) bool { + // N stands for GPU instance obviously :) + if platform == "azure" && strings.Contains(kola.AzureOptions.Size, "N") { + return false } + return true +} + +func verifyNvidiaInstallation(c cluster.TestCluster) { m := c.Machines()[0] nvidiaStatusRetry := func() error { diff --git a/platform/api/azure/gallery-image-template.json b/platform/api/azure/gallery-image-template.json index 794181de6..643f4785d 100644 --- a/platform/api/azure/gallery-image-template.json +++ b/platform/api/azure/gallery-image-template.json @@ -84,7 +84,15 @@ { "name": "DiskControllerTypes", "value": "NVMe,SCSI" - } + }, + { + "name": "SecurityType", + "value": "[if(equals(parameters('hyperVGeneration'), 'V2'), 'TrustedLaunchSupported', 'None')]" + }, + { + "name": "IsAcceleratedNetworkSupported", + "value": "true" + } ] }, "type": "Microsoft.Compute/galleries/images" diff --git a/platform/api/azure/image.go b/platform/api/azure/image.go index afc1e346b..9ed7c1366 100644 --- a/platform/api/azure/image.go +++ b/platform/api/azure/image.go @@ -152,8 +152,8 @@ func (a *API) resolveImage() error { if a.Opts.DiskURI != "" || a.Opts.BlobURL != "" || a.Opts.ImageFile != "" || a.Opts.Version != "" || a.Opts.Sku == "" { return nil } - - resp, err := http.DefaultClient.Get(fmt.Sprintf("https://%s.release.flatcar-linux.net/amd64-usr/current/version.txt", a.Opts.Sku)) + sku := strings.TrimSuffix(a.Opts.Sku, "-gen2") + resp, err := http.DefaultClient.Get(fmt.Sprintf("https://%s.release.flatcar-linux.net/amd64-usr/current/version.txt", sku)) if err != nil { return fmt.Errorf("unable to fetch release bucket %v version: %v", a.Opts.Sku, err) } diff --git a/platform/api/azure/instance.go b/platform/api/azure/instance.go index df3fb8768..079a884b7 100644 --- a/platform/api/azure/instance.go +++ b/platform/api/azure/instance.go @@ -19,7 +19,7 @@ import ( "encoding/base64" "fmt" "io" - "regexp" + "net/http" "time" "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" @@ -30,12 +30,20 @@ import ( "github.com/flatcar/mantle/util" ) +type MachineState int + +const ( + READY MachineState = iota + PROVISIONING +) + type Machine struct { ID string PublicIPAddress string PrivateIPAddress string InterfaceName string PublicIPName string + State MachineState } func (a *API) getAvset() string { @@ -148,13 +156,28 @@ func (a *API) getVMParameters(name, sshkey, storageAccountURI string, userdata * }, DiagnosticsProfile: &armcompute.DiagnosticsProfile{ BootDiagnostics: &armcompute.BootDiagnostics{ - Enabled: to.Ptr(true), - StorageURI: &storageAccountURI, + Enabled: to.Ptr(true), }, }, }, } + if a.Opts.TrustedLaunch { + if a.Opts.HyperVGeneration != string(armcompute.HyperVGenerationTypeV2) { + plog.Warningf("TrustedLaunch is only supported for HyperVGeneration v2; ignoring") + } + if a.Opts.Board != "amd64-usr" { + plog.Warningf("TrustedLaunch is only supported for amd64-usr; ignoring") + } + vm.Properties.SecurityProfile = &armcompute.SecurityProfile{ + SecurityType: to.Ptr(armcompute.SecurityTypesTrustedLaunch), + UefiSettings: &armcompute.UefiSettings{ + SecureBootEnabled: to.Ptr(false), + VTpmEnabled: to.Ptr(true), + }, + } + } + switch a.Opts.DiskController { case "nvme": vm.Properties.StorageProfile.DiskControllerType = to.Ptr(armcompute.DiskControllerTypesNVMe) @@ -209,7 +232,7 @@ func (a *API) CreateInstance(name, sshkey, resourceGroup, storageAccount string, clean := func() { _, _ = a.compClient.BeginDelete(context.TODO(), vmResourceGroup, name, &armcompute.VirtualMachinesClientBeginDeleteOptions{ - ForceDeletion: to.Ptr(true), + ForceDeletion: to.Ptr(false), }) _, _ = a.intClient.BeginDelete(context.TODO(), resourceGroup, *nic.Name, nil) _, _ = a.ipClient.BeginDelete(context.TODO(), resourceGroup, *ip.Name, nil) @@ -222,8 +245,7 @@ func (a *API) CreateInstance(name, sshkey, resourceGroup, storageAccount string, } _, err = poller.PollUntilDone(context.TODO(), nil) if err != nil { - clean() - return nil, err + return &Machine{ID: name, State: PROVISIONING}, fmt.Errorf("PollUntilDone(%s): %w", name, err) } plog.Infof("Instance %s created", name) @@ -277,55 +299,65 @@ func (a *API) CreateInstance(name, sshkey, resourceGroup, storageAccount string, func (a *API) TerminateInstance(machine *Machine, resourceGroup string) error { resourceGroup = a.getVMRG(resourceGroup) _, err := a.compClient.BeginDelete(context.TODO(), resourceGroup, machine.ID, &armcompute.VirtualMachinesClientBeginDeleteOptions{ - ForceDeletion: to.Ptr(true), + ForceDeletion: to.Ptr(false), }) // We used to wait for the VM to be deleted here, but it's not necessary as // we will also delete the resource group later. return err } -func (a *API) GetConsoleOutput(name, resourceGroup, storageAccount string) ([]byte, error) { +func (a *API) GetScreenshot(name, resourceGroup string, output io.Writer) error { vmResourceGroup := a.getVMRG(resourceGroup) - vm, err := a.compClient.Get(context.TODO(), vmResourceGroup, name, &armcompute.VirtualMachinesClientGetOptions{ - Expand: to.Ptr(armcompute.InstanceViewTypesInstanceView), - }) + param := &armcompute.VirtualMachinesClientRetrieveBootDiagnosticsDataOptions{ + SasURIExpirationTimeInMinutes: to.Ptr[int32](5), + } + resp, err := a.compClient.RetrieveBootDiagnosticsData(context.TODO(), vmResourceGroup, name, param) if err != nil { - return nil, fmt.Errorf("could not get VM: %v", err) + return fmt.Errorf("could not get VM: %v", err) } - - consoleURI := vm.Properties.InstanceView.BootDiagnostics.SerialConsoleLogBlobURI - if consoleURI == nil { - return nil, fmt.Errorf("serial console URI is nil") + if resp.ConsoleScreenshotBlobURI == nil { + return fmt.Errorf("console screenshot URI is nil") } - // Only the full URI to the logs are present in the virtual machine - // properties. Parse out the container & file name to use the GetBlob - // API call directly. - uri := []byte(*consoleURI) - containerPat := regexp.MustCompile(`bootdiagnostics-[a-z0-9\-]+`) - container := string(containerPat.Find(uri)) - if container == "" { - return nil, fmt.Errorf("could not find container name in URI: %q", *consoleURI) + var data io.ReadCloser + err = util.Retry(6, 10*time.Second, func() error { + reply, err := http.Get(*resp.ConsoleScreenshotBlobURI) + if err != nil { + return fmt.Errorf("could not GET console screenshot: %v", err) + } + data = reply.Body + return nil + }) + if err != nil { + return err } - namePat := regexp.MustCompile(`[a-z0-9\-\.]+.serialconsole.log`) - blobname := string(namePat.Find(uri)) - if blobname == "" { - return nil, fmt.Errorf("could not find blob name in URI: %q", *consoleURI) + written, err := io.Copy(output, data) + if err == nil { + plog.Debugf("wrote %d bytes to screenshot", written) } + return err +} - client, err := a.GetBlobServiceClient(storageAccount) +func (a *API) GetConsoleOutput(name, resourceGroup, storageAccount string) ([]byte, error) { + vmResourceGroup := a.getVMRG(resourceGroup) + param := &armcompute.VirtualMachinesClientRetrieveBootDiagnosticsDataOptions{ + SasURIExpirationTimeInMinutes: to.Ptr[int32](5), + } + resp, err := a.compClient.RetrieveBootDiagnosticsData(context.TODO(), vmResourceGroup, name, param) if err != nil { - return nil, err + return nil, fmt.Errorf("could not get VM: %v", err) } + if resp.SerialConsoleLogBlobURI == nil { + return nil, fmt.Errorf("serial console URI is nil") + } + var data io.ReadCloser err = util.Retry(6, 10*time.Second, func() error { - data, err = GetBlob(client, container, blobname) + reply, err := http.Get(*resp.SerialConsoleLogBlobURI) if err != nil { - return fmt.Errorf("could not get blob for container %q, blobname %q: %v", container, blobname, err) - } - if data == nil { - return fmt.Errorf("empty data while getting blob for container %q, blobname %q", container, blobname) + return fmt.Errorf("could not GET console output: %v", err) } + data = reply.Body return nil }) if err != nil { diff --git a/platform/api/azure/options.go b/platform/api/azure/options.go index e95a5bbd5..9690fe8e4 100644 --- a/platform/api/azure/options.go +++ b/platform/api/azure/options.go @@ -52,6 +52,7 @@ type Options struct { KolaVnet string UseGallery bool UsePrivateIPs bool + TrustedLaunch bool DiskController string diff --git a/platform/machine/azure/cluster.go b/platform/machine/azure/cluster.go index 8b91cee6f..90b2dcaaa 100644 --- a/platform/machine/azure/cluster.go +++ b/platform/machine/azure/cluster.go @@ -27,11 +27,12 @@ import ( type cluster struct { *platform.BaseCluster - flight *flight - sshKey string - ResourceGroup string - StorageAccount string - Network azure.Network + flight *flight + sshKey string + ResourceGroup string + StorageAccountRG string + StorageAccount string + Network azure.Network } func (ac *cluster) vmname() string { @@ -48,21 +49,24 @@ func (ac *cluster) NewMachine(userdata *conf.UserData) (platform.Machine, error) return nil, err } - instance, err := ac.flight.Api.CreateInstance(ac.vmname(), ac.sshKey, ac.ResourceGroup, ac.StorageAccount, conf, ac.Network) - if err != nil { - return nil, err - } - + instance, createErr := ac.flight.Api.CreateInstance(ac.vmname(), ac.sshKey, ac.ResourceGroup, ac.StorageAccount, conf, ac.Network) mach := &machine{ cluster: ac, mach: instance, } + if instance == nil { + return nil, createErr + } mach.dir = filepath.Join(ac.RuntimeConf().OutputDir, mach.ID()) if err := os.Mkdir(mach.dir, 0777); err != nil { mach.Destroy() return nil, err } + if createErr != nil { + mach.Destroy() + return nil, createErr + } confPath := filepath.Join(mach.dir, "user-data") if err := conf.WriteFile(confPath); err != nil { diff --git a/platform/machine/azure/flight.go b/platform/machine/azure/flight.go index 834a4a7c7..aa0aa80fe 100644 --- a/platform/machine/azure/flight.go +++ b/platform/machine/azure/flight.go @@ -41,6 +41,7 @@ type flight struct { ImageResourceGroup string ImageStorageAccount string Network azure.Network + UseFlightRG bool } // NewFlight creates an instance of a Flight suitable for spawning @@ -79,21 +80,21 @@ func NewFlight(opts *azure.Options) (platform.Flight, error) { return nil, err } + af.ImageResourceGroup, err = af.Api.CreateResourceGroup("kola-cluster-image") + if err != nil { + return nil, err + } + af.ImageStorageAccount, err = af.Api.CreateStorageAccount(af.ImageResourceGroup) + if err != nil { + return nil, err + } + if opts.BlobURL != "" || opts.ImageFile != "" { + af.UseFlightRG = true imageName := fmt.Sprintf("%v", time.Now().UnixNano()) blobName := imageName + ".vhd" container := "temp" - af.ImageResourceGroup, err = af.Api.CreateResourceGroup("kola-cluster-image") - if err != nil { - return nil, err - } - - af.ImageStorageAccount, err = af.Api.CreateStorageAccount(af.ImageResourceGroup) - if err != nil { - return nil, err - } - af.Network, err = af.Api.PrepareNetworkResources(af.ImageResourceGroup) if err != nil { af.Destroy() @@ -162,21 +163,17 @@ func (af *flight) NewCluster(rconf *platform.RuntimeConfig) (platform.Cluster, e ac.sshKey = af.FakeSSHKey } - if af.ImageResourceGroup != "" && af.ImageStorageAccount != "" { + ac.StorageAccountRG = af.ImageResourceGroup + ac.StorageAccount = af.ImageStorageAccount + + if af.UseFlightRG { ac.ResourceGroup = af.ImageResourceGroup - ac.StorageAccount = af.ImageStorageAccount ac.Network = af.Network } else { ac.ResourceGroup, err = af.Api.CreateResourceGroup("kola-cluster") if err != nil { return nil, err } - - ac.StorageAccount, err = af.Api.CreateStorageAccount(ac.ResourceGroup) - if err != nil { - return nil, err - } - ac.Network, err = af.Api.PrepareNetworkResources(ac.ResourceGroup) if err != nil { ac.Destroy() diff --git a/platform/machine/azure/machine.go b/platform/machine/azure/machine.go index 2470f5dbb..4c2e80ff0 100644 --- a/platform/machine/azure/machine.go +++ b/platform/machine/azure/machine.go @@ -126,6 +126,16 @@ func (am *machine) saveConsole() error { return fmt.Errorf("failed writing console to file: %v", err) } + if am.mach.State == azure.PROVISIONING { + path := filepath.Join(am.dir, "screenshot.bmp") + f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + return err + } + defer f.Close() + return am.cluster.flight.Api.GetScreenshot(am.ID(), am.ResourceGroup(), f) + } + return nil } diff --git a/util/retry.go b/util/retry.go index 554f6fbe7..8fe43f0ba 100644 --- a/util/retry.go +++ b/util/retry.go @@ -57,8 +57,6 @@ func WaitUntilReady(timeout, delay time.Duration, checkFunction func() (bool, er default: } - time.Sleep(delay) - done, err := checkFunction() if err != nil { return err @@ -67,6 +65,8 @@ func WaitUntilReady(timeout, delay time.Duration, checkFunction func() (bool, er if done { break } + + time.Sleep(delay) } return nil }