diff --git a/Makefile b/Makefile index 9702ba8cca..975dce1956 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ MCO_COMPONENTS = daemon controller server operator -EXTRA_COMPONENTS = apiserver-watcher +EXTRA_COMPONENTS = apiserver-watcher machine-os-builder ALL_COMPONENTS = $(patsubst %,machine-config-%,$(MCO_COMPONENTS)) $(EXTRA_COMPONENTS) PREFIX ?= /usr GO111MODULE?=on diff --git a/cmd/machine-os-builder/main.go b/cmd/machine-os-builder/main.go new file mode 100644 index 0000000000..d7c75f788d --- /dev/null +++ b/cmd/machine-os-builder/main.go @@ -0,0 +1,28 @@ +package main + +import ( + "flag" + "fmt" + "time" + + "github.com/spf13/cobra" +) + +const componentName = "machine-os-builder" + +var ( + rootCmd = &cobra.Command{ + Use: componentName, + Short: "Run Machine OS Builder", + Long: "", + } +) + +func init() { + rootCmd.PersistentFlags().AddGoFlagSet(flag.CommandLine) +} + +func main() { + fmt.Println("Hello, World!") + <-time.After(876000 * time.Hour) +} diff --git a/cmd/machine-os-builder/start.go b/cmd/machine-os-builder/start.go new file mode 100644 index 0000000000..695b4f8f2f --- /dev/null +++ b/cmd/machine-os-builder/start.go @@ -0,0 +1,38 @@ +package main + +import ( + "flag" + + "github.com/openshift/machine-config-operator/pkg/version" + "github.com/spf13/cobra" + "k8s.io/klog/v2" +) + +var ( + startCmd = &cobra.Command{ + Use: "start", + Short: "Starts Machine OS Builder", + Long: "", + Run: runStartCmd, + } + + startOpts struct { + kubeconfig string + } +) + +func init() { + rootCmd.AddCommand(startCmd) + startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)") +} + +func runStartCmd(_ *cobra.Command, _ []string) { + flag.Set("logtostderr", "true") + flag.Parse() + + klog.V(2).Infof("Options parsed: %+v", startOpts) + + // To help debugging, immediately log version + klog.Infof("Version: %+v (%s)", version.Raw, version.Hash) + +} diff --git a/cmd/machine-os-builder/version.go b/cmd/machine-os-builder/version.go new file mode 100644 index 0000000000..c261e58075 --- /dev/null +++ b/cmd/machine-os-builder/version.go @@ -0,0 +1,32 @@ +package main + +import ( + "flag" + "fmt" + + "github.com/openshift/machine-config-operator/pkg/version" + "github.com/spf13/cobra" +) + +var ( + versionCmd = &cobra.Command{ + Use: "version", + Short: "Print the version number of Machine OS Builder", + Long: `All software has versions. This is Machine OS Builder's.`, + Run: runVersionCmd, + } +) + +func init() { + rootCmd.AddCommand(versionCmd) +} + +func runVersionCmd(_ *cobra.Command, _ []string) { + flag.Set("logtostderr", "true") + flag.Parse() + + program := "MachineConfigController" + version := version.Raw + "-" + version.Hash + + fmt.Println(program, version) +} diff --git a/manifests/machineosbuilder/clusterrole.yaml b/manifests/machineosbuilder/clusterrole.yaml new file mode 100644 index 0000000000..33fbb73661 --- /dev/null +++ b/manifests/machineosbuilder/clusterrole.yaml @@ -0,0 +1,66 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: machine-os-builder +rules: +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch", "patch"] +- apiGroups: ["machineconfiguration.openshift.io"] + resources: ["*"] + verbs: ["*"] +- apiGroups: [""] + resources: ["configmaps", "secrets"] + verbs: ["*"] +- apiGroups: ["config.openshift.io"] + resources: ["images", "clusterversions", "featuregates", "nodes", "nodes/status"] + verbs: ["*"] +- apiGroups: ["config.openshift.io"] + resources: ["schedulers", "apiservers", "infrastructures", "imagedigestmirrorsets", "imagetagmirrorsets"] + verbs: ["get", "list", "watch"] +- apiGroups: ["operator.openshift.io"] + resources: ["imagecontentsourcepolicies"] + verbs: ["get", "list", "watch"] +- apiGroups: ["operator.openshift.io"] + resources: ["etcds"] + verbs: ["get", "list", "watch"] +- apiGroups: ["image.openshift.io"] + resources: ["images"] + verbs: ["get","list","watch","create","update","patch","delete"] +- apiGroups: ["image.openshift.io"] + resources: ["imagestreams"] + verbs: ["get","list","watch","create","update","patch","delete"] +- apiGroups: ["build.openshift.io"] + resources: ["builds","buildconfigs","buildconfigs/instantiate"] + verbs: ["get","list","watch","create","update","patch","delete"] +- apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "create", "delete"] +- apiGroups: ["extensions"] + resources: ["daemonsets"] + verbs: ["get"] +- apiGroups: ["apps"] + resources: ["daemonsets"] + verbs: ["get"] +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + - subjectaccessreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - "*" diff --git a/manifests/machineosbuilder/clusterrolebinding-anyuid.yaml b/manifests/machineosbuilder/clusterrolebinding-anyuid.yaml new file mode 100644 index 0000000000..1af2ae14b4 --- /dev/null +++ b/manifests/machineosbuilder/clusterrolebinding-anyuid.yaml @@ -0,0 +1,16 @@ +# (zzlotnik): Grant the machine-os-builder service account the ability to start +# pods with UID 1000 for builds. This allows us to run Buildah in an +# unprivileged pod for better security than allowing it to run in a privileged +# pod. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: machine-os-builder-anyuid +roleRef: + name: "system:openshift:scc:anyuid" + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole +subjects: + - name: machine-os-builder + kind: ServiceAccount + namespace: "{{.TargetNamespace}}" diff --git a/manifests/machineosbuilder/clusterrolebinding-service-account.yaml b/manifests/machineosbuilder/clusterrolebinding-service-account.yaml new file mode 100644 index 0000000000..1b4adda269 --- /dev/null +++ b/manifests/machineosbuilder/clusterrolebinding-service-account.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: machine-os-builder +roleRef: + kind: ClusterRole + name: machine-os-builder +subjects: + - kind: ServiceAccount + namespace: "{{.TargetNamespace}}" + name: machine-os-builder diff --git a/manifests/machineosbuilder/deployment.yaml b/manifests/machineosbuilder/deployment.yaml new file mode 100644 index 0000000000..faae5af6c1 --- /dev/null +++ b/manifests/machineosbuilder/deployment.yaml @@ -0,0 +1,25 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: machine-os-builder + namespace: "{{.TargetNamespace}}" +spec: + replicas: 1 + selector: + matchLabels: + k8s-app: machine-os-builder + template: + metadata: + labels: + k8s-app: machine-os-builder + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + spec: + containers: + - name: machine-os-builder + image: "{{.Images.MachineConfigOperator}}" + command: ["/usr/bin/machine-os-builder"] + args: + - start + - -v4 + serviceAccountName: machine-os-builder diff --git a/manifests/machineosbuilder/events-clusterrole.yaml b/manifests/machineosbuilder/events-clusterrole.yaml new file mode 100644 index 0000000000..572457e46a --- /dev/null +++ b/manifests/machineosbuilder/events-clusterrole.yaml @@ -0,0 +1,8 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: machine-os-builder-events +rules: +- apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch"] diff --git a/manifests/machineosbuilder/events-rolebinding-default.yaml b/manifests/machineosbuilder/events-rolebinding-default.yaml new file mode 100644 index 0000000000..f01fa0bd6c --- /dev/null +++ b/manifests/machineosbuilder/events-rolebinding-default.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: machine-os-builder-events + namespace: default +roleRef: + kind: ClusterRole + name: machine-os-builder-events +subjects: +- kind: ServiceAccount + namespace: {{.TargetNamespace}} + name: machine-os-builder diff --git a/manifests/machineosbuilder/events-rolebinding-target.yaml b/manifests/machineosbuilder/events-rolebinding-target.yaml new file mode 100644 index 0000000000..ef004f40cd --- /dev/null +++ b/manifests/machineosbuilder/events-rolebinding-target.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: machine-os-builder-events + namespace: {{.TargetNamespace}} +roleRef: + kind: ClusterRole + name: machine-os-builder-events +subjects: +- kind: ServiceAccount + namespace: {{.TargetNamespace}} + name: machine-os-builder diff --git a/manifests/machineosbuilder/sa.yaml b/manifests/machineosbuilder/sa.yaml new file mode 100644 index 0000000000..f69a1b84e3 --- /dev/null +++ b/manifests/machineosbuilder/sa.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: {{.TargetNamespace}} + name: machine-os-builder diff --git a/pkg/controller/common/constants.go b/pkg/controller/common/constants.go index 571fc825f2..bcb33185e7 100644 --- a/pkg/controller/common/constants.go +++ b/pkg/controller/common/constants.go @@ -39,6 +39,7 @@ const ( // MachineConfigPoolMaster is the MachineConfigPool name given to the master MachineConfigPoolMaster = "master" + // MachineConfigPoolWorker is the MachineConfigPool name given to the worker MachineConfigPoolWorker = "worker" diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 6122bca055..35e42ae577 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -362,6 +362,7 @@ func (optr *Operator) sync(key string) error { {"MachineConfigDaemon", optr.syncMachineConfigDaemon}, {"MachineConfigController", optr.syncMachineConfigController}, {"MachineConfigServer", optr.syncMachineConfigServer}, + {"MachineOSBuilder", optr.syncMachineOSBuilder}, // this check must always run last since it makes sure the pools are in sync/upgrading correctly {"RequiredPools", optr.syncRequiredMachineConfigPools}, } diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index ba87e572a3..a9a466ab16 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -22,6 +22,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/selection" kubeErrs "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" @@ -40,6 +41,8 @@ import ( daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants" "github.com/openshift/machine-config-operator/pkg/server" "github.com/openshift/machine-config-operator/pkg/version" + + autoscalingv1 "k8s.io/api/autoscaling/v1" ) const ( @@ -73,6 +76,15 @@ const ( mccClusterRoleBindingManifestPath = "manifests/machineconfigcontroller/clusterrolebinding.yaml" mccServiceAccountManifestPath = "manifests/machineconfigcontroller/sa.yaml" + // Machine OS Builder manifest paths + mobClusterRoleManifestPath = "manifests/machineosbuilder/clusterrole.yaml" + mobEventsClusterRoleManifestPath = "manifests/machineosbuilder/events-clusterrole.yaml" + mobEventsRoleBindingDefaultManifestPath = "manifests/machineosbuilder/events-rolebinding-default.yaml" + mobEventsRoleBindingTargetManifestPath = "manifests/machineosbuilder/events-rolebinding-target.yaml" + mobClusterRoleBindingServiceAccountManifestPath = "manifests/machineosbuilder/clusterrolebinding-service-account.yaml" + mobClusterRolebindingAnyUIDManifestPath = "manifests/machineosbuilder/clusterrolebinding-anyuid.yaml" + mobServiceAccountManifestPath = "manifests/machineosbuilder/sa.yaml" + // Machine Config Daemon manifest paths mcdClusterRoleManifestPath = "manifests/machineconfigdaemon/clusterrole.yaml" mcdEventsClusterRoleManifestPath = "manifests/machineconfigdaemon/events-clusterrole.yaml" @@ -668,6 +680,151 @@ func (optr *Operator) syncMachineConfigController(config *renderConfig) error { return optr.syncControllerConfig(config) } +func (optr *Operator) syncMachineOSBuilder(config *renderConfig) error { + klog.V(4).Info("Machine OS Builder sync started") + defer func() { + klog.V(4).Info("Machine OS Builder sync complete") + }() + + paths := manifestPaths{ + clusterRoles: []string{ + mobClusterRoleManifestPath, + mobEventsClusterRoleManifestPath, + }, + roleBindings: []string{ + mobEventsRoleBindingDefaultManifestPath, + mobEventsRoleBindingTargetManifestPath, + }, + clusterRoleBindings: []string{ + mobClusterRoleBindingServiceAccountManifestPath, + mobClusterRolebindingAnyUIDManifestPath, + }, + serviceAccounts: []string{ + mobServiceAccountManifestPath, + }, + } + + // It's probably fine to leave these around if we don't have an opted-in + // pool, since they don't consume any resources. + if err := optr.applyManifests(config, paths); err != nil { + return fmt.Errorf("failed to apply machine os builder manifests: %w", err) + } + + mobBytes, err := renderAsset(config, "manifests/machineosbuilder/deployment.yaml") + if err != nil { + return fmt.Errorf("could not render Machine OS Builder deployment asset: %w", err) + } + + mob := resourceread.ReadDeploymentV1OrDie(mobBytes) + + return optr.reconcileMachineOSBuilder(mob) +} + +// Determines if the Machine OS Builder deployment is in the correct state +// based upon whether we have opted-in pools or not. +func (optr *Operator) reconcileMachineOSBuilder(mob *appsv1.Deployment) error { + // First, check if we have any MachineConfigPools opted in. + layeredMCPs, err := optr.getLayeredMachineConfigPools() + if err != nil { + return fmt.Errorf("could not get layered MachineConfigPools: %w", err) + } + + isRunning, err := optr.isMachineOSBuilderRunning(mob) + // An unknown error occurred. Bail out here. + if err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("could not determine if Machine OS Builder is running: %w", err) + } + + // If the deployment does not exist and we do not have any opted-in pools, we + // should create the deployment with zero replicas so that it exists. + if apierrors.IsNotFound(err) && len(layeredMCPs) == 0 { + klog.Infof("Creating Machine OS Builder deployment") + return optr.updateMachineOSBuilderDeployment(mob, 0) + } + + // If we have opted-in pools and the Machine OS Builder deployment is not + // running, scale it up. + if len(layeredMCPs) != 0 && !isRunning { + layeredMCPNames := []string{} + for _, mcp := range layeredMCPs { + layeredMCPNames = append(layeredMCPNames, mcp.Name) + } + klog.Infof("Starting Machine OS Builder pod because MachineConfigPool(s) opted into layering: %v", layeredMCPNames) + return optr.updateMachineOSBuilderDeployment(mob, 1) + } + + // If we do not have opted-in pools and the Machine OS Builder deployment is + // running, scale it down. + if len(layeredMCPs) == 0 && isRunning { + klog.Infof("Shutting down Machine OS Builder pod because no MachineConfigPool(s) opted into layering") + return optr.updateMachineOSBuilderDeployment(mob, 0) + } + + // No-op if everything is in the desired state. + return nil +} + +// Determines if the Machine OS Builder is running based upon how many replicas +// we have. If an error is encountered, it is assumed that no Deployments are +// running. +func (optr *Operator) isMachineOSBuilderRunning(mob *appsv1.Deployment) (bool, error) { + apiMob, err := optr.deployLister.Deployments(ctrlcommon.MCONamespace).Get(mob.Name) + + if err == nil && *apiMob.Spec.Replicas != 0 { + return true, nil + } + + return false, err +} + +// Updates the Machine OS Builder Deployment, creating it if it does not exist. +func (optr *Operator) updateMachineOSBuilderDeployment(mob *appsv1.Deployment, replicas int32) error { + _, updated, err := mcoResourceApply.ApplyDeployment(optr.kubeClient.AppsV1(), mob) + if err != nil { + return fmt.Errorf("could not apply Machine OS Builder deployment: %w", err) + } + + scale := &autoscalingv1.Scale{ + ObjectMeta: mob.ObjectMeta, + Spec: autoscalingv1.ScaleSpec{ + Replicas: replicas, + }, + } + + _, err = optr.kubeClient.AppsV1().Deployments(ctrlcommon.MCONamespace).UpdateScale(context.TODO(), mob.Name, scale, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("could not scale Machine OS Builder: %w", err) + } + + if updated { + if err := optr.waitForDeploymentRollout(mob); err != nil { + return fmt.Errorf("could not wait for Machine OS Builder deployment rollout: %w", err) + } + } + + return nil +} + +// Returns a list of MachineConfigPools which have opted in to layering. +// Returns an empty list if none have opted in. +func (optr *Operator) getLayeredMachineConfigPools() ([]*mcfgv1.MachineConfigPool, error) { + // TODO: Once https://github.com/openshift/machine-config-operator/pull/3731 + // lands, change this to consume ctrlcommon.LayeringEnabledPoolLabel instead + // of having this hard-coded here. + requirement, err := labels.NewRequirement(ctrlcommon.LayeringEnabledPoolLabel, selection.Exists, []string{}) + if err != nil { + return []*mcfgv1.MachineConfigPool{}, err + } + + selector := labels.NewSelector().Add(*requirement) + pools, err := optr.mcpLister.List(selector) + if err != nil { + return []*mcfgv1.MachineConfigPool{}, err + } + + return pools, nil +} + func (optr *Operator) syncMachineConfigDaemon(config *renderConfig) error { paths := manifestPaths{ clusterRoles: []string{ diff --git a/test/e2e/mob_test.go b/test/e2e/mob_test.go new file mode 100644 index 0000000000..8813741de3 --- /dev/null +++ b/test/e2e/mob_test.go @@ -0,0 +1,55 @@ +package e2e_test + +import ( + "context" + "testing" + "time" + + "github.com/openshift/machine-config-operator/test/framework" + "github.com/openshift/machine-config-operator/test/helpers" + "github.com/stretchr/testify/require" + + ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" +) + +func TestMachineOSBuilder(t *testing.T) { + cs := framework.NewClientSet("") + mcpName := "test-mcp" + namespace := "openshift-machine-config-operator" + mobPodNamePrefix := "machine-os-builder" + + cleanup := helpers.CreateMCP(t, cs, mcpName) + time.Sleep(5 * time.Second) // Wait a bit to ensure MCP is fully created + + // added retry because another process was modifying the MCP concurrently + retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { + + // Get the latest MCP + mcp, err := cs.MachineConfigPools().Get(context.TODO(), mcpName, metav1.GetOptions{}) + if err != nil { + return err + } + + // Set the label + mcp.ObjectMeta.Labels[ctrlcommon.LayeringEnabledPoolLabel] = "" + + // Try to update the MCP + _, err = cs.MachineConfigPools().Update(context.TODO(), mcp, metav1.UpdateOptions{}) + return err + }) + require.Nil(t, retryErr) + + // wait for Machine OS Builder pod to start + err := helpers.WaitForPodStart(cs, mobPodNamePrefix, namespace) + require.NoError(t, err, "Failed to start the Machine OS Builder pod") + + // delete the MachineConfigPool + cleanup() + time.Sleep(5 * time.Second) + + // wait for Machine OS Builder pod to stop + err = helpers.WaitForPodStop(cs, mobPodNamePrefix, namespace) + require.NoError(t, err, "Failed to stop the Machine OS Builder pod") +} diff --git a/test/helpers/utils.go b/test/helpers/utils.go index 6d72be7bd7..002d78ddc2 100644 --- a/test/helpers/utils.go +++ b/test/helpers/utils.go @@ -338,6 +338,49 @@ func WaitForPausedConfig(t *testing.T, cs *framework.ClientSet, pool string) err return nil } +// WaitForPodStart waits for a pod with the given name prefix in the given namespace to start. +func WaitForPodStart(cs *framework.ClientSet, podPrefix, namespace string) error { + ctx := context.TODO() + + return wait.PollUntilContextTimeout(ctx, 2*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + podList, err := cs.CoreV1Interface.Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return false, err + } + + for _, pod := range podList.Items { + if strings.HasPrefix(pod.Name, podPrefix) { + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + return true, nil + } + } + } + } + return false, nil + }) +} + +// WaitForPodStop waits for a pod with the given name prefix in the given namespace to stop (i.e., to be deleted). +func WaitForPodStop(cs *framework.ClientSet, podPrefix, namespace string) error { + ctx := context.TODO() + + return wait.PollUntilContextTimeout(ctx, 2*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + podList, err := cs.CoreV1Interface.Pods(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return false, err + } + for _, pod := range podList.Items { + if strings.HasPrefix(pod.Name, podPrefix) { + // Pod with prefix still exists, so we return false + return false, nil + } + } + // If we reached here, it means no pod with the given prefix exists, so we return true + return true, nil + }) +} + // GetMonitoringToken retrieves the token from the openshift-monitoring secrets in the prometheus-k8s namespace. // It is equivalent to "oc sa get-token prometheus-k8s -n openshift-monitoring" func GetMonitoringToken(_ *testing.T, cs *framework.ClientSet) (string, error) {