Merge pull request #573 from amazonlinux/dogswatch-sitrep

dogswatch: fix Agent handler and policy check
bottlerocket-os · Dec 9, 2019 · 391dfa0 · 391dfa0
2 parents b2f1355 + d5fccf9
commit 391dfa0
Show file tree

Hide file tree

Showing 33 changed files with 1,731 additions and 467 deletions.
diff --git a/extras/dogswatch/.dockerignore b/extras/dogswatch/.dockerignore
@@ -1,5 +1,7 @@
 .direnv/
+dev/
 *.nix
 .envrc
 *.el
 *.tar*
+Makefile
diff --git a/extras/dogswatch/Dockerfile b/extras/dogswatch/Dockerfile
@@ -1,9 +1,13 @@
+
 # syntax=docker/dockerfile:experimental
 FROM golang:1.13 as builder
+ARG BUILD_LDFLAGS
+ENV BUILD_LDFLAGS=$BUILD_LDFLAGS
 ENV GOPROXY=direct
 COPY ./ /go/src/github.com/amazonlinux/thar/dogswatch/
 RUN cd /go/src/github.com/amazonlinux/thar/dogswatch && \
-    CGO_ENABLED=0 GOOS=linux go build -o dogswatch . && mv dogswatch /dogswatch
+    CGO_ENABLED=0 GOOS=linux go build -mod=readonly ${BUILD_LDFLAGS:+-ldflags "$BUILD_LDFLAGS"} \
+    -o dogswatch . && mv dogswatch /dogswatch
 
 FROM scratch
 COPY --from=builder /dogswatch /etc/ssl /

diff --git a/extras/dogswatch/Makefile b/extras/dogswatch/Makefile
@@ -7,6 +7,7 @@ GOBIN = ./bin/
 DOCKER_IMAGE := dogswatch
 DOCKER_IMAGE_REF_RELEASE := $(DOCKER_IMAGE):$(DOGSWATCH_VERSION)
 DOCKER_IMAGE_REF := $(DOCKER_IMAGE):$(shell git rev-parse --short=8 HEAD)
+DEBUG_LDFLAGS := -X $(GOPKG)/pkg/logging.DebugEnable=true
 
 build: $(GOBIN)
 	cd $(GOBIN) && \
@@ -17,21 +18,27 @@ $(GOBIN):
 	mkdir -p $(GOBIN)
 
 test:
-	go test -ldflags '-X $(GOPKG)/pkg/logging.DebugEnable=true' $(GOPKGS)
+	go test -ldflags '$(DEBUG_LDFLAGS)' $(GOPKGS)
+
+container:
+	docker build --network=host \
+		--tag $(DOCKER_IMAGE_REF)\
+		--build-arg BUILD_LDFLAGS='' \
+		.
+
+debug-container:
+	docker build --network=host \
+		--tag $(DOCKER_IMAGE_REF)\
+		--build-arg BUILD_LDFLAGS='$(DEBUG_LDFLAGS)' \
+		.
 
-container: vendor
-	docker build --network=host -t $(DOCKER_IMAGE_REF) .
 
 release-container: container
 	docker tag $(DOCKER_IMAGE_REF) $(DOCKER_IMAGE_REF_RELEASE)
 
 load: container
 	kind load docker-image $(DOCKER_IMAGE)
 
-vendor: go.sum go.mod
-	CGO_ENABLED=0 GOOS=linux go mod vendor
-	touch vendor/
-
 deploy:
 	sed 's,@containerRef@,$(DOCKER_IMAGE_REF),g' ./dev/deployment.yaml \
 		| kubectl apply -f -
@@ -51,4 +58,4 @@ dashboard:
 	kubectl proxy
 
 get-nodes-status:
-	kubectl get nodes -o json | jq -C -S '.items| map({(.metadata.name): (.metadata.labels * .metadata.annotations)})'
+	kubectl get nodes -o json | jq -C -S '.items | map(.metadata|{(.name): (.annotations*.labels|to_entries|map(select(.key|startswith("thar")))|from_entries)}) | add'
diff --git a/extras/dogswatch/README.md b/extras/dogswatch/README.md
@@ -1,22 +1,22 @@
 # Dogswatch: Update Operator
 
-Dogswatch is a [Kubernetes operator](https://Kubernetes.io/docs/concepts/extend-Kubernetes/operator/) that coordinates update activities on Thar hosts in a Kubernetes cluster. 
+Dogswatch is a [Kubernetes operator](https://Kubernetes.io/docs/concepts/extend-Kubernetes/operator/) that coordinates update activities on Thar hosts in a Kubernetes cluster.
 
 ## How to Run on Kubernetes
 
 
-To run the Dogswatch Operator in your Kubernetes cluster, the following are required resources and configuration (examples given in the [./dev/deployment.yaml](./dev/deployment.yaml) template):
+To run the Dogswatch Operator in a Kubernetes cluster, the following are required resources and configuration (examples given in the [./dev/deployment.yaml](./dev/deployment.yaml) template):
 
 - **`dogswatch` Container Image**
-  
+
   Holding the Dogswatch binaries and its supporting environment.
 
 - **Controller Deployment**
 
   Scheduling a stop-restart-tolerant Controller process on available Nodes.
 
 - **Agent DaemonSet**
-  
+
   Scheduling Agent on Thar hosts
 
 - **Thar Namespace**
@@ -28,67 +28,103 @@ To run the Dogswatch Operator in your Kubernetes cluster, the following are requ
   Configured for authenticating the Agent process on Kubernetes APIs.
 
 - **Cluster privileged credentials with read-write access to Nodes for Agent**
-  
+
   Applied to Agent Service Account to update annotations on the Node resource that the Agent is running under.
-  
+
 - **Service Account for the Controller**
 
   Configured for authenticating the Controller process on Kubernetes APIs.
-  
+
 - **Cluster privileged credentials with access to Pods and Nodes for Controller**
 
   Applied to the Controller Service Account for manipulating annotations on Node resources as well as cordon & uncordoning for updates.
   The Controller also must be able to un-schedule (`delete`) Pods running on Nodes that will be updated.
 
-In the [./dev/deployment.yaml example](./dev/deployment.yaml), the resource specifies the conditions that the Kubernetes Schedulers will place them in the Cluster.
-These conditions include the Node being labeled as having the required level of support for the Operator to function on it: the `thar.amazonaws.com/platform-version` label.
+Cluster administrators can deploy dogswatch with [suggested configuration defined here](./dogswatch.yaml) - this includes the above resources and Thar published container images.
+The dogswatch deployment can be applied to a cluster by calling `kubectl apply -f ./dogswatch.yaml` with an appropriately configured `kubectl` client for the target cluster.
+
+Once resources are in place one last step is required to let the Kubernetes schedule place the required Pods.
+The deployments control scheduling of the dogswatch pods by limiting Pods to appropriate Thar hosts using labels.
+For now, these labels are not applied automatically at boot and will need to be set on each Node resource using a tool like `kubectl`.
+
+Each Node that is running Thar must be labeled with the Node's `platform-version` (a host compatibility indicator) in order to have `dogswatch` Pods scheduled on them, the label `thar.amazonaws.com/platform-version` is used for this:
+
+``` text
+thar.amazonaws.com/platform-version=1.0.0
+```
+
+`kubectl` may be used to set this label on a Node:
+
+``` sh
+: kubectl label node $NODE_NAME thar.amazonaws.com/platform-version=1.0.0
+```
+
+If all Nodes in the cluster are running Thar, they can all be labeled at the same time with a single command:
+
+``` sh
+: kubectl label node $(kubectl get nodes -o jsonpath='{.items[*].metadata.name}') thar.amazonaws.com/platform-version=1.0.0
+```
+
+In the [development example deployment](./dev/deployment.yaml) the resources specify conditions that the Kubernetes Schedulers uses to place Pods in the Cluster.
+These conditions, among others, include a constraint on each Node being labeled as having support for the Operator to function on it: the `thar.amazonaws.com/platform-version` label.
 With this label present and the workloads scheduled, the Agent and Controller process will coordinate an update as soon as the Agent annotates its Node (by default only one update will happen at a time).
 
-To use the example [./dev/deployment.yaml](./dev/deployment.yaml) as a base, you must modify the resources to use the appropriate container image that is available to your kubelets (a common image is forthcoming, see #505).
-Then with a appropriately configured deployment yaml, you may call `kubelet apply -f ./my-deployment.yaml` to prepare the above resources and schedule the Dogswatch Pods in your Cluster.
+To use the [suggested deployment](./dogswatch.yaml) or [development deployment](./dev/deployment.yaml) as a base, any customized resources must be updated to use a customized container image to run.
+Then with the configured deployment, use `kubelet apply -f $UPDATED_DEPLOYMENT.yaml` to prepare the above resources and schedule the Dogswatch Pods in a Cluster.
 
 ## What Makes Up Dogswatch
 
 Dogswatch is made up of two distinct processes, one of which runs on each host.
 
 - `dogswatch -controller`
-    
+
   The coordinating process responsible for the handling update of Thar nodes
   cooperatively with the cluster's workloads.
-  
+
 - `dogswatch -agent`
-  
+
   The on-host process responsible for publishing update metadata and executing
   update activities.
-  
+
 ## How It Coordinates
 
-The Dogswatch processes communicate by applying updates to the Kubernetes Node resources' Annotations. 
+The Dogswatch processes communicate by applying updates to the Kubernetes Node resources' Annotations.
 The Annotations are used to communicate the Agent activity (called an `intent`) as determined by the Controller process, the current Agent activity in response to the intent, and the Host's update status
 as known by the Agent process.
 
 The Agent and Controller processes listen to an event stream from the Kubernetes cluster in order to quickly and reliably handle communicated `intent` in addition to updated metadata pertinent to updates and the Operator itself.
 
+### Observing Progress and State
+
+Dogwatch's operation can be simply observed by inspecting the labels and annotations on the Node resource.
+The state and pending activity are posted as progress is made.
+
+``` sh
+# With a configured kubectl and jq available on $PATH
+kubectl get nodes -o json \
+  | jq -C -S '.items | map(.metadata|{(.name): (.annotations*.labels|to_entries|map(select(.key|startswith("thar")))|from_entries)}) | add'
+```
+
 ### Current Limitations
 
 - Pod replication & healthy count is not taken into consideration (#502)
-- Nodes update without pause between each (#503)
+- Nodes update without pause between each Node (#503)
 - Single Node cluster degrades into unscheduleable on update (#501)
 - Node labels are not automatically applied to allow scheduling (#504)
 
 ## How to Contribute and Develop Changes for Dogswatch
 
-Working on Dogswatch requires a fully functioning Kubernetes cluster. 
-For the sake of development workflow, you may easily run this within a container or VM as with [`kind`](https://github.com/Kubernetes-sigs/kind) or [`minikube`](https://github.com/Kubernetes/minikube). 
+Working on Dogswatch requires a fully configured, working Kubernetes cluster.
+For the sake of development workflow, we suggest using a cluster that is containerized or virtualized - tools to manage these are available: [`kind`](https://github.com/Kubernetes-sigs/kind) (containerized) and [`minikube`](https://github.com/Kubernetes/minikube) (virtualized).
 The `dev/` directory contains several resources that may be used for development and debugging purposes:
 
 - `dashboard.yaml` - A **development environment** set of Kubernetes resources (these use insecure settings and *are not suitable for use in Production*!)
 - `deployment.yaml` - A _template_ for Kubernetes resources for Dogswatch that schedule a controller and setup a DaemonSet
 - `kind-cluster.yml` - A `kind` Cluster definition that may be used to stand up a local development cluster
 
-Much of the development workflow can be accommodated by the `Makefile` providedalongside the code. 
-Each of these targets utilize your existing environment and tools - for example: your `kubectl` as configured will be used. 
-If you have locally configured access to production, please ensure you've taken steps to reconfigure or otherwise cause `kubectl` to affect only your development cluster.
+Much of the development workflow can be accommodated by the `Makefile` provided alongside the code.
+Each of these targets utilize tools and environments they're configured to access - for example: `kubectl`, as configured on a host, will be used.
+If `kubectl` is configured to configured with access to production, please ensure take steps to reconfigure `kubectl` to affect only a development cluster.
 
 **General use targets**
 

diff --git a/extras/dogswatch/dev/deployment.yaml b/extras/dogswatch/dev/deployment.yaml
@@ -153,6 +153,8 @@ spec:
         - name: dogswatch
           image: "@containerRef@"
           imagePullPolicy: Always
+          # XXX: tty required to exec binaries that use `simplelog` until #576 is resolved.
+          tty: true
           args:
             - -agent
             - -debug