diff --git a/.drone.yml b/.drone.yml deleted file mode 100644 index 024ef67..0000000 --- a/.drone.yml +++ /dev/null @@ -1,18 +0,0 @@ -pipeline: - build: - when: - event: push - image: plugins/docker - repo: registry.usw.co/cloud/node-problem-detector - tags: - - "${DRONE_COMMIT}" - - "${DRONE_BRANCH}" - - build-tag: - when: - event: tag - branch: master - image: plugins/docker - repo: registry.usw.co/cloud/node-problem-detector - tags: - - "${DRONE_TAG}" diff --git a/.github/rvu/labels.yaml b/.github/rvu/labels.yaml new file mode 100644 index 0000000..7b192d6 --- /dev/null +++ b/.github/rvu/labels.yaml @@ -0,0 +1 @@ +service.rvu.co.uk/brand: airship diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml new file mode 100644 index 0000000..5dd5b10 --- /dev/null +++ b/.github/workflows/push.yaml @@ -0,0 +1,27 @@ +name: push +on: push +permissions: + contents: read + id-token: write +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Login to Quay.io + uses: docker/login-action@v3 + with: + registry: quay.io + username: ${{ secrets.QUAY_USERNAME }} + password: ${{ secrets.QUAY_PASSWORD }} + - id: meta + uses: docker/metadata-action@v5 + with: + images: quay.io/uswitch/node-problem-detector + tags: type=sha,prefix=,format=long + - uses: docker/build-push-action@v6 + with: + context: . + labels: ${{ steps.meta.outputs.labels }} + push: true + tags: ${{ steps.meta.outputs.tags }} \ No newline at end of file diff --git a/README.md b/README.md index 8d06473..fc55c5a 100644 --- a/README.md +++ b/README.md @@ -1 +1,16 @@ -Adding our own scripts to https://github.com/kubernetes/node-problem-detector +# Node Problem Detector custom scripts + +Adding our own scripts to https://github.com/kubernetes/node-problem-detector and sharing them in case you might find those handy for you use cases. + + +The scripts details can be found in `/config/plugin/` but ultimately, they are: +* `launch-config-drift`: a way to check if your instances launch template has diverged from your asg launch template +* `spot-termination`: uses the `meta-data/spot/instance-action endpoint` to check EC2 Spot Instance interruption notice +* `local-dns-resolver`: checks the response status value received (if any) from the local dns resolver ip +* `upstream-dns-resolver`: check if we receive an IPv4 address for a given A record. +* `uptime`: every 5 seconds, checks if the information detailing how long the system has been on since its last restart is acceptable (to us the threshold being 604800 seconds) + + +## Notes +*July 2024 -* The custom `node problem detector` image is now stored in the `uswitch/node_problem_detectr` repository on Quay. +
\ No newline at end of file diff --git a/config/plugin/launch_config_drift.sh b/config/plugin/launch_config_drift.sh index 05b1c16..e37cb2d 100755 --- a/config/plugin/launch_config_drift.sh +++ b/config/plugin/launch_config_drift.sh @@ -16,7 +16,7 @@ then fi instance="$(echo "${instances}" | jq '.AutoScalingInstances[0]')" -instance_launch_config="$(echo "${instance}" | jq -r .LaunchConfigurationName)" +instance_launch_config="$(echo "${instance}" | jq -r .LaunchTemplate.LaunchTemplateName)" instance_asg="$(echo "${instance}" | jq -r .AutoScalingGroupName)" asgs="$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ${instance_asg})" @@ -26,11 +26,11 @@ then exit $UNKNOWN fi -asg_launch_config="$(echo "${asgs}" | jq -r '.AutoScalingGroups[0].LaunchConfigurationName')" +asg_launch_config="$(echo "${asgs}" | jq -r '.AutoScalingGroups[0].MixedInstancesPolicy.LaunchTemplate.LaunchTemplateSpecification.LaunchTemplateName')" if [ "${instance_launch_config}" = "${asg_launch_config}" ] then exit $OK else exit $NONOK -fi +fi \ No newline at end of file