diff --git a/.github/workflows/aks-auto-shutdown.yaml b/.github/workflows/aks-auto-shutdown.yaml index 8e7474c8..debee598 100644 --- a/.github/workflows/aks-auto-shutdown.yaml +++ b/.github/workflows/aks-auto-shutdown.yaml @@ -3,6 +3,8 @@ on: workflow_dispatch: schedule: - cron: "0 19,22 * * *" # Every day at 20:00 and 23:00 BST +env: + DEV_ENV: ${{ secrets.DEV_ENV }} permissions: id-token: write jobs: @@ -19,16 +21,48 @@ jobs: tenant-id: 531ff96d-0ae9-462a-8d2d-bec7c0b42082 # HMCTS.NET allow-no-subscriptions: true - - name: AKS Auto Shutdown - run: ./scripts/aks/auto-start-stop.sh stop + - name: Staging - AKS Auto Stop + run: ./scripts/aks/auto-start-stop.sh stop staging env: - DEV_ENV: ${{ secrets.DEV_ENV }} + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Test - AKS Auto Stop + run: ./scripts/aks/auto-start-stop.sh stop testing + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Demo - AKS Auto Stop + run: ./scripts/aks/auto-start-stop.sh stop demo + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Development - AKS Auto Stop + run: ./scripts/aks/auto-start-stop.sh stop development + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Sandbox - AKS Auto Stop + run: ./scripts/aks/auto-start-stop.sh stop sandbox + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: ITHC - AKS Auto Stop + run: ./scripts/aks/auto-start-stop.sh stop ithc + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Untagged - AKS Auto Stop + run: ./scripts/aks/auto-start-stop.sh stop untagged + env: + DEV_ENV: ${{ env.DEV_ENV }} - name: Output log file run: ./scripts/common/log-output.sh - - name: Wait for clusters to stop + - name: Wait for cluster to stop + if: env.DEV_ENV != 'true' run: sleep 600 - name: AKS Auto Shutdown status check + if: env.DEV_ENV != 'true' run: ./scripts/aks/auto-shutdown-status.sh stop ${{ secrets.REGISTRYSLACKWEBHOOK }} diff --git a/.github/workflows/aks-auto-start.yaml b/.github/workflows/aks-auto-start.yaml index 45af7c9d..053acc85 100644 --- a/.github/workflows/aks-auto-start.yaml +++ b/.github/workflows/aks-auto-start.yaml @@ -1,15 +1,10 @@ name: aks-auto-start on: workflow_dispatch: - inputs: - DRYRUN: - options: - - "false" - - "true" schedule: - cron: "30 5 * * 1-5" # Every weekday at 6:30am BST env: - DRYRUN: ${{ inputs.DRYRUN }} + DEV_ENV: ${{ secrets.DEV_ENV }} permissions: id-token: write jobs: @@ -26,16 +21,48 @@ jobs: tenant-id: 531ff96d-0ae9-462a-8d2d-bec7c0b42082 # HMCTS.NET allow-no-subscriptions: true - - name: AKS Auto Start - run: ./scripts/aks/auto-start-stop.sh start + - name: Staging - AKS Auto Start + run: ./scripts/aks/auto-start-stop.sh start staging env: - DEV_ENV: ${{ secrets.DEV_ENV }} + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Test - AKS Auto Start + run: ./scripts/aks/auto-start-stop.sh start testing + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Demo - AKS Auto Start + run: ./scripts/aks/auto-start-stop.sh start demo + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Development - AKS Auto Start + run: ./scripts/aks/auto-start-stop.sh start development + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Sandbox - AKS Auto Start + run: ./scripts/aks/auto-start-stop.sh start sandbox + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: ITHC - AKS Auto Start + run: ./scripts/aks/auto-start-stop.sh start ithc + env: + DEV_ENV: ${{ env.DEV_ENV }} + + - name: Untagged - AKS Auto Start + run: ./scripts/aks/auto-start-stop.sh start untagged + env: + DEV_ENV: ${{ env.DEV_ENV }} - name: Output log file run: ./scripts/common/log-output.sh - name: Wait for cluster to start + if: env.DEV_ENV != 'true' run: sleep 600 - name: AKS Auto Shutdown status check + #if: env.DEV_ENV != 'true' run: ./scripts/aks/auto-shutdown-status.sh start ${{ secrets.REGISTRYSLACKWEBHOOK }} diff --git a/scripts/aks/auto-shutdown-status.sh b/scripts/aks/auto-shutdown-status.sh index 1b43e133..9f2e7cf3 100755 --- a/scripts/aks/auto-shutdown-status.sh +++ b/scripts/aks/auto-shutdown-status.sh @@ -10,14 +10,13 @@ source scripts/common/common-functions.sh MODE=${1:-start} registrySlackWebhook=$2 -SUBSCRIPTIONS=$(az account list -o json) -jq -c '.[]' <<< $SUBSCRIPTIONS | while read subscription; do - get_subscription_clusters +CLUSTERS=$(get_clusters) +clusters_count=$(jq -c -r '.count' <<<$CLUSTERS) +log "$clusters_count AKS Clusters found" +log "----------------------------------------------" -jq -c '.[]' <<< $CLUSTERS | while read cluster; do +jq -c '.data[]' <<<$CLUSTERS | while read cluster; do get_cluster_details - cluster_data=$(az aks show -n $CLUSTER_NAME -g $RESOURCE_GROUP -o json) - cluster_status=$(jq -r '.powerState.code' <<< "$cluster_data") if [[ $cluster_status == "Stopped" ]]; then echo -e "${GREEN}$CLUSTER_NAME is $cluster_status" @@ -25,7 +24,6 @@ jq -c '.[]' <<< $CLUSTERS | while read cluster; do echo -e "${AMBER}$CLUSTER_NAME is $cluster_status" fi if [[ $MODE == "start" ]]; then - check_cluster_status + check_cluster_status fi done -done diff --git a/scripts/aks/auto-start-stop.sh b/scripts/aks/auto-start-stop.sh index 97a5a5eb..8682b979 100755 --- a/scripts/aks/auto-start-stop.sh +++ b/scripts/aks/auto-start-stop.sh @@ -10,48 +10,49 @@ MODE=${1:-start} SKIP="false" if [[ "$MODE" != "start" && "$MODE" != "stop" ]]; then - echo "Invalid MODE. Please use 'start' or 'stop'." - exit 1 + echo "Invalid MODE. Please use 'start' or 'stop'." + exit 1 fi -SUBSCRIPTIONS=$(az account list -o json) -jq -c '.[]' <<< $SUBSCRIPTIONS | while read subscription; do - get_subscription_clusters - jq -c '.[]' <<< $CLUSTERS | while read cluster; do - get_cluster_details - cluster_env=$(echo $CLUSTER_NAME | cut -d'-' -f2) - - if [[ $cluster_env == "sbox" ]]; then - cluster_env=${cluster_env/#sbox/Sandbox} - elif [[ $cluster_env == "ptlsbox" ]]; then - cluster_env=${cluster_env/ptlsbox/Sandbox} - elif [[ $cluster_env == "stg" ]]; then - cluster_env=${cluster_env/stg/Staging} - fi +CLUSTERS=$(get_clusters "$2") +clusters_count=$(jq -c -r '.count' <<<$CLUSTERS) +log "$clusters_count AKS Clusters found" +log "----------------------------------------------" + +jq -c '.data[]' <<<$CLUSTERS | while read cluster; do + get_cluster_details + cluster_env=$(echo $CLUSTER_NAME | cut -d'-' -f2) + + if [[ $cluster_env == "sbox" ]]; then + cluster_env=${cluster_env/#sbox/Sandbox} + elif [[ $cluster_env == "ptlsbox" ]]; then + cluster_env=${cluster_env/ptlsbox/Sandbox} + elif [[ $cluster_env == "stg" ]]; then + cluster_env=${cluster_env/stg/Staging} + fi + + cluster_business_area=$(echo $CLUSTER_NAME | cut -d'-' -f1) + cluster_business_area=${cluster_business_area/ss/cross-cutting} - cluster_business_area=$(echo $CLUSTER_NAME | cut -d'-' -f1) - cluster_business_area=${cluster_business_area/ss/cross-cutting} + log "=====================================================" + log "Processing Cluster: $CLUSTER_NAME" + log "=====================================================" - log "=====================================================" - log "Processing Cluster: $CLUSTER_NAME" - log "=====================================================" + log "checking skip logic for cluster_env: $cluster_env, cluster_business_area: $cluster_business_area, mode: $MODE" + SKIP=$(should_skip_start_stop $cluster_env $cluster_business_area $MODE) - log "checking skip logic for cluster_env: $cluster_env, cluster_business_area: $cluster_business_area, mode: $MODE" - SKIP=$(should_skip_start_stop $cluster_env $cluster_business_area $MODE) + log "SKIP evalulated to $SKIP" - log "SKIP evalulated to $SKIP" + if [[ $SKIP == "false" ]]; then + if [[ $DEV_ENV != "true" ]]; then + aks_state_messages + az aks $MODE --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --no-wait || echo Ignoring any errors while $MODE operation on cluster - if [[ $SKIP == "false" ]]; then - if [[ $DEV_ENV != "true" ]]; then - aks_state_messages - az aks $MODE --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --no-wait || echo Ignoring any errors while $MODE operation on cluster - - else - ts_echo_color BLUE "Development Env: simulating state commands only." - aks_state_messages - fi else - ts_echo_color AMBER "cluster $CLUSTER_NAME (rg:$RESOURCE_GROUP) has been skipped from today's $MODE operation schedule" + ts_echo_color BLUE "Development Env: simulating state commands only." + aks_state_messages fi - done -done \ No newline at end of file + else + ts_echo_color AMBER "cluster $CLUSTER_NAME (rg:$RESOURCE_GROUP) has been skipped from today's $MODE operation schedule" + fi +done diff --git a/scripts/aks/common-functions.sh b/scripts/aks/common-functions.sh index f8f11993..eab3874f 100644 --- a/scripts/aks/common-functions.sh +++ b/scripts/aks/common-functions.sh @@ -1,26 +1,44 @@ #!/bin/bash +function get_clusters() { + #MS az graph query to find and return a list of all AKS tagged to be included in the auto-shutdown process. + log "----------------------------------------------" + log "Running az graph query..." -function get_subscription_clusters() { - SUBSCRIPTION_ID=$(jq -r '.id' <<< $subscription) - az account set -s $SUBSCRIPTION_ID - CLUSTERS=$(az resource list --resource-type Microsoft.ContainerService/managedClusters --query "[?tags.autoShutdown == 'true']" -o json) + if [ -z $1 ]; then + env_selector="" + elif [ $1 == "untagged" ]; then + env_selector="| where isnull(tags.environment)" + else + env_selector="| where tags.environment == '$1'" + fi + + az graph query -q " + resources + | where type =~ 'Microsoft.ContainerService/managedClusters' + | where tags.autoShutdown == 'true' + $env_selector + | project name, resourceGroup, subscriptionId, ['tags'], properties, ['id'] + " --first 1000 -o json + + log "az graph query complete" } function get_cluster_details() { - RESOURCE_GROUP=$(jq -r '.resourceGroup' <<< $cluster) - CLUSTER_NAME=$(jq -r '.name' <<< $cluster) - STARTUP_MODE=$(jq -r '.tags.startupMode' <<< $cluster) + RESOURCE_GROUP=$(jq -r '.resourceGroup' <<<$cluster) + CLUSTER_NAME=$(jq -r '.name' <<<$cluster) + STARTUP_MODE=$(jq -r '.tags.startupMode' <<<$cluster) + CLUSTER_STATUS=$(jq -r '.powerState.code' <<<$cluster) } function check_cluster_status() { - BUSINESS_AREA=$(jq -r '.tags.businessArea' <<< $cluster) + BUSINESS_AREA=$(jq -r '.tags.businessArea' <<<$cluster) if [[ "$BUSINESS_AREA" == "Cross-Cutting" ]]; then APP="toffee" elif [[ "$BUSINESS_AREA" == "CFT" ]]; then APP="plum" fi - ENVIRONMENT=$(jq -r '.tags.environment' <<< $cluster) + ENVIRONMENT=$(jq -r '.tags.environment' <<<$cluster) local env_variants=( "sandbox/Sandbox:sbox" @@ -31,15 +49,15 @@ function check_cluster_status() { ) local -A notify_channel_map=( - [sandbox]="sbox" - [testing]="perftest" - [staging]="aat" + [sandbox]="sbox" + [testing]="perftest" + [staging]="aat" ) if [ -n "${notify_channel_map[$ENVIRONMENT]}" ]; then - SLACK_CHANNEL_SUFFIX="${notify_channel_map[$ENVIRONMENT]}" + SLACK_CHANNEL_SUFFIX="${notify_channel_map[$ENVIRONMENT]}" else - SLACK_CHANNEL_SUFFIX="$ENVIRONMENT" + SLACK_CHANNEL_SUFFIX="$ENVIRONMENT" fi for variant in "${env_variants[@]}"; do @@ -54,7 +72,7 @@ function check_cluster_status() { ts_echo "Test that $APP works in $ENVIRONMENT after $CLUSTER_NAME start-up" - statuscode=$(curl --max-time 30 --retry 20 --retry-delay 15 -s -o /dev/null -w "%{http_code}" https://$APPLICATION.platform.hmcts.net) + statuscode=$(curl --max-time 30 --retry 20 --retry-delay 15 -s -o /dev/null -w "%{http_code}" https://$APPLICATION.platform.hmcts.net) if [[ ("$ENVIRONMENT" == "demo" || $statuscode -eq 200) ]]; then notification "#aks-monitor-$SLACK_CHANNEL_SUFFIX" "$APP works in $ENVIRONMENT after $CLUSTER_NAME start-up" @@ -67,6 +85,6 @@ function check_cluster_status() { } function aks_state_messages() { - ts_echo_color GREEN "Running $MODE operation on cluster $CLUSTER_NAME (rg:$RESOURCE_GROUP)" - ts_echo_color GREEN "az aks $MODE --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --no-wait || echo Ignoring any errors while $MODE operation on cluster" -} + ts_echo_color GREEN "Running $MODE operation on cluster $CLUSTER_NAME (rg:$RESOURCE_GROUP)" + ts_echo_color GREEN "az aks $MODE --resource-group $RESOURCE_GROUP --name $CLUSTER_NAME --no-wait || echo Ignoring any errors while $MODE operation on cluster" +} diff --git a/scripts/aks/set-subscription.sh b/scripts/aks/set-subscription.sh deleted file mode 100644 index 72968c3c..00000000 --- a/scripts/aks/set-subscription.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -# Define an associative array for subscription names, cluster name prefixes -declare -A SUBSCRIPTION_CLUSTERMAP - -SUBSCRIPTION_CLUSTERMAP["CFT,Sandbox"]="DCD-CFTAPPS-SBOX,CFT-SBOX" -SUBSCRIPTION_CLUSTERMAP["CFT,sbox"]="DCD-CFTAPPS-SBOX,CFT-SBOX" -SUBSCRIPTION_CLUSTERMAP["CFT,AAT / Staging"]="DCD-CFTAPPS-STG,CFT-AAT" -SUBSCRIPTION_CLUSTERMAP["CFT,Test / Perftest"]="DCD-CFTAPPS-TEST,CFT-PERFTEST" -SUBSCRIPTION_CLUSTERMAP["CFT,Preview / Dev"]="DCD-CFTAPPS-DEV,CFT-PREVIEW" -SUBSCRIPTION_CLUSTERMAP["CFT,Demo"]="DCD-CFTAPPS-DEMO,CFT-DEMO" -SUBSCRIPTION_CLUSTERMAP["CFT,ITHC"]="DCD-CFTAPPS-ITHC,CFT-ITHC" -SUBSCRIPTION_CLUSTERMAP["CFT,PTL"]="DTS-CFTPTL-INTSVC,CFT-PTL" -SUBSCRIPTION_CLUSTERMAP["CFT,PTLSBOX"]="DTS-CFTSBOX-INTSVC,CFT-PTLSBOX" - -SUBSCRIPTION_CLUSTERMAP["SDS,Sandbox"]="DTS-SHAREDSERVICES-SBOX,SS-SBOX" -SUBSCRIPTION_CLUSTERMAP["SDS,sbox"]="DTS-SHAREDSERVICES-SBOX,SS-SBOX" -SUBSCRIPTION_CLUSTERMAP["SDS,AAT / Staging"]="DTS-SHAREDSERVICES-STG,SS-STG" -SUBSCRIPTION_CLUSTERMAP["SDS,Test / Perftest"]="DTS-SHAREDSERVICES-TEST,SS-TEST" -SUBSCRIPTION_CLUSTERMAP["SDS,Preview / Dev"]="DTS-SHAREDSERVICES-DEV,SS-DEV" -SUBSCRIPTION_CLUSTERMAP["SDS,Demo"]="DTS-SHAREDSERVICES-DEMO,SS-DEMO" -SUBSCRIPTION_CLUSTERMAP["SDS,ITHC"]="DTS-SHAREDSERVICES-ITHC,SS-ITHC" -SUBSCRIPTION_CLUSTERMAP["SDS,PTL"]="DTS-SHAREDSERVICESPTL,SS-PTL" -SUBSCRIPTION_CLUSTERMAP["SDS,PTLSBOX"]="DTS-SHAREDSERVICESPTL-SBOX,SS-PTLSBOX" - -function subscription() { - key="${PROJECT},${SELECTED_ENV}" - SUBSCRIPTION=$(echo ${SUBSCRIPTION_CLUSTERMAP[$key]} | cut -d ',' -f 1) - CLUSTER_PREFIX=$(echo ${SUBSCRIPTION_CLUSTERMAP[$key]} | cut -d ',' -f 2) - - az account set -n $SUBSCRIPTION - ts_echo $SUBSCRIPTION selected -} - -function ts_echo() { - date +"%H:%M:%S $(printf "%s " "$@")" -}