Skip to content

Commit

Permalink
testing
Browse files Browse the repository at this point in the history
  • Loading branch information
sprutton1 committed Aug 28, 2024
1 parent e923c58 commit c4875ba
Show file tree
Hide file tree
Showing 15 changed files with 80 additions and 41 deletions.
33 changes: 23 additions & 10 deletions .github/workflows/deploy-stack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ jobs:

down-services:
needs:
- set-latest-versions
- set-maintenance-mode
strategy:
fail-fast: false
Expand All @@ -63,7 +62,6 @@ jobs:

upgrade-web:
needs:
- set-latest-versions
- set-maintenance-mode
uses: ./.github/workflows/upgrade-web.yml
with:
Expand All @@ -72,34 +70,48 @@ jobs:

upgrade-veritech:
needs:
- set-latest-versions
- set-service-versions
- set-maintenance-mode
uses: ./.github/workflows/upgrade-veritech.yml
with:
environment: ${{ inputs.environment }}
secrets: inherit

migrate-sdf:
upgrade-services:
needs:
- set-service-versions
- down-services
uses: ./.github/workflows/migrate-sdf.yml
strategy:
fail-fast: false
matrix:
service: [ "pinga", "rebaser", "sdf" ]
uses: ./.github/workflows/down-service.yml
with:
environment: ${{ inputs.environment }}
service: ${{ matrix.service }}
secrets: inherit

upgrade-services:
up-services:
needs:
- migrate-sdf
- upgrade-services
strategy:
fail-fast: false
matrix:
service: [ "pinga", "rebaser", "sdf" ]
uses: ./.github/workflows/down-service.yml
service: [ "pinga", "rebaser" ]
uses: ./.github/workflows/up-service.yml
with:
environment: ${{ inputs.environment }}
service: ${{ matrix.service }}
secrets: inherit

migrate-and-up-sdf:
needs:
- up-services
uses: ./.github/workflows/migrate-sdf.yml
with:
environment: ${{ inputs.environment }}
secrets: inherit

e2e-validation:
# We want to ensure that in-progress cron runs against tools-prod
# are canceled when we do a deploy so they don't fail erroneously
Expand All @@ -109,7 +121,8 @@ jobs:
needs:
- upgrade-web
- upgrade-veritech
- upgrade-services
- migrate-and-up-sdf
if: always()
uses: ./.github/workflows/e2e-validation.yml
with:
environment: ${{ inputs.environment }}
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/instance-refresh.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,18 @@ on:
jobs:
replace:
environment: ${{ inputs.environment }}
concurrency:
group: instance-refresh-${{ inputs.environment }}-${{ inputs.service }}
cancel-in-progress: true
name: Instance refresh
runs-on: ubuntu-latest
steps:
- name: Configure AWS credentials for ${{ inputs.environment }}
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_KEY }}
aws-region: us-east-1
- name: Instance refresh
run: |
poll_instance_refresh() {
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/migrate-sdf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Down service
- name: Migrate and up service
run: |
component/toolbox/awsi.sh upgrade -p pull-from-env -r us-east-1 -a y -s sdf
component/toolbox/awsi.sh migrate -p pull-from-env -r us-east-1 -a y -s sdf
component/toolbox/awsi.sh service-state -p pull-from-env -r us-east-1 -a y -s sdf -S up
2 changes: 1 addition & 1 deletion .github/workflows/set-service-version.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
environment: ${{ inputs.environment }}
runs-on: ubuntu-latest
steps:
- name: Configure AWS credentials for production
- name: Configure AWS credentials for ${{ inputs.environment }}
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/upgrade-veritech.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
secrets: inherit

up-veritech:
needs: upgrade-veritech
uses: ./.github/workflows/up-service.yml
with:
environment: ${{ inputs.environment }}
Expand Down
1 change: 1 addition & 0 deletions component/init/configs/service.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
migration_mode = "skip"
pkgs_path = "/tmp"
create_workspace_permissions = "$SI_WORKSPACE_PERMISSIONS"
create_workspace_allowlist = [ "$SI_WORKSPACE_ALLOW_LIST" ]
Expand Down
2 changes: 1 addition & 1 deletion component/toolbox/awsi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ docker run --rm "${terminal}" \
-e AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID}" \
-e AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY}" \
-e AWS_SESSION_TOKEN="${AWS_SESSION_TOKEN}" \
systeminit/toolbox:stable "$*"
systeminit/toolbox:test "$*"
11 changes: 10 additions & 1 deletion component/toolbox/scripts/migrate
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,18 @@ mkdir -p "$results_directory/"
# get the first SDF and go do the thing
while read -r line; do
instance_id=$(echo "$line" | awk '{print $2}')
start_and_track_ssm_session "$instance_id" "$sdf_migrate_script" "sdf" "migrate" "$results_directory" &
start_and_track_ssm_session "$instance_id" "$sdf_migrate_script" "$results_directory" "InstanceId=$instance_id"
break
done <<< "$instances"

await_file_results "$results_directory" 2
concat_and_output_json "$results_directory" "$check_results_file"

if jq -e 'all(.[]; .status == "success")' "$results_directory/$check_results_file" > /dev/null; then
echo "All running service nodes of ${service} have had their state set to $state"
echo "----------------------------------------"
exit 0
else
echo "Error: One or more of the checks failed to push a node into maintenance mode, try again later or look at the logs"
exit 2
fi
13 changes: 10 additions & 3 deletions component/toolbox/scripts/service-state
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
fi

# Parse flags
while getopts ":p:r:a:s:" opt; do
while getopts ":p:r:a:s:S:" opt; do
case ${opt} in
p)
profile=$OPTARG
Expand Down Expand Up @@ -120,12 +120,19 @@ i=1
while read -r line; do
instance_id=$(echo "$line" | awk '{print $2}')
service_name=$(echo "$line" | awk '{print $1}' | awk -F- '{print $2}')
start_and_track_ssm_session "$instance_id" "$service_state_script" "$service_name" "$state" "$results_directory" # Serially
start_and_track_ssm_session "$instance_id" "$service_state_script" "$results_directory" "InstanceId=$instance_id,Service=$service_name,Action=$state" # Serially
((i++))
done <<< "$instances"

await_file_results "$results_directory" $((i - 1))

concat_and_output_json "$results_directory" "$check_results_file"

echo "All active binary services of ${service} have been set to ${state}"
if jq -e 'all(.[]; .status == "success")' "$results_directory/$check_results_file" > /dev/null; then
echo "All running service nodes of ${service} have had their state set to $state"
echo "----------------------------------------"
exit 0
else
echo "Error: One or more of the checks failed to push a node into maintenance mode, try again later or look at the logs"
exit 2
fi
10 changes: 8 additions & 2 deletions component/toolbox/scripts/ssm-scripts/si-check-node-upgrade
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ parameters:
type: "String"
description: "Action to execute [not yet used]"
default: "N/A"
Environment:
type: "String"
description: "Environment to run in"
default: "N/A"

mainSteps:
- action: "aws:runShellScript"
Expand All @@ -24,8 +28,10 @@ mainSteps:
# JW: This assessment blindly assumes that there are no additional configuration changes to the binaries or runtimes and that they are a direct application code replacement
# this is a little naive but we can check the deployment specs/ymls if we wish to conduct this check too.

DESIRED_VERSION=$(aws ssm get-parameter --query "Parameter.Value" --output text --name "si-version-$SI_SERVICE")
NEW_VERSION=$(curl -Ls https://artifacts.systeminit.com/{{ Service }}/${DESIRED_VERSION}/omnibus/linux/$(arch)/{{ Service }}-${DESIRED_VERSION}-omnibus-linux-$(arch).tar.gz.metadata.json | jq -r '.version')
export SI_SERVICE={{ Service }}
export SI_HOSTENV={{ Environment }}
export SI_VERSION=$(aws ssm get-parameter --query "Parameter.Value" --output text --name "$SI_HOSTENV-si-version-$SI_SERVICE")
NEW_VERSION=$(curl -Ls https://artifacts.systeminit.com/{{ Service }}/${SI_VERSION}/omnibus/linux/$(arch)/{{ Service }}-${SI_VERSION}-omnibus-linux-$(arch).tar.gz.metadata.json | jq -r '.version')
RUNNING_VERSION=$(sudo find / -wholename '/etc/nix-omnibus/{{ Service }}/**/metadata.json' | tail -n 1 | xargs cat | jq -r '.version')

# Check if both versions are set to non-empty values
Expand Down
9 changes: 0 additions & 9 deletions component/toolbox/scripts/ssm-scripts/si-migrate-sdf
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,10 @@
schemaVersion: "2.2"
description: "Run a oneshot SDF with MigrationMode=RunAndQuit"
parameters:
Service:
type: "String"
description: "Service to Run on Node"
default: "N/A"
InstanceId:
type: "String"
description: "InstanceId of the executing node"
default: "N/A"
Action:
type: "String"
description: "Action to execute [not yet used]"
default: "N/A"

mainSteps:
- action: "aws:runShellScript"
name: "example"
Expand Down
6 changes: 4 additions & 2 deletions component/toolbox/scripts/ssm-scripts/si-service-state
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ mainSteps:
inputs:
runCommand:
- |
case {{ Action }} in
case "{{ Action }}" in
"down")
service {{ Service }} stop
echo "{\"instance_id\": \"{{ InstanceId }}\", \"status\": \"success\", \"service\": \"{{ Service }}\", \"state\": \"{{ Action }}\" }"
;;
"up")
service {{ Service }} restart
echo "{\"instance_id\": \"{{ InstanceId }}\", \"status\": \"success\", \"service\": \"{{ Service }}\", \"state\": \"{{ Action }}\" }"
;;
"upgrade")
export SI_SERVICE={{ Service }}
Expand All @@ -42,7 +44,7 @@ mainSteps:
docker-compose -f /run/app/docker-compose.yaml --profile $SI_SERVICE up --wait

wget https://artifacts.systeminit.com/{{ Service }}/${SI_VERSION}/omnibus/linux/$(arch)/{{ Service }}-${SI_VERSION}-omnibus-linux-$(arch).tar.gz -O - | tar -xzf - -C /
METADATA==$(sudo find / -wholename '/etc/nix-omnibus/{{ Service }}/**/metadata.json' | tail -n 1 | xargs cat | jq)
METADATA=$(sudo find / -wholename '/etc/nix-omnibus/{{ Service }}/**/metadata.json' | tail -n 1 | xargs cat | jq)
COMMIT=$(echo $METADATA | jq -r '.commit')
RUNNING_VERSION=$(echo $METADATA | jq -r '.version')

Expand Down
7 changes: 3 additions & 4 deletions component/toolbox/scripts/supporting-funcs/ssm-funcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@ start_and_track_ssm_session() {

instance_id=$1
script=$2
service=$3
action=$4
results_directory=$5
results_directory=$3
params=$4

output=$(aws ssm send-command --instance-ids "$instance_id" --document-name "$script" --parameters "Service=$service,InstanceId=$instance_id,Action=$action" 2>&1)
output=$(aws ssm send-command --instance-ids "$instance_id" --document-name "$script" --parameters "$params" 2>&1)

status=$?

Expand Down
5 changes: 3 additions & 2 deletions component/toolbox/scripts/toggle-maintenance
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
# ---------------------------------------------------------------------------------------------------
# Identify all the service replicas/machines and permits you to toggle maintenance mode on or off via
# an SSM document execution on the host. If the json output from the SSM executions is not enough to
# an SSM document execution on the host. If the json output from the SSM executions is not enough to
# debug just look in AWS and you'll see the whole execution history in SSM Command Execution History.
# ---------------------------------------------------------------------------------------------------

Expand All @@ -10,7 +10,7 @@
set -eo pipefail

# Find & Import all the supporting functions from the supporting folder
# Get the directory of the current script to figure out where the
# Get the directory of the current script to figure out where the
# Supporting funcs are
IMPORT_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)

Expand Down Expand Up @@ -126,6 +126,7 @@ while read -r line; do
instance_id=$(echo "$line" | awk '{print $2}')
service_name=$(echo "$line" | awk '{print $1}' | awk -F- '{print $2}')
start_and_track_ssm_session "$instance_id" "$service_maintenance_script" "$service_name" "$maintenance" "$results_directory" &
start_and_track_ssm_session "$instance_id" "$service_maintenance_script" "$results_directory" "InstanceId=$instance_id,Action=$maintenance" &
((i++))
done <<< "$instances"

Expand Down
8 changes: 4 additions & 4 deletions component/toolbox/scripts/upgrade
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
fi

# Parse flags
while getopts ":p:r:a:s:" opt; do
while getopts ":p:r:a:s:e:" opt; do
case ${opt} in
p)
profile=$OPTARG
Expand Down Expand Up @@ -130,7 +130,7 @@ i=1
while read -r line; do
instance_id=$(echo "$line" | awk '{print $2}')
service_name=$(echo "$line" | awk '{print $1}' | awk -F- '{print $2}')
start_and_track_ssm_session "$instance_id" "$upgrade_check_script" "$service_name" "check" "$results_directory" &
start_and_track_ssm_session "$instance_id" "$upgrade_check_script" "$results_directory" "InstanceId=$instance_id,Service=$service_name,Environment=$environment" &
((i++))
done <<< "$instances"

Expand Down Expand Up @@ -162,7 +162,7 @@ upgrade_hosts_num=$(jq 'map(select(.service == "veritech")) | .[]' <<< $upgrade_
jq 'map(select(.service == "veritech")) | .[]' <<< $upgrade_candidates_json | jq -c '.' | while read -r line; do
instance_id=$(echo "$line" | jq -r '.instance_id')
service_name=$(echo "$line" | jq -r '.service')
start_and_track_ssm_session "$instance_id" "$service_state_script" "$service_name" "upgrade" "$results_directory" -e "$environment" # Serially
start_and_track_ssm_session "$instance_id" "$service_state_script" "$results_directory" "InstanceId=$instance_id,Service=$service_name,Action=upgrade,Environment=$environment"
done

# Wait until all the results arrive
Expand All @@ -173,7 +173,7 @@ upgrade_hosts_num=$(jq 'map(select(.service != "veritech")) | .[]' <<< $upgrade_
jq 'map(select(.service != "veritech")) | .[]' <<< $upgrade_candidates_json | jq -c '.' | while read -r line; do
instance_id=$(echo "$line" | jq -r '.instance_id')
service_name=$(echo "$line" | jq -r '.service')
start_and_track_ssm_session "$instance_id" "$service_state_script" "$service_name" "upgrade" "$results_directory" -e "$environment" & # In Parallel
start_and_track_ssm_session "$instance_id" "$service_state_script" "$results_directory" "InstanceId=$instance_id,Service=$service_name,Action=upgrade,Environment=$environment" &
((i++))
done

Expand Down

0 comments on commit c4875ba

Please sign in to comment.