From 5efb1aa3cc95a93190796cf49f40813594c341aa Mon Sep 17 00:00:00 2001
From: Theo Nam Truong <theotr@amazon.com>
Date: Tue, 11 Oct 2022 15:27:03 -0600
Subject: [PATCH] Implemented Retry for OpenSearch Container (#304)

Signed-off-by: Theo Truong <theotr@amazon.com>

Signed-off-by: Theo Truong <theotr@amazon.com>
---
 .ci/opensearch/Dockerfile.opensearch |  6 ++++++
 .ci/opensearch/docker-compose.yml    | 12 ++++++++++++
 .github/workflows/compatibility.yml  |  2 ++
 Makefile                             |  2 +-
 4 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/.ci/opensearch/Dockerfile.opensearch b/.ci/opensearch/Dockerfile.opensearch
index 67a1a3b13..e248a6206 100644
--- a/.ci/opensearch/Dockerfile.opensearch
+++ b/.ci/opensearch/Dockerfile.opensearch
@@ -5,4 +5,10 @@ ARG opensearch_path=/usr/share/opensearch
 ARG opensearch_yml=$opensearch_path/config/opensearch.yml
 
 ARG SECURE_INTEGRATION
+
+HEALTHCHECK --start-period=20s --interval=5s --retries=2 --timeout=1s \
+  CMD if [ "$SECURE_INTEGRATION" != "true" ]; \
+  then curl --fail localhost:9200/_cat/health; \
+  else curl --fail -k https:/localhost:9200/_cat/health -u admin:admin; fi
+
 RUN if [ "$SECURE_INTEGRATION" != "true" ] ; then $opensearch_path/bin/opensearch-plugin remove opensearch-security; fi
diff --git a/.ci/opensearch/docker-compose.yml b/.ci/opensearch/docker-compose.yml
index 9f8b2a525..977dbb3b1 100644
--- a/.ci/opensearch/docker-compose.yml
+++ b/.ci/opensearch/docker-compose.yml
@@ -2,6 +2,7 @@ version: '3'
 
 services:
   opensearch:
+    restart: always
     build:
       context: .
       dockerfile: Dockerfile.opensearch
@@ -11,6 +12,17 @@ services:
     environment:
       - discovery.type=single-node
       - bootstrap.memory_lock=true
+      - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false}
     ports:
       - '9200:9200'
     user: opensearch
+  autoheal:
+    restart: always
+    image: willfarrell/autoheal
+    environment:
+      - AUTOHEAL_CONTAINER_LABEL=all
+      - AUTOHEAL_START_PERIOD=30
+      - AUTOHEAL_INTERVAL=5
+      - AUTOHEAL_DEFAULT_STOP_TIMEOUT=30
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
diff --git a/.github/workflows/compatibility.yml b/.github/workflows/compatibility.yml
index 207c00bb2..4bd99fef8 100644
--- a/.github/workflows/compatibility.yml
+++ b/.github/workflows/compatibility.yml
@@ -34,6 +34,7 @@ jobs:
           sudo sysctl -w vm.max_map_count=262144
 
       - name: Runs OpenSearch cluster
+        id: start_opensearch_cluster
         run: |
           export OPENSEARCH_VERSION=${{ matrix.entry.opensearch_version }}
           export SECURE_INTEGRATION=${{ matrix.secured }}
@@ -59,5 +60,6 @@ jobs:
           npm run test:integration:helpers-secure
 
       - name: Stop the OpenSearch cluster
+        if: ${{ steps.start_opensearch_cluster.outcome == 'success'}}
         run: |
           make cluster.opensearch.stop
diff --git a/Makefile b/Makefile
index 1ddd73f75..8a0f5085a 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ cluster.opensearch.build:
 
 cluster.opensearch.start:
 	docker-compose --project-directory .ci/opensearch up -d ;
-	sleep 20;
+	sleep 60;
 
 cluster.opensearch.stop:
 	docker-compose --project-directory .ci/opensearch down ;