Skip to content

Commit

Permalink
Stop running healthcheck when interrupt signal is received
Browse files Browse the repository at this point in the history
This doesn't fully address #14, so the added test is still currently failing
  • Loading branch information
perk11 committed Sep 23, 2024
1 parent a45e1bc commit 91282d5
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 16 deletions.
5 changes: 5 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ func (rm ResourceManager) createRunningService(serviceConfig ServiceConfig) Runn
var (
config Config
resourceManager ResourceManager
interrupted bool = false
)

func main() {
Expand Down Expand Up @@ -124,6 +125,7 @@ func main() {
for {
receivedSignal := <-exit
log.Printf("Received %s signal, terminating all processes", signalToString(receivedSignal))
interrupted = true
// no need to unlock as os.Exit will be called
resourceManager.serviceMutex.Lock()
for name := range resourceManager.runningServices {
Expand Down Expand Up @@ -300,6 +302,9 @@ func performHealthCheck(serviceConfig ServiceConfig) {

log.Printf("[%s] Running healthcheck command \"%s\"", serviceConfig.Name, serviceConfig.HealthcheckCommand)
for {
if interrupted {
return
}
cmd := exec.Command("sh", "-c", serviceConfig.HealthcheckCommand)
err := cmd.Run()

Expand Down
55 changes: 39 additions & 16 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ import (
"time"
)

func connectOnly(test *testing.T, proxyAddress string) {
_, err := net.Dial("tcp", proxyAddress)
if err != nil {
test.Error(err)
return
}
}

func minimal(test *testing.T, proxyAddress string) {
conn, err := net.Dial("tcp", proxyAddress)
if err != nil {
Expand Down Expand Up @@ -76,7 +84,7 @@ func isProcessRunning(pid int) bool {
}
return false
}
func startLargeModelProxy(testCaseName string, configPath string) (*exec.Cmd, error) {
func startLargeModelProxy(testCaseName string, configPath string, waitChannel chan error) (*exec.Cmd, error) {
cmd := exec.Command("./large-model-proxy", "-c", configPath)
logFilePath := fmt.Sprintf("logs/test_%s.log", testCaseName)
logFile, err := os.OpenFile(logFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
Expand All @@ -87,18 +95,17 @@ func startLargeModelProxy(testCaseName string, configPath string) (*exec.Cmd, er
log.Printf("Failed to open log file for test %s", logFilePath)
}
if err := cmd.Start(); err != nil {
waitChannel <- err
return nil, err
}
// Create a channel to receive the process exit status
done := make(chan error, 1)
go func() {
done <- cmd.Wait()
waitChannel <- cmd.Wait()
}()

time.Sleep(1 * time.Second)

select {
case err := <-done:
case err := <-waitChannel:
if err != nil {
return nil, fmt.Errorf("large-model-proxy exited prematurely with error %v", err)
} else {
Expand All @@ -118,17 +125,23 @@ func startLargeModelProxy(testCaseName string, configPath string) (*exec.Cmd, er
return cmd, nil
}

func stopApplication(cmd *exec.Cmd) error {
func stopApplication(cmd *exec.Cmd, waitChannel chan error) error {
if err := cmd.Process.Signal(syscall.SIGINT); err != nil {
return err
}
err := cmd.Wait()
if err != nil && err.Error() != "waitid: no child processes" && err.Error() != "wait: no child processes" {
return err

select {
case err := <-waitChannel:
if err != nil && err.Error() != "waitid: no child processes" && err.Error() != "wait: no child processes" {
return err
}
return nil
case <-time.After(15 * time.Second):
// Optionally kill the process if it hasn't exited
_ = cmd.Process.Kill()
return errors.New("large-model-proxy process did not stop within 15 seconds after receiving SIGINT")
}
return nil
}

func checkPortClosed(port string) error {
_, err := net.DialTimeout("tcp", net.JoinHostPort("localhost", port), time.Second)
if err == nil {
Expand Down Expand Up @@ -170,26 +183,36 @@ func TestAppScenarios(test *testing.T) {
"2004",
minimal,
},
{
"healthcheck-stuck",
"test-server/healthcheck-stuck.json",
"2005",
connectOnly,
},
}

for _, testCase := range tests {
testCase := testCase
test.Run(testCase.Name, func(test *testing.T) {
test.Parallel()

cmd, err := startLargeModelProxy(testCase.Name, testCase.ConfigPath)
var waitChannel = make(chan error, 1)
cmd, err := startLargeModelProxy(testCase.Name, testCase.ConfigPath, waitChannel)
if err != nil {
test.Fatalf("could not start application: %v", err)
}

defer func(cmd *exec.Cmd, port string) {
if err := stopApplication(cmd); err != nil {
defer func(cmd *exec.Cmd, port string, waitChannel chan error) {
if cmd == nil {
test.Errorf("not stopping application since there was a start error: %v", err)
return
}
if err := stopApplication(cmd, waitChannel); err != nil {
test.Errorf("failed to stop application: %v", err)
}
if err := checkPortClosed(port); err != nil {
test.Errorf("port %s is still open after application exit: %v", port, err)
}
}(cmd, testCase.Port)
}(cmd, testCase.Port, waitChannel)

proxyAddress := fmt.Sprintf("localhost:%s", testCase.Port)
testCase.TestFunc(test, proxyAddress)
Expand Down
14 changes: 14 additions & 0 deletions test-server/healthcheck-stuck.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"Services": [
{
"Name": "test-server_healthcheck",
"ListenPort": "2005",
"ProxyTargetHost": "localhost",
"ProxyTargetPort": "12005",
"Command": "test-server/test-server",
"Args": "-p 12005 -healthcheck-port 2015",
"HealthcheckCommand": "false",
"HealthcheckIntervalMilliseconds": 200
}
]
}

0 comments on commit 91282d5

Please sign in to comment.