Skip to content

Commit

Permalink
Implement custom healthcheck support
Browse files Browse the repository at this point in the history
  • Loading branch information
perk11 committed Sep 22, 2024
1 parent 92cfd24 commit a45e1bc
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 16 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ Below is an example config.json:
"ProxyTargetPort": "18081",
"Command": "/opt/llama.cpp/llama-server",
"Args": "-m /opt/Gemma-27B-v1_Q4km.gguf -c 8192 -ngl 100 -t 4 --port 18081",
"HealthcheckCommand": "curl --fail http://localhost:8081/",
"HealthcheckIntervalMilliseconds": 200,
"RestartOnConnectionFailure": false,
"ResourceRequirements": {
"VRAM-GPU-1": 20000,
Expand Down
52 changes: 40 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,19 @@ type Config struct {
}

type ServiceConfig struct {
Name string
ListenPort string
ProxyTargetHost string
ProxyTargetPort string
Command string
Args string
LogFilePath string
Workdir string
ShutDownAfterInactivitySeconds time.Duration
RestartOnConnectionFailure bool
ResourceRequirements map[string]int `json:"ResourceRequirements"`
Name string
ListenPort string
ProxyTargetHost string
ProxyTargetPort string
Command string
Args string
LogFilePath string
Workdir string
HealthcheckCommand string
HealthcheckIntervalMilliseconds time.Duration
ShutDownAfterInactivitySeconds time.Duration
RestartOnConnectionFailure bool
ResourceRequirements map[string]int `json:"ResourceRequirements"`
}
type RunningService struct {
manageMutex *sync.Mutex
Expand Down Expand Up @@ -268,8 +270,8 @@ func startService(serviceConfig ServiceConfig) (net.Conn, error) {
delete(resourceManager.runningServices, serviceConfig.Name)
return nil, fmt.Errorf("failed to run command \"%s %s\"", serviceConfig.Command, serviceConfig.Args)
}
performHealthCheck(serviceConfig)
var serviceConnection = connectWithWaiting(serviceConfig.ProxyTargetHost, serviceConfig.ProxyTargetPort, serviceConfig.Name, 120*time.Second)
time.Sleep(2 * time.Second) //TODO: replace with a custom callback

runningService.cmd = cmd

Expand All @@ -291,6 +293,32 @@ func startService(serviceConfig ServiceConfig) (net.Conn, error) {
return serviceConnection, nil
}

func performHealthCheck(serviceConfig ServiceConfig) {
if serviceConfig.HealthcheckCommand == "" {
return
}

log.Printf("[%s] Running healthcheck command \"%s\"", serviceConfig.Name, serviceConfig.HealthcheckCommand)
for {
cmd := exec.Command("sh", "-c", serviceConfig.HealthcheckCommand)
err := cmd.Run()

if err == nil {
log.Printf("[%s] Healthceck \"%s\" returned exit code 0, healthcheck completed", serviceConfig.Name, serviceConfig.HealthcheckCommand)
break
} else {
log.Printf(
"[%s] Healtcheck \"%s\" returned exit code %d, trying again in %dms",
serviceConfig.Name,
serviceConfig.HealthcheckCommand,
cmd.ProcessState.ExitCode(),
serviceConfig.HealthcheckIntervalMilliseconds,
)
time.Sleep(serviceConfig.HealthcheckIntervalMilliseconds * time.Millisecond)
}
}
}

func connectToService(serviceConfig ServiceConfig) net.Conn {
log.Printf("[%s] Opening new service connection to %s:%s", serviceConfig.Name, serviceConfig.ProxyTargetHost, serviceConfig.ProxyTargetPort)
serviceConn, err := net.Dial("tcp", net.JoinHostPort(serviceConfig.ProxyTargetHost, serviceConfig.ProxyTargetPort))
Expand Down
4 changes: 3 additions & 1 deletion test-server/healthcheck-immediate-listen-start.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
"ProxyTargetHost": "localhost",
"ProxyTargetPort": "12002",
"Command": "test-server/test-server",
"Args": "-p 12002 -healthcheck-port 2012 -sleep-before-listening-for-healthcheck 3s -startup-duration 5s"
"Args": "-p 12002 -healthcheck-port 2012 -sleep-before-listening-for-healthcheck 3s -startup-duration 5s",
"HealthcheckCommand": "curl --fail http://localhost:2012",
"HealthcheckIntervalMilliseconds": 200
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
"ProxyTargetHost": "localhost",
"ProxyTargetPort": "12003",
"Command": "test-server/test-server",
"Args": "-p 12003 -healthcheck-port 2013 -sleep-before-listening-for-healthcheck 3s -startup-duration 5s"
"Args": "-p 12003 -healthcheck-port 2013 -sleep-before-listening-for-healthcheck 3s -startup-duration 5s",
"HealthcheckCommand": "curl --fail http://localhost:2013",
"HealthcheckIntervalMilliseconds": 200
}
]
}
4 changes: 3 additions & 1 deletion test-server/healthcheck-immediate-startup.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
"ProxyTargetHost": "localhost",
"ProxyTargetPort": "12004",
"Command": "test-server/test-server",
"Args": "-p 12004 -healthcheck-port 2014"
"Args": "-p 12004 -healthcheck-port 2014",
"HealthcheckCommand": "curl --fail http://localhost:2014",
"HealthcheckIntervalMilliseconds": 200
}
]
}
4 changes: 3 additions & 1 deletion test-server/healthcheck.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
"ProxyTargetHost": "localhost",
"ProxyTargetPort": "12001",
"Command": "test-server/test-server",
"Args": "-p 12001 -healthcheck-port 2011 -sleep-before-listening 10s -sleep-before-listening-for-healthcheck 3s -startup-duration 5s"
"Args": "-p 12001 -healthcheck-port 2011 -sleep-before-listening 10s -sleep-before-listening-for-healthcheck 3s -startup-duration 5s",
"HealthcheckCommand": "curl --fail http://localhost:2011",
"HealthcheckIntervalMilliseconds": 200
}
]
}

0 comments on commit a45e1bc

Please sign in to comment.