Skip to content

Commit

Permalink
socket dynamic conf
Browse files Browse the repository at this point in the history
Signed-off-by: 晓杰 <[email protected]>
  • Loading branch information
xiao-jay committed Sep 29, 2023
1 parent 0d6d3b6 commit 01c421c
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 1 deletion.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ require (
github.com/stretchr/testify v1.8.3
go.uber.org/automaxprocs v1.4.0
golang.org/x/crypto v0.1.0
golang.org/x/sys v0.7.0
golang.org/x/time v0.3.0
gopkg.in/yaml.v2 v2.4.0
k8s.io/api v0.27.2
Expand Down Expand Up @@ -111,7 +112,6 @@ require (
golang.org/x/net v0.9.0 // indirect
golang.org/x/oauth2 v0.7.0 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/sys v0.7.0 // indirect
golang.org/x/term v0.7.0 // indirect
golang.org/x/text v0.9.0 // indirect
golang.org/x/tools v0.7.0 // indirect
Expand Down
5 changes: 5 additions & 0 deletions installer/helm/chart/volcano/templates/scheduler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,15 @@ spec:
volumeMounts:
- name: scheduler-config
mountPath: /volcano.scheduler
- name: klog-sock
mountPath: /tmp/socks
volumes:
- name: scheduler-config
configMap:
name: {{ .Release.Name }}-scheduler-configmap
- name: klog-sock
hostPath:
path: /tmp/socks
---
apiVersion: v1
kind: Service
Expand Down
5 changes: 5 additions & 0 deletions installer/volcano-development.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9181,10 +9181,15 @@ spec:
volumeMounts:
- name: scheduler-config
mountPath: /volcano.scheduler
- name: klog-sock
mountPath: /tmp/socks
volumes:
- name: scheduler-config
configMap:
name: volcano-scheduler-configmap
- name: klog-sock
hostPath:
path: /tmp/socks
---
# Source: volcano/templates/scheduling_v1beta1_podgroup.yaml
apiVersion: apiextensions.k8s.io/v1
Expand Down
10 changes: 10 additions & 0 deletions pkg/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package scheduler

import (
"flag"
"fmt"
"path/filepath"
"sync"
Expand All @@ -27,6 +28,8 @@ import (
"k8s.io/client-go/rest"
"k8s.io/klog/v2"

"volcano.sh/volcano/pkg/util"

"volcano.sh/volcano/cmd/scheduler/app/options"
"volcano.sh/volcano/pkg/filewatcher"
schedcache "volcano.sh/volcano/pkg/scheduler/cache"
Expand Down Expand Up @@ -96,6 +99,7 @@ func (pc *Scheduler) Run(stopCh <-chan struct{}) {
if options.ServerOpts.EnableCacheDumper {
pc.dumper.ListenForSignal(stopCh)
}
go runSchedulerSocket()
}

func (pc *Scheduler) runOnce() {
Expand Down Expand Up @@ -206,3 +210,9 @@ func (pc *Scheduler) watchSchedulerConf(stopCh <-chan struct{}) {
}
}
}

func runSchedulerSocket() {
fs := flag.CommandLine
startKlogLevel := fs.Lookup("v").Value.String()
util.ListenAndServeKlogLogLevel("klog", startKlogLevel, util.SocketDir)
}
214 changes: 214 additions & 0 deletions pkg/util/socket.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
package util

import (
"context"
"fmt"
"html"
"net"
"net/http"
"os"
"path/filepath"
"strconv"
"sync"
"time"

"golang.org/x/sys/unix"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/klog/v2"
)

const (
DefaultDuration = "5m"
SocketDir = "/tmp/socks" // Default directory storing socket files
SocketSuffix = ".sock"

// The HTTP request patterns
setLogLevelPath = "/setlevel"
getLogLevelPath = "/getlevel"
exampleSocketCli = "\"Failed to change klog log level, because got wrong value from level argument\\n\"+\n\t\t\t\t\"example: curl --unix-socket /tmp/klog-socks/componentName-klog.sock \\\"http://localhost/setlevel?level=8&duration=60s\\\"\\n\"+\n\t\t\t\t\"level=8 means changing klog log level to 8\\n\"+\n\t\t\t\t\"duration=60s means maintaining level=8 for 60 seconds[60m -> 60 minutes; 60h -> 60 hours]\""
)

var (
// When users frequently make request to change klog log level, the previously registered timer may not expire.
// To improve performance, cancel the previous timer. prevCtx, prevCtxCancelFunc is used to achieve this target.
prevCtx context.Context
prevCtxCancelFunc context.CancelFunc

// currentLogLevel stores current log level
currentLogLevel string
// startupLogLevel stores start-up log level
startupLogLevel string
// mutex is used to avoid data race about prevCtx, prevCtxCancelFunc and currentLogLevel
mutex sync.RWMutex
)

// responseOk returns a statusOK response to client
func responseOk(w *http.ResponseWriter, okMsg string) {
(*w).Header().Set("Content-Type", "text/plain; charset=utf-8")
(*w).Header().Set("X-Content-Type-Options", "nosniff")
_, err := fmt.Fprint(*w, okMsg)
if err != nil {
klog.Error(err)
return
}
}

// responseError returns an error response containing specific httpCode and errMsg to client
func responseError(w *http.ResponseWriter, errMsg string, httpCode int) {
http.Error(*w, errMsg, httpCode)
}

// modifyLoglevel will try to change current klog's log level to newLogLevel and assign it to currentLogLevel.
// After prevCtxCancelFunc function corresponding to last timer executed, prevCtx and prevCtxCancelFunc will be reassigned
// in order to represent brand-new timer.
func modifyLoglevel(newLogLevel string) error {
mutex.Lock()
defer mutex.Unlock()

// Change klog log level to new value
var loglevel klog.Level
if err := loglevel.Set(newLogLevel); err != nil {
return err
}
currentLogLevel = newLogLevel

// Cancel the previous timer.
if prevCtxCancelFunc != nil {
prevCtxCancelFunc()
}
prevCtx, prevCtxCancelFunc = context.WithCancel(context.Background())
return nil
}

// reset creates a timer to make klog recover to start-up log level.
func reset(ctx context.Context, duration time.Duration) {
defer runtime.HandleCrash()
select {
// Create a timer
case <-time.After(duration):
var loglevel klog.Level
mutex.Lock()
defer mutex.Unlock()
if err := loglevel.Set(startupLogLevel); err != nil {
return
}
currentLogLevel = startupLogLevel
klog.InfoS("Klog recover to start-up log level successfully", "startupLogLevel", startupLogLevel)
// Cancel previous timer
case <-ctx.Done():
klog.InfoS("Cancel previous timer successfully")
}
}

// installKlogLogLevelHandler registers the HTTP request patterns that can set/get current klog log level
func installKlogLogLevelHandler(mux *http.ServeMux, startup string) {
currentLogLevel, startupLogLevel = startup, startup
// Register the HTTP request patterns that can change klog log level
mux.Handle(setLogLevelPath, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
values := r.URL.Query()
rawLevel := values.Get("level")
rawDuration := values.Get("duration")
// Escape the data that needs to be output to the log, Prevent Reflected cross-site scripting
rawLevel = html.EscapeString(rawLevel)
rawDuration = html.EscapeString(rawDuration)

if rawDuration == "" {
rawDuration = DefaultDuration
}
var duration time.Duration
var err error
// Validate argument in request
if level, err := strconv.ParseInt(rawLevel, 10, 64); err != nil || level <= 0 {
responseError(&w, exampleSocketCli, http.StatusBadRequest)
return
}
if duration, err = time.ParseDuration(rawDuration); err != nil || duration.Milliseconds() <= 0 {
responseError(&w, exampleSocketCli, http.StatusBadRequest)
return
}

if err := modifyLoglevel(rawLevel); err != nil {
responseError(&w, fmt.Sprintf("Failed to change klog log level. Error: %v\n", err.Error()), http.StatusInternalServerError)
return
}

mutex.RLock()
// Create a timer to make klog recover to start-up log level.
// There will be more than one timer using same prevCtx variable under extreme conditions.
// Therefore, put reset function in mutex range.
go reset(prevCtx, duration)
responseOk(&w, fmt.Sprintf("Change klog log level to %s successfully and for %v\n", currentLogLevel, duration))
mutex.RUnlock()
}))

// Register the HTTP request patterns that can get current klog log level
mux.Handle(getLogLevelPath, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
mutex.RLock()
responseOk(&w, fmt.Sprintf("Current klog log level: %s\n", currentLogLevel))
mutex.RUnlock()
}))
}

// listenUnix does net.Listen for a unix socket
func listenUnix(componentName string, socketDir string) (net.Listener, error) {
// Use default directory to store socket files
if len(socketDir) == 0 {
socketDir = SocketDir
}

// Check whether KlogLogLevelSocketDir exists
if _, err := os.Stat(socketDir); os.IsNotExist(err) {
if err = os.MkdirAll(socketDir, 0750); err != nil {
return nil, fmt.Errorf("error creating klog log level socket dir: %v", err)
}
}

// Specify socket file full path
socketFileFullName := componentName + SocketSuffix
socketFileFullPath := filepath.Join(socketDir, socketFileFullName)

// Remove any socket, stale or not, but fall through for other files
fi, err := os.Stat(socketFileFullPath)
if err == nil && (fi.Mode()&os.ModeSocket) != 0 {
err := os.Remove(socketFileFullPath)
if err != nil {
klog.ErrorS(err, "failed to remote socket file", "file", socketFileFullPath)
return nil, err
}
}

// Default to only user accessible socket, caller can open up later if desired
// Result perm: 777 - 077 = 700
oldmask := unix.Umask(0077)
l, err := net.Listen("unix", socketFileFullPath)
unix.Umask(oldmask)

return l, err
}

// serveOnListener starts the server using given listener, loops forever.
func serveOnListener(l net.Listener, m *http.ServeMux) error {
server := http.Server{
Handler: m,
}
return server.Serve(l)
}

// ListenAndServeKlogLogLevel registers a server on specific component to handle the HTTP request which set/get klog log level
func ListenAndServeKlogLogLevel(componentName string, startupLogLevel string, socketDir string) {
var err error
defer runtime.HandleCrash()

mux := http.NewServeMux()
installKlogLogLevelHandler(mux, startupLogLevel)

var listener net.Listener
listener, err = listenUnix(componentName, socketDir)
if err != nil {
return
}

if err = serveOnListener(listener, mux); err != nil {
return
}
}

0 comments on commit 01c421c

Please sign in to comment.