Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move lockfile logging to osquery log adapter #2018

Merged
merged 1 commit into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 91 additions & 6 deletions ee/log/osquerylogs/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"regexp"
"strconv"
"strings"
"time"

"github.com/shirou/gopsutil/v3/host"
"github.com/shirou/gopsutil/v3/process"
Expand All @@ -17,9 +18,10 @@ import (
// OsqueryLogAdapater creates an io.Writer implementation useful for attaching
// to the osquery stdout/stderr
type OsqueryLogAdapter struct {
slogger slog.Logger
level slog.Level
rootDirectory string
slogger slog.Logger
level slog.Level
rootDirectory string
lastLockfileLogTime time.Time
}

type Option func(*OsqueryLogAdapter)
Expand All @@ -30,9 +32,11 @@ func WithLevel(level slog.Level) Option {
}
}

var callerRegexp = regexp.MustCompile(`[\w.]+:\d+]`)

var pidRegex = regexp.MustCompile(`Refusing to kill non-osqueryd process (\d+)`)
var (
callerRegexp = regexp.MustCompile(`[\w.]+:\d+]`)
pidRegex = regexp.MustCompile(`Refusing to kill non-osqueryd process (\d+)`)
lockfileRegex = regexp.MustCompile(`lock file: ([a-zA-Z0-9_\.\s\\\/\-:]*LOCK):`)
)

func extractOsqueryCaller(msg string) string {
return strings.TrimSuffix(callerRegexp.FindString(msg), "]")
Expand Down Expand Up @@ -78,6 +82,22 @@ func (l *OsqueryLogAdapter) Write(p []byte) (int, error) {
go l.logInfoAboutUnrecognizedProcessLockingPidfile(p)
}

// We have noticed the lock file occasionally locked when it shouldn't be -- we think this can happen
// when osquery doesn't get adequate time to shut down gracefully. The held lockfile will prevent
// osquery from starting up entirely.
// See: https://github.com/kolide/launcher/issues/2004.
if bytes.Contains(p, []byte("Rocksdb open failed")) {
// We can get spammed with this log, but we don't want to do all the work to look up info about the process
// using the lockfile each time we see this log -- make sure we only log once every 10 minutes at most.
if time.Since(l.lastLockfileLogTime) > 10*time.Minute {
l.lastLockfileLogTime = time.Now()
l.slogger.Log(context.TODO(), slog.LevelError,
"detected stale lockfile, logging info about file",
)
go l.logInfoAboutProcessHoldingLockfile(context.TODO(), p)
}
}

msg := strings.TrimSpace(string(p))
caller := extractOsqueryCaller(msg)
l.slogger.Log(context.TODO(), level, // nolint:sloglint // it's fine to not have a constant or literal here
Expand Down Expand Up @@ -202,3 +222,68 @@ func getSliceStat(getFunc func() ([]int32, error)) string {
}
return fmt.Sprintf("%+v", stat)
}

// logInfoAboutProcessHoldingLockfile logs information about the osquery database's lock file.
func (l *OsqueryLogAdapter) logInfoAboutProcessHoldingLockfile(ctx context.Context, p []byte) {
matches := lockfileRegex.FindAllStringSubmatch(string(p), -1)
if len(matches) < 1 || len(matches[0]) < 2 {
l.slogger.Log(context.TODO(), slog.LevelError,
"could not extract lockfile path from log line",
"log_line", string(p),
)

return
}

lockFilePath := strings.TrimSpace(matches[0][1]) // We want the group, not the full match
infoToLog := []any{
"lockfile_path", lockFilePath,
}

defer func() {
l.slogger.Log(ctx, slog.LevelInfo,
"detected stale osquery db lock file",
infoToLog...,
)
}()

// Check to see whether the process holding the file still exists
processes, err := getProcessesHoldingFile(ctx, lockFilePath)
if err != nil {
infoToLog = append(infoToLog, "err", err)
return
}

// Grab more info to log from the processes using the lockfile
processStrs := make([]string, len(processes))
for i, p := range processes {
processStrs[i] = processStr(ctx, p)
}
infoToLog = append(infoToLog, "processes", processStrs)
}

func processStr(ctx context.Context, p *process.Process) string {
name := "unknown"
processOwner := "unknown"
runningStatus := "unknown"
cmdline := "unknown"

if gotName, err := p.NameWithContext(ctx); err == nil {
name = gotName
}
if gotUsername, err := p.UsernameWithContext(ctx); err == nil {
processOwner = gotUsername
}
if gotIsRunning, err := p.IsRunningWithContext(ctx); err == nil {
if gotIsRunning {
runningStatus = "running"
} else {
runningStatus = "not running"
}
}
if gotCmdline, err := p.CmdlineWithContext(ctx); err == nil {
cmdline = gotCmdline
}

return fmt.Sprintf("process with name `%s` and PID %d belonging to user `%s` has current status `%s` (%s)", name, p.Pid, processOwner, runningStatus, cmdline)
}
43 changes: 43 additions & 0 deletions ee/log/osquerylogs/log_posix.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@ package osquerylogs

import (
"context"
"errors"
"fmt"
"log/slog"
"path/filepath"
"strconv"
"strings"
"time"

"github.com/kolide/launcher/ee/allowedcmd"
"github.com/shirou/gopsutil/v3/process"
)

// runAndLogPs runs ps filtering on the given PID, and logs the output.
Expand Down Expand Up @@ -110,3 +115,41 @@ func (l *OsqueryLogAdapter) runAndLogLsofOnPidfile() {
"output", string(out),
)
}

func getProcessesHoldingFile(ctx context.Context, pathToFile string) ([]*process.Process, error) {
cmd, err := allowedcmd.Lsof(ctx, "-t", pathToFile)
if err != nil {
return nil, fmt.Errorf("creating lsof command: %w", err)
}

out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("running lsof: %w", err)
}

pidStr := strings.TrimSpace(string(out))
if pidStr == "" {
return nil, errors.New("no process found using file via lsof")
}

pidStrs := strings.Split(strings.TrimSpace(string(out)), "\n")
if len(pidStrs) == 0 {
return nil, errors.New("no processes found using file via lsof")
}

processes := make([]*process.Process, 0)
for _, pidStr := range pidStrs {
pid, err := strconv.ParseInt(pidStr, 10, 32)
if err != nil {
return nil, fmt.Errorf("invalid pid %s: %w", pidStr, err)
}

p, err := process.NewProcess(int32(pid))
if err != nil {
return nil, fmt.Errorf("getting process for %d: %w", pid, err)
}
processes = append(processes, p)
}

return processes, nil
}
45 changes: 45 additions & 0 deletions ee/log/osquerylogs/log_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@

package osquerylogs

import (
"context"
"errors"
"fmt"
"strings"

"github.com/shirou/gopsutil/v3/process"
)

func (l *OsqueryLogAdapter) runAndLogPs(_ string) {
return
}
Expand All @@ -14,3 +23,39 @@ func (l *OsqueryLogAdapter) runAndLogLsofByPID(_ string) {
func (l *OsqueryLogAdapter) runAndLogLsofOnPidfile() {
return
}

func getProcessesHoldingFile(ctx context.Context, pathToFile string) ([]*process.Process, error) {
allProcesses, err := process.ProcessesWithContext(ctx)
if err != nil {
return nil, fmt.Errorf("getting process list: %w", err)
}
if len(allProcesses) == 0 {
return nil, errors.New("could not get any processes")
}

processes := make([]*process.Process, 0)
for _, p := range allProcesses {
openFiles, err := p.OpenFilesWithContext(ctx)
if err != nil {
continue
}

// Check the process's open files to see if this process is the one using the lockfile
for _, f := range openFiles {
// We check for strings.Contains rather than equals because the open file's path contains
// a `\\?\` prefix.
if !strings.Contains(f.Path, pathToFile) {
continue
}

processes = append(processes, p)
break
}
}

if len(processes) == 0 {
return nil, fmt.Errorf("no processes found using file %s", pathToFile)
}

return processes, nil
}
83 changes: 0 additions & 83 deletions pkg/osquery/runtime/osqueryinstance.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
"github.com/osquery/osquery-go/plugin/config"
"github.com/osquery/osquery-go/plugin/distributed"
osquerylogger "github.com/osquery/osquery-go/plugin/logger"
"github.com/shirou/gopsutil/v3/process"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"

Expand Down Expand Up @@ -250,15 +249,6 @@ func (i *OsqueryInstance) Launch() error {
return fmt.Errorf("could not calculate osquery file paths: %w", err)
}

// Check to see whether lock files exist that could prevent osquery from starting up
if _, err := i.detectStaleDatabaseLock(ctx, paths); err != nil {
// A detection error shouldn't prevent us from creating the process -- log it and move on
i.slogger.Log(ctx, slog.LevelInfo,
"error detecting stale database lock",
"err", err,
)
}

// Populate augeas lenses, if requested
if i.opts.augeasLensFunc != nil {
if err := os.MkdirAll(paths.augeasPath, 0755); err != nil {
Expand Down Expand Up @@ -712,79 +702,6 @@ func calculateOsqueryPaths(rootDirectory string, registrationId string, runId st
return osqueryFilePaths, nil
}

// detectStaleDatabaseLock checks for the presence of a lock file in the given database.
// We have noticed the lock file occasionally hanging around -- we think this can happen
// when osquery doesn't get adequate time to shut down gracefully. The presence of the lock
// file will prevent osquery from starting up entirely. For now, we just detect this stale
// lock file, and log information about it. In the future we will attempt to remediate.
// See: https://github.com/kolide/launcher/issues/2004.
func (i *OsqueryInstance) detectStaleDatabaseLock(ctx context.Context, paths *osqueryFilePaths) (bool, error) {
lockFilePath := filepath.Join(paths.databasePath, "LOCK")

lockFileInfo, err := os.Stat(lockFilePath)
if os.IsNotExist(err) {
// No lock file, nothing to do here
return false, nil
}
if err != nil {
return false, fmt.Errorf("determining whether lock file exists: %w", err)
}

infoToLog := []any{
"lockfile_path", lockFilePath,
"lockfile_modtime", lockFileInfo.ModTime().UTC().String(),
}

defer func() {
i.slogger.Log(ctx, slog.LevelInfo,
"detected stale osquery db lock file",
infoToLog...,
)
}()

// Check to see whether the process holding the file still exists
processes, err := getProcessesHoldingFile(ctx, lockFilePath)
if err != nil {
infoToLog = append(infoToLog, "err", err)
return false, fmt.Errorf("getting process holding file: %w", err)
}

// Grab more info to log from the processes using the lockfile
processStrs := make([]string, len(processes))
for i, p := range processes {
processStrs[i] = processStr(ctx, p)
}
infoToLog = append(infoToLog, "processes", processStrs)

return true, nil
}

func processStr(ctx context.Context, p *process.Process) string {
name := "unknown"
processOwner := "unknown"
runningStatus := "unknown"
cmdline := "unknown"

if gotName, err := p.NameWithContext(ctx); err == nil {
name = gotName
}
if gotUsername, err := p.UsernameWithContext(ctx); err == nil {
processOwner = gotUsername
}
if gotIsRunning, err := p.IsRunningWithContext(ctx); err == nil {
if gotIsRunning {
runningStatus = "running"
} else {
runningStatus = "not running"
}
}
if gotCmdline, err := p.CmdlineWithContext(ctx); err == nil {
cmdline = gotCmdline
}

return fmt.Sprintf("process with name `%s` and PID %d belonging to user `%s` has current status `%s` (%s)", name, p.Pid, processOwner, runningStatus, cmdline)
}

// createOsquerydCommand uses osqueryOptions to return an *exec.Cmd
// which will launch a properly configured osqueryd process.
func (i *OsqueryInstance) createOsquerydCommand(osquerydBinary string, paths *osqueryFilePaths) (*exec.Cmd, error) {
Expand Down
20 changes: 0 additions & 20 deletions pkg/osquery/runtime/osqueryinstance_other_test.go

This file was deleted.

Loading
Loading