diff --git a/Makefile b/Makefile index ea3d1bceb..7cb5018b2 100644 --- a/Makefile +++ b/Makefile @@ -218,8 +218,6 @@ package-builder: .pre-build deps package-builder-windows: .pre-build deps go run cmd/make/make.go -targets=package-builder -linkstamp --os windows -launcher-pummel: - go run cmd/make/make.go -targets=launcher-pummel deps-go: go run cmd/make/make.go -targets=deps-go,install-tools diff --git a/cmd/launcher-pummel/README.md b/cmd/launcher-pummel/README.md deleted file mode 100644 index 198eed1c0..000000000 --- a/cmd/launcher-pummel/README.md +++ /dev/null @@ -1,23 +0,0 @@ -# Osquery Server Load Testing - -## Building the tool - -From the root of the repository, run the following: - -``` -make deps -make launcher-pummel -./build/launcher-pummel --help -``` - -## Tool Usage Instructions - -``` -./build/launcher-pummel \ - --host_path=./path/to/my/host/templates \ - --server_url=fleet.example.co \ - --enroll_secret=mB3XE5kwLt3YryD9FAanjwhm02HoOqll \ - --hosts=mac:100,windows:20,linux:5000 -``` - -You can also define the enroll secret via a file path (`--enroll_secret_path`) or an environment variable (`ENROLL_SECRET`). See `launcher-pummel --help` for more information. diff --git a/cmd/launcher-pummel/launcher-pummel.go b/cmd/launcher-pummel/launcher-pummel.go deleted file mode 100644 index c8f3bfd5d..000000000 --- a/cmd/launcher-pummel/launcher-pummel.go +++ /dev/null @@ -1,193 +0,0 @@ -package main - -import ( - "bytes" - "flag" - "fmt" - "os" - "os/signal" - "strconv" - "strings" - "time" - - "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" - "github.com/kolide/kit/env" - "github.com/kolide/kit/logutil" - "github.com/kolide/kit/version" - "github.com/kolide/launcher/pkg/simulator" -) - -func main() { - var ( - flVersion = flag.Bool( - "version", - env.Bool("VERSION", false), - "Print version and exit", - ) - flDebug = flag.Bool( - "debug", - env.Bool("DEBUG", false), - "Print debug logs", - ) - flJson = flag.Bool( - "json", - env.Bool("JSON", false), - "Print logs in JSON format", - ) - flHostPath = flag.String( - "host_path", - env.String("HOST_PATH", "simulator/hosts"), - "Directory path for loading host yaml files", - ) - flServerURL = flag.String( - "server_url", - env.String("SERVER_URL", "localhost:8080"), - "URL of gRPC server to load test", - ) - flEnrollSecret = flag.String( - "enroll_secret", - env.String("ENROLL_SECRET", ""), - "The enroll secret that is used in your environment", - ) - flEnrollSecretPath = flag.String( - "enroll_secret_path", - env.String("ENROLL_SECRET_PATH", ""), - "Optionally, the path to your enrollment secret", - ) - flHosts = flag.String( - "hosts", - env.String("HOSTS", ""), - "Comma-separated list of host type and quantity i.e.: linux:1000,macos:200", - ) - flInsecureTLS = flag.Bool( - "insecure", - env.Bool("INSECURE", false), - "Do not verify TLS certs for outgoing connections (default: false)", - ) - flInsecureGRPC = flag.Bool( - "insecure_grpc", - env.Bool("INSECURE_GRPC", false), - "Dial GRPC without a TLS config (default: false)", - ) - ) - flag.Parse() - - var logger log.Logger - - if *flJson { - logger = log.NewJSONLogger(log.NewSyncWriter(os.Stdout)) - } else { - logger = log.NewLogfmtLogger(log.NewSyncWriter(os.Stdout)) - } - logger = log.With(logger, - "ts", log.DefaultTimestampUTC, - "component", "simulator", - ) - logger = level.NewInjector(logger, level.InfoValue()) - logger = log.With(logger, "caller", log.DefaultCaller) - - if *flDebug { - logger = level.NewFilter(logger, level.AllowDebug()) - } else { - logger = level.NewFilter(logger, level.AllowInfo()) - } - - if *flVersion { - version.PrintFull() - os.Exit(0) - } - - hosts, err := simulator.LoadHosts(*flHostPath, logger) - if err != nil { - logutil.Fatal(logger, - "msg", "error loading host definitions", - "err", err, - ) - } - - var enrollSecret string - if *flEnrollSecret != "" { - enrollSecret = *flEnrollSecret - } else if *flEnrollSecretPath != "" { - content, err := os.ReadFile(*flEnrollSecretPath) - if err != nil { - logutil.Fatal(logger, "err", fmt.Errorf("could not read enroll_secret_path: %w", err), "enroll_secret_path", *flEnrollSecretPath) - } - enrollSecret = string(bytes.TrimSpace(content)) - } - - if len(enrollSecret) == 0 { - logutil.Fatal(logger, "msg", "--enroll_secret cannot be empty") - } - - level.Info(logger).Log( - "msg", "starting load testing tool", - ) - - hostList := strings.Split(*flHosts, ",") - if len(hostList) == 0 { - logutil.Fatal(logger, "msg", "no hosts specified") - } - - for _, hostSimulation := range hostList { - simulationParts := strings.Split(hostSimulation, ":") - if len(simulationParts) != 2 { - logutil.Fatal(logger, - "msg", "arguments should be of the form host_type:count", - "arg", hostSimulation, - ) - } - hostType, countStr := simulationParts[0], simulationParts[1] - - count, err := strconv.Atoi(countStr) - if err != nil { - logutil.Fatal(logger, - "msg", "unable to parse count", - "arg", hostSimulation, - ) - } - - host, ok := hosts[hostType] - if !ok { - logutil.Fatal(logger, - "msg", "unrecognized host type", - "type", hostType, - ) - } - - level.Info(logger).Log( - "msg", "starting hosts", - "count", count, - ) - - opts := []simulator.SimulationOption{} - if *flInsecureTLS { - opts = append(opts, simulator.WithInsecure()) - } - if *flInsecureGRPC { - opts = append(opts, simulator.WithInsecureGrpc()) - } - - // Start hosts - for i := 0; i < count; i++ { - simulator.LaunchSimulation( - logger, - host, - *flServerURL, - fmt.Sprintf("%s_%d", hostType, i), - enrollSecret, - opts..., - ) - time.Sleep(10 * time.Millisecond) - } - } - - level.Info(logger).Log( - "msg", "all hosts started", - ) - - sig := make(chan os.Signal, 1) - signal.Notify(sig, os.Interrupt) - <-sig -} diff --git a/cmd/launcher/launcher.go b/cmd/launcher/launcher.go index 42baa5dcb..0168caf1d 100644 --- a/cmd/launcher/launcher.go +++ b/cmd/launcher/launcher.go @@ -19,7 +19,6 @@ import ( "github.com/apache/thrift/lib/go/thrift" "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" "github.com/kolide/kit/fsutil" "github.com/kolide/kit/logutil" "github.com/kolide/kit/ulid" @@ -72,22 +71,25 @@ const ( // runLauncher is the entry point into running launcher. It creates a // rungroups with the various options, and goes! If autoupdate is // enabled, the finalizers will trigger various restarts. -func runLauncher(ctx context.Context, cancel func(), slogger, systemSlogger *multislogger.MultiSlogger, opts *launcher.Options) error { +func runLauncher(ctx context.Context, cancel func(), multiSlogger, systemMultiSlogger *multislogger.MultiSlogger, opts *launcher.Options) error { thrift.ServerConnectivityCheckInterval = 100 * time.Millisecond logger := ctxlog.FromContext(ctx) logger = log.With(logger, "caller", log.DefaultCaller, "session_pid", os.Getpid()) + slogger := multiSlogger.Logger // If delay_start is configured, wait before running launcher. if opts.DelayStart > 0*time.Second { - level.Debug(logger).Log( - "msg", "delay_start configured, waiting before starting launcher", + slogger.Log(ctx, slog.LevelDebug, + "delay_start configured, waiting before starting launcher", "delay_start", opts.DelayStart.String(), ) time.Sleep(opts.DelayStart) } - level.Debug(logger).Log("msg", "runLauncher starting") + slogger.Log(ctx, slog.LevelDebug, + "runLauncher starting", + ) // We've seen launcher intermittently be unable to recover from // DNS failures in the past, so this check gives us a little bit @@ -97,11 +99,14 @@ func runLauncher(ctx context.Context, cancel func(), slogger, systemSlogger *mul // Note that the SplitN won't work for bare ip6 addresses. if err := backoff.WaitFor(func() error { hostport := strings.SplitN(opts.KolideServerURL, ":", 2) + if len(hostport) < 1 { + return fmt.Errorf("unable to parse url: %s", opts.KolideServerURL) + } _, lookupErr := net.LookupIP(hostport[0]) return lookupErr }, 10*time.Second, 1*time.Second); err != nil { - level.Info(logger).Log( - "msg", "could not successfully perform IP lookup before starting launcher, proceeding anyway", + slogger.Log(ctx, slog.LevelInfo, + "could not successfully perform IP lookup before starting launcher, proceeding anyway", "kolide_server_url", opts.KolideServerURL, "err", err, ) @@ -115,8 +120,9 @@ func runLauncher(ctx context.Context, cancel func(), slogger, systemSlogger *mul if err != nil { return fmt.Errorf("creating temporary root directory: %w", err) } - level.Info(logger).Log( - "msg", "using default system root directory", + + slogger.Log(ctx, slog.LevelInfo, + "using default system root directory", "path", rootDirectory, ) } @@ -169,8 +175,10 @@ func runLauncher(ctx context.Context, cancel func(), slogger, systemSlogger *mul fcOpts := []flags.Option{flags.WithCmdLineOpts(opts)} flagController := flags.NewFlagController(logger, stores[storage.AgentFlagsStore], fcOpts...) - k := knapsack.New(stores, flagController, db, slogger, systemSlogger) - + k := knapsack.New(stores, flagController, db, multiSlogger, systemMultiSlogger) + // reassign slogger to knapsack slogger to get launcher run id added to slogger + slogger = k.Slogger() + go runOsqueryVersionCheck(ctx, logger, k.LatestOsquerydPath(ctx)) go timemachine.ExcludeLauncherDB(ctx, k) @@ -210,8 +218,8 @@ func runLauncher(ctx context.Context, cancel func(), slogger, systemSlogger *mul traceExporter, err = exporter.NewTraceExporter(ctx, k) if err != nil { - level.Debug(logger).Log( - "msg", "could not set up trace exporter", + slogger.Log(ctx, slog.LevelDebug, + "could not set up trace exporter", "err", err, ) } else { @@ -272,7 +280,10 @@ func runLauncher(ctx context.Context, cancel func(), slogger, systemSlogger *mul powerEventWatcher, err := powereventwatcher.New(k, log.With(logger, "component", "power_event_watcher")) if err != nil { - level.Debug(logger).Log("msg", "could not init power event watcher", "err", err) + slogger.Log(ctx, slog.LevelDebug, + "could not init power event watcher", + "err", err, + ) } else { runGroup.Add("powerEventWatcher", powerEventWatcher.Execute, powerEventWatcher.Interrupt) } @@ -321,7 +332,9 @@ func runLauncher(ctx context.Context, cancel func(), slogger, systemSlogger *mul // Create the control service and services that depend on it var runner *desktopRunner.DesktopUsersProcessesRunner if k.ControlServerURL() == "" { - level.Debug(logger).Log("msg", "control server URL not set, will not create control service") + slogger.Log(ctx, slog.LevelDebug, + "control server URL not set, will not create control service", + ) } else { controlService, err := createControlService(ctx, k.ControlStore(), k) if err != nil { @@ -392,8 +405,8 @@ func runLauncher(ctx context.Context, cancel func(), slogger, systemSlogger *mul } if metadataWriter := internal.NewMetadataWriter(logger, k); metadataWriter == nil { - level.Debug(logger).Log( - "msg", "unable to set up metadata writer", + slogger.Log(ctx, slog.LevelDebug, + "unable to set up metadata writer", "err", err, ) } else { @@ -416,7 +429,10 @@ func runLauncher(ctx context.Context, cancel func(), slogger, systemSlogger *mul if err != nil { // For now, log this and move on. It might be a fatal error - level.Error(logger).Log("msg", "Failed to setup localserver", "error", err) + slogger.Log(ctx, slog.LevelError, + "failed to setup local server", + "err", err, + ) } ls.SetQuerier(extension) @@ -522,18 +538,21 @@ func writePidFile(path string) error { // be due to the notarization check taking too long, we execute the binary here ahead // of time in the hopes of getting the check out of the way. This is expected to be called // from a goroutine, and thus does not return an error. -func runOsqueryVersionCheck(ctx context.Context, logger log.Logger, osquerydPath string) { +func runOsqueryVersionCheck(ctx context.Context, slogger *slog.Logger, osquerydPath string) { if runtime.GOOS != "darwin" { return } - logger = log.With(logger, "component", "osquery-version-check") + slogger = slogger.With("component", "osquery-version-check") var output bytes.Buffer osq, err := runsimple.NewOsqueryProcess(osquerydPath, runsimple.WithStdout(&output)) if err != nil { - level.Error(logger).Log("msg", "unable to create process", "err", err) + slogger.Log(ctx, slog.LevelError, + "unable to create process", + "err", err, + ) return } @@ -548,7 +567,8 @@ func runOsqueryVersionCheck(ctx context.Context, logger log.Logger, osquerydPath outTrimmed := strings.TrimSpace(output.String()) if osqErr != nil { - level.Error(logger).Log("msg", "could not check osqueryd version", + slogger.Log(ctx, slog.LevelError, + "could not check osqueryd version", "output", outTrimmed, "err", err, "execution_time_ms", executionTimeMs, @@ -557,7 +577,8 @@ func runOsqueryVersionCheck(ctx context.Context, logger log.Logger, osquerydPath return } - level.Debug(logger).Log("msg", "checked osqueryd version", + slogger.Log(ctx, slog.LevelDebug, + "checked osqueryd version", "version", outTrimmed, "execution_time_ms", executionTimeMs, "osqueryd_path", osquerydPath, diff --git a/cmd/launcher/main.go b/cmd/launcher/main.go index 8e1ed6028..245195322 100644 --- a/cmd/launcher/main.go +++ b/cmd/launcher/main.go @@ -24,6 +24,7 @@ import ( "github.com/kolide/launcher/pkg/log/locallogger" "github.com/kolide/launcher/pkg/log/multislogger" "github.com/kolide/launcher/pkg/log/teelogger" + "github.com/pkg/errors" ) func main() { @@ -131,6 +132,12 @@ func main() { "msg", "panic occurred", "err", r, ) + if err, ok := r.(error); ok { + level.Info(logger).Log( + "msg", "panic stack trace", + "stack_trace", fmt.Sprintf("%+v", errors.WithStack(err)), + ) + } time.Sleep(time.Second) } }() diff --git a/cmd/launcher/svc_windows.go b/cmd/launcher/svc_windows.go index 79facf0c6..f5958dd3f 100644 --- a/cmd/launcher/svc_windows.go +++ b/cmd/launcher/svc_windows.go @@ -22,6 +22,7 @@ import ( "github.com/kolide/launcher/pkg/log/locallogger" "github.com/kolide/launcher/pkg/log/multislogger" "github.com/kolide/launcher/pkg/log/teelogger" + "github.com/pkg/errors" "golang.org/x/sys/windows/svc" "golang.org/x/sys/windows/svc/debug" @@ -115,6 +116,12 @@ func runWindowsSvc(args []string) error { "msg", "panic occurred in windows service", "err", r, ) + if err, ok := r.(error); ok { + level.Info(logger).Log( + "msg", "panic stack trace", + "stack_trace", fmt.Sprintf("%+v", errors.WithStack(err)), + ) + } time.Sleep(time.Second) } }() diff --git a/docs/architecture/2023-10-17_autoupdate_v3.md b/docs/architecture/2023-10-17_autoupdate_v3.md index dfacab815..764fd84c4 100644 --- a/docs/architecture/2023-10-17_autoupdate_v3.md +++ b/docs/architecture/2023-10-17_autoupdate_v3.md @@ -8,8 +8,11 @@ Accepted: October 2023 -Changes rolled out to nightly channel in October 2023. Rollout to beta and -stable is still TBD. +Changes rolled out to nightly channel in October 2023. + +Changes rolled out to beta channel in November 2023. + +Slow rollout to stable channel began January 2024. Supersedes: [Launcher Auto Update Process Version 2](2019-09-05_autoupdate_v2.md) @@ -77,6 +80,7 @@ several ways: `kolide_tuf_autoupdater_errors` table; review and address unexpected errors. 1. Perform a rollout to nightly only, and then to nightly and beta, to test the new system with a limited number of devices. +1. Perform a gradual rollout to stable. 1. Expand the TUF checkup in flare to fetch data about local and remote state, to enable troubleshooting for autoupdate issues. 1. Add an automated test suite that exercises and validates autoupdate diff --git a/docs/launcher.md b/docs/launcher.md index 0dd7b760d..0c6966621 100644 --- a/docs/launcher.md +++ b/docs/launcher.md @@ -4,7 +4,7 @@ Requirements: * Repository checkout (Not dependent on `$GOPATH`) -* Recent go (currently depends on 1.16) +* Recent go (currently depends on 1.21) * [`zig`](https://ziglang.org/) compiler, if and only if, cross compiling for linux Then, from your checkout, run: diff --git a/ee/agent/knapsack/knapsack.go b/ee/agent/knapsack/knapsack.go index 2a100f873..047dc4976 100644 --- a/ee/agent/knapsack/knapsack.go +++ b/ee/agent/knapsack/knapsack.go @@ -48,6 +48,10 @@ func New(stores map[storage.Store]types.KVStore, flags types.Flags, db *bbolt.DB launcherRunId: ulid.New(), } + if k.slogger != nil { + k.slogger.Logger = k.slogger.Logger.With("launcher_run_id", k.launcherRunId) + } + return k } diff --git a/ee/tuf/library_lookup.go b/ee/tuf/library_lookup.go index 1da2543b4..180ad4d2d 100644 --- a/ee/tuf/library_lookup.go +++ b/ee/tuf/library_lookup.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" @@ -64,6 +65,21 @@ func ShouldUseNewAutoupdater(ctx context.Context) bool { // getAutoupdateConfig pulls the configuration values necessary to work with the autoupdate library // from either the given args or from the config file. func getAutoupdateConfig(args []string) (*autoupdateConfig, error) { + // pflag, while mostly great for our usecase here, expects getopt-style flags, which means + // it doesn't support the Golang standard of using single and double dashes interchangeably + // for flags. (e.g., pflag cannot parse `-config`, but Golang treats `-config` the same as + // `--config`.) This transforms all single-dash args to double-dashes so that pflag can parse + // them as expected. + argsToParse := make([]string, len(args)) + for i := 0; i < len(args); i += 1 { + if strings.HasPrefix(args[i], "-") && !strings.HasPrefix(args[i], "--") { + argsToParse[i] = "-" + args[i] + continue + } + + argsToParse[i] = args[i] + } + // Create a flagset with options that are relevant to autoupdate only. // Ensure that we won't fail out when we see other command-line options. pflagSet := pflag.NewFlagSet("autoupdate options", pflag.ContinueOnError) @@ -77,7 +93,7 @@ func getAutoupdateConfig(args []string) (*autoupdateConfig, error) { pflagSet.StringVar(&flUpdateChannel, "update_channel", "", "") pflagSet.StringVar(&flLocalDevelopmentPath, "localdev_path", "", "") - if err := pflagSet.Parse(args); err != nil { + if err := pflagSet.Parse(argsToParse); err != nil { return nil, fmt.Errorf("parsing command-line flags: %w", err) } @@ -88,7 +104,7 @@ func getAutoupdateConfig(args []string) (*autoupdateConfig, error) { // is set) or via command line (flRootDirectory and flUpdateChannel are set), but do not // support a mix of both for this usage. if flConfigFilePath == "" && flRootDirectory == "" && flUpdateChannel == "" { - return getAutoupdateConfigFromFile(launcher.ConfigFilePath(args)) + return getAutoupdateConfigFromFile(launcher.ConfigFilePath(argsToParse)) } if flConfigFilePath != "" { diff --git a/ee/tuf/library_lookup_test.go b/ee/tuf/library_lookup_test.go index dab4c4e8f..c8e393d19 100644 --- a/ee/tuf/library_lookup_test.go +++ b/ee/tuf/library_lookup_test.go @@ -252,14 +252,24 @@ transport jsonrpc require.NoError(t, os.WriteFile(configFilepath, []byte(fileContents), 0755), "expected to set up test config file") - cfg, err := getAutoupdateConfig([]string{"--config", configFilepath}) + cfg1, err := getAutoupdateConfig([]string{"--config", configFilepath}) require.NoError(t, err, "expected no error getting autoupdate config") - require.NotNil(t, cfg, "expected valid autoupdate config") - require.Equal(t, testRootDir, cfg.rootDirectory, "root directory is incorrect") - require.Equal(t, "", cfg.updateDirectory, "update directory should not have been set") - require.Equal(t, testChannel, cfg.channel, "channel is incorrect") - require.Equal(t, "", cfg.localDevelopmentPath, "local development path should not have been set") + require.NotNil(t, cfg1, "expected valid autoupdate config") + require.Equal(t, testRootDir, cfg1.rootDirectory, "root directory is incorrect") + require.Equal(t, "", cfg1.updateDirectory, "update directory should not have been set") + require.Equal(t, testChannel, cfg1.channel, "channel is incorrect") + require.Equal(t, "", cfg1.localDevelopmentPath, "local development path should not have been set") + + // Same thing, just one - instead of 2 + cfg2, err := getAutoupdateConfig([]string{"-config", configFilepath}) + require.NoError(t, err, "expected no error getting autoupdate config") + + require.NotNil(t, cfg2, "expected valid autoupdate config") + require.Equal(t, testRootDir, cfg2.rootDirectory, "root directory is incorrect") + require.Equal(t, "", cfg2.updateDirectory, "update directory should not have been set") + require.Equal(t, testChannel, cfg2.channel, "channel is incorrect") + require.Equal(t, "", cfg2.localDevelopmentPath, "local development path should not have been set") } func Test_getAutoupdateConfig_ConfigFlagNotSet(t *testing.T) { @@ -287,3 +297,29 @@ func Test_getAutoupdateConfig_ConfigFlagNotSet(t *testing.T) { require.Equal(t, testChannel, cfg.channel, "channel is incorrect") require.Equal(t, testLocaldevPath, cfg.localDevelopmentPath, "local development path is incorrect") } + +func Test_getAutoupdateConfig_ConfigFlagNotSet_SingleHyphen(t *testing.T) { + t.Parallel() + + testRootDir := t.TempDir() + testUpdateDir := t.TempDir() + testChannel := "nightly" + testLocaldevPath := filepath.Join("some", "path", "to", "a", "local", "build") + + cfg, err := getAutoupdateConfig([]string{ + "-root_directory", testRootDir, + "-osquery_flag", "enable_watchdog_debug=true", + "-update_directory", testUpdateDir, + "-autoupdate", + "-update_channel", testChannel, + "-localdev_path", testLocaldevPath, + "-transport", "jsonrpc", + }) + require.NoError(t, err, "expected no error getting autoupdate config") + + require.NotNil(t, cfg, "expected valid autoupdate config") + require.Equal(t, testRootDir, cfg.rootDirectory, "root directory is incorrect") + require.Equal(t, testUpdateDir, cfg.updateDirectory, "update directory is incorrect") + require.Equal(t, testChannel, cfg.channel, "channel is incorrect") + require.Equal(t, testLocaldevPath, cfg.localDevelopmentPath, "local development path is incorrect") +} diff --git a/go.mod b/go.mod index 987550e16..b232ccb01 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,6 @@ require ( github.com/Masterminds/semver v1.4.2 github.com/Microsoft/go-winio v0.6.1 github.com/clbanning/mxj v1.8.4 - github.com/ghodss/yaml v1.0.0 github.com/go-bindata/go-bindata v1.0.0 github.com/go-ini/ini v1.61.0 github.com/go-kit/kit v0.9.0 @@ -15,7 +14,6 @@ require ( github.com/google/uuid v1.3.0 github.com/gorilla/websocket v1.4.2 github.com/groob/plist v0.0.0-20190114192801-a99fbe489d03 - github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 github.com/knightsc/system_policy v1.1.1-0.20211029142728-5f4c0d5419cc github.com/kolide/kit v0.0.0-20221107170827-fb85e3d59eab github.com/kolide/krypto v0.1.1-0.20231219012048-5859599c50aa diff --git a/go.sum b/go.sum index 776cb528d..3a1735fbb 100644 --- a/go.sum +++ b/go.sum @@ -129,7 +129,6 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= -github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-bindata/go-bindata v1.0.0 h1:DZ34txDXWn1DyWa+vQf7V9ANc2ILTtrEjtlsdJRF26M= github.com/go-bindata/go-bindata v1.0.0/go.mod h1:xK8Dsgwmeed+BBsSy2XTopBn/8uK2HWuGSnA11C3Joo= diff --git a/pkg/simulator/hosts.go b/pkg/simulator/hosts.go deleted file mode 100644 index 1fc51f67e..000000000 --- a/pkg/simulator/hosts.go +++ /dev/null @@ -1,169 +0,0 @@ -package simulator - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "regexp" - "strings" - "sync" - - "github.com/ghodss/yaml" - "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" -) - -type queryRunner struct { - // Name of this host type. - Name string `json:"name"` - // parentName is the name of the parent type. - ParentName string `json:"parent"` - // parent is a pointer to the parent (nil if no parent) used for query - // result inheritance. - parent *queryRunner - //QueryResults maps from regexp pattern to query results that should be - //returned. - Queries []matcher `json:"queries"` - - // The following members facilitate logging unmatched queries. - logger log.Logger - unmatchedMutex sync.Mutex - unmatchedQueries map[string]bool -} - -// matcher contains a regex for matching input queries, and the results to -// return if the query matches. -type matcher struct { - // Pattern is a regexp for the query patterns this should match. - Pattern *regexp.Regexp `json:"pattern"` - // Results is the results to return for matched queries - Results []map[string]string `json:"results"` -} - -// querySpec exists for loading from the YAML files. After it is parsed into -// this structure, a queryRunner is created by compiling the regexes and -// linking the parents. -type querySpec struct { - Name string `json:"name"` - ParentName string `json:"parent"` - Queries []struct { - Pattern string `json:"pattern"` - Results []map[string]string `json:"results"` - } `json:"queries"` -} - -// LoadHosts will load the host specifications and return a map of the -// queryRunners representing these host types. -func LoadHosts(dir string, logger log.Logger) (map[string]*queryRunner, error) { - files, err := os.ReadDir(dir) - if err != nil { - return nil, fmt.Errorf("listing hosts directory: %w", err) - } - - hostMap := map[string]*queryRunner{} - - // Load all files - for _, file := range files { - if strings.HasSuffix(file.Name(), ".yaml") { - path := filepath.Join(dir, file.Name()) - contents, err := os.ReadFile(path) - if err != nil { - return nil, fmt.Errorf("reading file %s: %w", path, err) - } - - var h querySpec - err = yaml.Unmarshal(contents, &h) - if err != nil { - return nil, fmt.Errorf("unmarshal yaml for %s: %w", path, err) - } - - runner := &queryRunner{ - Name: h.Name, - ParentName: h.ParentName, - Queries: []matcher{}, - unmatchedQueries: make(map[string]bool), - logger: logger, - } - - for _, q := range h.Queries { - re, err := regexp.Compile(strings.ToLower(q.Pattern)) - if err != nil { - return nil, fmt.Errorf("compile regexp for %s: %w", path, err) - } - runner.Queries = append(runner.Queries, matcher{re, q.Results}) - } - - // Check for duplicate host type name. It is user error - // to provide multiple definitions for the same host - // type. - if _, exists := hostMap[runner.Name]; exists { - return nil, fmt.Errorf("duplicate host type: %s", runner.Name) - } - - hostMap[runner.Name] = runner - } - } - - // Link parents - for _, runner := range hostMap { - if runner.ParentName == "" { - continue - } - - parent, ok := hostMap[runner.ParentName] - if !ok { - return nil, fmt.Errorf("missing parent named: %s", runner.ParentName) - } - runner.parent = parent - } - - // TODO check for cycles - - return hostMap, nil -} - -func (h *queryRunner) RunQuery(sql string) (rows []map[string]string, err error) { - sql = strings.ToLower(sql) - defer func() { - if err == nil { - // Query was matched - return - } - - h.unmatchedMutex.Lock() - defer h.unmatchedMutex.Unlock() - - if h.unmatchedQueries[sql] { - // Already logged this one - return - } - - h.unmatchedQueries[sql] = true - level.Info(h.logger).Log( - "msg", "host has no match for query", - "host_type", h.Name, - "sql", sql, - ) - }() - - return h.runQueryRecurse(sql) -} - -func (h *queryRunner) runQueryRecurse(sql string) ([]map[string]string, error) { - // Try matching patterns - for _, q := range h.Queries { - if q.Pattern.MatchString(sql) { - return q.Results, nil - } - } - - // No patterns matched - if h.parent == nil { - // No parent exists - return nil, errors.New("no matching query pattern") - } - - // Recursive call to inherited patterns of parent - return h.parent.runQueryRecurse(sql) -} diff --git a/pkg/simulator/hosts/base_darwin.yaml b/pkg/simulator/hosts/base_darwin.yaml deleted file mode 100644 index 195fb872b..000000000 --- a/pkg/simulator/hosts/base_darwin.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: - base_darwin -queries: - - pattern: select 1$ - results: [ - {"1":"1"} - ] - - - pattern: select \* from interface_details id join interface_addresses ia - results: [ - {"address":"192.168.2.9","broadcast":"192.168.2.255","collisions":"0","flags":"34915","ibytes":"1355573248","idrops":"0","ierrors":"0","interface":"en0","ipackets":"1549777","last_change":"1512082514","mac":"f3:0f:23:34:e3:ef","mask":"255.255.255.0","metric":"0","mtu":"1500","obytes":"225253376","odrops":"0","oerrors":"0","opackets":"923337","point_to_point":"","type":""} - ] - - - pattern: select \* from os_version - results: [ - {"build":"17B1002","codename":"","major":"10","minor":"13","name":"Mac OS X","patch":"1","platform":"darwin","platform_like":"darwin","version":"10.13.1"} - ] - - pattern: select \* from osquery_info - results: [ - {"build_distro":"10.12","build_platform":"darwin","config_hash":"","config_valid":"0","extensions":"inactive","instance_id":"70ccb066-10f7-455c-8da1-c50e2e12d807","pid":"38454","start_time":"1512083519","uuid":"47F44EDB-237A-5DA1-904A-F05F62ECF4D9","version":"2.10.2","watcher":"-1"} - ] - - - pattern: select \* from system_info - results: [ - {"computer_name":"mac","cpu_brand":"Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz","cpu_logical_cores":"8","cpu_physical_cores":"4","cpu_subtype":"Intel x86-64h Haswell","cpu_type":"x86_64h","hardware_model":"MacBookPro13,3 ","hardware_serial":"C02SQ1ALH040","hardware_vendor":"Apple Inc. ","hardware_version":"1.0 ","hostname":"mac.local","local_hostname":"mac","physical_memory":"17179869184","uuid":"47F44EDB-237A-5DA1-904A-F05F62ECF4D9"} - ] - - - pattern: select \* from uptime - results: [ - {"days":"0","hours":"6","minutes":"40","seconds":"4","total_seconds":"24004"} - ] diff --git a/pkg/simulator/hosts/out_of_date_darwin.yaml b/pkg/simulator/hosts/out_of_date_darwin.yaml deleted file mode 100644 index d66cbfe54..000000000 --- a/pkg/simulator/hosts/out_of_date_darwin.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: - out_of_date_darwin -parent: - base_darwin -queries: - - pattern: select \* from os_version - results: [ - {"build":"16B2657","codename":"","major":"10","minor":"12","name":"Mac OS X","patch":"1","platform":"darwin","platform_like":"darwin","version":"10.12.1"} - ] - - - pattern: select 1 from os_version where minor < 13 - results: [ - {"1": "1"} - ] diff --git a/pkg/simulator/hosts_test.go b/pkg/simulator/hosts_test.go deleted file mode 100644 index 9f61fc746..000000000 --- a/pkg/simulator/hosts_test.go +++ /dev/null @@ -1,187 +0,0 @@ -package simulator - -import ( - "regexp" - "testing" - - "github.com/go-kit/kit/log" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestLoadHostsErrors(t *testing.T) { - t.Parallel() - - t.Skip("TODO: Windows tests") - testCases := []struct { - dir string - matchErr string - }{ - { - "testdata/invalid_dir", - "listing hosts directory", - }, - { - "testdata/bad_symlink", - "reading file", - }, - { - "testdata/invalid_yaml", - "unmarshal yaml", - }, - { - "testdata/duplicate", - "duplicate host", - }, - { - "testdata/missing_parent", - "missing parent", - }, - { - "testdata/invalid_regexp", - "compile regexp", - }, - // TODO add test with cycle - } - - for _, tt := range testCases { - tt := tt - t.Run(tt.matchErr, func(t *testing.T) { - t.Parallel() - - hosts, err := LoadHosts(tt.dir, log.NewNopLogger()) - require.Nil(t, hosts) - require.Error(t, err) - assert.Contains(t, err.Error(), tt.matchErr) - }) - } -} - -func TestLoadHosts(t *testing.T) { - t.Parallel() - - hosts, err := LoadHosts("testdata/valid1", log.NewNopLogger()) - require.Nil(t, err) - - foo, bar := hosts["foo"], hosts["bar"] - require.NotNil(t, foo) - require.NotNil(t, bar) - - assert.Equal(t, "foo", foo.Name) - assert.Equal(t, - []matcher{ - { - regexp.MustCompile("select hour, minutes from time"), - []map[string]string{{"hour": "19", "minutes": "34"}}, - }, - { - regexp.MustCompile("select platform from osquery_info"), - []map[string]string{{"platform": "darwin"}}, - }, - }, - foo.Queries, - ) - assert.Nil(t, foo.parent) - - assert.Equal(t, "bar", bar.Name) - assert.Equal(t, - []matcher{ - { - regexp.MustCompile("select version from osquery_info"), - []map[string]string{{"version": "2.10.2"}}, - }, - }, - bar.Queries, - ) - assert.Equal(t, foo, bar.parent) -} - -func TestRunQuery(t *testing.T) { - t.Parallel() - - h1 := &queryRunner{ - Queries: []matcher{ - {regexp.MustCompile(".*time.*"), []map[string]string{{"foo": "bar"}}}, - }, - unmatchedQueries: make(map[string]bool), - logger: log.NewNopLogger(), - } - h2 := &queryRunner{ - Queries: []matcher{ - {regexp.MustCompile("select \\* from osquery_info"), []map[string]string{{"osquery": "info"}}}, - }, - parent: h1, - unmatchedQueries: make(map[string]bool), - logger: log.NewNopLogger(), - } - h3 := &queryRunner{ - Queries: []matcher{ - {regexp.MustCompile("select hour from time"), []map[string]string{{"hour": "12"}}}, - {regexp.MustCompile("select .* from time"), []map[string]string{{"minute": "36"}}}, - }, - parent: h1, - unmatchedQueries: make(map[string]bool), - logger: log.NewNopLogger(), - } - - testCases := []struct { - Host *queryRunner - Query string - Result []map[string]string - }{ - { - h1, - "select * from time", - []map[string]string{{"foo": "bar"}}, - }, - { - h1, - "select nomatch", - nil, - }, - { - h2, - "select * from osquery_info", - []map[string]string{{"osquery": "info"}}, - }, - { - h2, - "select hour from time", - []map[string]string{{"foo": "bar"}}, - }, - { - h2, - "select nomatch", - nil, - }, - { - h3, - "select hour from time", - []map[string]string{{"hour": "12"}}, - }, - { - h3, - "select day from time", - []map[string]string{{"minute": "36"}}, - }, - { - h3, - "select day from osquery_info join time", - []map[string]string{{"foo": "bar"}}, - }, - } - - for _, tt := range testCases { - tt := tt - t.Run("", func(t *testing.T) { - t.Parallel() - - res, err := tt.Host.RunQuery(tt.Query) - if tt.Result != nil { - assert.Equal(t, tt.Result, res) - } else { - assert.NotNil(t, err) - } - }) - } -} diff --git a/pkg/simulator/simulator.go b/pkg/simulator/simulator.go deleted file mode 100644 index b14296fbd..000000000 --- a/pkg/simulator/simulator.go +++ /dev/null @@ -1,455 +0,0 @@ -package simulator - -import ( - "context" - "crypto/tls" - "errors" - "fmt" - "log/slog" - "math" - "net" - "os" - "sync" - "time" - - "google.golang.org/grpc" - "google.golang.org/grpc/credentials" - - "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" - "github.com/kolide/launcher/ee/agent/knapsack" - "github.com/kolide/launcher/pkg/log/multislogger" - "github.com/kolide/launcher/pkg/service" - "github.com/osquery/osquery-go/plugin/distributed" - "github.com/osquery/osquery-go/plugin/logger" -) - -// QueryRunner is the interface which defines the pluggable behavior of a simulated -// host. Each "type" of host may have their own implementation of this interface. -type QueryRunner interface { - RunQuery(sql string) (results []map[string]string, err error) -} - -// HostSimulation is the type which contains the state of a simulated host -type HostSimulation struct { - // the following define the configurable aspects of the simulation - host QueryRunner - uuid string - enrollSecret string - insecure bool - insecureGrpc bool - requestQueriesInterval time.Duration - requestConfigInterval time.Duration - publishLogsInterval time.Duration - logger log.Logger - - // The state of the simulation is gated with a read/write lock. - // To read something in state: - // - // h.state.lock.RLock() - // defer h.state.lock.RUnlock() - // - // To write state based on the on-going simulation: - // - // h.state.lock.Lock() - // defer h.state.lock.Unlock() - state *hostSimulationState - - shutdown chan chan struct{} -} - -// Enroll is the implementation of the host simulation's enrollment functionality -func (h *HostSimulation) Enroll() error { - h.state.lock.Lock() - defer h.state.lock.Unlock() - - enrollmentAttempts := 5 - - var err error - var nodeKey string - var invalid bool - - for currentAttempt := 1; currentAttempt <= enrollmentAttempts; currentAttempt++ { - if currentAttempt != 1 { - level.Debug(h.logger).Log( - "msg", "first enrollment attempt failed, re-trying enrollment", - "attempt", currentAttempt, - ) - time.Sleep(time.Duration(math.Pow(2, float64(currentAttempt))) * time.Second) - } - nodeKey, invalid, err = h.state.serviceClient.RequestEnrollment(context.Background(), h.enrollSecret, h.uuid, service.EnrollmentDetails{}) - if err != nil { - level.Debug(h.logger).Log( - "msg", "transport error in enrollment", - "err", err.Error(), - "uuid", h.uuid, - ) - err = fmt.Errorf("transport error in enrollment: %w", err) - continue - } - if invalid { - level.Debug(h.logger).Log( - "msg", "server responded that enrollment was invalid", - "uuid", h.uuid, - ) - err = fmt.Errorf("enrollment invalid for host with uuid: %s", h.uuid) - continue - } - h.state.nodeKey = nodeKey - return nil - } - - level.Debug(h.logger).Log( - "msg", "successfully enrolled host", - "uuid", h.uuid, - ) - - return err -} - -// RequestConfig is the implementation of the host simulation's config retrieval -// functionality -func (h *HostSimulation) RequestConfig() error { - h.state.lock.Lock() - defer h.state.lock.Unlock() - - // TODO: When we request the config, we are throwing away what the server - // returns. An enhancement to this simulator should take the packs in the - // config and execute the queries on a realistic schedule. - // Further, the config may contain options as well which could influence the - // desired rate at which a host should be checking in to the server which has - // an obvious effect on the integrity of a load test. - config, invalid, err := h.state.serviceClient.RequestConfig(context.Background(), h.state.nodeKey) - if err != nil { - level.Debug(h.logger).Log( - "msg", "transport error requesting config", - "err", err.Error(), - "uuid", h.uuid, - ) - return fmt.Errorf("transport error retrieving config: %w", err) - } - if invalid { - level.Debug(h.logger).Log( - "msg", "server responded that config request was invalid", - "uuid", h.uuid, - ) - return errors.New("enrollment invalid in request config") - } - - level.Debug(h.logger).Log( - "msg", "successfully requested config", - "config", config, - "uuid", h.uuid, - ) - - return nil -} - -// PublishLogs is the implementation of the host simulation's log publishing -// functionality -func (h *HostSimulation) PublishLogs() error { - h.state.lock.Lock() - defer h.state.lock.Unlock() - - logs := []string{"foo", "bar", "baz"} - _, _, invalid, err := h.state.serviceClient.PublishLogs(context.Background(), h.state.nodeKey, logger.LogTypeStatus, logs) - if err != nil { - level.Debug(h.logger).Log( - "msg", "transport error publishing logs", - "err", err.Error(), - "uuid", h.uuid, - ) - return fmt.Errorf("transport error publishing logs: %w", err) - } - if invalid { - level.Debug(h.logger).Log( - "msg", "server responded that log publish was invalid", - "uuid", h.uuid, - ) - return errors.New("enrollment invalid in publish logs") - } - - level.Debug(h.logger).Log( - "msg", "successfully published logs", - "uuid", h.uuid, - ) - - return nil -} - -// RequestQueries is the implementation of the host simulation's request new -// queries functionality -func (h *HostSimulation) RequestQueries() error { - h.state.lock.Lock() - defer h.state.lock.Unlock() - - queries, invalid, err := h.state.serviceClient.RequestQueries(context.Background(), h.state.nodeKey) - if err != nil { - level.Debug(h.logger).Log( - "msg", "transport error requesting queries", - "err", err.Error(), - "uuid", h.uuid, - ) - return fmt.Errorf("transport error requesting queries: %w", err) - } - if invalid { - level.Debug(h.logger).Log( - "msg", "server responded that requesting queries was invalid", - "uuid", h.uuid, - ) - return errors.New("enrollment invalid in request queries") - } - - if len(queries.Queries) == 0 { - // No queries to run - return nil - } - - results := []distributed.Result{} - for name, sql := range queries.Queries { - level.Debug(h.logger).Log( - "msg", "running live query", - "name", name, - "sql", sql, - "uuid", h.uuid, - ) - rows, err := h.host.RunQuery(sql) - if err != nil { - level.Debug(h.logger).Log( - "msg", "error running query", - "err", err.Error(), - "uuid", h.uuid, - ) - rows = []map[string]string{} - } - results = append(results, - distributed.Result{QueryName: name, Status: 0, Rows: rows}, - ) - } - - level.Debug(h.logger).Log( - "msg", "publishing results for live queries", - "uuid", h.uuid, - ) - - _, _, invalid, err = h.state.serviceClient.PublishResults(context.Background(), h.state.nodeKey, results) - if err != nil { - level.Debug(h.logger).Log( - "msg", "transport error publishing distributed query results", - "err", err.Error(), - "uuid", h.uuid, - ) - return fmt.Errorf("transport error publishing results: %w", err) - } - if invalid { - level.Debug(h.logger).Log( - "msg", "server responded that publishing distributed query results was invalid", - "uuid", h.uuid, - ) - return errors.New("enrollment invalid in publish results") - } - - return nil -} - -// hostSimulationState is a light container around simulation state management -type hostSimulationState struct { - lock sync.RWMutex - serviceClient service.KolideService - nodeKey string - failed bool - started bool -} - -// SimulationOption is a functional option pattern for defining how a host -// simulation instance should be configured. For more information on this -// patter, see the following blog post: -// https://dave.cheney.net/2014/10/17/functional-options-for-friendly-apis -type SimulationOption func(*HostSimulation) - -// WithRequestQueriesInterval is a functional option which allows the user to -// specify how often the simulation host should check-in to the server and ask -// for queries to run -func WithRequestQueriesInterval(interval time.Duration) SimulationOption { - return func(i *HostSimulation) { - i.requestQueriesInterval = interval - } -} - -// WithRequestConfigInterval is a functional option which allows the user to -// specify how often the simulation host should check-in to the server and ask -// for a new config -func WithRequestConfigInterval(interval time.Duration) SimulationOption { - return func(i *HostSimulation) { - i.requestConfigInterval = interval - } -} - -// WithPublishLogsInterval is a functional option which allows the user to -// specify how often the simulation host should log status and results logs -func WithPublishLogsInterval(interval time.Duration) SimulationOption { - return func(i *HostSimulation) { - i.publishLogsInterval = interval - } -} - -// WithInsecure is a functional option which allows the user to declare that -// when connecting to the remote API, the client should skip TLS certificate -// verification but still use TLS -func WithInsecure() SimulationOption { - return func(i *HostSimulation) { - i.insecure = true - } -} - -// WithInsecureGrpc is a functional option which allows the user to declare that -// WithInsecure is a functional option which allows the user to declare that -// when connecting to the remote API, the client should turn off TLS entirely -func WithInsecureGrpc() SimulationOption { - return func(i *HostSimulation) { - i.insecureGrpc = true - } -} - -// createSimulationRuntime is an internal helper which creates an instance of -// *HostSimulation given a set of supplied functional options -func createSimulationRuntime(logger log.Logger, host QueryRunner, uuid, enrollSecret string, opts ...SimulationOption) *HostSimulation { - h := &HostSimulation{ - logger: logger, - host: host, - uuid: uuid, - enrollSecret: enrollSecret, - requestQueriesInterval: 2 * time.Second, - requestConfigInterval: 5 * time.Second, - publishLogsInterval: 10 * time.Second, - shutdown: make(chan chan struct{}), - state: &hostSimulationState{}, - } - for _, opt := range opts { - opt(h) - } - - return h -} - -// LaunchSimulation is a utility which allows the user to configure and run an -// asynchronous instance of a host simulation given a set of functional options -func LaunchSimulation(logger log.Logger, host QueryRunner, grpcURL, uuid, enrollSecret string, opts ...SimulationOption) *HostSimulation { - h := createSimulationRuntime(logger, host, uuid, enrollSecret, opts...) - go func() { - h.state.lock.Lock() - h.state.started = true - - grpcOpts := []grpc.DialOption{ - grpc.WithTimeout(time.Second), - } - if h.insecureGrpc { - grpcOpts = append(grpcOpts, grpc.WithInsecure()) - } else { - host, _, err := net.SplitHostPort(grpcURL) - if err != nil { - err = fmt.Errorf("split grpc server host and port: %s: %w", grpcURL, err) - h.state.failed = true - h.state.lock.Unlock() - level.Info(logger).Log("msg", "got error exiting simulator goroutine", "err", err) - return - } - creds := credentials.NewTLS(&tls.Config{ - ServerName: host, - InsecureSkipVerify: h.insecure, - }) - grpcOpts = append(grpcOpts, grpc.WithTransportCredentials(creds)) - } - conn, err := grpc.Dial(grpcURL, grpcOpts...) - if err != nil { - h.state.failed = true - h.state.lock.Unlock() - level.Info(logger).Log("msg", "got error exiting simulator goroutine", "err", err) - return - } - defer conn.Close() - - multislogger := multislogger.New(slog.NewJSONHandler(os.Stdout, nil)) - knapsack := knapsack.New(nil, nil, nil, multislogger, nil) - h.state.serviceClient = service.NewGRPCClient(knapsack, conn) - - h.state.lock.Unlock() - - err = h.Enroll() - if err != nil { - h.state.lock.Lock() - level.Debug(h.logger).Log( - "msg", "enrollment failed", - "uuid", h.uuid, - "err", err, - ) - h.state.failed = true - h.state.lock.Unlock() - return - } - - requestQueriesTicker := time.NewTicker(h.requestQueriesInterval) - defer requestQueriesTicker.Stop() - requestConfigTicker := time.NewTicker(h.requestConfigInterval) - defer requestConfigTicker.Stop() - publishLogsTicker := time.NewTicker(h.publishLogsInterval) - defer publishLogsTicker.Stop() - - for { - var err error - select { - case <-requestQueriesTicker.C: - err = h.RequestQueries() - case <-requestConfigTicker.C: - err = h.RequestConfig() - case <-publishLogsTicker.C: - err = h.PublishLogs() - case done := <-h.shutdown: - close(done) - return - } - if err != nil { - level.Debug(h.logger).Log( - "msg", "an error occurred in simulation", - "err", err.Error(), - "uuid", h.uuid, - ) - h.state.lock.Lock() - h.state.failed = true - h.state.lock.Unlock() - return - } - } - - }() - return h -} - -// Healthy is a helper which performs an introspection on the simulation -// instance to determine whether or not it is healthy -func (h *HostSimulation) Healthy() bool { - // we're going to be reading the state of the instance, so we first must - // acquire a read lock on the state - h.state.lock.RLock() - defer h.state.lock.RUnlock() - - if h.state.started { - return !h.state.failed - } - return true -} - -// Shutdown will attempt to gracefully shutdown the simulation -func (h *HostSimulation) Shutdown() error { - done := make(chan struct{}) - h.shutdown <- done - - timer := time.NewTimer(time.Second) - select { - case <-done: - return nil - case <-timer.C: - } - - return errors.New("simulation did not shut down in time") -} diff --git a/pkg/simulator/simulator_test.go b/pkg/simulator/simulator_test.go deleted file mode 100644 index c89a14f64..000000000 --- a/pkg/simulator/simulator_test.go +++ /dev/null @@ -1,27 +0,0 @@ -package simulator - -import ( - "testing" - - "github.com/go-kit/kit/log" - "github.com/stretchr/testify/require" -) - -func TestFunctionalOptions(t *testing.T) { - t.Parallel() - - simulation := createSimulationRuntime( - log.NewNopLogger(), - nil, "", "", - WithInsecure(), - ) - - // verify the functional options are correct - require.True(t, simulation.insecure) - require.False(t, simulation.insecureGrpc) - - // we haven't started the simulation yet, so the instance should think it's - // healthy still - require.False(t, simulation.state.started) - require.True(t, simulation.Healthy()) -} diff --git a/pkg/simulator/testdata/bad_symlink/symlink.yaml b/pkg/simulator/testdata/bad_symlink/symlink.yaml deleted file mode 120000 index 5a9668899..000000000 --- a/pkg/simulator/testdata/bad_symlink/symlink.yaml +++ /dev/null @@ -1 +0,0 @@ -foo/bar/bing/bang/boom \ No newline at end of file diff --git a/pkg/simulator/testdata/duplicate/duplicate.yaml b/pkg/simulator/testdata/duplicate/duplicate.yaml deleted file mode 100644 index 41c79e887..000000000 --- a/pkg/simulator/testdata/duplicate/duplicate.yaml +++ /dev/null @@ -1,5 +0,0 @@ -name: - foobar -queries: - - pattern: ".*time.*" - results: diff --git a/pkg/simulator/testdata/duplicate/host.yaml b/pkg/simulator/testdata/duplicate/host.yaml deleted file mode 100644 index db7910c18..000000000 --- a/pkg/simulator/testdata/duplicate/host.yaml +++ /dev/null @@ -1,2 +0,0 @@ -name: - foobar \ No newline at end of file diff --git a/pkg/simulator/testdata/invalid_regexp/bar.yaml b/pkg/simulator/testdata/invalid_regexp/bar.yaml deleted file mode 100644 index fe9bd1f22..000000000 --- a/pkg/simulator/testdata/invalid_regexp/bar.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: - bar -parent: - foo -queries: - - pattern: "select version from osquery_info" - results: - - {"version":"2.10.2"} \ No newline at end of file diff --git a/pkg/simulator/testdata/invalid_regexp/foo.yaml b/pkg/simulator/testdata/invalid_regexp/foo.yaml deleted file mode 100644 index 94922abb2..000000000 --- a/pkg/simulator/testdata/invalid_regexp/foo.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: - foo -queries: - - pattern: "select hour, minutes from time" - results: - - {"hour":"19","minutes":"34"} - - pattern: "select platform from osquery_info" - results: - - {"platform": "darwin"} \ No newline at end of file diff --git a/pkg/simulator/testdata/invalid_regexp/invalid.yaml b/pkg/simulator/testdata/invalid_regexp/invalid.yaml deleted file mode 100644 index 123d8c7db..000000000 --- a/pkg/simulator/testdata/invalid_regexp/invalid.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: - bar -parent: - foo -queries: - - pattern: "select version (" - results: - - {"version":"2.10.2"} \ No newline at end of file diff --git a/pkg/simulator/testdata/invalid_yaml/invalid.yaml b/pkg/simulator/testdata/invalid_yaml/invalid.yaml deleted file mode 100644 index 1b41843f5..000000000 --- a/pkg/simulator/testdata/invalid_yaml/invalid.yaml +++ /dev/null @@ -1 +0,0 @@ -this is not valid yaml \ No newline at end of file diff --git a/pkg/simulator/testdata/invalid_yaml/valid.yaml b/pkg/simulator/testdata/invalid_yaml/valid.yaml deleted file mode 100644 index 8b2f51859..000000000 --- a/pkg/simulator/testdata/invalid_yaml/valid.yaml +++ /dev/null @@ -1,2 +0,0 @@ -foo: - bar \ No newline at end of file diff --git a/pkg/simulator/testdata/missing_parent/bar.yaml b/pkg/simulator/testdata/missing_parent/bar.yaml deleted file mode 100644 index 4aa4ba9a9..000000000 --- a/pkg/simulator/testdata/missing_parent/bar.yaml +++ /dev/null @@ -1,2 +0,0 @@ -name: - bar \ No newline at end of file diff --git a/pkg/simulator/testdata/missing_parent/foo.yaml b/pkg/simulator/testdata/missing_parent/foo.yaml deleted file mode 100644 index 33e4ee074..000000000 --- a/pkg/simulator/testdata/missing_parent/foo.yaml +++ /dev/null @@ -1,4 +0,0 @@ -name: - foo -parent: - baz \ No newline at end of file diff --git a/pkg/simulator/testdata/valid1/bar.yaml b/pkg/simulator/testdata/valid1/bar.yaml deleted file mode 100644 index fe9bd1f22..000000000 --- a/pkg/simulator/testdata/valid1/bar.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: - bar -parent: - foo -queries: - - pattern: "select version from osquery_info" - results: - - {"version":"2.10.2"} \ No newline at end of file diff --git a/pkg/simulator/testdata/valid1/foo.yaml b/pkg/simulator/testdata/valid1/foo.yaml deleted file mode 100644 index 94922abb2..000000000 --- a/pkg/simulator/testdata/valid1/foo.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: - foo -queries: - - pattern: "select hour, minutes from time" - results: - - {"hour":"19","minutes":"34"} - - pattern: "select platform from osquery_info" - results: - - {"platform": "darwin"} \ No newline at end of file diff --git a/tools/autoupdate-v1-tests/README.md b/tools/autoupdate-v1-tests/README.md deleted file mode 100644 index 3b278b198..000000000 --- a/tools/autoupdate-v1-tests/README.md +++ /dev/null @@ -1,83 +0,0 @@ -# Upgrade Exec Service Testing - -This is a test program to explore update functionality, primarily -focuses on windows.It pairs with the [Autoupdate ADR](/ -docs/architecture/2019-03-11_autoupdate.md) - -## Observations - -You cannot remove an exe that's linked to a _running_ service. You can -rename it. You can't overwrite it. This manifests as a permission -error. If the service is loaded, but stopped, you can overwrite. - -Note that if you're using the restart-on-failure, this can create a -small race condition. If the service manager attempts a restart while -there is _no_ binary on the expected path, the service transitions to -a stopped state. This timing is effected by the restart delay -settings. - -### Windows Services Restart On Failure - -Windows Service Manager can restart services on failure. This is -controlled by the Recovery portion of the settings. Behavior seems the -same regardless of exit code. Process _does_ get a new PID on launch - -These configs can be examined using the `sc.exe` tool. (Note: you -might need an extra param to adjust the buffer size.) - -For example: - -``` shell -PS C:\Users\example> sc.exe qfailure upgradetest 5000 -[SC] QueryServiceConfig2 SUCCESS - -SERVICE_NAME: upgradetest - RESET_PERIOD (in seconds) : 0 - REBOOT_MESSAGE : - COMMAND_LINE : -``` - -Further Reading: - -* [WiX ServiceConfig](http://wixtoolset.org/documentation/manual/v3/xsd/util/serviceconfig.html) -* [go configs](https://godoc.org/golang.org/x/sys/windows/svc/mgr#Service.RecoveryActions) -* [StackExchange](https://serverfault.com/questions/48600/how-can-i-automatically-restart-a-windows-service-if-it-crashes) - -**Note:** The MSI options to configure this are broken. The -recommendation is to use a Custom Action to call out to -`sc.exe`. Instead, we handle inside the service start. - -### Replace before restart - -Manually testing the idea of moving a binary aside, and dropping in a -new one and then calling `Exit(0)` and letting the service manager -restart... This seems to work - -Test Process: -1. Use my test case of run 5s, exit. -2. svc manager restarts -3. Observe new PIDs. -4. During a 5s loop, move old binary aside and scp new binary in -5. Observe the format of the log messages change on restart - -## Shell Debugging Snippets - -Viewing Event Log: -``` -# Old interface -Get-EventLog -LogName Application -Newest 10 - -# New interface, with full bodies -Get-WinEvent -LogName System -MaxEvents 10 -Get-WinEvent -LogName Application -MaxEvents 10 | Format-Table TimeCreated,Message -wrap -``` - - -Various ways to see service status: -``` shell -Get-Service upgradetest - -sc.exe query upgradetest -sc.exe qc upgradetest -sc.exe qfailure upgradetest 5000 -``` diff --git a/tools/autoupdate-v1-tests/main.go b/tools/autoupdate-v1-tests/main.go deleted file mode 100644 index 30e03bc08..000000000 --- a/tools/autoupdate-v1-tests/main.go +++ /dev/null @@ -1,67 +0,0 @@ -package main - -import ( - "fmt" - "os" - "time" - - "github.com/kardianos/osext" -) - -type processNotes struct { - Pid int - Path string - Size int64 - ModTime time.Time -} - -var ProcessNotes processNotes - -func main() { - - binaryName, err := osext.Executable() - if err != nil { - panic(fmt.Errorf("osext.Executable: %w", err)) - } - - processStat, err := os.Stat(binaryName) - if err != nil { - panic(fmt.Errorf("os.Stat: %w", err)) - } - - ProcessNotes.Pid = os.Getpid() - ProcessNotes.Path = binaryName - ProcessNotes.Size = processStat.Size() - ProcessNotes.ModTime = processStat.ModTime() - - if len(os.Args) < 2 { - // Let's assume this should be windows services for now - //panic("Need an argument") - _ = runWindowsSvc([]string{}) - } - - var run func([]string) error - - switch os.Args[1] { - case "run": - run = runLoop - case "svc": - run = runWindowsSvc - case "svc-fg": - run = runWindowsSvcForeground - case "install": - run = runInstallService - case "uninstall": - run = runRemoveService - default: - panic("Unknown option") - } - - err = run(os.Args[2:]) - if err != nil { - panic(fmt.Errorf("Running subcommand %s: %w", os.Args[1], err)) - } - - fmt.Printf("Finished Main (pid %d)\n", ProcessNotes.Pid) - -} diff --git a/tools/autoupdate-v1-tests/runlauncher_return.go b/tools/autoupdate-v1-tests/runlauncher_return.go deleted file mode 100644 index c8ce75060..000000000 --- a/tools/autoupdate-v1-tests/runlauncher_return.go +++ /dev/null @@ -1,37 +0,0 @@ -package main - -import ( - "context" - "fmt" - "time" - - "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" -) - -func runLauncher(ctx context.Context, cancel func(), args []string, logger log.Logger) error { - count := 0 - - for { - count = count + 1 - level.Debug(logger).Log( - "msg", "Launcher Loop", - "count", count, - "pid", ProcessNotes.Pid, - "size", ProcessNotes.Size, - "modtime", ProcessNotes.ModTime, - ) - time.Sleep(5 * time.Second) - - if count > 4 { - if err := triggerUpgrade(ctx, cancel, logger); err != nil { - return fmt.Errorf("triggerUpgrade: %w", err) - } - break - } - } - - level.Debug(logger).Log("msg", "I guess we're exiting", "pid", ProcessNotes.Pid) - cancel() - return nil -} diff --git a/tools/autoupdate-v1-tests/runloop.go b/tools/autoupdate-v1-tests/runloop.go deleted file mode 100644 index 2bf9947fb..000000000 --- a/tools/autoupdate-v1-tests/runloop.go +++ /dev/null @@ -1,15 +0,0 @@ -package main - -import ( - "context" - - "github.com/kolide/kit/logutil" -) - -func runLoop(args []string) error { - ctx, cancel := context.WithCancel(context.Background()) - logger := logutil.NewServerLogger(true) - - return runLauncher(ctx, cancel, args, logger) - -} diff --git a/tools/autoupdate-v1-tests/svc-manager_windows.go b/tools/autoupdate-v1-tests/svc-manager_windows.go deleted file mode 100644 index 3538046a7..000000000 --- a/tools/autoupdate-v1-tests/svc-manager_windows.go +++ /dev/null @@ -1,66 +0,0 @@ -//go:build windows -// +build windows - -package main - -import ( - "fmt" - - "github.com/kardianos/osext" - - "golang.org/x/sys/windows/svc/mgr" -) - -func runInstallService(args []string) error { - exepath, err := osext.Executable() - if err != nil { - return fmt.Errorf("osext.Executable: %w", err) - } - - m, err := mgr.Connect() - if err != nil { - return fmt.Errorf("mgr.Connect: %w", err) - } - defer m.Disconnect() - - s, err := m.OpenService(serviceName) - if err == nil { - s.Close() - return fmt.Errorf("service %s already exists", serviceName) - } - - cfg := mgr.Config{DisplayName: serviceDesc, StartType: mgr.StartAutomatic} - - //ra := mgr.RecoveryAction{Type: mgr.ServiceRestart, Delay: 5 * time.Second} - - s, err = m.CreateService(serviceName, exepath, cfg, "svc") - if err != nil { - return fmt.Errorf("CreateService: %w", err) - } - defer s.Close() - - //if err := s.SetRecoveryActions([]mgr.RecoveryAction{ra}, 3); err != nil { - //return errors.Wrap(err, "SetRecoveryActions") - //} - - return nil -} - -func runRemoveService(args []string) error { - m, err := mgr.Connect() - if err != nil { - return fmt.Errorf("mgr.Connect: %w", err) - } - defer m.Disconnect() - - s, err := m.OpenService(serviceName) - if err != nil { - s.Close() - return fmt.Errorf("service %s is not installed", serviceName) - } - defer s.Close() - - err = s.Delete() - return err - -} diff --git a/tools/autoupdate-v1-tests/svc.go b/tools/autoupdate-v1-tests/svc.go deleted file mode 100644 index 2fa5c4865..000000000 --- a/tools/autoupdate-v1-tests/svc.go +++ /dev/null @@ -1,24 +0,0 @@ -//go:build !windows -// +build !windows - -package main - -import ( - "errors" -) - -func runWindowsSvc(args []string) error { - return errors.New("This isn't windows") -} - -func runWindowsSvcForeground(args []string) error { - return errors.New("This isn't windows") -} - -func runInstallService(args []string) error { - return errors.New("This isn't windows") -} - -func runRemoveService(args []string) error { - return errors.New("This isn't windows") -} diff --git a/tools/autoupdate-v1-tests/svc_windows.go b/tools/autoupdate-v1-tests/svc_windows.go deleted file mode 100644 index b15a31476..000000000 --- a/tools/autoupdate-v1-tests/svc_windows.go +++ /dev/null @@ -1,162 +0,0 @@ -//go:build windows -// +build windows - -package main - -import ( - "context" - "fmt" - "os" - "time" - - "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" - "github.com/kolide/kit/logutil" - "github.com/kolide/launcher/pkg/log/eventlog" - - "golang.org/x/sys/windows/svc" - "golang.org/x/sys/windows/svc/debug" - "golang.org/x/sys/windows/svc/mgr" -) - -const ( - serviceName = "upgradetest" - serviceDesc = "Launcher Auto Upgrade Testing" -) - -func runWindowsSvc(args []string) error { - eventLogWriter, err := eventlog.NewWriter(serviceName) - if err != nil { - return fmt.Errorf("create eventlog writer: %w", err) - } - defer eventLogWriter.Close() - - logger := eventlog.New(eventLogWriter) - level.Debug(logger).Log("msg", "service start requested") - - run := svc.Run - return run(serviceName, &winSvc{logger: logger, args: args}) -} - -func runWindowsSvcForeground(args []string) error { - // Foreground mode is inherently a debug mode. So we start the - // logger in debugging mode, instead of looking at opts.debug - logger := logutil.NewCLILogger(true) - level.Debug(logger).Log("msg", "foreground service start requested (debug mode)") - - run := debug.Run - return run(serviceName, &winSvc{logger: logger, args: args}) -} - -type winSvc struct { - logger log.Logger - args []string -} - -func (w *winSvc) Execute(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) { - const cmdsAccepted = svc.AcceptStop | svc.AcceptShutdown - changes <- svc.Status{State: svc.StartPending} - level.Debug(w.logger).Log("msg", "windows service starting") - changes <- svc.Status{State: svc.Running, Accepts: cmdsAccepted} - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // TODO: needs a gofunc probably - if err := fixRecoveryActions(serviceName); err != nil { - level.Info(w.logger).Log("msg", "Failed to fixRecoveryActions", "err", err) - // TODO: Do we actually want to exit here? Not sure. - changes <- svc.Status{State: svc.Stopped, Accepts: cmdsAccepted} - os.Exit(1) - } - - go func() { - err := runLauncher(ctx, cancel, w.args, w.logger) - if err != nil { - level.Info(w.logger).Log("err", err, "stack", fmt.Sprintf("%+v", err)) - changes <- svc.Status{State: svc.Stopped, Accepts: cmdsAccepted} - os.Exit(1) - } - - level.Info(w.logger).Log("msg", "runLauncher gofunc ended cleanly", "pid", os.Getpid()) - - // Case 1 -- Nothing here - // - // If we do not exit here, we sorta just hang. This doesn't seem - // surprising -- What else would happen. The launcher go routine - // ends, but the signal handling forloop remains. - - // Case 2 -- os.Exit(0) - // - // Logs stop, and the service shows as stopped. Eg: windows - // services saw the clean exit and assumed it was - // intentional. Note that this may depend on some service - // installation parameter. - level.Info(w.logger).Log("msg", "exit(0)") - os.Exit(0) - - // Case 3 -- os.Exit(1) - // - // Same as Case 2. Makes me think something is set oddly in the - // windows service recovery stuff. It really oughgt be - // retrying. Need to figure out how to get to those settings - //level.Info(w.logger).Log("msg", "let's exit(1)") - //os.Exit(1) - }() - - for { - select { - case c := <-r: - switch c.Cmd { - case svc.Interrogate: - level.Info(w.logger).Log("case", "Interrogate") - changes <- c.CurrentStatus - // Testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4 - time.Sleep(100 * time.Millisecond) - changes <- c.CurrentStatus - case svc.Stop, svc.Shutdown: - level.Info(w.logger).Log("case", "stop/shutdown") - changes <- svc.Status{State: svc.StopPending} - cancel() - time.Sleep(100 * time.Millisecond) - changes <- svc.Status{State: svc.Stopped, Accepts: cmdsAccepted} - return ssec, errno - default: - level.Info(w.logger).Log("err", "unexpected control request", "control_request", c) - } - } - } -} - -// Fix the Recovery Actions. -// -// This is all a hack around MSI's inability to set the recovery actions. -// -// Doing this requires the service name. We ought be able to get it -// inside the service, but I can't see how. So, we'll make some -// assumptions about how the service has been installed. Bummer. -func fixRecoveryActions(name string) error { - m, err := mgr.Connect() - if err != nil { - return fmt.Errorf("mgr.Connect: %w", err) - } - defer m.Disconnect() - - s, err := m.OpenService(name) - if err != nil { - return fmt.Errorf("service %s is not installed", name) - } - defer s.Close() - - // Action, and time to wait before performing action - ra := mgr.RecoveryAction{Type: mgr.ServiceRestart, Delay: 1 * time.Second} - - // How many seconds of stable daemon activity resets the RecoveryAction cycle - resetPeriod := uint32(3) - - if err := s.SetRecoveryActions([]mgr.RecoveryAction{ra}, resetPeriod); err != nil { - return fmt.Errorf("SetRecoveryActions: %w", err) - } - - return nil -} diff --git a/tools/autoupdate-v1-tests/upgrade.go b/tools/autoupdate-v1-tests/upgrade.go deleted file mode 100644 index 0dcc07436..000000000 --- a/tools/autoupdate-v1-tests/upgrade.go +++ /dev/null @@ -1,62 +0,0 @@ -package main - -import ( - "context" - "fmt" - "os" - "runtime" - "syscall" - - "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" - "github.com/kolide/kit/fsutil" -) - -func triggerUpgrade(ctx context.Context, cancel func(), logger log.Logger) error { - level.Info(logger).Log( - "msg", "Starting Upgrade", - "origpid", ProcessNotes.Pid, - ) - - // Should this get a random append? - stagedFile := fmt.Sprintf("%s-staged", ProcessNotes.Path) - - // To emulate a new version, just copy the current binary to the staged location - level.Debug(logger).Log("msg", "fsutil.CopyFile") - if err := fsutil.CopyFile(ProcessNotes.Path, stagedFile); err != nil { - return (fmt.Errorf("fsutil.CopyFile: %w", err)) - } - - oldFile := fmt.Sprintf("%s-old", ProcessNotes.Path) - level.Debug(logger).Log("msg", "os.Rename cur to old") - if err := os.Rename(ProcessNotes.Path, oldFile); err != nil { - return fmt.Errorf("os.Rename cur top old: %w", err) - } - - level.Debug(logger).Log("msg", "os.Rename stage to cur") - if err := os.Rename(stagedFile, ProcessNotes.Path); err != nil { - return fmt.Errorf("os.Rename staged to cur: %w", err) - } - - level.Debug(logger).Log("msg", "os.Chmod") - if err := os.Chmod(ProcessNotes.Path, 0755); err != nil { - return fmt.Errorf("os.Chmod: %w", err) - } - - // Our normal process here is to exec the new binary. However, this - // doesn't work on windows -- windows has no exec. So instead, we - // exit, and let the service manager restart us. - if runtime.GOOS == "windows" { - level.Info(logger).Log("msg", "Exiting, so service manager can restart new version") - return nil - } - - // For non-windows machine, exec the new version - level.Debug(logger).Log("msg", "syscall.Exec") - if err := syscall.Exec(ProcessNotes.Path, os.Args, os.Environ()); err != nil { - return fmt.Errorf("syscall.Exec: %w", err) - } - - // Getting here, means the exec call returned - return nil -}