From dda79dfed7311c3b3f901bace7392e4898c3f9da Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Wed, 26 Jun 2024 12:42:18 -0400 Subject: [PATCH 01/21] Construct KATC tables --- ee/katc/config.go | 118 +++++++++++++++++++++++++++++++++++++ ee/katc/config_test.go | 78 ++++++++++++++++++++++++ ee/katc/snappy.go | 19 ++++++ ee/katc/sqlite.go | 84 ++++++++++++++++++++++++++ ee/katc/table.go | 58 ++++++++++++++++++ pkg/osquery/table/table.go | 22 +++---- 6 files changed, 365 insertions(+), 14 deletions(-) create mode 100644 ee/katc/config.go create mode 100644 ee/katc/config_test.go create mode 100644 ee/katc/snappy.go create mode 100644 ee/katc/sqlite.go create mode 100644 ee/katc/table.go diff --git a/ee/katc/config.go b/ee/katc/config.go new file mode 100644 index 000000000..715866640 --- /dev/null +++ b/ee/katc/config.go @@ -0,0 +1,118 @@ +package katc + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log/slog" + "runtime" + + "github.com/osquery/osquery-go" + "github.com/osquery/osquery-go/plugin/table" +) + +/* +Open qs: +- Should we go with the EAV approach rather than with columns? Look at how dataflatten does it + +TODOs: +- Need to do queryContext filtering +*/ + +type katcTableType struct { + name string + dataFunc func(ctx context.Context, path string, query string, columns []string, slogger *slog.Logger) ([]map[string][]byte, error) +} + +const ( + sqliteTableType = "sqlite" +) + +func (ktt *katcTableType) UnmarshalJSON(data []byte) error { + var s string + err := json.Unmarshal(data, &s) + if err != nil { + return fmt.Errorf("unmarshalling string: %w", err) + } + + switch s { + case sqliteTableType: + ktt.name = sqliteTableType + ktt.dataFunc = sqliteData + return nil + default: + return fmt.Errorf("unknown table type %s", s) + } +} + +type dataProcessingStep struct { + name string + processingFunc func(ctx context.Context, data []byte, slogger *slog.Logger) ([]byte, error) +} + +const ( + snappyDecodeProcessingStep = "snappy" + jsObjectDecodeProcessingStep = "js_object" +) + +func (d *dataProcessingStep) UnmarshalJSON(data []byte) error { + var s string + err := json.Unmarshal(data, &s) + if err != nil { + return fmt.Errorf("unmarshalling string: %w", err) + } + + switch s { + case snappyDecodeProcessingStep: + d.name = snappyDecodeProcessingStep + d.processingFunc = snappyDecode + return nil + case jsObjectDecodeProcessingStep: + d.name = jsObjectDecodeProcessingStep + return errors.New("not yet implemented") + default: + return fmt.Errorf("unknown data processing step %s", s) + } +} + +type katcTableConfig struct { + Type katcTableType `json:"type"` + Platform string `json:"platform"` + Columns []string `json:"columns"` + Path string `json:"path"` // Path to file holding data (e.g. sqlite file) -- wildcards supported + Query string `json:"query"` // Query to run against `path` + DataProcessingSteps []dataProcessingStep `json:"data_processing_steps"` +} + +func ConstructKATCTables(config map[string]string, slogger *slog.Logger) []osquery.OsqueryPlugin { + plugins := make([]osquery.OsqueryPlugin, 0) + for tableName, tableConfigStr := range config { + var cfg katcTableConfig + if err := json.Unmarshal([]byte(tableConfigStr), &cfg); err != nil { + slogger.Log(context.TODO(), slog.LevelWarn, + "unable to unmarshal config for Kolide ATC table, skipping", + "table_name", tableName, + "err", err, + ) + continue + } + + if cfg.Platform != runtime.GOOS { + continue + } + + columns := make([]table.ColumnDefinition, len(cfg.Columns)) + for i := 0; i < len(cfg.Columns); i += 1 { + columns[i] = table.ColumnDefinition{ + Name: cfg.Columns[i], + Type: table.ColumnTypeText, + } + } + + t := newKatcTable(tableName, cfg, slogger) + plugins = append(plugins, table.NewPlugin(tableName, columns, t.generate)) + } + + return plugins +} diff --git a/ee/katc/config_test.go b/ee/katc/config_test.go new file mode 100644 index 000000000..fbedfd299 --- /dev/null +++ b/ee/katc/config_test.go @@ -0,0 +1,78 @@ +package katc + +import ( + _ "embed" + "fmt" + "runtime" + "testing" + + "github.com/kolide/launcher/pkg/log/multislogger" + "github.com/stretchr/testify/require" +) + +func TestConstructKATCTables(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + testCaseName string + katcConfig map[string]string + expectedPluginCount int + }{ + { + testCaseName: "snappy_sqlite", + katcConfig: map[string]string{ + "kolide_snappy_sqlite_test": fmt.Sprintf(`{ + "type": "sqlite", + "platform": "%s", + "columns": ["data"], + "path": "/some/path/to/db.sqlite", + "query": "SELECT data FROM object_data JOIN object_store ON (object_data.object_store_id = object_store.id) WHERE object_store.name=\"testtable\";", + "data_processing_steps": ["snappy"] + }`, runtime.GOOS), + }, + expectedPluginCount: 1, + }, + { + testCaseName: "malformed config", + katcConfig: map[string]string{ + "malformed_test": "this is not a config", + }, + expectedPluginCount: 0, + }, + { + testCaseName: "invalid table type", + katcConfig: map[string]string{ + "kolide_snappy_test": fmt.Sprintf(`{ + "type": "unknown_type", + "platform": "%s", + "columns": ["data"], + "path": "/some/path/to/db.sqlite", + "query": "SELECT data FROM object_data;" + }`, runtime.GOOS), + }, + expectedPluginCount: 0, + }, + { + testCaseName: "invalid data processing step type", + katcConfig: map[string]string{ + "kolide_snappy_test": fmt.Sprintf(`{ + "type": "sqlite", + "platform": "%s", + "columns": ["data"], + "path": "/some/path/to/db.sqlite", + "query": "SELECT data FROM object_data;", + "data_processing_steps": ["unknown_step"] + }`, runtime.GOOS), + }, + expectedPluginCount: 0, + }, + } { + tt := tt + t.Run(tt.testCaseName, func(t *testing.T) { + t.Parallel() + + plugins := ConstructKATCTables(tt.katcConfig, multislogger.NewNopLogger()) + require.Equal(t, tt.expectedPluginCount, len(plugins), "unexpected number of plugins") + }) + } +} diff --git a/ee/katc/snappy.go b/ee/katc/snappy.go new file mode 100644 index 000000000..1d1219a19 --- /dev/null +++ b/ee/katc/snappy.go @@ -0,0 +1,19 @@ +package katc + +import ( + "context" + "fmt" + "log/slog" + + "github.com/golang/snappy" +) + +// snappyDecode is a dataProcessingStep that decodes data compressed with snappy +func snappyDecode(ctx context.Context, data []byte, slogger *slog.Logger) ([]byte, error) { + decodedResultBytes, err := snappy.Decode(nil, data) + if err != nil { + return nil, fmt.Errorf("decoding column: %w", err) + } + + return decodedResultBytes, nil +} diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go new file mode 100644 index 000000000..a6d04db1c --- /dev/null +++ b/ee/katc/sqlite.go @@ -0,0 +1,84 @@ +package katc + +import ( + "context" + "database/sql" + "fmt" + "log/slog" + "path/filepath" + + _ "modernc.org/sqlite" +) + +// sqliteData is the dataFunc for sqlite KATC tables +func sqliteData(ctx context.Context, pathPattern string, query string, columns []string, slogger *slog.Logger) ([]map[string][]byte, error) { + sqliteDbs, err := filepath.Glob(pathPattern) + if err != nil { + return nil, fmt.Errorf("globbing for files with pattern %s: %w", pathPattern, err) + } + + results := make([]map[string][]byte, 0) + for _, sqliteDb := range sqliteDbs { + resultsFromDb, err := querySqliteDb(ctx, sqliteDb, query, columns, slogger) + if err != nil { + return nil, fmt.Errorf("querying %s: %w", sqliteDb, err) + } + results = append(results, resultsFromDb...) + } + + return results, nil +} + +// querySqliteDb queries the database at the given path, returning rows of results +func querySqliteDb(ctx context.Context, path string, query string, columns []string, slogger *slog.Logger) ([]map[string][]byte, error) { + conn, err := sql.Open("sqlite", path) + if err != nil { + return nil, fmt.Errorf("opening sqlite db: %w", err) + } + defer func() { + if err := conn.Close(); err != nil { + slogger.Log(ctx, slog.LevelWarn, + "closing sqlite db after query", + "err", err, + ) + } + }() + + rows, err := conn.QueryContext(ctx, query) + if err != nil { + return nil, fmt.Errorf("running query: %w", err) + } + defer func() { + if err := rows.Close(); err != nil { + slogger.Log(ctx, slog.LevelWarn, + "closing rows after scanning results", + "err", err, + ) + } + }() + + results := make([]map[string][]byte, 0) + + // Prepare destination for scan + rawResult := make([][]byte, len(columns)) + scanDest := make([]any, len(columns)) + for i := 0; i < len(columns); i += 1 { + scanDest[i] = &rawResult[i] + } + + // Scan all rows + for rows.Next() { + if err := rows.Scan(scanDest...); err != nil { + return nil, fmt.Errorf("scanning query results: %w", err) + } + + row := make(map[string][]byte) + for i := 0; i < len(columns); i += 1 { + row[columns[i]] = rawResult[i] + } + + results = append(results, row) + } + + return results, nil +} diff --git a/ee/katc/table.go b/ee/katc/table.go new file mode 100644 index 000000000..5b159c303 --- /dev/null +++ b/ee/katc/table.go @@ -0,0 +1,58 @@ +package katc + +import ( + "context" + "fmt" + "log/slog" + + "github.com/osquery/osquery-go/plugin/table" +) + +type katcTable struct { + cfg katcTableConfig + slogger *slog.Logger +} + +func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) *katcTable { + return &katcTable{ + cfg: cfg, + slogger: slogger.With( + "table_name", tableName, + "table_type", cfg.Type, + "table_path", cfg.Path, + ), + } +} + +func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContext) ([]map[string]string, error) { + // Fetch data from our table source + dataRaw, err := k.cfg.Type.dataFunc(ctx, k.cfg.Path, k.cfg.Query, k.cfg.Columns, k.slogger) + if err != nil { + return nil, fmt.Errorf("fetching data: %w", err) + } + + // Process data + results := make([]map[string]string, 0) + for _, dataRawRow := range dataRaw { + rowData := make(map[string]string) + for key, val := range dataRawRow { + // Run any processing steps on the data value + for _, dataProcessingStep := range k.cfg.DataProcessingSteps { + val, err = dataProcessingStep.processingFunc(ctx, val, k.slogger) + if err != nil { + return nil, fmt.Errorf("transforming data at key %s: %w", key, err) + } + } + + // Now transform byte => string + rowData[key] = string(val) + } + + results = append(results, rowData) + } + + // Now, filter data as needed + // TODO queryContext + + return results, nil +} diff --git a/pkg/osquery/table/table.go b/pkg/osquery/table/table.go index 39f3edf6b..e5e2d0517 100644 --- a/pkg/osquery/table/table.go +++ b/pkg/osquery/table/table.go @@ -10,6 +10,7 @@ import ( "github.com/kolide/launcher/ee/agent/startupsettings" "github.com/kolide/launcher/ee/agent/types" "github.com/kolide/launcher/ee/allowedcmd" + "github.com/kolide/launcher/ee/katc" "github.com/kolide/launcher/ee/tables/cryptoinfotable" "github.com/kolide/launcher/ee/tables/dataflattentable" "github.com/kolide/launcher/ee/tables/desktopprocs" @@ -98,29 +99,22 @@ func kolideCustomAtcTables(k types.Knapsack, slogger *slog.Logger) []osquery.Osq } } - // In the future, we would construct the plugins from the configuration here. - // For now, we just log. - slogger.Log(context.TODO(), slog.LevelDebug, - "retrieved Kolide ATC config", - "config", config, - ) - - return nil + return katc.ConstructKATCTables(config, slogger) } func katcFromDb(k types.Knapsack) (map[string]string, error) { if k == nil || k.KatcConfigStore() == nil { return nil, errors.New("stores in knapsack not available") } - loggableConfig := make(map[string]string) + katcCfg := make(map[string]string) if err := k.KatcConfigStore().ForEach(func(k []byte, v []byte) error { - loggableConfig[string(k)] = string(v) + katcCfg[string(k)] = string(v) return nil }); err != nil { return nil, fmt.Errorf("retrieving contents of Kolide ATC config store: %w", err) } - return loggableConfig, nil + return katcCfg, nil } func katcFromStartupSettings(k types.Knapsack) (map[string]string, error) { @@ -135,10 +129,10 @@ func katcFromStartupSettings(k types.Knapsack) (map[string]string, error) { return nil, fmt.Errorf("error getting katc_config from startup settings: %w", err) } - var loggableConfig map[string]string - if err := json.Unmarshal([]byte(katcConfig), &loggableConfig); err != nil { + var katcCfg map[string]string + if err := json.Unmarshal([]byte(katcConfig), &katcCfg); err != nil { return nil, fmt.Errorf("unmarshalling katc_config: %w", err) } - return loggableConfig, nil + return katcCfg, nil } From 5530c170217c7ff977aa490fed7a222d1960fc56 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Wed, 26 Jun 2024 16:22:06 -0400 Subject: [PATCH 02/21] Add support for deserializing structured_clone javascript objects --- ee/katc/config.go | 17 ++- ee/katc/structured_clone.go | 257 ++++++++++++++++++++++++++++++++++++ ee/katc/table.go | 2 +- 3 files changed, 269 insertions(+), 7 deletions(-) create mode 100644 ee/katc/structured_clone.go diff --git a/ee/katc/config.go b/ee/katc/config.go index 715866640..3cc8f8c7b 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -26,7 +26,8 @@ type katcTableType struct { } const ( - sqliteTableType = "sqlite" + sqliteTableType = "sqlite" + indexedDBTableType = "indexeddb" ) func (ktt *katcTableType) UnmarshalJSON(data []byte) error { @@ -41,6 +42,9 @@ func (ktt *katcTableType) UnmarshalJSON(data []byte) error { ktt.name = sqliteTableType ktt.dataFunc = sqliteData return nil + case indexedDBTableType: + ktt.name = indexedDBTableType + return errors.New("indexeddb is not yet implemented") default: return fmt.Errorf("unknown table type %s", s) } @@ -52,8 +56,8 @@ type dataProcessingStep struct { } const ( - snappyDecodeProcessingStep = "snappy" - jsObjectDecodeProcessingStep = "js_object" + snappyDecodeProcessingStep = "snappy" + structuredCloneDeserializeProcessingStep = "structured_clone" ) func (d *dataProcessingStep) UnmarshalJSON(data []byte) error { @@ -68,9 +72,10 @@ func (d *dataProcessingStep) UnmarshalJSON(data []byte) error { d.name = snappyDecodeProcessingStep d.processingFunc = snappyDecode return nil - case jsObjectDecodeProcessingStep: - d.name = jsObjectDecodeProcessingStep - return errors.New("not yet implemented") + case structuredCloneDeserializeProcessingStep: + d.name = structuredCloneDeserializeProcessingStep + d.processingFunc = structuredCloneDeserialize + return nil default: return fmt.Errorf("unknown data processing step %s", s) } diff --git a/ee/katc/structured_clone.go b/ee/katc/structured_clone.go new file mode 100644 index 000000000..5ace4e1c1 --- /dev/null +++ b/ee/katc/structured_clone.go @@ -0,0 +1,257 @@ +package katc + +import ( + "bytes" + "context" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "log/slog" + + "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" +) + +const ( + tagFloatMax uint32 = 0xfff00000 + tagHeader uint32 = 0xfff10000 + tagNull uint32 = 0xffff0000 + tagUndefined uint32 = 0xffff0001 + tagBoolean uint32 = 0xffff0002 + tagInt32 uint32 = 0xffff0003 + tagString uint32 = 0xffff0004 + tagDateObject uint32 = 0xffff0005 + tagRegexpObject uint32 = 0xffff0006 + tagArrayObject uint32 = 0xffff0007 + tagObjectObject uint32 = 0xffff0008 + tagArrayBufferObjectV2 uint32 = 0xffff0009 + tagBooleanObject uint32 = 0xffff000a + tagStringObject uint32 = 0xffff000b + tagNumberObject uint32 = 0xffff000c + tagBackReferenceObject uint32 = 0xffff000d + tagDoNotUse1 uint32 = 0xffff000e + tagDoNotUse2 uint32 = 0xffff000f + tagTypedArrayObjectV2 uint32 = 0xffff0010 + tagMapObject uint32 = 0xffff0011 + tagSetObject uint32 = 0xffff0012 + tagEndOfKeys uint32 = 0xffff0013 +) + +// structuredCloneDeserialize deserializes a JS object that has been stored in IndexedDB +// by Firefox. +// References: +// * https://stackoverflow.com/a/59923297 +// * https://searchfox.org/mozilla-central/source/js/public/StructuredClone.h +// * https://searchfox.org/mozilla-central/source/js/src/vm/StructuredClone.cpp (see especially JSStructuredCloneReader::read) +// * https://html.spec.whatwg.org/multipage/structured-data.html#structureddeserialize +func structuredCloneDeserialize(ctx context.Context, data []byte, slogger *slog.Logger) ([]byte, error) { + srcReader := bytes.NewReader(data) + + // First, read the header + firstTag, _, err := nextPair(srcReader) + if err != nil { + return nil, fmt.Errorf("reading header pair: %w", err) + } + if firstTag != tagHeader { + return nil, fmt.Errorf("unknown header tag %x", firstTag) + } + + // Next up should be our top-level object + objectTag, _, err := nextPair(srcReader) + if err != nil { + return nil, fmt.Errorf("reading top-level object tag: %w", err) + } + if objectTag != tagObjectObject { + return nil, fmt.Errorf("object not found after header: expected %x, got %x", tagObjectObject, objectTag) + } + + // Read all entries in our object + resultObj, err := deserializeObject(srcReader) + if err != nil { + return nil, fmt.Errorf("reading top-level object: %w", err) + } + + // Marshal the object to return + objRaw, err := json.Marshal(resultObj) + if err != nil { + return nil, fmt.Errorf("marshalling result: %w", err) + } + + return objRaw, nil +} + +func nextPair(srcReader io.ByteReader) (uint32, uint32, error) { + // Tags and data are written as a singular little-endian uint64 value. + // For example, the pair (`tagBoolean`, 1) is written as 01 00 00 00 02 00 FF FF, + // where 0xffff0002 is `tagBoolean`. + // To read the pair, we read the next 8 bytes in reverse order, treating the + // first four as the tag and the next four as the data. + var err error + pairBytes := make([]byte, 8) + for i := 7; i >= 0; i -= 1 { + pairBytes[i], err = srcReader.ReadByte() + if err != nil { + return 0, 0, fmt.Errorf("reading byte in pair: %w", err) + } + } + + return binary.BigEndian.Uint32(pairBytes[0:4]), binary.BigEndian.Uint32(pairBytes[4:]), nil +} + +func deserializeObject(srcReader io.ByteReader) (map[string]any, error) { + resultObj := make(map[string]any, 0) + + for { + nextObjTag, nextObjData, err := nextPair(srcReader) + if err != nil { + return nil, fmt.Errorf("reading next pair in object: %w", err) + } + + if nextObjTag == tagEndOfKeys { + // All done! Return object + break + } + + // Read key + if nextObjTag != tagString { + return nil, fmt.Errorf("unsupported key type %x", nextObjTag) + } + nextKey, err := deserializeString(nextObjData, srcReader) + if err != nil { + return nil, fmt.Errorf("reading string for tag %x: %w", nextObjTag, err) + } + + // Read value + valTag, valData, err := nextPair(srcReader) + if err != nil { + return nil, fmt.Errorf("reading next pair for value in object: %w", err) + } + + switch valTag { + case tagInt32: + resultObj[nextKey] = valData + case tagString, tagStringObject: + str, err := deserializeString(valData, srcReader) + if err != nil { + return nil, fmt.Errorf("reading string for key %s: %w", nextKey, err) + } + resultObj[nextKey] = str + case tagObjectObject: + obj, err := deserializeObject(srcReader) + if err != nil { + return nil, fmt.Errorf("reading object for key %s: %w", nextKey, err) + } + resultObj[nextKey] = obj + case tagArrayObject: + arr, err := deserializeArray(valData, srcReader) + if err != nil { + return nil, fmt.Errorf("reading array for key %s: %w", nextKey, err) + } + resultObj[nextKey] = arr + case tagNull, tagUndefined: + resultObj[nextKey] = nil + default: + return nil, fmt.Errorf("cannot process %s: unknown tag type %x", nextKey, valTag) + } + } + + return resultObj, nil +} + +func deserializeString(strData uint32, srcReader io.ByteReader) (string, error) { + strLen := strData & bitMask(31) + isAscii := strData & (1 << 31) + + if isAscii != 0 { + return deserializeAsciiString(strLen, srcReader) + } + + return deserializeUtf16String(strLen, srcReader) +} + +func deserializeAsciiString(strLen uint32, srcReader io.ByteReader) (string, error) { + // Read bytes for string + var i uint32 + var err error + strBytes := make([]byte, strLen) + for i = 0; i < strLen; i += 1 { + strBytes[i], err = srcReader.ReadByte() + if err != nil { + return "", fmt.Errorf("reading byte in string: %w", err) + } + } + + // Now, read padding and discard -- data is stored in 8-byte words + bytesIntoNextWord := strLen % 8 + if bytesIntoNextWord > 0 { + paddingLen := 8 - bytesIntoNextWord + for i = 0; i < paddingLen; i += 1 { + _, _ = srcReader.ReadByte() + } + } + + return string(strBytes), nil +} + +func deserializeUtf16String(strLen uint32, srcReader io.ByteReader) (string, error) { + // Two bytes per char + lenToRead := strLen * 2 + var i uint32 + var err error + strBytes := make([]byte, lenToRead) + for i = 0; i < lenToRead; i += 1 { + strBytes[i], err = srcReader.ReadByte() + if err != nil { + return "", fmt.Errorf("reading byte in string: %w", err) + } + } + + // Now, read padding and discard -- data is stored in 8-byte words + bytesIntoNextWord := lenToRead % 8 + if bytesIntoNextWord > 0 { + paddingLen := 8 - bytesIntoNextWord + for i = 0; i < paddingLen; i += 1 { + _, _ = srcReader.ReadByte() + } + } + + // Decode string + utf16Reader := transform.NewReader(bytes.NewReader(strBytes), unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()) + decoded, err := io.ReadAll(utf16Reader) + if err != nil { + return "", fmt.Errorf("decoding: %w", err) + } + return string(decoded), nil +} + +func deserializeArray(arrayLength uint32, srcReader io.ByteReader) ([]any, error) { + resultArr := make([]any, arrayLength) + + // We discard the next pair before reading the array. + _, _, _ = nextPair(srcReader) + + for i := 0; i < int(arrayLength); i += 1 { + itemTag, _, err := nextPair(srcReader) + if err != nil { + return nil, fmt.Errorf("reading item at index %d in array: %w", i, err) + } + + switch itemTag { + case tagObjectObject: + obj, err := deserializeObject(srcReader) + if err != nil { + return nil, fmt.Errorf("reading object at index %d in array: %w", i, err) + } + resultArr[i] = obj + default: + return nil, fmt.Errorf("cannot process item at index %d in array: unsupported tag type %x", i, itemTag) + } + } + + return resultArr, nil +} + +func bitMask(n uint32) uint32 { + return (1 << n) - 1 +} diff --git a/ee/katc/table.go b/ee/katc/table.go index 5b159c303..74c00320e 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -40,7 +40,7 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex for _, dataProcessingStep := range k.cfg.DataProcessingSteps { val, err = dataProcessingStep.processingFunc(ctx, val, k.slogger) if err != nil { - return nil, fmt.Errorf("transforming data at key %s: %w", key, err) + return nil, fmt.Errorf("transforming data at key `%s`: %w", key, err) } } From 80b380bd088e7b039b7121c323e90e5eac91efc3 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Thu, 27 Jun 2024 09:41:07 -0400 Subject: [PATCH 03/21] Reorder function args --- ee/katc/config.go | 2 +- ee/katc/sqlite.go | 6 +++--- ee/katc/table.go | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index 3cc8f8c7b..b3df68a48 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -22,7 +22,7 @@ TODOs: type katcTableType struct { name string - dataFunc func(ctx context.Context, path string, query string, columns []string, slogger *slog.Logger) ([]map[string][]byte, error) + dataFunc func(ctx context.Context, slogger *slog.Logger, path string, query string, columns []string) ([]map[string][]byte, error) } const ( diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go index a6d04db1c..52028f20d 100644 --- a/ee/katc/sqlite.go +++ b/ee/katc/sqlite.go @@ -11,7 +11,7 @@ import ( ) // sqliteData is the dataFunc for sqlite KATC tables -func sqliteData(ctx context.Context, pathPattern string, query string, columns []string, slogger *slog.Logger) ([]map[string][]byte, error) { +func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, query string, columns []string) ([]map[string][]byte, error) { sqliteDbs, err := filepath.Glob(pathPattern) if err != nil { return nil, fmt.Errorf("globbing for files with pattern %s: %w", pathPattern, err) @@ -19,7 +19,7 @@ func sqliteData(ctx context.Context, pathPattern string, query string, columns [ results := make([]map[string][]byte, 0) for _, sqliteDb := range sqliteDbs { - resultsFromDb, err := querySqliteDb(ctx, sqliteDb, query, columns, slogger) + resultsFromDb, err := querySqliteDb(ctx, slogger, sqliteDb, query, columns) if err != nil { return nil, fmt.Errorf("querying %s: %w", sqliteDb, err) } @@ -30,7 +30,7 @@ func sqliteData(ctx context.Context, pathPattern string, query string, columns [ } // querySqliteDb queries the database at the given path, returning rows of results -func querySqliteDb(ctx context.Context, path string, query string, columns []string, slogger *slog.Logger) ([]map[string][]byte, error) { +func querySqliteDb(ctx context.Context, slogger *slog.Logger, path string, query string, columns []string) ([]map[string][]byte, error) { conn, err := sql.Open("sqlite", path) if err != nil { return nil, fmt.Errorf("opening sqlite db: %w", err) diff --git a/ee/katc/table.go b/ee/katc/table.go index 74c00320e..6c1ea94e2 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -26,7 +26,7 @@ func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) * func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContext) ([]map[string]string, error) { // Fetch data from our table source - dataRaw, err := k.cfg.Type.dataFunc(ctx, k.cfg.Path, k.cfg.Query, k.cfg.Columns, k.slogger) + dataRaw, err := k.cfg.Type.dataFunc(ctx, k.slogger, k.cfg.Path, k.cfg.Query, k.cfg.Columns) if err != nil { return nil, fmt.Errorf("fetching data: %w", err) } From 38c9e4639c90dc928ac2cee91e97686af66c44c8 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Thu, 27 Jun 2024 09:44:42 -0400 Subject: [PATCH 04/21] Rename type => source --- ee/katc/config.go | 20 ++++++++++---------- ee/katc/config_test.go | 8 ++++---- ee/katc/table.go | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index b3df68a48..35453d2a6 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -20,17 +20,17 @@ TODOs: - Need to do queryContext filtering */ -type katcTableType struct { +type katcSourceType struct { name string dataFunc func(ctx context.Context, slogger *slog.Logger, path string, query string, columns []string) ([]map[string][]byte, error) } const ( - sqliteTableType = "sqlite" - indexedDBTableType = "indexeddb" + sqliteSourceType = "sqlite" + indexedDBSourceType = "indexeddb" ) -func (ktt *katcTableType) UnmarshalJSON(data []byte) error { +func (kst *katcSourceType) UnmarshalJSON(data []byte) error { var s string err := json.Unmarshal(data, &s) if err != nil { @@ -38,12 +38,12 @@ func (ktt *katcTableType) UnmarshalJSON(data []byte) error { } switch s { - case sqliteTableType: - ktt.name = sqliteTableType - ktt.dataFunc = sqliteData + case sqliteSourceType: + kst.name = sqliteSourceType + kst.dataFunc = sqliteData return nil - case indexedDBTableType: - ktt.name = indexedDBTableType + case indexedDBSourceType: + kst.name = indexedDBSourceType return errors.New("indexeddb is not yet implemented") default: return fmt.Errorf("unknown table type %s", s) @@ -82,7 +82,7 @@ func (d *dataProcessingStep) UnmarshalJSON(data []byte) error { } type katcTableConfig struct { - Type katcTableType `json:"type"` + Source katcSourceType `json:"source"` Platform string `json:"platform"` Columns []string `json:"columns"` Path string `json:"path"` // Path to file holding data (e.g. sqlite file) -- wildcards supported diff --git a/ee/katc/config_test.go b/ee/katc/config_test.go index fbedfd299..166263e87 100644 --- a/ee/katc/config_test.go +++ b/ee/katc/config_test.go @@ -22,7 +22,7 @@ func TestConstructKATCTables(t *testing.T) { testCaseName: "snappy_sqlite", katcConfig: map[string]string{ "kolide_snappy_sqlite_test": fmt.Sprintf(`{ - "type": "sqlite", + "source": "sqlite", "platform": "%s", "columns": ["data"], "path": "/some/path/to/db.sqlite", @@ -40,10 +40,10 @@ func TestConstructKATCTables(t *testing.T) { expectedPluginCount: 0, }, { - testCaseName: "invalid table type", + testCaseName: "invalid table source", katcConfig: map[string]string{ "kolide_snappy_test": fmt.Sprintf(`{ - "type": "unknown_type", + "source": "unknown_source", "platform": "%s", "columns": ["data"], "path": "/some/path/to/db.sqlite", @@ -56,7 +56,7 @@ func TestConstructKATCTables(t *testing.T) { testCaseName: "invalid data processing step type", katcConfig: map[string]string{ "kolide_snappy_test": fmt.Sprintf(`{ - "type": "sqlite", + "source": "sqlite", "platform": "%s", "columns": ["data"], "path": "/some/path/to/db.sqlite", diff --git a/ee/katc/table.go b/ee/katc/table.go index 6c1ea94e2..a55fb6ce8 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -18,7 +18,7 @@ func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) * cfg: cfg, slogger: slogger.With( "table_name", tableName, - "table_type", cfg.Type, + "table_type", cfg.Source, "table_path", cfg.Path, ), } @@ -26,7 +26,7 @@ func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) * func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContext) ([]map[string]string, error) { // Fetch data from our table source - dataRaw, err := k.cfg.Type.dataFunc(ctx, k.slogger, k.cfg.Path, k.cfg.Query, k.cfg.Columns) + dataRaw, err := k.cfg.Source.dataFunc(ctx, k.slogger, k.cfg.Path, k.cfg.Query, k.cfg.Columns) if err != nil { return nil, fmt.Errorf("fetching data: %w", err) } From 90a930b0eafabccfa560d58ec66f52fa79113484 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Thu, 27 Jun 2024 10:59:36 -0400 Subject: [PATCH 05/21] Fetch columns from query results --- ee/katc/config.go | 2 +- ee/katc/sqlite.go | 14 ++++++++++---- ee/katc/table.go | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index 35453d2a6..32d45bc4d 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -22,7 +22,7 @@ TODOs: type katcSourceType struct { name string - dataFunc func(ctx context.Context, slogger *slog.Logger, path string, query string, columns []string) ([]map[string][]byte, error) + dataFunc func(ctx context.Context, slogger *slog.Logger, path string, query string) ([]map[string][]byte, error) } const ( diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go index 52028f20d..91c91a714 100644 --- a/ee/katc/sqlite.go +++ b/ee/katc/sqlite.go @@ -11,7 +11,7 @@ import ( ) // sqliteData is the dataFunc for sqlite KATC tables -func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, query string, columns []string) ([]map[string][]byte, error) { +func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, query string) ([]map[string][]byte, error) { sqliteDbs, err := filepath.Glob(pathPattern) if err != nil { return nil, fmt.Errorf("globbing for files with pattern %s: %w", pathPattern, err) @@ -19,7 +19,7 @@ func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, q results := make([]map[string][]byte, 0) for _, sqliteDb := range sqliteDbs { - resultsFromDb, err := querySqliteDb(ctx, slogger, sqliteDb, query, columns) + resultsFromDb, err := querySqliteDb(ctx, slogger, sqliteDb, query) if err != nil { return nil, fmt.Errorf("querying %s: %w", sqliteDb, err) } @@ -30,7 +30,7 @@ func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, q } // querySqliteDb queries the database at the given path, returning rows of results -func querySqliteDb(ctx context.Context, slogger *slog.Logger, path string, query string, columns []string) ([]map[string][]byte, error) { +func querySqliteDb(ctx context.Context, slogger *slog.Logger, path string, query string) ([]map[string][]byte, error) { conn, err := sql.Open("sqlite", path) if err != nil { return nil, fmt.Errorf("opening sqlite db: %w", err) @@ -59,7 +59,13 @@ func querySqliteDb(ctx context.Context, slogger *slog.Logger, path string, query results := make([]map[string][]byte, 0) - // Prepare destination for scan + // Fetch columns so we know how many values per row we will scan + columns, err := rows.Columns() + if err != nil { + return nil, fmt.Errorf("getting columns from query result: %w", err) + } + + // Prepare scan destination rawResult := make([][]byte, len(columns)) scanDest := make([]any, len(columns)) for i := 0; i < len(columns); i += 1 { diff --git a/ee/katc/table.go b/ee/katc/table.go index a55fb6ce8..0b91d7534 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -26,7 +26,7 @@ func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) * func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContext) ([]map[string]string, error) { // Fetch data from our table source - dataRaw, err := k.cfg.Source.dataFunc(ctx, k.slogger, k.cfg.Path, k.cfg.Query, k.cfg.Columns) + dataRaw, err := k.cfg.Source.dataFunc(ctx, k.slogger, k.cfg.Path, k.cfg.Query) if err != nil { return nil, fmt.Errorf("fetching data: %w", err) } From 42ae269cb3dba9e22a296428d66e44b7bb2ac056 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Thu, 27 Jun 2024 16:31:51 -0400 Subject: [PATCH 06/21] Ensure path is included in results; reorder func args --- ee/katc/config.go | 20 +++++++++++++++----- ee/katc/snappy.go | 2 +- ee/katc/sqlite.go | 11 +++++++---- ee/katc/structured_clone.go | 2 +- ee/katc/table.go | 29 ++++++++++++++++------------- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index 32d45bc4d..642314f05 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -22,7 +22,12 @@ TODOs: type katcSourceType struct { name string - dataFunc func(ctx context.Context, slogger *slog.Logger, path string, query string) ([]map[string][]byte, error) + dataFunc func(ctx context.Context, slogger *slog.Logger, path string, query string) ([]sourceData, error) +} + +type sourceData struct { + path string + rows []map[string][]byte } const ( @@ -52,7 +57,7 @@ func (kst *katcSourceType) UnmarshalJSON(data []byte) error { type dataProcessingStep struct { name string - processingFunc func(ctx context.Context, data []byte, slogger *slog.Logger) ([]byte, error) + processingFunc func(ctx context.Context, slogger *slog.Logger, data []byte) ([]byte, error) } const ( @@ -107,12 +112,17 @@ func ConstructKATCTables(config map[string]string, slogger *slog.Logger) []osque continue } - columns := make([]table.ColumnDefinition, len(cfg.Columns)) + columns := []table.ColumnDefinition{ + { + Name: sourcePathColumnName, + Type: table.ColumnTypeText, + }, + } for i := 0; i < len(cfg.Columns); i += 1 { - columns[i] = table.ColumnDefinition{ + columns = append(columns, table.ColumnDefinition{ Name: cfg.Columns[i], Type: table.ColumnTypeText, - } + }) } t := newKatcTable(tableName, cfg, slogger) diff --git a/ee/katc/snappy.go b/ee/katc/snappy.go index 1d1219a19..99d3370b2 100644 --- a/ee/katc/snappy.go +++ b/ee/katc/snappy.go @@ -9,7 +9,7 @@ import ( ) // snappyDecode is a dataProcessingStep that decodes data compressed with snappy -func snappyDecode(ctx context.Context, data []byte, slogger *slog.Logger) ([]byte, error) { +func snappyDecode(ctx context.Context, _ *slog.Logger, data []byte) ([]byte, error) { decodedResultBytes, err := snappy.Decode(nil, data) if err != nil { return nil, fmt.Errorf("decoding column: %w", err) diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go index 91c91a714..278bbe385 100644 --- a/ee/katc/sqlite.go +++ b/ee/katc/sqlite.go @@ -11,19 +11,22 @@ import ( ) // sqliteData is the dataFunc for sqlite KATC tables -func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, query string) ([]map[string][]byte, error) { +func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, query string) ([]sourceData, error) { sqliteDbs, err := filepath.Glob(pathPattern) if err != nil { return nil, fmt.Errorf("globbing for files with pattern %s: %w", pathPattern, err) } - results := make([]map[string][]byte, 0) + results := make([]sourceData, 0) for _, sqliteDb := range sqliteDbs { - resultsFromDb, err := querySqliteDb(ctx, slogger, sqliteDb, query) + rowsFromDb, err := querySqliteDb(ctx, slogger, sqliteDb, query) if err != nil { return nil, fmt.Errorf("querying %s: %w", sqliteDb, err) } - results = append(results, resultsFromDb...) + results = append(results, sourceData{ + path: sqliteDb, + rows: rowsFromDb, + }) } return results, nil diff --git a/ee/katc/structured_clone.go b/ee/katc/structured_clone.go index 5ace4e1c1..3e322816f 100644 --- a/ee/katc/structured_clone.go +++ b/ee/katc/structured_clone.go @@ -45,7 +45,7 @@ const ( // * https://searchfox.org/mozilla-central/source/js/public/StructuredClone.h // * https://searchfox.org/mozilla-central/source/js/src/vm/StructuredClone.cpp (see especially JSStructuredCloneReader::read) // * https://html.spec.whatwg.org/multipage/structured-data.html#structureddeserialize -func structuredCloneDeserialize(ctx context.Context, data []byte, slogger *slog.Logger) ([]byte, error) { +func structuredCloneDeserialize(ctx context.Context, slogger *slog.Logger, data []byte) ([]byte, error) { srcReader := bytes.NewReader(data) // First, read the header diff --git a/ee/katc/table.go b/ee/katc/table.go index 0b91d7534..08e35c477 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -8,6 +8,8 @@ import ( "github.com/osquery/osquery-go/plugin/table" ) +const sourcePathColumnName = "source_path" + type katcTable struct { cfg katcTableConfig slogger *slog.Logger @@ -33,22 +35,23 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex // Process data results := make([]map[string]string, 0) - for _, dataRawRow := range dataRaw { - rowData := make(map[string]string) - for key, val := range dataRawRow { - // Run any processing steps on the data value - for _, dataProcessingStep := range k.cfg.DataProcessingSteps { - val, err = dataProcessingStep.processingFunc(ctx, val, k.slogger) - if err != nil { - return nil, fmt.Errorf("transforming data at key `%s`: %w", key, err) + for _, s := range dataRaw { + for _, dataRawRow := range s.rows { + rowData := map[string]string{ + sourcePathColumnName: s.path, + } + for key, val := range dataRawRow { + // Run any processing steps on the data value + for _, dataProcessingStep := range k.cfg.DataProcessingSteps { + val, err = dataProcessingStep.processingFunc(ctx, k.slogger, val) + if err != nil { + return nil, fmt.Errorf("transforming data at key `%s`: %w", key, err) + } } + rowData[key] = string(val) } - - // Now transform byte => string - rowData[key] = string(val) + results = append(results, rowData) } - - results = append(results, rowData) } // Now, filter data as needed From 15f7ea5f1fbc86bc76dbced0ba9e644d859bcd12 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Fri, 28 Jun 2024 10:32:35 -0400 Subject: [PATCH 07/21] Read-only --- ee/katc/sqlite.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go index 278bbe385..f392a68f9 100644 --- a/ee/katc/sqlite.go +++ b/ee/katc/sqlite.go @@ -34,7 +34,8 @@ func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, q // querySqliteDb queries the database at the given path, returning rows of results func querySqliteDb(ctx context.Context, slogger *slog.Logger, path string, query string) ([]map[string][]byte, error) { - conn, err := sql.Open("sqlite", path) + dsn := fmt.Sprintf("file://%s?mode=ro", path) + conn, err := sql.Open("sqlite", dsn) if err != nil { return nil, fmt.Errorf("opening sqlite db: %w", err) } From f52843e961fc911450b606a902e3510467060180 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Fri, 28 Jun 2024 11:08:56 -0400 Subject: [PATCH 08/21] Transform entire row instead of individual data to properly unwrap indexeddb values --- ee/katc/config.go | 39 +++++++--------- ee/katc/config_test.go | 4 +- ee/katc/snappy.go | 16 +++++-- ee/katc/structured_clone.go | 93 ++++++++++++++++++++++++------------- ee/katc/table.go | 19 +++++--- 5 files changed, 104 insertions(+), 67 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index 642314f05..f1a2b71b7 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -13,9 +13,6 @@ import ( ) /* -Open qs: -- Should we go with the EAV approach rather than with columns? Look at how dataflatten does it - TODOs: - Need to do queryContext filtering */ @@ -55,17 +52,17 @@ func (kst *katcSourceType) UnmarshalJSON(data []byte) error { } } -type dataProcessingStep struct { - name string - processingFunc func(ctx context.Context, slogger *slog.Logger, data []byte) ([]byte, error) +type rowTransformStep struct { + name string + transformFunc func(ctx context.Context, slogger *slog.Logger, row map[string][]byte) (map[string][]byte, error) } const ( - snappyDecodeProcessingStep = "snappy" - structuredCloneDeserializeProcessingStep = "structured_clone" + snappyDecodeTransformStep = "snappy" + structuredCloneDeserializeTransformStep = "structured_clone" ) -func (d *dataProcessingStep) UnmarshalJSON(data []byte) error { +func (r *rowTransformStep) UnmarshalJSON(data []byte) error { var s string err := json.Unmarshal(data, &s) if err != nil { @@ -73,13 +70,13 @@ func (d *dataProcessingStep) UnmarshalJSON(data []byte) error { } switch s { - case snappyDecodeProcessingStep: - d.name = snappyDecodeProcessingStep - d.processingFunc = snappyDecode + case snappyDecodeTransformStep: + r.name = snappyDecodeTransformStep + r.transformFunc = snappyDecode return nil - case structuredCloneDeserializeProcessingStep: - d.name = structuredCloneDeserializeProcessingStep - d.processingFunc = structuredCloneDeserialize + case structuredCloneDeserializeTransformStep: + r.name = structuredCloneDeserializeTransformStep + r.transformFunc = structuredCloneDeserialize return nil default: return fmt.Errorf("unknown data processing step %s", s) @@ -87,12 +84,12 @@ func (d *dataProcessingStep) UnmarshalJSON(data []byte) error { } type katcTableConfig struct { - Source katcSourceType `json:"source"` - Platform string `json:"platform"` - Columns []string `json:"columns"` - Path string `json:"path"` // Path to file holding data (e.g. sqlite file) -- wildcards supported - Query string `json:"query"` // Query to run against `path` - DataProcessingSteps []dataProcessingStep `json:"data_processing_steps"` + Source katcSourceType `json:"source"` + Platform string `json:"platform"` + Columns []string `json:"columns"` + Path string `json:"path"` // Path to file holding data (e.g. sqlite file) -- wildcards supported + Query string `json:"query"` // Query to run against `path` + RowTransformSteps []rowTransformStep `json:"row_transform_steps"` } func ConstructKATCTables(config map[string]string, slogger *slog.Logger) []osquery.OsqueryPlugin { diff --git a/ee/katc/config_test.go b/ee/katc/config_test.go index 166263e87..112e36d84 100644 --- a/ee/katc/config_test.go +++ b/ee/katc/config_test.go @@ -27,7 +27,7 @@ func TestConstructKATCTables(t *testing.T) { "columns": ["data"], "path": "/some/path/to/db.sqlite", "query": "SELECT data FROM object_data JOIN object_store ON (object_data.object_store_id = object_store.id) WHERE object_store.name=\"testtable\";", - "data_processing_steps": ["snappy"] + "row_transform_steps": ["snappy"] }`, runtime.GOOS), }, expectedPluginCount: 1, @@ -61,7 +61,7 @@ func TestConstructKATCTables(t *testing.T) { "columns": ["data"], "path": "/some/path/to/db.sqlite", "query": "SELECT data FROM object_data;", - "data_processing_steps": ["unknown_step"] + "row_transform_steps": ["unknown_step"] }`, runtime.GOOS), }, expectedPluginCount: 0, diff --git a/ee/katc/snappy.go b/ee/katc/snappy.go index 99d3370b2..6e2613e76 100644 --- a/ee/katc/snappy.go +++ b/ee/katc/snappy.go @@ -9,11 +9,17 @@ import ( ) // snappyDecode is a dataProcessingStep that decodes data compressed with snappy -func snappyDecode(ctx context.Context, _ *slog.Logger, data []byte) ([]byte, error) { - decodedResultBytes, err := snappy.Decode(nil, data) - if err != nil { - return nil, fmt.Errorf("decoding column: %w", err) +func snappyDecode(ctx context.Context, _ *slog.Logger, row map[string][]byte) (map[string][]byte, error) { + decodedRow := make(map[string][]byte) + + for k, v := range row { + decodedResultBytes, err := snappy.Decode(nil, v) + if err != nil { + return nil, fmt.Errorf("decoding data for key %s: %w", k, err) + } + + decodedRow[k] = decodedResultBytes } - return decodedResultBytes, nil + return decodedRow, nil } diff --git a/ee/katc/structured_clone.go b/ee/katc/structured_clone.go index 3e322816f..625b4b038 100644 --- a/ee/katc/structured_clone.go +++ b/ee/katc/structured_clone.go @@ -5,9 +5,11 @@ import ( "context" "encoding/binary" "encoding/json" + "errors" "fmt" "io" "log/slog" + "strconv" "golang.org/x/text/encoding/unicode" "golang.org/x/text/transform" @@ -45,7 +47,14 @@ const ( // * https://searchfox.org/mozilla-central/source/js/public/StructuredClone.h // * https://searchfox.org/mozilla-central/source/js/src/vm/StructuredClone.cpp (see especially JSStructuredCloneReader::read) // * https://html.spec.whatwg.org/multipage/structured-data.html#structureddeserialize -func structuredCloneDeserialize(ctx context.Context, slogger *slog.Logger, data []byte) ([]byte, error) { +func structuredCloneDeserialize(ctx context.Context, slogger *slog.Logger, row map[string][]byte) (map[string][]byte, error) { + // IndexedDB data is stored by key "data" pointing to the serialized object. We want to + // extract that serialized object, and discard the top-level "data" key. + data, ok := row["data"] + if !ok { + return nil, errors.New("row missing top-level data key") + } + srcReader := bytes.NewReader(data) // First, read the header @@ -72,13 +81,7 @@ func structuredCloneDeserialize(ctx context.Context, slogger *slog.Logger, data return nil, fmt.Errorf("reading top-level object: %w", err) } - // Marshal the object to return - objRaw, err := json.Marshal(resultObj) - if err != nil { - return nil, fmt.Errorf("marshalling result: %w", err) - } - - return objRaw, nil + return resultObj, nil } func nextPair(srcReader io.ByteReader) (uint32, uint32, error) { @@ -99,8 +102,8 @@ func nextPair(srcReader io.ByteReader) (uint32, uint32, error) { return binary.BigEndian.Uint32(pairBytes[0:4]), binary.BigEndian.Uint32(pairBytes[4:]), nil } -func deserializeObject(srcReader io.ByteReader) (map[string]any, error) { - resultObj := make(map[string]any, 0) +func deserializeObject(srcReader io.ByteReader) (map[string][]byte, error) { + resultObj := make(map[string][]byte, 0) for { nextObjTag, nextObjData, err := nextPair(srcReader) @@ -121,6 +124,7 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) { if err != nil { return nil, fmt.Errorf("reading string for tag %x: %w", nextObjTag, err) } + nextKeyStr := string(nextKey) // Read value valTag, valData, err := nextPair(srcReader) @@ -130,36 +134,36 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) { switch valTag { case tagInt32: - resultObj[nextKey] = valData + resultObj[nextKeyStr] = []byte(strconv.Itoa(int(valData))) case tagString, tagStringObject: str, err := deserializeString(valData, srcReader) if err != nil { - return nil, fmt.Errorf("reading string for key %s: %w", nextKey, err) + return nil, fmt.Errorf("reading string for key %s: %w", nextKeyStr, err) } - resultObj[nextKey] = str + resultObj[nextKeyStr] = str case tagObjectObject: - obj, err := deserializeObject(srcReader) + obj, err := deserializeNestedObject(srcReader) if err != nil { - return nil, fmt.Errorf("reading object for key %s: %w", nextKey, err) + return nil, fmt.Errorf("reading object for key %s: %w", nextKeyStr, err) } - resultObj[nextKey] = obj + resultObj[nextKeyStr] = obj case tagArrayObject: arr, err := deserializeArray(valData, srcReader) if err != nil { - return nil, fmt.Errorf("reading array for key %s: %w", nextKey, err) + return nil, fmt.Errorf("reading array for key %s: %w", nextKeyStr, err) } - resultObj[nextKey] = arr + resultObj[nextKeyStr] = arr case tagNull, tagUndefined: - resultObj[nextKey] = nil + resultObj[nextKeyStr] = nil default: - return nil, fmt.Errorf("cannot process %s: unknown tag type %x", nextKey, valTag) + return nil, fmt.Errorf("cannot process %s: unknown tag type %x", nextKeyStr, valTag) } } return resultObj, nil } -func deserializeString(strData uint32, srcReader io.ByteReader) (string, error) { +func deserializeString(strData uint32, srcReader io.ByteReader) ([]byte, error) { strLen := strData & bitMask(31) isAscii := strData & (1 << 31) @@ -170,7 +174,7 @@ func deserializeString(strData uint32, srcReader io.ByteReader) (string, error) return deserializeUtf16String(strLen, srcReader) } -func deserializeAsciiString(strLen uint32, srcReader io.ByteReader) (string, error) { +func deserializeAsciiString(strLen uint32, srcReader io.ByteReader) ([]byte, error) { // Read bytes for string var i uint32 var err error @@ -178,7 +182,7 @@ func deserializeAsciiString(strLen uint32, srcReader io.ByteReader) (string, err for i = 0; i < strLen; i += 1 { strBytes[i], err = srcReader.ReadByte() if err != nil { - return "", fmt.Errorf("reading byte in string: %w", err) + return nil, fmt.Errorf("reading byte in string: %w", err) } } @@ -191,10 +195,10 @@ func deserializeAsciiString(strLen uint32, srcReader io.ByteReader) (string, err } } - return string(strBytes), nil + return strBytes, nil } -func deserializeUtf16String(strLen uint32, srcReader io.ByteReader) (string, error) { +func deserializeUtf16String(strLen uint32, srcReader io.ByteReader) ([]byte, error) { // Two bytes per char lenToRead := strLen * 2 var i uint32 @@ -203,7 +207,7 @@ func deserializeUtf16String(strLen uint32, srcReader io.ByteReader) (string, err for i = 0; i < lenToRead; i += 1 { strBytes[i], err = srcReader.ReadByte() if err != nil { - return "", fmt.Errorf("reading byte in string: %w", err) + return nil, fmt.Errorf("reading byte in string: %w", err) } } @@ -220,12 +224,12 @@ func deserializeUtf16String(strLen uint32, srcReader io.ByteReader) (string, err utf16Reader := transform.NewReader(bytes.NewReader(strBytes), unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()) decoded, err := io.ReadAll(utf16Reader) if err != nil { - return "", fmt.Errorf("decoding: %w", err) + return nil, fmt.Errorf("decoding: %w", err) } - return string(decoded), nil + return decoded, nil } -func deserializeArray(arrayLength uint32, srcReader io.ByteReader) ([]any, error) { +func deserializeArray(arrayLength uint32, srcReader io.ByteReader) ([]byte, error) { resultArr := make([]any, arrayLength) // We discard the next pair before reading the array. @@ -239,17 +243,42 @@ func deserializeArray(arrayLength uint32, srcReader io.ByteReader) ([]any, error switch itemTag { case tagObjectObject: - obj, err := deserializeObject(srcReader) + obj, err := deserializeNestedObject(srcReader) if err != nil { return nil, fmt.Errorf("reading object at index %d in array: %w", i, err) } - resultArr[i] = obj + resultArr[i] = string(obj) // cast to string so it's readable when marshalled again below default: return nil, fmt.Errorf("cannot process item at index %d in array: unsupported tag type %x", i, itemTag) } } - return resultArr, nil + arrBytes, err := json.Marshal(resultArr) + if err != nil { + return nil, fmt.Errorf("marshalling array: %w", err) + } + + return arrBytes, nil +} + +func deserializeNestedObject(srcReader io.ByteReader) ([]byte, error) { + nestedObj, err := deserializeObject(srcReader) + if err != nil { + return nil, fmt.Errorf("deserializing nested object: %w", err) + } + + // Make nested object values readable -- cast []byte to string + readableNestedObj := make(map[string]string) + for k, v := range nestedObj { + readableNestedObj[k] = string(v) + } + + resultObj, err := json.Marshal(readableNestedObj) + if err != nil { + return nil, fmt.Errorf("marshalling nested object: %w", err) + } + + return resultObj, nil } func bitMask(n uint32) uint32 { diff --git a/ee/katc/table.go b/ee/katc/table.go index 08e35c477..f8f63f015 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -37,17 +37,22 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex results := make([]map[string]string, 0) for _, s := range dataRaw { for _, dataRawRow := range s.rows { + // Make sure source is included in row data rowData := map[string]string{ sourcePathColumnName: s.path, } - for key, val := range dataRawRow { - // Run any processing steps on the data value - for _, dataProcessingStep := range k.cfg.DataProcessingSteps { - val, err = dataProcessingStep.processingFunc(ctx, k.slogger, val) - if err != nil { - return nil, fmt.Errorf("transforming data at key `%s`: %w", key, err) - } + + // Run any needed transformations on the row data + for _, step := range k.cfg.RowTransformSteps { + dataRawRow, err = step.transformFunc(ctx, k.slogger, dataRawRow) + if err != nil { + return nil, fmt.Errorf("running transform func %s: %w", step.name, err) } + } + + // After transformations have been applied, we can cast the data from []byte + // to string to return to osquery. + for key, val := range dataRawRow { rowData[key] = string(val) } results = append(results, rowData) From 9868c167d9d8deee4552b11f9a7e47ee3218d63e Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Fri, 28 Jun 2024 12:32:12 -0400 Subject: [PATCH 09/21] Add source path constraint filtering so we don't run queries against unmatched sources --- ee/katc/config.go | 22 +------- ee/katc/sqlite.go | 12 +++- ee/katc/table.go | 139 ++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 141 insertions(+), 32 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index f1a2b71b7..072468869 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -12,14 +12,9 @@ import ( "github.com/osquery/osquery-go/plugin/table" ) -/* -TODOs: -- Need to do queryContext filtering -*/ - type katcSourceType struct { name string - dataFunc func(ctx context.Context, slogger *slog.Logger, path string, query string) ([]sourceData, error) + dataFunc func(ctx context.Context, slogger *slog.Logger, path string, query string, sourceConstraints *table.ConstraintList) ([]sourceData, error) } type sourceData struct { @@ -109,20 +104,7 @@ func ConstructKATCTables(config map[string]string, slogger *slog.Logger) []osque continue } - columns := []table.ColumnDefinition{ - { - Name: sourcePathColumnName, - Type: table.ColumnTypeText, - }, - } - for i := 0; i < len(cfg.Columns); i += 1 { - columns = append(columns, table.ColumnDefinition{ - Name: cfg.Columns[i], - Type: table.ColumnTypeText, - }) - } - - t := newKatcTable(tableName, cfg, slogger) + t, columns := newKatcTable(tableName, cfg, slogger) plugins = append(plugins, table.NewPlugin(tableName, columns, t.generate)) } diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go index f392a68f9..1d041de7a 100644 --- a/ee/katc/sqlite.go +++ b/ee/katc/sqlite.go @@ -7,11 +7,12 @@ import ( "log/slog" "path/filepath" + "github.com/osquery/osquery-go/plugin/table" _ "modernc.org/sqlite" ) // sqliteData is the dataFunc for sqlite KATC tables -func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, query string) ([]sourceData, error) { +func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, query string, sourceConstraints *table.ConstraintList) ([]sourceData, error) { sqliteDbs, err := filepath.Glob(pathPattern) if err != nil { return nil, fmt.Errorf("globbing for files with pattern %s: %w", pathPattern, err) @@ -19,6 +20,15 @@ func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, q results := make([]sourceData, 0) for _, sqliteDb := range sqliteDbs { + // Check to make sure `sqliteDb` adheres to sourceConstraints + valid, err := sourcePathAdheresToSourceConstraints(sqliteDb, sourceConstraints) + if err != nil { + return nil, fmt.Errorf("checking source path constraints: %w", err) + } + if !valid { + continue + } + rowsFromDb, err := querySqliteDb(ctx, slogger, sqliteDb, query) if err != nil { return nil, fmt.Errorf("querying %s: %w", sqliteDb, err) diff --git a/ee/katc/table.go b/ee/katc/table.go index f8f63f015..93f9db851 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -4,6 +4,8 @@ import ( "context" "fmt" "log/slog" + "regexp" + "strings" "github.com/osquery/osquery-go/plugin/table" ) @@ -11,30 +13,49 @@ import ( const sourcePathColumnName = "source_path" type katcTable struct { - cfg katcTableConfig - slogger *slog.Logger + cfg katcTableConfig + columnLookup map[string]struct{} + slogger *slog.Logger } -func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) *katcTable { +func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) (*katcTable, []table.ColumnDefinition) { + columns := []table.ColumnDefinition{ + { + Name: sourcePathColumnName, + Type: table.ColumnTypeText, + }, + } + columnLookup := map[string]struct{}{ + sourcePathColumnName: {}, + } + for i := 0; i < len(cfg.Columns); i += 1 { + columns = append(columns, table.ColumnDefinition{ + Name: cfg.Columns[i], + Type: table.ColumnTypeText, + }) + columnLookup[cfg.Columns[i]] = struct{}{} + } + return &katcTable{ - cfg: cfg, + cfg: cfg, + columnLookup: columnLookup, slogger: slogger.With( "table_name", tableName, "table_type", cfg.Source, "table_path", cfg.Path, ), - } + }, columns } func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContext) ([]map[string]string, error) { // Fetch data from our table source - dataRaw, err := k.cfg.Source.dataFunc(ctx, k.slogger, k.cfg.Path, k.cfg.Query) + dataRaw, err := k.cfg.Source.dataFunc(ctx, k.slogger, k.cfg.Path, k.cfg.Query, getSourceConstraint(queryContext)) if err != nil { return nil, fmt.Errorf("fetching data: %w", err) } // Process data - results := make([]map[string]string, 0) + transformedResults := make([]map[string]string, 0) for _, s := range dataRaw { for _, dataRawRow := range s.rows { // Make sure source is included in row data @@ -55,12 +76,108 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex for key, val := range dataRawRow { rowData[key] = string(val) } - results = append(results, rowData) + transformedResults = append(transformedResults, rowData) + } + } + + // Now, filter data to ensure we only return columns in k.columnLookup + filteredResults := make([]map[string]string, 0) + for _, row := range transformedResults { + includeRow := true + filteredRow := make(map[string]string) + for column, data := range row { + if _, expectedColumn := k.columnLookup[column]; !expectedColumn { + k.slogger.Log(ctx, slog.LevelWarn, + "results contained unknown column, discarding", + "column", column, + ) + continue + } + + filteredRow[column] = data + + // No need to check the rest of the row + if !includeRow { + break + } } + + if includeRow { + filteredResults = append(filteredResults, filteredRow) + } + } + + return filteredResults, nil +} + +func getSourceConstraint(queryContext table.QueryContext) *table.ConstraintList { + sourceConstraint, sourceConstraintExists := queryContext.Constraints[sourcePathColumnName] + if sourceConstraintExists { + return &sourceConstraint + } + return nil +} + +func sourcePathAdheresToSourceConstraints(sourcePath string, sourceConstraints *table.ConstraintList) (bool, error) { + if sourceConstraints == nil { + return true, nil } - // Now, filter data as needed - // TODO queryContext + validPath := true + for _, sourceConstraint := range sourceConstraints.Constraints { + switch sourceConstraint.Operator { + case table.OperatorEquals: + if sourcePath != sourceConstraint.Expression { + validPath = false + } + case table.OperatorLike: + // Transform the expression into a regex to test if we have a match. + likeRegexpStr := regexp.QuoteMeta(sourceConstraint.Expression) + // % matches zero or more characters + likeRegexpStr = strings.Replace(likeRegexpStr, "%", `.*`, -1) + // _ matches a single character + likeRegexpStr = strings.Replace(likeRegexpStr, "_", `.`, -1) + // LIKE is case-insensitive + likeRegexpStr = `(?i)` + likeRegexpStr + r, err := regexp.Compile(likeRegexpStr) + if err != nil { + return false, fmt.Errorf("invalid LIKE statement: %w", err) + } + if !r.MatchString(sourcePath) { + validPath = false + } + case table.OperatorGlob: + // Transform the expression into a regex to test if we have a match. + // Unlike LIKE, GLOB is case-sensitive. + globRegexpStr := regexp.QuoteMeta(sourceConstraint.Expression) + // * matches zero or more characters + globRegexpStr = strings.Replace(globRegexpStr, `\*`, `.*`, -1) + // ? matches a single character + globRegexpStr = strings.Replace(globRegexpStr, `\?`, `.`, -1) + r, err := regexp.Compile(globRegexpStr) + if err != nil { + return false, fmt.Errorf("invalid GLOB statement: %w", err) + } + if !r.MatchString(sourcePath) { + validPath = false + } + case table.OperatorRegexp: + r, err := regexp.Compile(sourceConstraint.Expression) + if err != nil { + return false, fmt.Errorf("invalid regex: %w", err) + } + if !r.MatchString(sourcePath) { + validPath = false + } + default: + return false, fmt.Errorf("operator %v not valid source constraint", sourceConstraint.Operator) + } + + // No need to check other constraints + if !validPath { + break + } + } - return results, nil + return validPath, nil } From 915936648a7e1deeb9b6271dbc986880526f9f69 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Fri, 28 Jun 2024 13:55:11 -0400 Subject: [PATCH 10/21] Add test for constraint checks --- ee/katc/table_test.go | 211 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 ee/katc/table_test.go diff --git a/ee/katc/table_test.go b/ee/katc/table_test.go new file mode 100644 index 000000000..d97335580 --- /dev/null +++ b/ee/katc/table_test.go @@ -0,0 +1,211 @@ +package katc + +import ( + "path/filepath" + "testing" + + "github.com/osquery/osquery-go/plugin/table" + "github.com/stretchr/testify/require" +) + +func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + testCaseName string + sourcePath string + constraints table.ConstraintList + adheres bool + errorExpected bool + }{ + { + testCaseName: "equals", + sourcePath: filepath.Join("some", "path", "to", "a", "source"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorEquals, + Expression: filepath.Join("some", "path", "to", "a", "source"), + }, + }, + }, + adheres: true, + errorExpected: false, + }, + { + testCaseName: "not equals", + sourcePath: filepath.Join("some", "path", "to", "a", "source"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorEquals, + Expression: filepath.Join("a", "path", "to", "a", "different", "source"), + }, + }, + }, + adheres: false, + errorExpected: false, + }, + { + testCaseName: "LIKE with % wildcard", + sourcePath: filepath.Join("a", "path", "to", "db.sqlite"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorLike, + Expression: filepath.Join("a", "path", "to", "%.sqlite"), + }, + }, + }, + adheres: true, + errorExpected: false, + }, + { + testCaseName: "LIKE with underscore wildcard", + sourcePath: filepath.Join("a", "path", "to", "db.sqlite"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorLike, + Expression: filepath.Join("_", "path", "to", "db.sqlite"), + }, + }, + }, + adheres: true, + errorExpected: false, + }, + { + testCaseName: "LIKE is case-insensitive", + sourcePath: filepath.Join("a", "path", "to", "db.sqlite"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorLike, + Expression: filepath.Join("A", "PATH", "TO", "DB.%"), + }, + }, + }, + adheres: true, + }, + { + testCaseName: "GLOB with * wildcard", + sourcePath: filepath.Join("another", "path", "to", "a", "source"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorGlob, + Expression: filepath.Join("another", "*", "to", "a", "source"), + }, + }, + }, + adheres: true, + errorExpected: false, + }, + { + testCaseName: "GLOB with ? wildcard", + sourcePath: filepath.Join("another", "path", "to", "a", "source"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorGlob, + Expression: filepath.Join("another", "path", "to", "?", "source"), + }, + }, + }, + adheres: true, + errorExpected: false, + }, + { + testCaseName: "regexp", + sourcePath: filepath.Join("test", "path", "to", "a", "source"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorRegexp, + Expression: `.*source$`, + }, + }, + }, + adheres: true, + errorExpected: false, + }, + { + testCaseName: "invalid regexp", + sourcePath: filepath.Join("test", "path", "to", "a", "source"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorRegexp, + Expression: `invalid\`, + }, + }, + }, + adheres: false, + errorExpected: true, + }, + { + testCaseName: "unsupported", + sourcePath: filepath.Join("test", "path", "to", "a", "source", "2"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorUnique, + Expression: filepath.Join("test", "path", "to", "a", "source", "2"), + }, + }, + }, + adheres: false, + errorExpected: true, + }, + { + testCaseName: "multiple constraints where one does not match", + sourcePath: filepath.Join("test", "path", "to", "a", "source", "3"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorLike, + Expression: filepath.Join("test", "path", "to", "a", "source", "%"), + }, + { + Operator: table.OperatorEquals, + Expression: filepath.Join("some", "path", "to", "a", "source"), + }, + }, + }, + adheres: false, + errorExpected: false, + }, + { + testCaseName: "multiple constraints where all match", + sourcePath: filepath.Join("test", "path", "to", "a", "source", "3"), + constraints: table.ConstraintList{ + Constraints: []table.Constraint{ + { + Operator: table.OperatorLike, + Expression: filepath.Join("test", "path", "to", "a", "source", "%"), + }, + { + Operator: table.OperatorEquals, + Expression: filepath.Join("test", "path", "to", "a", "source", "3"), + }, + }, + }, + adheres: true, + errorExpected: false, + }, + } { + tt := tt + t.Run(tt.testCaseName, func(t *testing.T) { + t.Parallel() + + adheres, err := sourcePathAdheresToSourceConstraints(tt.sourcePath, &tt.constraints) + if tt.errorExpected { + require.Error(t, err, "expected error on checking constraints") + } else { + require.NoError(t, err, "expected no error on checking constraints") + } + + require.Equal(t, tt.adheres, adheres, "incorrect result checking constraints") + }) + } +} From 1e32327b7fb4a1a6f09bf6f885703c431d050850 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Fri, 28 Jun 2024 13:56:21 -0400 Subject: [PATCH 11/21] Rename function for brevity --- ee/katc/sqlite.go | 2 +- ee/katc/table.go | 2 +- ee/katc/table_test.go | 32 ++++++++++++++++---------------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go index 1d041de7a..e8d649920 100644 --- a/ee/katc/sqlite.go +++ b/ee/katc/sqlite.go @@ -21,7 +21,7 @@ func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, q results := make([]sourceData, 0) for _, sqliteDb := range sqliteDbs { // Check to make sure `sqliteDb` adheres to sourceConstraints - valid, err := sourcePathAdheresToSourceConstraints(sqliteDb, sourceConstraints) + valid, err := checkSourcePathConstraints(sqliteDb, sourceConstraints) if err != nil { return nil, fmt.Errorf("checking source path constraints: %w", err) } diff --git a/ee/katc/table.go b/ee/katc/table.go index 93f9db851..039da5484 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -118,7 +118,7 @@ func getSourceConstraint(queryContext table.QueryContext) *table.ConstraintList return nil } -func sourcePathAdheresToSourceConstraints(sourcePath string, sourceConstraints *table.ConstraintList) (bool, error) { +func checkSourcePathConstraints(sourcePath string, sourceConstraints *table.ConstraintList) (bool, error) { if sourceConstraints == nil { return true, nil } diff --git a/ee/katc/table_test.go b/ee/katc/table_test.go index d97335580..0418f034d 100644 --- a/ee/katc/table_test.go +++ b/ee/katc/table_test.go @@ -8,14 +8,14 @@ import ( "github.com/stretchr/testify/require" ) -func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { +func Test_checkSourcePathConstraints(t *testing.T) { t.Parallel() for _, tt := range []struct { testCaseName string sourcePath string constraints table.ConstraintList - adheres bool + valid bool errorExpected bool }{ { @@ -29,7 +29,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: true, + valid: true, errorExpected: false, }, { @@ -43,7 +43,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: false, + valid: false, errorExpected: false, }, { @@ -57,7 +57,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: true, + valid: true, errorExpected: false, }, { @@ -71,7 +71,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: true, + valid: true, errorExpected: false, }, { @@ -85,7 +85,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: true, + valid: true, }, { testCaseName: "GLOB with * wildcard", @@ -98,7 +98,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: true, + valid: true, errorExpected: false, }, { @@ -112,7 +112,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: true, + valid: true, errorExpected: false, }, { @@ -126,7 +126,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: true, + valid: true, errorExpected: false, }, { @@ -140,7 +140,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: false, + valid: false, errorExpected: true, }, { @@ -154,7 +154,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: false, + valid: false, errorExpected: true, }, { @@ -172,7 +172,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: false, + valid: false, errorExpected: false, }, { @@ -190,7 +190,7 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { }, }, }, - adheres: true, + valid: true, errorExpected: false, }, } { @@ -198,14 +198,14 @@ func Test_sourcePathAdheresToSourceConstraints(t *testing.T) { t.Run(tt.testCaseName, func(t *testing.T) { t.Parallel() - adheres, err := sourcePathAdheresToSourceConstraints(tt.sourcePath, &tt.constraints) + valid, err := checkSourcePathConstraints(tt.sourcePath, &tt.constraints) if tt.errorExpected { require.Error(t, err, "expected error on checking constraints") } else { require.NoError(t, err, "expected no error on checking constraints") } - require.Equal(t, tt.adheres, adheres, "incorrect result checking constraints") + require.Equal(t, tt.valid, valid, "incorrect result checking constraints") }) } } From af341ae123897b75663ac97adf1d954dd23b6427 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Fri, 28 Jun 2024 14:10:39 -0400 Subject: [PATCH 12/21] Add documentation --- ee/katc/config.go | 13 ++++++++++++ ee/katc/snappy.go | 3 ++- ee/katc/structured_clone.go | 41 ++++++++++++++----------------------- ee/katc/table.go | 6 ++++++ 4 files changed, 36 insertions(+), 27 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index 072468869..5ef0ca6c0 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -12,11 +12,17 @@ import ( "github.com/osquery/osquery-go/plugin/table" ) +// katcSourceType defines a source of data for a KATC table. The `name` is the +// identifier parsed from the JSON KATC config, and the `dataFunc` is the function +// that performs the query against the source. type katcSourceType struct { name string dataFunc func(ctx context.Context, slogger *slog.Logger, path string, query string, sourceConstraints *table.ConstraintList) ([]sourceData, error) } +// sourceData holds the result of calling `katcSourceType.dataFunc`. It maps the +// source's path to the query results. (A config may have wildcards in the path, +// allowing for querying against multiple source paths.) type sourceData struct { path string rows []map[string][]byte @@ -47,6 +53,9 @@ func (kst *katcSourceType) UnmarshalJSON(data []byte) error { } } +// rowTransformStep defines an operation performed against a row of data +// returned from a source. The `name` is the identifier parsed from the +// JSON KATC config. type rowTransformStep struct { name string transformFunc func(ctx context.Context, slogger *slog.Logger, row map[string][]byte) (map[string][]byte, error) @@ -78,6 +87,8 @@ func (r *rowTransformStep) UnmarshalJSON(data []byte) error { } } +// katcTableConfig is the configuration for a specific KATC table. The control server +// sends down these configurations. type katcTableConfig struct { Source katcSourceType `json:"source"` Platform string `json:"platform"` @@ -87,6 +98,8 @@ type katcTableConfig struct { RowTransformSteps []rowTransformStep `json:"row_transform_steps"` } +// ConstructKATCTables takes stored configuration of KATC tables, parses the configuration, +// and returns the constructed tables. func ConstructKATCTables(config map[string]string, slogger *slog.Logger) []osquery.OsqueryPlugin { plugins := make([]osquery.OsqueryPlugin, 0) for tableName, tableConfigStr := range config { diff --git a/ee/katc/snappy.go b/ee/katc/snappy.go index 6e2613e76..6b9cece99 100644 --- a/ee/katc/snappy.go +++ b/ee/katc/snappy.go @@ -8,7 +8,8 @@ import ( "github.com/golang/snappy" ) -// snappyDecode is a dataProcessingStep that decodes data compressed with snappy +// snappyDecode is a dataProcessingStep that decodes data compressed with snappy. +// We use this to decode data retrieved from Firefox IndexedDB sqlite-backed databases. func snappyDecode(ctx context.Context, _ *slog.Logger, row map[string][]byte) (map[string][]byte, error) { decodedRow := make(map[string][]byte) diff --git a/ee/katc/structured_clone.go b/ee/katc/structured_clone.go index 625b4b038..854793f09 100644 --- a/ee/katc/structured_clone.go +++ b/ee/katc/structured_clone.go @@ -16,37 +16,24 @@ import ( ) const ( - tagFloatMax uint32 = 0xfff00000 - tagHeader uint32 = 0xfff10000 - tagNull uint32 = 0xffff0000 - tagUndefined uint32 = 0xffff0001 - tagBoolean uint32 = 0xffff0002 - tagInt32 uint32 = 0xffff0003 - tagString uint32 = 0xffff0004 - tagDateObject uint32 = 0xffff0005 - tagRegexpObject uint32 = 0xffff0006 - tagArrayObject uint32 = 0xffff0007 - tagObjectObject uint32 = 0xffff0008 - tagArrayBufferObjectV2 uint32 = 0xffff0009 - tagBooleanObject uint32 = 0xffff000a - tagStringObject uint32 = 0xffff000b - tagNumberObject uint32 = 0xffff000c - tagBackReferenceObject uint32 = 0xffff000d - tagDoNotUse1 uint32 = 0xffff000e - tagDoNotUse2 uint32 = 0xffff000f - tagTypedArrayObjectV2 uint32 = 0xffff0010 - tagMapObject uint32 = 0xffff0011 - tagSetObject uint32 = 0xffff0012 - tagEndOfKeys uint32 = 0xffff0013 + tagHeader uint32 = 0xfff10000 + tagNull uint32 = 0xffff0000 + tagUndefined uint32 = 0xffff0001 + tagBoolean uint32 = 0xffff0002 + tagInt32 uint32 = 0xffff0003 + tagString uint32 = 0xffff0004 + tagArrayObject uint32 = 0xffff0007 + tagObjectObject uint32 = 0xffff0008 + tagBooleanObject uint32 = 0xffff000a + tagStringObject uint32 = 0xffff000b + tagEndOfKeys uint32 = 0xffff0013 ) -// structuredCloneDeserialize deserializes a JS object that has been stored in IndexedDB -// by Firefox. +// structuredCloneDeserialize deserializes a JS object that has been stored by Firefox +// in IndexedDB sqlite-backed databases. // References: // * https://stackoverflow.com/a/59923297 -// * https://searchfox.org/mozilla-central/source/js/public/StructuredClone.h // * https://searchfox.org/mozilla-central/source/js/src/vm/StructuredClone.cpp (see especially JSStructuredCloneReader::read) -// * https://html.spec.whatwg.org/multipage/structured-data.html#structureddeserialize func structuredCloneDeserialize(ctx context.Context, slogger *slog.Logger, row map[string][]byte) (map[string][]byte, error) { // IndexedDB data is stored by key "data" pointing to the serialized object. We want to // extract that serialized object, and discard the top-level "data" key. @@ -84,6 +71,7 @@ func structuredCloneDeserialize(ctx context.Context, slogger *slog.Logger, row m return resultObj, nil } +// nextPair returns the next (tag, data) pair from `srcReader`. func nextPair(srcReader io.ByteReader) (uint32, uint32, error) { // Tags and data are written as a singular little-endian uint64 value. // For example, the pair (`tagBoolean`, 1) is written as 01 00 00 00 02 00 FF FF, @@ -102,6 +90,7 @@ func nextPair(srcReader io.ByteReader) (uint32, uint32, error) { return binary.BigEndian.Uint32(pairBytes[0:4]), binary.BigEndian.Uint32(pairBytes[4:]), nil } +// deserializeObject deserializes the next object from `srcReader`. func deserializeObject(srcReader io.ByteReader) (map[string][]byte, error) { resultObj := make(map[string][]byte, 0) diff --git a/ee/katc/table.go b/ee/katc/table.go index 039da5484..39780c761 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -12,12 +12,15 @@ import ( const sourcePathColumnName = "source_path" +// katcTable is a Kolide ATC table. It queries the source and transforms the response data +// per the configuration in its `cfg`. type katcTable struct { cfg katcTableConfig columnLookup map[string]struct{} slogger *slog.Logger } +// newKatcTable returns a new table with the given `cfg`, as well as the osquery columns for that table. func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) (*katcTable, []table.ColumnDefinition) { columns := []table.ColumnDefinition{ { @@ -47,6 +50,7 @@ func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) ( }, columns } +// generate handles queries against a KATC table. func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContext) ([]map[string]string, error) { // Fetch data from our table source dataRaw, err := k.cfg.Source.dataFunc(ctx, k.slogger, k.cfg.Path, k.cfg.Query, getSourceConstraint(queryContext)) @@ -110,6 +114,7 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex return filteredResults, nil } +// getSourceConstraint retrieves any constraints against the `source_path` column func getSourceConstraint(queryContext table.QueryContext) *table.ConstraintList { sourceConstraint, sourceConstraintExists := queryContext.Constraints[sourcePathColumnName] if sourceConstraintExists { @@ -118,6 +123,7 @@ func getSourceConstraint(queryContext table.QueryContext) *table.ConstraintList return nil } +// checkSourcePathConstraints validates whether a given `sourcePath` matches the given constraints. func checkSourcePathConstraints(sourcePath string, sourceConstraints *table.ConstraintList) (bool, error) { if sourceConstraints == nil { return true, nil From 3d0c13547a9ccb15ed80ec675fdea628fefa37a3 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Fri, 28 Jun 2024 16:01:26 -0400 Subject: [PATCH 13/21] Add a table test --- ee/katc/table_test.go | 121 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/ee/katc/table_test.go b/ee/katc/table_test.go index 0418f034d..4aed48e82 100644 --- a/ee/katc/table_test.go +++ b/ee/katc/table_test.go @@ -1,13 +1,134 @@ package katc import ( + "context" + "database/sql" + "os" "path/filepath" + "runtime" "testing" + "github.com/golang/snappy" + "github.com/google/uuid" + "github.com/kolide/launcher/pkg/log/multislogger" "github.com/osquery/osquery-go/plugin/table" "github.com/stretchr/testify/require" + + _ "modernc.org/sqlite" ) +func Test_generate_SqliteBackedIndexedDB(t *testing.T) { + t.Parallel() + + // This test validates generation of table results. It uses a sqlite-backed + // IndexedDB as a source, which means it also exercises functionality from + // sqlite.go, snappy.go, and structured_clone.go. + + // First, set up the data we expect to retrieve. + expectedColumn := "uuid" + u, err := uuid.NewRandom() + require.NoError(t, err, "generating test UUID") + expectedColumnValue := u.String() + + // Serialize the row data, reversing the deserialization operation in + // structured_clone.go. + serializedUuid := []byte(expectedColumnValue) + serializedObj := append([]byte{ + // Header + 0x00, 0x00, 0x00, 0x00, // header tag data -- discarded + 0x00, 0x00, 0xf1, 0xff, // LE `tagHeader` + // Begin object + 0x00, 0x00, 0x00, 0x00, // object tag data -- discarded + 0x08, 0x00, 0xff, 0xff, // LE `tagObject` + // Begin UUID key + 0x04, 0x00, 0x00, 0x80, // LE data about upcoming string: length 4 (remaining bytes), is ASCII + 0x04, 0x00, 0xff, 0xff, // LE `tagString` + 0x75, 0x75, 0x69, 0x64, // "uuid" + 0x00, 0x00, 0x00, 0x00, // padding to get to 8-byte word boundary + // End UUID key + // Begin UUID value + 0x24, 0x00, 0x00, 0x80, // LE data about upcoming string: length 36 (remaining bytes), is ASCII + 0x04, 0x00, 0xff, 0xff, // LE `tagString` + }, + serializedUuid..., + ) + serializedObj = append(serializedObj, + 0x00, 0x00, 0x00, 0x00, // padding to get to 8-byte word boundary for UUID string + // End UUID value + 0x00, 0x00, 0x00, 0x00, // tag data -- discarded + 0x13, 0x00, 0xff, 0xff, // LE `tagEndOfKeys` 0xffff0013 + ) + + // Now compress the serialized row data, reversing the decompression operation + // in snappy.go + compressedObj := snappy.Encode(nil, serializedObj) + + // Now, create a sqlite database to store this data in. + databaseDir := t.TempDir() + sourceFilepath := filepath.Join(databaseDir, "test.sqlite") + f, err := os.Create(sourceFilepath) + require.NoError(t, err, "creating source db") + require.NoError(t, f.Close(), "closing source db file") + conn, err := sql.Open("sqlite", sourceFilepath) + require.NoError(t, err) + _, err = conn.Exec(`CREATE TABLE object_data(data TEXT NOT NULL PRIMARY KEY) WITHOUT ROWID;`) + require.NoError(t, err, "creating test table") + + // Insert compressed object into the database + _, err = conn.Exec("INSERT INTO object_data (data) VALUES (?);", compressedObj) + require.NoError(t, err, "inserting into sqlite database") + require.NoError(t, conn.Close(), "closing sqlite database") + + // At long last, our source is adequately configured. + // Move on to constructing our KATC table. + cfg := katcTableConfig{ + Source: katcSourceType{ + name: sqliteSourceType, + dataFunc: sqliteData, + }, + Platform: runtime.GOOS, + Columns: []string{expectedColumn}, + Path: filepath.Join(databaseDir, "*.sqlite"), // All sqlite files in the test directory + Query: "SELECT data FROM object_data;", + RowTransformSteps: []rowTransformStep{ + { + name: snappyDecodeTransformStep, + transformFunc: snappyDecode, + }, + { + name: structuredCloneDeserializeTransformStep, + transformFunc: structuredCloneDeserialize, + }, + }, + } + testTable, _ := newKatcTable("test_katc_table", cfg, multislogger.NewNopLogger()) + + // Make a query context restricting the source to our exact source sqlite database + queryContext := table.QueryContext{ + Constraints: map[string]table.ConstraintList{ + sourcePathColumnName: { + Constraints: []table.Constraint{ + { + Operator: table.OperatorEquals, + Expression: sourceFilepath, + }, + }, + }, + }, + } + + // At long last: run a query + results, err := testTable.generate(context.TODO(), queryContext) + require.NoError(t, err) + + // Validate results + require.Equal(t, 1, len(results), "only one row expected") + require.Contains(t, results[0], sourcePathColumnName, "missing source_path column") + require.Equal(t, sourceFilepath, results[0][sourcePathColumnName]) + require.Contains(t, results[0], expectedColumn, "expected column missing") + require.Equal(t, expectedColumnValue, results[0][expectedColumn], "data mismatch") +} + func Test_checkSourcePathConstraints(t *testing.T) { t.Parallel() From 212b04705732a6f74ecc67aa1f266f5941c995a8 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Fri, 28 Jun 2024 16:22:17 -0400 Subject: [PATCH 14/21] discard column log is way too noisy, remove it --- ee/katc/table.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ee/katc/table.go b/ee/katc/table.go index 39780c761..72e643a7e 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -91,10 +91,7 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex filteredRow := make(map[string]string) for column, data := range row { if _, expectedColumn := k.columnLookup[column]; !expectedColumn { - k.slogger.Log(ctx, slog.LevelWarn, - "results contained unknown column, discarding", - "column", column, - ) + // Silently discard the column+data continue } From f7c7eb89c71418fcbe30ce29296922b1e4a4f771 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Fri, 28 Jun 2024 16:32:05 -0400 Subject: [PATCH 15/21] Remove source type until implemented --- ee/katc/config.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index 5ef0ca6c0..cb04358ae 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -3,7 +3,6 @@ package katc import ( "context" "encoding/json" - "errors" "fmt" "log/slog" "runtime" @@ -29,8 +28,7 @@ type sourceData struct { } const ( - sqliteSourceType = "sqlite" - indexedDBSourceType = "indexeddb" + sqliteSourceType = "sqlite" ) func (kst *katcSourceType) UnmarshalJSON(data []byte) error { @@ -45,9 +43,6 @@ func (kst *katcSourceType) UnmarshalJSON(data []byte) error { kst.name = sqliteSourceType kst.dataFunc = sqliteData return nil - case indexedDBSourceType: - kst.name = indexedDBSourceType - return errors.New("indexeddb is not yet implemented") default: return fmt.Errorf("unknown table type %s", s) } From 3efe890ea5a5b0b0b4c2295c2d52cff5e7812182 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Mon, 1 Jul 2024 10:14:58 -0400 Subject: [PATCH 16/21] Rename to disambiguate source (type of table) and source (specific location of data) --- ee/katc/config.go | 8 ++++---- ee/katc/config_test.go | 34 ++++++++++++++++++++++++++------ ee/katc/sqlite.go | 2 +- ee/katc/table.go | 44 +++++++++++++++++++++--------------------- ee/katc/table_test.go | 38 ++++++++++++++++++------------------ 5 files changed, 74 insertions(+), 52 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index cb04358ae..706eddaf8 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -20,8 +20,8 @@ type katcSourceType struct { } // sourceData holds the result of calling `katcSourceType.dataFunc`. It maps the -// source's path to the query results. (A config may have wildcards in the path, -// allowing for querying against multiple source paths.) +// source to the query results. (A config may have wildcards in the source, +// allowing for querying against multiple sources.) type sourceData struct { path string rows []map[string][]byte @@ -85,10 +85,10 @@ func (r *rowTransformStep) UnmarshalJSON(data []byte) error { // katcTableConfig is the configuration for a specific KATC table. The control server // sends down these configurations. type katcTableConfig struct { - Source katcSourceType `json:"source"` + SourceType katcSourceType `json:"source_type"` + Source string `json:"source"` // Describes how to connect to source (e.g. path to db) -- wildcards supported Platform string `json:"platform"` Columns []string `json:"columns"` - Path string `json:"path"` // Path to file holding data (e.g. sqlite file) -- wildcards supported Query string `json:"query"` // Query to run against `path` RowTransformSteps []rowTransformStep `json:"row_transform_steps"` } diff --git a/ee/katc/config_test.go b/ee/katc/config_test.go index 112e36d84..e7ce530e1 100644 --- a/ee/katc/config_test.go +++ b/ee/katc/config_test.go @@ -22,16 +22,38 @@ func TestConstructKATCTables(t *testing.T) { testCaseName: "snappy_sqlite", katcConfig: map[string]string{ "kolide_snappy_sqlite_test": fmt.Sprintf(`{ - "source": "sqlite", + "source_type": "sqlite", "platform": "%s", "columns": ["data"], - "path": "/some/path/to/db.sqlite", + "source": "/some/path/to/db.sqlite", "query": "SELECT data FROM object_data JOIN object_store ON (object_data.object_store_id = object_store.id) WHERE object_store.name=\"testtable\";", "row_transform_steps": ["snappy"] }`, runtime.GOOS), }, expectedPluginCount: 1, }, + { + testCaseName: "multiple plugins", + katcConfig: map[string]string{ + "test_1": fmt.Sprintf(`{ + "source_type": "sqlite", + "platform": "%s", + "columns": ["data"], + "source": "/some/path/to/db.sqlite", + "query": "SELECT data FROM object_data;", + "row_transform_steps": ["snappy"] + }`, runtime.GOOS), + "test_2": fmt.Sprintf(`{ + "source_type": "sqlite", + "platform": "%s", + "columns": ["col1", "col2"], + "source": "/some/path/to/a/different/db.sqlite", + "query": "SELECT col1, col2 FROM some_table;", + "row_transform_steps": [] + }`, runtime.GOOS), + }, + expectedPluginCount: 2, + }, { testCaseName: "malformed config", katcConfig: map[string]string{ @@ -43,10 +65,10 @@ func TestConstructKATCTables(t *testing.T) { testCaseName: "invalid table source", katcConfig: map[string]string{ "kolide_snappy_test": fmt.Sprintf(`{ - "source": "unknown_source", + "source_type": "unknown_source", "platform": "%s", "columns": ["data"], - "path": "/some/path/to/db.sqlite", + "source": "/some/path/to/db.sqlite", "query": "SELECT data FROM object_data;" }`, runtime.GOOS), }, @@ -56,10 +78,10 @@ func TestConstructKATCTables(t *testing.T) { testCaseName: "invalid data processing step type", katcConfig: map[string]string{ "kolide_snappy_test": fmt.Sprintf(`{ - "source": "sqlite", + "source_type": "sqlite", "platform": "%s", "columns": ["data"], - "path": "/some/path/to/db.sqlite", + "source": "/some/path/to/db.sqlite", "query": "SELECT data FROM object_data;", "row_transform_steps": ["unknown_step"] }`, runtime.GOOS), diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go index e8d649920..2b3414423 100644 --- a/ee/katc/sqlite.go +++ b/ee/katc/sqlite.go @@ -21,7 +21,7 @@ func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, q results := make([]sourceData, 0) for _, sqliteDb := range sqliteDbs { // Check to make sure `sqliteDb` adheres to sourceConstraints - valid, err := checkSourcePathConstraints(sqliteDb, sourceConstraints) + valid, err := checkSourceConstraints(sqliteDb, sourceConstraints) if err != nil { return nil, fmt.Errorf("checking source path constraints: %w", err) } diff --git a/ee/katc/table.go b/ee/katc/table.go index 72e643a7e..8dd8af9f3 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -10,7 +10,7 @@ import ( "github.com/osquery/osquery-go/plugin/table" ) -const sourcePathColumnName = "source_path" +const sourceColumnName = "source" // katcTable is a Kolide ATC table. It queries the source and transforms the response data // per the configuration in its `cfg`. @@ -24,12 +24,12 @@ type katcTable struct { func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) (*katcTable, []table.ColumnDefinition) { columns := []table.ColumnDefinition{ { - Name: sourcePathColumnName, + Name: sourceColumnName, Type: table.ColumnTypeText, }, } columnLookup := map[string]struct{}{ - sourcePathColumnName: {}, + sourceColumnName: {}, } for i := 0; i < len(cfg.Columns); i += 1 { columns = append(columns, table.ColumnDefinition{ @@ -44,8 +44,8 @@ func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) ( columnLookup: columnLookup, slogger: slogger.With( "table_name", tableName, - "table_type", cfg.Source, - "table_path", cfg.Path, + "table_type", cfg.SourceType, + "table_source", cfg.Source, ), }, columns } @@ -53,7 +53,7 @@ func newKatcTable(tableName string, cfg katcTableConfig, slogger *slog.Logger) ( // generate handles queries against a KATC table. func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContext) ([]map[string]string, error) { // Fetch data from our table source - dataRaw, err := k.cfg.Source.dataFunc(ctx, k.slogger, k.cfg.Path, k.cfg.Query, getSourceConstraint(queryContext)) + dataRaw, err := k.cfg.SourceType.dataFunc(ctx, k.slogger, k.cfg.Source, k.cfg.Query, getSourceConstraint(queryContext)) if err != nil { return nil, fmt.Errorf("fetching data: %w", err) } @@ -64,7 +64,7 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex for _, dataRawRow := range s.rows { // Make sure source is included in row data rowData := map[string]string{ - sourcePathColumnName: s.path, + sourceColumnName: s.path, } // Run any needed transformations on the row data @@ -111,27 +111,27 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex return filteredResults, nil } -// getSourceConstraint retrieves any constraints against the `source_path` column +// getSourceConstraint retrieves any constraints against the `source` column func getSourceConstraint(queryContext table.QueryContext) *table.ConstraintList { - sourceConstraint, sourceConstraintExists := queryContext.Constraints[sourcePathColumnName] + sourceConstraint, sourceConstraintExists := queryContext.Constraints[sourceColumnName] if sourceConstraintExists { return &sourceConstraint } return nil } -// checkSourcePathConstraints validates whether a given `sourcePath` matches the given constraints. -func checkSourcePathConstraints(sourcePath string, sourceConstraints *table.ConstraintList) (bool, error) { +// checkSourceConstraints validates whether a given `source` matches the given constraints. +func checkSourceConstraints(source string, sourceConstraints *table.ConstraintList) (bool, error) { if sourceConstraints == nil { return true, nil } - validPath := true + validSource := true for _, sourceConstraint := range sourceConstraints.Constraints { switch sourceConstraint.Operator { case table.OperatorEquals: - if sourcePath != sourceConstraint.Expression { - validPath = false + if source != sourceConstraint.Expression { + validSource = false } case table.OperatorLike: // Transform the expression into a regex to test if we have a match. @@ -146,8 +146,8 @@ func checkSourcePathConstraints(sourcePath string, sourceConstraints *table.Cons if err != nil { return false, fmt.Errorf("invalid LIKE statement: %w", err) } - if !r.MatchString(sourcePath) { - validPath = false + if !r.MatchString(source) { + validSource = false } case table.OperatorGlob: // Transform the expression into a regex to test if we have a match. @@ -161,26 +161,26 @@ func checkSourcePathConstraints(sourcePath string, sourceConstraints *table.Cons if err != nil { return false, fmt.Errorf("invalid GLOB statement: %w", err) } - if !r.MatchString(sourcePath) { - validPath = false + if !r.MatchString(source) { + validSource = false } case table.OperatorRegexp: r, err := regexp.Compile(sourceConstraint.Expression) if err != nil { return false, fmt.Errorf("invalid regex: %w", err) } - if !r.MatchString(sourcePath) { - validPath = false + if !r.MatchString(source) { + validSource = false } default: return false, fmt.Errorf("operator %v not valid source constraint", sourceConstraint.Operator) } // No need to check other constraints - if !validPath { + if !validSource { break } } - return validPath, nil + return validSource, nil } diff --git a/ee/katc/table_test.go b/ee/katc/table_test.go index 4aed48e82..511c12e29 100644 --- a/ee/katc/table_test.go +++ b/ee/katc/table_test.go @@ -82,13 +82,13 @@ func Test_generate_SqliteBackedIndexedDB(t *testing.T) { // At long last, our source is adequately configured. // Move on to constructing our KATC table. cfg := katcTableConfig{ - Source: katcSourceType{ + SourceType: katcSourceType{ name: sqliteSourceType, dataFunc: sqliteData, }, Platform: runtime.GOOS, Columns: []string{expectedColumn}, - Path: filepath.Join(databaseDir, "*.sqlite"), // All sqlite files in the test directory + Source: filepath.Join(databaseDir, "*.sqlite"), // All sqlite files in the test directory Query: "SELECT data FROM object_data;", RowTransformSteps: []rowTransformStep{ { @@ -106,7 +106,7 @@ func Test_generate_SqliteBackedIndexedDB(t *testing.T) { // Make a query context restricting the source to our exact source sqlite database queryContext := table.QueryContext{ Constraints: map[string]table.ConstraintList{ - sourcePathColumnName: { + sourceColumnName: { Constraints: []table.Constraint{ { Operator: table.OperatorEquals, @@ -123,8 +123,8 @@ func Test_generate_SqliteBackedIndexedDB(t *testing.T) { // Validate results require.Equal(t, 1, len(results), "only one row expected") - require.Contains(t, results[0], sourcePathColumnName, "missing source_path column") - require.Equal(t, sourceFilepath, results[0][sourcePathColumnName]) + require.Contains(t, results[0], sourceColumnName, "missing source column") + require.Equal(t, sourceFilepath, results[0][sourceColumnName]) require.Contains(t, results[0], expectedColumn, "expected column missing") require.Equal(t, expectedColumnValue, results[0][expectedColumn], "data mismatch") } @@ -134,14 +134,14 @@ func Test_checkSourcePathConstraints(t *testing.T) { for _, tt := range []struct { testCaseName string - sourcePath string + source string constraints table.ConstraintList valid bool errorExpected bool }{ { testCaseName: "equals", - sourcePath: filepath.Join("some", "path", "to", "a", "source"), + source: filepath.Join("some", "path", "to", "a", "source"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -155,7 +155,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "not equals", - sourcePath: filepath.Join("some", "path", "to", "a", "source"), + source: filepath.Join("some", "path", "to", "a", "source"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -169,7 +169,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "LIKE with % wildcard", - sourcePath: filepath.Join("a", "path", "to", "db.sqlite"), + source: filepath.Join("a", "path", "to", "db.sqlite"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -183,7 +183,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "LIKE with underscore wildcard", - sourcePath: filepath.Join("a", "path", "to", "db.sqlite"), + source: filepath.Join("a", "path", "to", "db.sqlite"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -197,7 +197,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "LIKE is case-insensitive", - sourcePath: filepath.Join("a", "path", "to", "db.sqlite"), + source: filepath.Join("a", "path", "to", "db.sqlite"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -210,7 +210,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "GLOB with * wildcard", - sourcePath: filepath.Join("another", "path", "to", "a", "source"), + source: filepath.Join("another", "path", "to", "a", "source"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -224,7 +224,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "GLOB with ? wildcard", - sourcePath: filepath.Join("another", "path", "to", "a", "source"), + source: filepath.Join("another", "path", "to", "a", "source"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -238,7 +238,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "regexp", - sourcePath: filepath.Join("test", "path", "to", "a", "source"), + source: filepath.Join("test", "path", "to", "a", "source"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -252,7 +252,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "invalid regexp", - sourcePath: filepath.Join("test", "path", "to", "a", "source"), + source: filepath.Join("test", "path", "to", "a", "source"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -266,7 +266,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "unsupported", - sourcePath: filepath.Join("test", "path", "to", "a", "source", "2"), + source: filepath.Join("test", "path", "to", "a", "source", "2"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -280,7 +280,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "multiple constraints where one does not match", - sourcePath: filepath.Join("test", "path", "to", "a", "source", "3"), + source: filepath.Join("test", "path", "to", "a", "source", "3"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -298,7 +298,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { }, { testCaseName: "multiple constraints where all match", - sourcePath: filepath.Join("test", "path", "to", "a", "source", "3"), + source: filepath.Join("test", "path", "to", "a", "source", "3"), constraints: table.ConstraintList{ Constraints: []table.Constraint{ { @@ -319,7 +319,7 @@ func Test_checkSourcePathConstraints(t *testing.T) { t.Run(tt.testCaseName, func(t *testing.T) { t.Parallel() - valid, err := checkSourcePathConstraints(tt.sourcePath, &tt.constraints) + valid, err := checkSourceConstraints(tt.source, &tt.constraints) if tt.errorExpected { require.Error(t, err, "expected error on checking constraints") } else { From 7fbdcb82df6ecc3de7821bf7ead98b8fbfff3f59 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Mon, 1 Jul 2024 10:26:36 -0400 Subject: [PATCH 17/21] Rename structured clone to something more intuitive --- ee/katc/config.go | 10 +++++----- .../{structured_clone.go => deserialize_firefox.go} | 4 ++-- ee/katc/table_test.go | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) rename ee/katc/{structured_clone.go => deserialize_firefox.go} (97%) diff --git a/ee/katc/config.go b/ee/katc/config.go index 706eddaf8..21727bdbc 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -57,8 +57,8 @@ type rowTransformStep struct { } const ( - snappyDecodeTransformStep = "snappy" - structuredCloneDeserializeTransformStep = "structured_clone" + snappyDecodeTransformStep = "snappy" + deserializeFirefoxTransformStep = "deserialize_firefox" ) func (r *rowTransformStep) UnmarshalJSON(data []byte) error { @@ -73,9 +73,9 @@ func (r *rowTransformStep) UnmarshalJSON(data []byte) error { r.name = snappyDecodeTransformStep r.transformFunc = snappyDecode return nil - case structuredCloneDeserializeTransformStep: - r.name = structuredCloneDeserializeTransformStep - r.transformFunc = structuredCloneDeserialize + case deserializeFirefoxTransformStep: + r.name = deserializeFirefoxTransformStep + r.transformFunc = deserializeFirefox return nil default: return fmt.Errorf("unknown data processing step %s", s) diff --git a/ee/katc/structured_clone.go b/ee/katc/deserialize_firefox.go similarity index 97% rename from ee/katc/structured_clone.go rename to ee/katc/deserialize_firefox.go index 854793f09..1a50975c3 100644 --- a/ee/katc/structured_clone.go +++ b/ee/katc/deserialize_firefox.go @@ -29,12 +29,12 @@ const ( tagEndOfKeys uint32 = 0xffff0013 ) -// structuredCloneDeserialize deserializes a JS object that has been stored by Firefox +// deserializeFirefox deserializes a JS object that has been stored by Firefox // in IndexedDB sqlite-backed databases. // References: // * https://stackoverflow.com/a/59923297 // * https://searchfox.org/mozilla-central/source/js/src/vm/StructuredClone.cpp (see especially JSStructuredCloneReader::read) -func structuredCloneDeserialize(ctx context.Context, slogger *slog.Logger, row map[string][]byte) (map[string][]byte, error) { +func deserializeFirefox(ctx context.Context, slogger *slog.Logger, row map[string][]byte) (map[string][]byte, error) { // IndexedDB data is stored by key "data" pointing to the serialized object. We want to // extract that serialized object, and discard the top-level "data" key. data, ok := row["data"] diff --git a/ee/katc/table_test.go b/ee/katc/table_test.go index 511c12e29..bc6172a45 100644 --- a/ee/katc/table_test.go +++ b/ee/katc/table_test.go @@ -22,7 +22,7 @@ func Test_generate_SqliteBackedIndexedDB(t *testing.T) { // This test validates generation of table results. It uses a sqlite-backed // IndexedDB as a source, which means it also exercises functionality from - // sqlite.go, snappy.go, and structured_clone.go. + // sqlite.go, snappy.go, and deserialize_firefox.go. // First, set up the data we expect to retrieve. expectedColumn := "uuid" @@ -31,7 +31,7 @@ func Test_generate_SqliteBackedIndexedDB(t *testing.T) { expectedColumnValue := u.String() // Serialize the row data, reversing the deserialization operation in - // structured_clone.go. + // deserialize_firefox.go. serializedUuid := []byte(expectedColumnValue) serializedObj := append([]byte{ // Header @@ -96,8 +96,8 @@ func Test_generate_SqliteBackedIndexedDB(t *testing.T) { transformFunc: snappyDecode, }, { - name: structuredCloneDeserializeTransformStep, - transformFunc: structuredCloneDeserialize, + name: deserializeFirefoxTransformStep, + transformFunc: deserializeFirefox, }, }, } From 97d8647c2365cc5ef9aa7bdc52364bcdeac4b857 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Mon, 1 Jul 2024 11:22:48 -0400 Subject: [PATCH 18/21] Fix dsn for sqlite --- ee/katc/sqlite.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go index 2b3414423..5cd6ea66e 100644 --- a/ee/katc/sqlite.go +++ b/ee/katc/sqlite.go @@ -44,7 +44,7 @@ func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, q // querySqliteDb queries the database at the given path, returning rows of results func querySqliteDb(ctx context.Context, slogger *slog.Logger, path string, query string) ([]map[string][]byte, error) { - dsn := fmt.Sprintf("file://%s?mode=ro", path) + dsn := fmt.Sprintf("file:%s?mode=ro", path) conn, err := sql.Open("sqlite", dsn) if err != nil { return nil, fmt.Errorf("opening sqlite db: %w", err) From 62e40619b222f251c7cbcb5d3c84caa2c8c1e796 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Mon, 1 Jul 2024 13:52:55 -0400 Subject: [PATCH 19/21] Remove unneeded check --- ee/katc/table.go | 10 +--------- ee/katc/table_test.go | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/ee/katc/table.go b/ee/katc/table.go index 8dd8af9f3..15b0a6308 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -87,7 +87,6 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex // Now, filter data to ensure we only return columns in k.columnLookup filteredResults := make([]map[string]string, 0) for _, row := range transformedResults { - includeRow := true filteredRow := make(map[string]string) for column, data := range row { if _, expectedColumn := k.columnLookup[column]; !expectedColumn { @@ -96,16 +95,9 @@ func (k *katcTable) generate(ctx context.Context, queryContext table.QueryContex } filteredRow[column] = data - - // No need to check the rest of the row - if !includeRow { - break - } } - if includeRow { - filteredResults = append(filteredResults, filteredRow) - } + filteredResults = append(filteredResults, filteredRow) } return filteredResults, nil diff --git a/ee/katc/table_test.go b/ee/katc/table_test.go index bc6172a45..5cc9340d4 100644 --- a/ee/katc/table_test.go +++ b/ee/katc/table_test.go @@ -122,7 +122,7 @@ func Test_generate_SqliteBackedIndexedDB(t *testing.T) { require.NoError(t, err) // Validate results - require.Equal(t, 1, len(results), "only one row expected") + require.Equal(t, 1, len(results), "exactly one row expected") require.Contains(t, results[0], sourceColumnName, "missing source column") require.Equal(t, sourceFilepath, results[0][sourceColumnName]) require.Contains(t, results[0], expectedColumn, "expected column missing") From eda263e00861e7720b11b998a281d6d546d8d3e1 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Mon, 1 Jul 2024 14:40:25 -0400 Subject: [PATCH 20/21] Don't need unnecessary variable, return early if constraint not met --- ee/katc/table.go | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/ee/katc/table.go b/ee/katc/table.go index 15b0a6308..d3aa31d0f 100644 --- a/ee/katc/table.go +++ b/ee/katc/table.go @@ -118,12 +118,11 @@ func checkSourceConstraints(source string, sourceConstraints *table.ConstraintLi return true, nil } - validSource := true for _, sourceConstraint := range sourceConstraints.Constraints { switch sourceConstraint.Operator { case table.OperatorEquals: if source != sourceConstraint.Expression { - validSource = false + return false, nil } case table.OperatorLike: // Transform the expression into a regex to test if we have a match. @@ -139,7 +138,7 @@ func checkSourceConstraints(source string, sourceConstraints *table.ConstraintLi return false, fmt.Errorf("invalid LIKE statement: %w", err) } if !r.MatchString(source) { - validSource = false + return false, nil } case table.OperatorGlob: // Transform the expression into a regex to test if we have a match. @@ -154,7 +153,7 @@ func checkSourceConstraints(source string, sourceConstraints *table.ConstraintLi return false, fmt.Errorf("invalid GLOB statement: %w", err) } if !r.MatchString(source) { - validSource = false + return false, nil } case table.OperatorRegexp: r, err := regexp.Compile(sourceConstraint.Expression) @@ -162,17 +161,12 @@ func checkSourceConstraints(source string, sourceConstraints *table.ConstraintLi return false, fmt.Errorf("invalid regex: %w", err) } if !r.MatchString(source) { - validSource = false + return false, nil } default: return false, fmt.Errorf("operator %v not valid source constraint", sourceConstraint.Operator) } - - // No need to check other constraints - if !validSource { - break - } } - return validSource, nil + return true, nil } From 3019cf0a9fb5400bd0f4f4a199907f7db41fc034 Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Mon, 1 Jul 2024 15:13:17 -0400 Subject: [PATCH 21/21] Support LIKE syntax for source rather than glob --- ee/katc/config.go | 2 +- ee/katc/sqlite.go | 15 ++++++++++++++- ee/katc/table_test.go | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/ee/katc/config.go b/ee/katc/config.go index 21727bdbc..6d4470167 100644 --- a/ee/katc/config.go +++ b/ee/katc/config.go @@ -86,7 +86,7 @@ func (r *rowTransformStep) UnmarshalJSON(data []byte) error { // sends down these configurations. type katcTableConfig struct { SourceType katcSourceType `json:"source_type"` - Source string `json:"source"` // Describes how to connect to source (e.g. path to db) -- wildcards supported + Source string `json:"source"` // Describes how to connect to source (e.g. path to db) -- % and _ wildcards supported Platform string `json:"platform"` Columns []string `json:"columns"` Query string `json:"query"` // Query to run against `path` diff --git a/ee/katc/sqlite.go b/ee/katc/sqlite.go index 5cd6ea66e..6ddeea639 100644 --- a/ee/katc/sqlite.go +++ b/ee/katc/sqlite.go @@ -6,13 +6,15 @@ import ( "fmt" "log/slog" "path/filepath" + "strings" "github.com/osquery/osquery-go/plugin/table" _ "modernc.org/sqlite" ) // sqliteData is the dataFunc for sqlite KATC tables -func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, query string, sourceConstraints *table.ConstraintList) ([]sourceData, error) { +func sqliteData(ctx context.Context, slogger *slog.Logger, sourcePattern string, query string, sourceConstraints *table.ConstraintList) ([]sourceData, error) { + pathPattern := sourcePatternToGlobbablePattern(sourcePattern) sqliteDbs, err := filepath.Glob(pathPattern) if err != nil { return nil, fmt.Errorf("globbing for files with pattern %s: %w", pathPattern, err) @@ -42,6 +44,17 @@ func sqliteData(ctx context.Context, slogger *slog.Logger, pathPattern string, q return results, nil } +// sourcePatternToGlobbablePattern translates the source pattern, which adheres to LIKE +// sqlite syntax for consistency with other osquery tables, into a pattern that can be +// accepted by filepath.Glob. +func sourcePatternToGlobbablePattern(sourcePattern string) string { + // % matches zero or more characters in LIKE, corresponds to * in glob syntax + globbablePattern := strings.Replace(sourcePattern, "%", `*`, -1) + // _ matches a single character in LIKE, corresponds to ? in glob syntax + globbablePattern = strings.Replace(globbablePattern, "_", `?`, -1) + return globbablePattern +} + // querySqliteDb queries the database at the given path, returning rows of results func querySqliteDb(ctx context.Context, slogger *slog.Logger, path string, query string) ([]map[string][]byte, error) { dsn := fmt.Sprintf("file:%s?mode=ro", path) diff --git a/ee/katc/table_test.go b/ee/katc/table_test.go index 5cc9340d4..2c3465d09 100644 --- a/ee/katc/table_test.go +++ b/ee/katc/table_test.go @@ -88,7 +88,7 @@ func Test_generate_SqliteBackedIndexedDB(t *testing.T) { }, Platform: runtime.GOOS, Columns: []string{expectedColumn}, - Source: filepath.Join(databaseDir, "*.sqlite"), // All sqlite files in the test directory + Source: filepath.Join(databaseDir, "%.sqlite"), // All sqlite files in the test directory Query: "SELECT data FROM object_data;", RowTransformSteps: []rowTransformStep{ {