From 36afc33d29701d8cd399ae5c062ee0f55046f10d Mon Sep 17 00:00:00 2001 From: Rebecca Mahany-Horton Date: Tue, 2 Jul 2024 15:50:07 -0400 Subject: [PATCH] [KATC] Prepare ee/indexeddb for use in KATC tables (#1767) --- ee/indexeddb/indexeddb.go | 33 ++--------- ee/indexeddb/keys.go | 62 -------------------- ee/indexeddb/keys_test.go | 57 ------------------- ee/indexeddb/values.go | 110 ++++++++++++++++++++++++------------ ee/indexeddb/values_test.go | 9 ++- 5 files changed, 84 insertions(+), 187 deletions(-) diff --git a/ee/indexeddb/indexeddb.go b/ee/indexeddb/indexeddb.go index 2a9c9a410..f35c356d9 100644 --- a/ee/indexeddb/indexeddb.go +++ b/ee/indexeddb/indexeddb.go @@ -29,7 +29,7 @@ var indexeddbComparer = newChromeComparer() // QueryIndexeddbObjectStore queries the indexeddb at the given location `dbLocation`, // returning all objects in the given database that live in the given object store. -func QueryIndexeddbObjectStore(dbLocation string, dbName string, objectStoreName string) ([]map[string]any, error) { +func QueryIndexeddbObjectStore(dbLocation string, dbName string, objectStoreName string) ([]map[string][]byte, error) { // If Chrome is open, we won't be able to open the db. So, copy it to a temporary location first. tempDbCopyLocation, err := copyIndexeddb(dbLocation) if err != nil { @@ -91,21 +91,11 @@ func QueryIndexeddbObjectStore(dbLocation string, dbName string, objectStoreName return nil, errors.New("unable to get object store ID") } - // Get the key path for all objects in this store - keyPathRaw, err := db.Get(objectStoreKeyPathKey(databaseId, objectStoreId), nil) - if err != nil { - return nil, fmt.Errorf("getting key path: %w", err) - } - keyPath, err := decodeIDBKeyPath(keyPathRaw) - if err != nil { - return nil, fmt.Errorf("decoding key path: %w", err) - } - // Get the key prefix for all objects in this store. keyPrefix := objectDataKeyPrefix(databaseId, objectStoreId) - // Now, we can read all records, parsing only the ones with our matching key prefix. - objs := make([]map[string]any, 0) + // Now, we can read all records, keeping only the ones with our matching key prefix. + objs := make([]map[string][]byte, 0) iter := db.NewIterator(nil, nil) for iter.Next() { key := iter.Key() @@ -113,20 +103,9 @@ func QueryIndexeddbObjectStore(dbLocation string, dbName string, objectStoreName continue } - keyVal, err := decodeIDBKey(key, keyPrefix) - if err != nil { - return objs, fmt.Errorf("decoding key: %w", err) - } - - obj, err := deserializeIndexeddbValue(iter.Value()) - if err != nil { - return objs, fmt.Errorf("decoding object: %w", err) - } - - // Set the key path in the object -- add idb_ prefix to avoid collisions - obj[fmt.Sprintf("idb_%s", string(keyPath))] = keyVal - - objs = append(objs, obj) + objs = append(objs, map[string][]byte{ + "data": iter.Value(), + }) } iter.Release() if err := iter.Error(); err != nil { diff --git a/ee/indexeddb/keys.go b/ee/indexeddb/keys.go index a38a4f13c..0685e12f7 100644 --- a/ee/indexeddb/keys.go +++ b/ee/indexeddb/keys.go @@ -1,9 +1,7 @@ package indexeddb import ( - "bytes" "encoding/binary" - "errors" "fmt" "path/filepath" "strings" @@ -22,9 +20,6 @@ const ( // Index IDs objectStoreDataIndexId = 0x01 // 1 - // Key types - keyTypeNumber = 0x03 // 3 - // When parsing the origin from the database location, I have to add @1 at the end for the origin to be complete. // I don't know why. originSuffix = "@1" @@ -77,24 +72,6 @@ func objectStoreNameKey(dbId uint64, objectStoreId uint64) []byte { return append(storeNameKey, 0x00) } -// objectStoreKeyPathKey constructs a query for the key path for the object store with the given ID. -func objectStoreKeyPathKey(dbId uint64, objectStoreId uint64) []byte { - // Key takes the format <0, database id, 0, 0, 50, object store id, 1>. - storeNameKey := []byte{0x00} - storeNameKey = append(storeNameKey, uvarintToBytes(dbId)...) - storeNameKey = append(storeNameKey, - 0x00, - 0x00, - objectStoreMetaDataTypeByte, - ) - - // Add the object store ID - storeNameKey = append(storeNameKey, uvarintToBytes(objectStoreId)...) - - // Add 0x01, indicating we're querying for the object store name - return append(storeNameKey, 0x01) -} - // objectDataKeyPrefix returns the key prefix shared by all objects stored in the given database // and in the given store. func objectDataKeyPrefix(dbId uint64, objectStoreId uint64) []byte { @@ -110,45 +87,6 @@ func decodeUtf16BigEndianBytes(b []byte) ([]byte, error) { return utf16BigEndianDecoder.Bytes(b) } -// decodeIDBKeyPath extracts the key path from the given input. IDBKeyPaths have multiple types. -// This function only supports string types, which take the format 0x00, 0x00, 0x01, StringWithLength. -func decodeIDBKeyPath(b []byte) ([]byte, error) { - if !bytes.HasPrefix(b, []byte{0x00, 0x00, 0x01}) { - return nil, errors.New("unsupported IDBKeyPath type") - } - - if len(b) < 4 { - return nil, fmt.Errorf("IDBKeyPath with length %d is too short to be a string", len(b)) - } - - // Read the StringWithLength's length, but discard it -- we can just decode the remainder - // of the slice. - prefixLen := 3 - _, bytesRead := binary.Uvarint(b[prefixLen:]) - - return decodeUtf16BigEndianBytes(b[prefixLen+bytesRead:]) -} - -// decodeIDBKey extracts the object key from the given data. It currently only supports -// numerical keys. -func decodeIDBKey(key []byte, keyPrefix []byte) (any, error) { - key = bytes.TrimPrefix(key, keyPrefix) - - // Next byte is key type. - switch key[0] { - case keyTypeNumber: - // IEEE754 64-bit (double), in host endianness - buf := bytes.NewReader(key[1:]) - var keyData float64 - if err := binary.Read(buf, binary.NativeEndian, &keyData); err != nil { - return nil, fmt.Errorf("reading double: %w", err) - } - return keyData, nil - default: - return nil, fmt.Errorf("unimplemented key type 0x%02x", key[0]) - } -} - // stringWithLength constructs an appropriate representation of `s`. // See: https://github.com/chromium/chromium/blob/main/content/browser/indexed_db/docs/leveldb_coding_scheme.md#types func stringWithLength(s string) ([]byte, error) { diff --git a/ee/indexeddb/keys_test.go b/ee/indexeddb/keys_test.go index cc81fb105..fedbb576a 100644 --- a/ee/indexeddb/keys_test.go +++ b/ee/indexeddb/keys_test.go @@ -53,26 +53,6 @@ func Test_objectStoreNameKey(t *testing.T) { require.Equal(t, expectedKey, objectStoreNameKey(dbId, objectStoreId), "object store name key format is incorrect") } -func Test_objectStoreKeyPathKey(t *testing.T) { - t.Parallel() - - var dbId uint64 = 2 - var objectStoreId uint64 = 3 - - // Key takes the format <0, database id, 0, 0, 50, object store id, 1>. - expectedKey := []byte{ - 0x00, - 0x02, // DB ID - 0x00, - 0x00, - objectStoreMetaDataTypeByte, - 0x03, // object store ID - 0x01, - } - - require.Equal(t, expectedKey, objectStoreKeyPathKey(dbId, objectStoreId), "object store key path key format is incorrect") -} - func Test_objectDataKeyPrefix(t *testing.T) { t.Parallel() @@ -102,43 +82,6 @@ func Test_decodeUtf16BigEndianBytes(t *testing.T) { require.Equal(t, originalBytes, actualBytes, "decoded bytes do not match") } -func Test_decodeIDBKeyPath(t *testing.T) { - t.Parallel() - - // Prepare key path - keyPath := []byte("id") - utf16BigEndianEncoder := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder() - utf16KeyPathBytes, err := utf16BigEndianEncoder.Bytes(keyPath) - require.NoError(t, err, "encoding bytes") - - testKeyPath := []byte{ - 0x00, 0x00, 0x01, // prefix - 0x02, // length of "id" - } - testKeyPath = append(testKeyPath, utf16KeyPathBytes...) - - resultKeyPath, err := decodeIDBKeyPath(testKeyPath) - require.NoError(t, err, "decoding key path") - require.Equal(t, keyPath, resultKeyPath) -} - -func Test_decodeIDBKey(t *testing.T) { - t.Parallel() - - // Prepare key value - var keyValue float64 = 4 - keyValueBuf := bytes.NewBuffer(make([]byte, 0)) - require.NoError(t, binary.Write(keyValueBuf, binary.NativeEndian, keyValue), "writing key value") - - testKeyPrefix := []byte{0x00, 0x01, 0x01, 0x01} - testKey := append(testKeyPrefix, keyTypeNumber) - testKey = append(testKey, keyValueBuf.Bytes()...) - - actualVal, err := decodeIDBKey(testKey, testKeyPrefix) - require.NoError(t, err, "decoding idb key") - require.Equal(t, keyValue, actualVal) -} - func Test_stringWithLength(t *testing.T) { t.Parallel() diff --git a/ee/indexeddb/values.go b/ee/indexeddb/values.go index 9a75f7e96..d398b5395 100644 --- a/ee/indexeddb/values.go +++ b/ee/indexeddb/values.go @@ -2,9 +2,14 @@ package indexeddb import ( "bytes" + "context" "encoding/binary" + "encoding/json" + "errors" "fmt" "io" + "log/slog" + "strconv" "golang.org/x/text/encoding/unicode" "golang.org/x/text/transform" @@ -36,27 +41,28 @@ const ( tokenNull byte = 0x30 ) -// deserializeIndexeddbValue takes the value in `src` and deserializes it -// into a map. -func deserializeIndexeddbValue(src []byte) (map[string]any, error) { - srcReader := bytes.NewReader(src) - obj := make(map[string]any) +// deserializeChrome deserializes a JS object that has been stored by Chrome +// in IndexedDB LevelDB-backed databases. +func deserializeChrome(_ context.Context, _ *slog.Logger, row map[string][]byte) (map[string][]byte, error) { + data, ok := row["data"] + if !ok { + return nil, errors.New("row missing top-level data key") + } + srcReader := bytes.NewReader(data) // First, read through the header to extract top-level data version, err := readHeader(srcReader) if err != nil { - return obj, fmt.Errorf("reading header: %w", err) + return nil, fmt.Errorf("reading header: %w", err) } - obj["version"] = version // Now, parse the actual data in this row objData, err := deserializeObject(srcReader) - obj["data"] = objData if err != nil { - return obj, fmt.Errorf("decoding obj: %w", err) + return nil, fmt.Errorf("decoding obj for indexeddb version %d: %w", version, err) } - return obj, nil + return objData, nil } // readHeader reads through the header bytes at the start of `srcReader`. @@ -90,8 +96,8 @@ func readHeader(srcReader io.ByteReader) (uint64, error) { } // deserializeObject deserializes the next object from the srcReader. -func deserializeObject(srcReader io.ByteReader) (map[string]any, error) { - obj := make(map[string]any) +func deserializeObject(srcReader io.ByteReader) (map[string][]byte, error) { + obj := make(map[string][]byte) for { // Parse the next property in this object. @@ -142,7 +148,7 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) { switch nextByte { case tokenObjectBegin: // Object nested inside this object - nestedObj, err := deserializeObject(srcReader) + nestedObj, err := deserializeNestedObject(srcReader) if err != nil { return obj, fmt.Errorf("decoding nested object: %w", err) } @@ -162,9 +168,9 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) { } obj[currentPropertyName] = strVal case tokenTrue: - obj[currentPropertyName] = true + obj[currentPropertyName] = []byte("true") case tokenFalse: - obj[currentPropertyName] = false + obj[currentPropertyName] = []byte("false") case tokenUndefined, tokenNull: obj[currentPropertyName] = nil case tokenInt32: @@ -172,7 +178,7 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) { if err != nil { return obj, fmt.Errorf("decoding int32: %w", err) } - obj[currentPropertyName] = propertyInt + obj[currentPropertyName] = []byte(strconv.Itoa(int(propertyInt))) case tokenBeginSparseArray: // This is the only type of array I've encountered so far, so it's the only one implemented. arr, err := deserializeSparseArray(srcReader) @@ -190,7 +196,7 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) { // deserializeSparseArray deserializes the next array from the srcReader. // Currently, it only handles an array of objects. -func deserializeSparseArray(srcReader io.ByteReader) ([]any, error) { +func deserializeSparseArray(srcReader io.ByteReader) ([]byte, error) { // After an array start, the next byte will be the length of the array. arrayLen, err := binary.ReadUvarint(srcReader) if err != nil { @@ -199,10 +205,11 @@ func deserializeSparseArray(srcReader io.ByteReader) ([]any, error) { // Read from srcReader until we've filled the array to the correct size. arrItems := make([]any, arrayLen) + reachedEndOfArray := false for { idxByte, err := srcReader.ReadByte() if err != nil { - return arrItems, fmt.Errorf("reading next byte: %w", err) + return nil, fmt.Errorf("reading next byte: %w", err) } // First, get the index for this item in the array @@ -211,13 +218,13 @@ func deserializeSparseArray(srcReader io.ByteReader) ([]any, error) { case tokenInt32: arrIdx, err := binary.ReadVarint(srcReader) if err != nil { - return arrItems, fmt.Errorf("reading varint: %w", err) + return nil, fmt.Errorf("reading varint: %w", err) } i = int(arrIdx) case tokenUint32: arrIdx, err := binary.ReadUvarint(srcReader) if err != nil { - return arrItems, fmt.Errorf("reading uvarint: %w", err) + return nil, fmt.Errorf("reading uvarint: %w", err) } i = int(arrIdx) case tokenEndSparseArray: @@ -226,64 +233,95 @@ func deserializeSparseArray(srcReader io.ByteReader) ([]any, error) { _, _ = srcReader.ReadByte() _, _ = srcReader.ReadByte() // The array has ended -- return. - return arrItems, nil + reachedEndOfArray = true case 0x01, 0x03: // This occurs immediately before tokenEndSparseArray -- not sure why. We can ignore it. continue default: - return arrItems, fmt.Errorf("unexpected array index type: 0x%02x / `%s`", idxByte, string(idxByte)) + return nil, fmt.Errorf("unexpected array index type: 0x%02x / `%s`", idxByte, string(idxByte)) + } + + if reachedEndOfArray { + break } // Now read item at index nextByte, err := srcReader.ReadByte() if err != nil { - return arrItems, fmt.Errorf("reading next byte: %w", err) + return nil, fmt.Errorf("reading next byte: %w", err) } switch nextByte { case tokenObjectBegin: - obj, err := deserializeObject(srcReader) + obj, err := deserializeNestedObject(srcReader) if err != nil { - return arrItems, fmt.Errorf("decoding object in array: %w", err) + return nil, fmt.Errorf("decoding object in array: %w", err) } - arrItems[i] = obj + arrItems[i] = string(obj) // cast to string so it's readable when marshalled again below default: - return arrItems, fmt.Errorf("unimplemented array type 0x%02x / `%s`", nextByte, string(nextByte)) + return nil, fmt.Errorf("unimplemented array item type 0x%02x / `%s`", nextByte, string(nextByte)) } } + + arrBytes, err := json.Marshal(arrItems) + if err != nil { + return nil, fmt.Errorf("marshalling array: %w", err) + } + + return arrBytes, nil +} + +func deserializeNestedObject(srcReader io.ByteReader) ([]byte, error) { + nestedObj, err := deserializeObject(srcReader) + if err != nil { + return nil, fmt.Errorf("deserializing nested object: %w", err) + } + + // Make nested object values readable -- cast []byte to string + readableNestedObj := make(map[string]string) + for k, v := range nestedObj { + readableNestedObj[k] = string(v) + } + + resultObj, err := json.Marshal(readableNestedObj) + if err != nil { + return nil, fmt.Errorf("marshalling nested object: %w", err) + } + + return resultObj, nil } // deserializeAsciiStr handles the upcoming ascii string in srcReader. -func deserializeAsciiStr(srcReader io.ByteReader) (string, error) { +func deserializeAsciiStr(srcReader io.ByteReader) ([]byte, error) { strLen, err := binary.ReadUvarint(srcReader) if err != nil { - return "", fmt.Errorf("reading uvarint: %w", err) + return nil, fmt.Errorf("reading uvarint: %w", err) } strBytes := make([]byte, strLen) for i := 0; i < int(strLen); i += 1 { nextByte, err := srcReader.ReadByte() if err != nil { - return "", fmt.Errorf("reading next byte in string: %w", err) + return nil, fmt.Errorf("reading next byte in string: %w", err) } strBytes[i] = nextByte } - return string(strBytes), nil + return strBytes, nil } // deserializeUtf16Str handles the upcoming utf-16 string in srcReader. -func deserializeUtf16Str(srcReader io.ByteReader) (string, error) { +func deserializeUtf16Str(srcReader io.ByteReader) ([]byte, error) { strLen, err := binary.ReadUvarint(srcReader) if err != nil { - return "", fmt.Errorf("reading uvarint: %w", err) + return nil, fmt.Errorf("reading uvarint: %w", err) } strBytes := make([]byte, strLen) for i := 0; i < int(strLen); i += 1 { nextByte, err := srcReader.ReadByte() if err != nil { - return "", fmt.Errorf("reading next byte in string: %w", err) + return nil, fmt.Errorf("reading next byte in string: %w", err) } strBytes[i] = nextByte @@ -292,8 +330,8 @@ func deserializeUtf16Str(srcReader io.ByteReader) (string, error) { utf16Reader := transform.NewReader(bytes.NewReader(strBytes), unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder()) decoded, err := io.ReadAll(utf16Reader) if err != nil { - return "", fmt.Errorf("reading string as utf-16: %w", err) + return nil, fmt.Errorf("reading string as utf-16: %w", err) } - return string(decoded), nil + return decoded, nil } diff --git a/ee/indexeddb/values_test.go b/ee/indexeddb/values_test.go index b406605c4..bedc64b4a 100644 --- a/ee/indexeddb/values_test.go +++ b/ee/indexeddb/values_test.go @@ -1,8 +1,10 @@ package indexeddb import ( + "context" "testing" + "github.com/kolide/launcher/pkg/log/multislogger" "github.com/stretchr/testify/require" ) @@ -29,12 +31,9 @@ func Test_deserializeIndexeddbValue(t *testing.T) { 0x01, // properties_written } - obj, err := deserializeIndexeddbValue(testBytes) + obj, err := deserializeChrome(context.TODO(), multislogger.NewNopLogger(), map[string][]byte{"data": testBytes}) require.NoError(t, err, "deserializing object") - // Confirm we got a version and data top-level property - require.Contains(t, obj, "version", "expected version property") - require.Contains(t, obj, "data", "expected data property") // Confirm we got an id property for the object - require.Contains(t, obj["data"], "id", "expected id property") + require.Contains(t, obj, "id", "expected id property") }