From 36afc33d29701d8cd399ae5c062ee0f55046f10d Mon Sep 17 00:00:00 2001
From: Rebecca Mahany-Horton <rebeccamahany@gmail.com>
Date: Tue, 2 Jul 2024 15:50:07 -0400
Subject: [PATCH] [KATC] Prepare ee/indexeddb for use in KATC tables (#1767)

---
 ee/indexeddb/indexeddb.go   |  33 ++---------
 ee/indexeddb/keys.go        |  62 --------------------
 ee/indexeddb/keys_test.go   |  57 -------------------
 ee/indexeddb/values.go      | 110 ++++++++++++++++++++++++------------
 ee/indexeddb/values_test.go |   9 ++-
 5 files changed, 84 insertions(+), 187 deletions(-)

diff --git a/ee/indexeddb/indexeddb.go b/ee/indexeddb/indexeddb.go
index 2a9c9a410..f35c356d9 100644
--- a/ee/indexeddb/indexeddb.go
+++ b/ee/indexeddb/indexeddb.go
@@ -29,7 +29,7 @@ var indexeddbComparer = newChromeComparer()
 
 // QueryIndexeddbObjectStore queries the indexeddb at the given location `dbLocation`,
 // returning all objects in the given database that live in the given object store.
-func QueryIndexeddbObjectStore(dbLocation string, dbName string, objectStoreName string) ([]map[string]any, error) {
+func QueryIndexeddbObjectStore(dbLocation string, dbName string, objectStoreName string) ([]map[string][]byte, error) {
 	// If Chrome is open, we won't be able to open the db. So, copy it to a temporary location first.
 	tempDbCopyLocation, err := copyIndexeddb(dbLocation)
 	if err != nil {
@@ -91,21 +91,11 @@ func QueryIndexeddbObjectStore(dbLocation string, dbName string, objectStoreName
 		return nil, errors.New("unable to get object store ID")
 	}
 
-	// Get the key path for all objects in this store
-	keyPathRaw, err := db.Get(objectStoreKeyPathKey(databaseId, objectStoreId), nil)
-	if err != nil {
-		return nil, fmt.Errorf("getting key path: %w", err)
-	}
-	keyPath, err := decodeIDBKeyPath(keyPathRaw)
-	if err != nil {
-		return nil, fmt.Errorf("decoding key path: %w", err)
-	}
-
 	// Get the key prefix for all objects in this store.
 	keyPrefix := objectDataKeyPrefix(databaseId, objectStoreId)
 
-	// Now, we can read all records, parsing only the ones with our matching key prefix.
-	objs := make([]map[string]any, 0)
+	// Now, we can read all records, keeping only the ones with our matching key prefix.
+	objs := make([]map[string][]byte, 0)
 	iter := db.NewIterator(nil, nil)
 	for iter.Next() {
 		key := iter.Key()
@@ -113,20 +103,9 @@ func QueryIndexeddbObjectStore(dbLocation string, dbName string, objectStoreName
 			continue
 		}
 
-		keyVal, err := decodeIDBKey(key, keyPrefix)
-		if err != nil {
-			return objs, fmt.Errorf("decoding key: %w", err)
-		}
-
-		obj, err := deserializeIndexeddbValue(iter.Value())
-		if err != nil {
-			return objs, fmt.Errorf("decoding object: %w", err)
-		}
-
-		// Set the key path in the object -- add idb_ prefix to avoid collisions
-		obj[fmt.Sprintf("idb_%s", string(keyPath))] = keyVal
-
-		objs = append(objs, obj)
+		objs = append(objs, map[string][]byte{
+			"data": iter.Value(),
+		})
 	}
 	iter.Release()
 	if err := iter.Error(); err != nil {
diff --git a/ee/indexeddb/keys.go b/ee/indexeddb/keys.go
index a38a4f13c..0685e12f7 100644
--- a/ee/indexeddb/keys.go
+++ b/ee/indexeddb/keys.go
@@ -1,9 +1,7 @@
 package indexeddb
 
 import (
-	"bytes"
 	"encoding/binary"
-	"errors"
 	"fmt"
 	"path/filepath"
 	"strings"
@@ -22,9 +20,6 @@ const (
 	// Index IDs
 	objectStoreDataIndexId = 0x01 // 1
 
-	// Key types
-	keyTypeNumber = 0x03 // 3
-
 	// When parsing the origin from the database location, I have to add @1 at the end for the origin to be complete.
 	// I don't know why.
 	originSuffix = "@1"
@@ -77,24 +72,6 @@ func objectStoreNameKey(dbId uint64, objectStoreId uint64) []byte {
 	return append(storeNameKey, 0x00)
 }
 
-// objectStoreKeyPathKey constructs a query for the key path for the object store with the given ID.
-func objectStoreKeyPathKey(dbId uint64, objectStoreId uint64) []byte {
-	// Key takes the format <0, database id, 0, 0, 50, object store id, 1>.
-	storeNameKey := []byte{0x00}
-	storeNameKey = append(storeNameKey, uvarintToBytes(dbId)...)
-	storeNameKey = append(storeNameKey,
-		0x00,
-		0x00,
-		objectStoreMetaDataTypeByte,
-	)
-
-	// Add the object store ID
-	storeNameKey = append(storeNameKey, uvarintToBytes(objectStoreId)...)
-
-	// Add 0x01, indicating we're querying for the object store name
-	return append(storeNameKey, 0x01)
-}
-
 // objectDataKeyPrefix returns the key prefix shared by all objects stored in the given database
 // and in the given store.
 func objectDataKeyPrefix(dbId uint64, objectStoreId uint64) []byte {
@@ -110,45 +87,6 @@ func decodeUtf16BigEndianBytes(b []byte) ([]byte, error) {
 	return utf16BigEndianDecoder.Bytes(b)
 }
 
-// decodeIDBKeyPath extracts the key path from the given input. IDBKeyPaths have multiple types.
-// This function only supports string types, which take the format 0x00, 0x00, 0x01, StringWithLength.
-func decodeIDBKeyPath(b []byte) ([]byte, error) {
-	if !bytes.HasPrefix(b, []byte{0x00, 0x00, 0x01}) {
-		return nil, errors.New("unsupported IDBKeyPath type")
-	}
-
-	if len(b) < 4 {
-		return nil, fmt.Errorf("IDBKeyPath with length %d is too short to be a string", len(b))
-	}
-
-	// Read the StringWithLength's length, but discard it -- we can just decode the remainder
-	// of the slice.
-	prefixLen := 3
-	_, bytesRead := binary.Uvarint(b[prefixLen:])
-
-	return decodeUtf16BigEndianBytes(b[prefixLen+bytesRead:])
-}
-
-// decodeIDBKey extracts the object key from the given data. It currently only supports
-// numerical keys.
-func decodeIDBKey(key []byte, keyPrefix []byte) (any, error) {
-	key = bytes.TrimPrefix(key, keyPrefix)
-
-	// Next byte is key type.
-	switch key[0] {
-	case keyTypeNumber:
-		// IEEE754 64-bit (double), in host endianness
-		buf := bytes.NewReader(key[1:])
-		var keyData float64
-		if err := binary.Read(buf, binary.NativeEndian, &keyData); err != nil {
-			return nil, fmt.Errorf("reading double: %w", err)
-		}
-		return keyData, nil
-	default:
-		return nil, fmt.Errorf("unimplemented key type 0x%02x", key[0])
-	}
-}
-
 // stringWithLength constructs an appropriate representation of `s`.
 // See: https://github.com/chromium/chromium/blob/main/content/browser/indexed_db/docs/leveldb_coding_scheme.md#types
 func stringWithLength(s string) ([]byte, error) {
diff --git a/ee/indexeddb/keys_test.go b/ee/indexeddb/keys_test.go
index cc81fb105..fedbb576a 100644
--- a/ee/indexeddb/keys_test.go
+++ b/ee/indexeddb/keys_test.go
@@ -53,26 +53,6 @@ func Test_objectStoreNameKey(t *testing.T) {
 	require.Equal(t, expectedKey, objectStoreNameKey(dbId, objectStoreId), "object store name key format is incorrect")
 }
 
-func Test_objectStoreKeyPathKey(t *testing.T) {
-	t.Parallel()
-
-	var dbId uint64 = 2
-	var objectStoreId uint64 = 3
-
-	// Key takes the format <0, database id, 0, 0, 50, object store id, 1>.
-	expectedKey := []byte{
-		0x00,
-		0x02, // DB ID
-		0x00,
-		0x00,
-		objectStoreMetaDataTypeByte,
-		0x03, // object store ID
-		0x01,
-	}
-
-	require.Equal(t, expectedKey, objectStoreKeyPathKey(dbId, objectStoreId), "object store key path key format is incorrect")
-}
-
 func Test_objectDataKeyPrefix(t *testing.T) {
 	t.Parallel()
 
@@ -102,43 +82,6 @@ func Test_decodeUtf16BigEndianBytes(t *testing.T) {
 	require.Equal(t, originalBytes, actualBytes, "decoded bytes do not match")
 }
 
-func Test_decodeIDBKeyPath(t *testing.T) {
-	t.Parallel()
-
-	// Prepare key path
-	keyPath := []byte("id")
-	utf16BigEndianEncoder := unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder()
-	utf16KeyPathBytes, err := utf16BigEndianEncoder.Bytes(keyPath)
-	require.NoError(t, err, "encoding bytes")
-
-	testKeyPath := []byte{
-		0x00, 0x00, 0x01, // prefix
-		0x02, // length of "id"
-	}
-	testKeyPath = append(testKeyPath, utf16KeyPathBytes...)
-
-	resultKeyPath, err := decodeIDBKeyPath(testKeyPath)
-	require.NoError(t, err, "decoding key path")
-	require.Equal(t, keyPath, resultKeyPath)
-}
-
-func Test_decodeIDBKey(t *testing.T) {
-	t.Parallel()
-
-	// Prepare key value
-	var keyValue float64 = 4
-	keyValueBuf := bytes.NewBuffer(make([]byte, 0))
-	require.NoError(t, binary.Write(keyValueBuf, binary.NativeEndian, keyValue), "writing key value")
-
-	testKeyPrefix := []byte{0x00, 0x01, 0x01, 0x01}
-	testKey := append(testKeyPrefix, keyTypeNumber)
-	testKey = append(testKey, keyValueBuf.Bytes()...)
-
-	actualVal, err := decodeIDBKey(testKey, testKeyPrefix)
-	require.NoError(t, err, "decoding idb key")
-	require.Equal(t, keyValue, actualVal)
-}
-
 func Test_stringWithLength(t *testing.T) {
 	t.Parallel()
 
diff --git a/ee/indexeddb/values.go b/ee/indexeddb/values.go
index 9a75f7e96..d398b5395 100644
--- a/ee/indexeddb/values.go
+++ b/ee/indexeddb/values.go
@@ -2,9 +2,14 @@ package indexeddb
 
 import (
 	"bytes"
+	"context"
 	"encoding/binary"
+	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
+	"log/slog"
+	"strconv"
 
 	"golang.org/x/text/encoding/unicode"
 	"golang.org/x/text/transform"
@@ -36,27 +41,28 @@ const (
 	tokenNull              byte = 0x30
 )
 
-// deserializeIndexeddbValue takes the value in `src` and deserializes it
-// into a map.
-func deserializeIndexeddbValue(src []byte) (map[string]any, error) {
-	srcReader := bytes.NewReader(src)
-	obj := make(map[string]any)
+// deserializeChrome deserializes a JS object that has been stored by Chrome
+// in IndexedDB LevelDB-backed databases.
+func deserializeChrome(_ context.Context, _ *slog.Logger, row map[string][]byte) (map[string][]byte, error) {
+	data, ok := row["data"]
+	if !ok {
+		return nil, errors.New("row missing top-level data key")
+	}
+	srcReader := bytes.NewReader(data)
 
 	// First, read through the header to extract top-level data
 	version, err := readHeader(srcReader)
 	if err != nil {
-		return obj, fmt.Errorf("reading header: %w", err)
+		return nil, fmt.Errorf("reading header: %w", err)
 	}
-	obj["version"] = version
 
 	// Now, parse the actual data in this row
 	objData, err := deserializeObject(srcReader)
-	obj["data"] = objData
 	if err != nil {
-		return obj, fmt.Errorf("decoding obj: %w", err)
+		return nil, fmt.Errorf("decoding obj for indexeddb version %d: %w", version, err)
 	}
 
-	return obj, nil
+	return objData, nil
 }
 
 // readHeader reads through the header bytes at the start of `srcReader`.
@@ -90,8 +96,8 @@ func readHeader(srcReader io.ByteReader) (uint64, error) {
 }
 
 // deserializeObject deserializes the next object from the srcReader.
-func deserializeObject(srcReader io.ByteReader) (map[string]any, error) {
-	obj := make(map[string]any)
+func deserializeObject(srcReader io.ByteReader) (map[string][]byte, error) {
+	obj := make(map[string][]byte)
 
 	for {
 		// Parse the next property in this object.
@@ -142,7 +148,7 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) {
 		switch nextByte {
 		case tokenObjectBegin:
 			// Object nested inside this object
-			nestedObj, err := deserializeObject(srcReader)
+			nestedObj, err := deserializeNestedObject(srcReader)
 			if err != nil {
 				return obj, fmt.Errorf("decoding nested object: %w", err)
 			}
@@ -162,9 +168,9 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) {
 			}
 			obj[currentPropertyName] = strVal
 		case tokenTrue:
-			obj[currentPropertyName] = true
+			obj[currentPropertyName] = []byte("true")
 		case tokenFalse:
-			obj[currentPropertyName] = false
+			obj[currentPropertyName] = []byte("false")
 		case tokenUndefined, tokenNull:
 			obj[currentPropertyName] = nil
 		case tokenInt32:
@@ -172,7 +178,7 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) {
 			if err != nil {
 				return obj, fmt.Errorf("decoding int32: %w", err)
 			}
-			obj[currentPropertyName] = propertyInt
+			obj[currentPropertyName] = []byte(strconv.Itoa(int(propertyInt)))
 		case tokenBeginSparseArray:
 			// This is the only type of array I've encountered so far, so it's the only one implemented.
 			arr, err := deserializeSparseArray(srcReader)
@@ -190,7 +196,7 @@ func deserializeObject(srcReader io.ByteReader) (map[string]any, error) {
 
 // deserializeSparseArray deserializes the next array from the srcReader.
 // Currently, it only handles an array of objects.
-func deserializeSparseArray(srcReader io.ByteReader) ([]any, error) {
+func deserializeSparseArray(srcReader io.ByteReader) ([]byte, error) {
 	// After an array start, the next byte will be the length of the array.
 	arrayLen, err := binary.ReadUvarint(srcReader)
 	if err != nil {
@@ -199,10 +205,11 @@ func deserializeSparseArray(srcReader io.ByteReader) ([]any, error) {
 
 	// Read from srcReader until we've filled the array to the correct size.
 	arrItems := make([]any, arrayLen)
+	reachedEndOfArray := false
 	for {
 		idxByte, err := srcReader.ReadByte()
 		if err != nil {
-			return arrItems, fmt.Errorf("reading next byte: %w", err)
+			return nil, fmt.Errorf("reading next byte: %w", err)
 		}
 
 		// First, get the index for this item in the array
@@ -211,13 +218,13 @@ func deserializeSparseArray(srcReader io.ByteReader) ([]any, error) {
 		case tokenInt32:
 			arrIdx, err := binary.ReadVarint(srcReader)
 			if err != nil {
-				return arrItems, fmt.Errorf("reading varint: %w", err)
+				return nil, fmt.Errorf("reading varint: %w", err)
 			}
 			i = int(arrIdx)
 		case tokenUint32:
 			arrIdx, err := binary.ReadUvarint(srcReader)
 			if err != nil {
-				return arrItems, fmt.Errorf("reading uvarint: %w", err)
+				return nil, fmt.Errorf("reading uvarint: %w", err)
 			}
 			i = int(arrIdx)
 		case tokenEndSparseArray:
@@ -226,64 +233,95 @@ func deserializeSparseArray(srcReader io.ByteReader) ([]any, error) {
 			_, _ = srcReader.ReadByte()
 			_, _ = srcReader.ReadByte()
 			// The array has ended -- return.
-			return arrItems, nil
+			reachedEndOfArray = true
 		case 0x01, 0x03:
 			// This occurs immediately before tokenEndSparseArray -- not sure why. We can ignore it.
 			continue
 		default:
-			return arrItems, fmt.Errorf("unexpected array index type: 0x%02x / `%s`", idxByte, string(idxByte))
+			return nil, fmt.Errorf("unexpected array index type: 0x%02x / `%s`", idxByte, string(idxByte))
+		}
+
+		if reachedEndOfArray {
+			break
 		}
 
 		// Now read item at index
 		nextByte, err := srcReader.ReadByte()
 		if err != nil {
-			return arrItems, fmt.Errorf("reading next byte: %w", err)
+			return nil, fmt.Errorf("reading next byte: %w", err)
 		}
 		switch nextByte {
 		case tokenObjectBegin:
-			obj, err := deserializeObject(srcReader)
+			obj, err := deserializeNestedObject(srcReader)
 			if err != nil {
-				return arrItems, fmt.Errorf("decoding object in array: %w", err)
+				return nil, fmt.Errorf("decoding object in array: %w", err)
 			}
-			arrItems[i] = obj
+			arrItems[i] = string(obj) // cast to string so it's readable when marshalled again below
 		default:
-			return arrItems, fmt.Errorf("unimplemented array type 0x%02x / `%s`", nextByte, string(nextByte))
+			return nil, fmt.Errorf("unimplemented array item type 0x%02x / `%s`", nextByte, string(nextByte))
 		}
 	}
+
+	arrBytes, err := json.Marshal(arrItems)
+	if err != nil {
+		return nil, fmt.Errorf("marshalling array: %w", err)
+	}
+
+	return arrBytes, nil
+}
+
+func deserializeNestedObject(srcReader io.ByteReader) ([]byte, error) {
+	nestedObj, err := deserializeObject(srcReader)
+	if err != nil {
+		return nil, fmt.Errorf("deserializing nested object: %w", err)
+	}
+
+	// Make nested object values readable -- cast []byte to string
+	readableNestedObj := make(map[string]string)
+	for k, v := range nestedObj {
+		readableNestedObj[k] = string(v)
+	}
+
+	resultObj, err := json.Marshal(readableNestedObj)
+	if err != nil {
+		return nil, fmt.Errorf("marshalling nested object: %w", err)
+	}
+
+	return resultObj, nil
 }
 
 // deserializeAsciiStr handles the upcoming ascii string in srcReader.
-func deserializeAsciiStr(srcReader io.ByteReader) (string, error) {
+func deserializeAsciiStr(srcReader io.ByteReader) ([]byte, error) {
 	strLen, err := binary.ReadUvarint(srcReader)
 	if err != nil {
-		return "", fmt.Errorf("reading uvarint: %w", err)
+		return nil, fmt.Errorf("reading uvarint: %w", err)
 	}
 
 	strBytes := make([]byte, strLen)
 	for i := 0; i < int(strLen); i += 1 {
 		nextByte, err := srcReader.ReadByte()
 		if err != nil {
-			return "", fmt.Errorf("reading next byte in string: %w", err)
+			return nil, fmt.Errorf("reading next byte in string: %w", err)
 		}
 
 		strBytes[i] = nextByte
 	}
 
-	return string(strBytes), nil
+	return strBytes, nil
 }
 
 // deserializeUtf16Str handles the upcoming utf-16 string in srcReader.
-func deserializeUtf16Str(srcReader io.ByteReader) (string, error) {
+func deserializeUtf16Str(srcReader io.ByteReader) ([]byte, error) {
 	strLen, err := binary.ReadUvarint(srcReader)
 	if err != nil {
-		return "", fmt.Errorf("reading uvarint: %w", err)
+		return nil, fmt.Errorf("reading uvarint: %w", err)
 	}
 
 	strBytes := make([]byte, strLen)
 	for i := 0; i < int(strLen); i += 1 {
 		nextByte, err := srcReader.ReadByte()
 		if err != nil {
-			return "", fmt.Errorf("reading next byte in string: %w", err)
+			return nil, fmt.Errorf("reading next byte in string: %w", err)
 		}
 
 		strBytes[i] = nextByte
@@ -292,8 +330,8 @@ func deserializeUtf16Str(srcReader io.ByteReader) (string, error) {
 	utf16Reader := transform.NewReader(bytes.NewReader(strBytes), unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder())
 	decoded, err := io.ReadAll(utf16Reader)
 	if err != nil {
-		return "", fmt.Errorf("reading string as utf-16: %w", err)
+		return nil, fmt.Errorf("reading string as utf-16: %w", err)
 	}
 
-	return string(decoded), nil
+	return decoded, nil
 }
diff --git a/ee/indexeddb/values_test.go b/ee/indexeddb/values_test.go
index b406605c4..bedc64b4a 100644
--- a/ee/indexeddb/values_test.go
+++ b/ee/indexeddb/values_test.go
@@ -1,8 +1,10 @@
 package indexeddb
 
 import (
+	"context"
 	"testing"
 
+	"github.com/kolide/launcher/pkg/log/multislogger"
 	"github.com/stretchr/testify/require"
 )
 
@@ -29,12 +31,9 @@ func Test_deserializeIndexeddbValue(t *testing.T) {
 		0x01, // properties_written
 	}
 
-	obj, err := deserializeIndexeddbValue(testBytes)
+	obj, err := deserializeChrome(context.TODO(), multislogger.NewNopLogger(), map[string][]byte{"data": testBytes})
 	require.NoError(t, err, "deserializing object")
 
-	// Confirm we got a version and data top-level property
-	require.Contains(t, obj, "version", "expected version property")
-	require.Contains(t, obj, "data", "expected data property")
 	// Confirm we got an id property for the object
-	require.Contains(t, obj["data"], "id", "expected id property")
+	require.Contains(t, obj, "id", "expected id property")
 }