From 25729b35b91a2d201de36b6d476a3369d965f830 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 11 Jul 2024 20:02:53 -0700 Subject: [PATCH 01/12] Implement JSON_TYPE and optimized comparisons for IndexedJsonDocument. --- go/store/prolly/tree/json_indexed_document.go | 182 ++++++++++++++++++ .../prolly/tree/json_indexed_document_test.go | 160 +++++++++++++++ 2 files changed, 342 insertions(+) diff --git a/go/store/prolly/tree/json_indexed_document.go b/go/store/prolly/tree/json_indexed_document.go index 0c8c3eb38c2..2196157842e 100644 --- a/go/store/prolly/tree/json_indexed_document.go +++ b/go/store/prolly/tree/json_indexed_document.go @@ -548,3 +548,185 @@ func (i IndexedJsonDocument) GetBytes() (bytes []byte, err error) { // TODO: Add context parameter to JSONBytes.GetBytes return getBytesFromIndexedJsonMap(i.ctx, i.m) } + +func (i IndexedJsonDocument) getFirstCharacter(ctx context.Context) (byte, error) { + stopIterationError := fmt.Errorf("stop") + var firstCharacter byte + err := i.m.WalkNodes(ctx, func(ctx context.Context, nd Node) error { + if nd.IsLeaf() { + firstCharacter = nd.GetValue(0)[0] + return stopIterationError + } + return nil + }) + if err != stopIterationError { + return 0, err + } + return firstCharacter, nil +} + +type jsonTypeCategory int + +const ( + jsonTypeNull jsonTypeCategory = iota + jsonTypeNumber + jsonTypeString + JsonTypeObject + jsonTypeArray + jsonTypeBoolean +) + +func getTypeCategoryOfValue(val interface{}) (jsonTypeCategory, error) { + if val == nil { + return jsonTypeNull, nil + } + switch val.(type) { + case map[string]interface{}: + return JsonTypeObject, nil + case []interface{}: + return jsonTypeArray, nil + case bool: + return jsonTypeBoolean, nil + case string: + return jsonTypeString, nil + case decimal.Decimal, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64: + return jsonTypeNumber, nil + } + return 0, fmt.Errorf("expected json value, got %v", val) +} + +// getTypeCategoryFromFirstCharacter returns the type of a JSON object by inspecting its first byte. +func getTypeCategoryFromFirstCharacter(c byte) jsonTypeCategory { + switch c { + case '{': + return JsonTypeObject + case '[': + return jsonTypeArray + case 'n': + return jsonTypeNull + case 't', 'f': + return jsonTypeBoolean + case '"': + return jsonTypeString + default: + return jsonTypeNumber + } +} + +func (i IndexedJsonDocument) getTypeCategory() (jsonTypeCategory, error) { + firstCharacter, err := i.getFirstCharacter(i.ctx) + if err != nil { + return 0, err + } + return getTypeCategoryFromFirstCharacter(firstCharacter), nil +} + +func GetTypeCategory(wrapper sql.JSONWrapper) (jsonTypeCategory, error) { + switch doc := wrapper.(type) { + case IndexedJsonDocument: + return doc.getTypeCategory() + case *types.LazyJSONDocument: + return getTypeCategoryFromFirstCharacter(doc.Bytes[0]), nil + default: + val, err := doc.ToInterface() + if err != nil { + return 0, err + } + return getTypeCategoryOfValue(val) + } +} + +// Type implements types.ComparableJson +func (i IndexedJsonDocument) Type(ctx context.Context) (string, error) { + firstCharacter, err := i.getFirstCharacter(ctx) + if err != nil { + return "", err + } + + switch firstCharacter { + case '{': + return "OBJECT", nil + case '[': + return "ARRAY", nil + } + // At this point the value must be a scalar, so it's okay to just load the whole thing. + val, err := i.ToInterface() + if err != nil { + return "", err + } + return sqljson.TypeOfJsonValue(val), nil +} + +// Compare implements types.ComparableJson +func (i IndexedJsonDocument) Compare(other interface{}) (int, error) { + thisTypeCategory, err := i.getTypeCategory() + if err != nil { + return 0, err + } + + otherIndexedDocument, ok := other.(IndexedJsonDocument) + if !ok { + val, err := i.ToInterface() + if err != nil { + return 0, err + } + otherVal := other + if otherWrapper, ok := other.(sql.JSONWrapper); ok { + otherVal, err = otherWrapper.ToInterface() + if err != nil { + return 0, err + } + } + return types.CompareJSON(val, otherVal) + } + + otherTypeCategory, err := otherIndexedDocument.getTypeCategory() + if err != nil { + return 0, err + } + if thisTypeCategory < otherTypeCategory { + return -1, nil + } + if thisTypeCategory > otherTypeCategory { + return 1, nil + } + switch thisTypeCategory { + case jsonTypeNull: + return 0, nil + case jsonTypeArray, JsonTypeObject: + // To compare two values that are both arrays or both objects, we must locate the first location + // where they differ. + + jsonDiffer, err := NewIndexedJsonDiffer(i.ctx, i, otherIndexedDocument) + if err != nil { + return 0, err + } + firstDiff, err := jsonDiffer.Next(i.ctx) + if err == io.EOF { + // The two documents have no differences. + return 0, nil + } + if err != nil { + return 0, err + } + switch firstDiff.Type { + case AddedDiff: + // A key is present in other but not this. + return -1, nil + case RemovedDiff: + return 1, nil + case ModifiedDiff: + // Since both modified values have already been loaded into memory, + // We can just compare them. + return types.JSON.Compare(firstDiff.From, firstDiff.To) + default: + panic("Impossible diff type") + } + default: + val, err := i.ToInterface() + if err != nil { + return 0, err + } + return types.CompareJSON(val, other) + } +} diff --git a/go/store/prolly/tree/json_indexed_document_test.go b/go/store/prolly/tree/json_indexed_document_test.go index 5a9b7e0e0a0..da1d51da304 100644 --- a/go/store/prolly/tree/json_indexed_document_test.go +++ b/go/store/prolly/tree/json_indexed_document_test.go @@ -27,6 +27,7 @@ import ( "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/expression/function/json/jsontests" "github.com/dolthub/go-mysql-server/sql/types" + typetests "github.com/dolthub/go-mysql-server/sql/types/jsontests" "github.com/stretchr/testify/require" ) @@ -301,3 +302,162 @@ func TestIndexedJsonDocument_ContainsPath(t *testing.T) { testCases := jsontests.JsonContainsPathTestCases(t, convertToIndexedJsonDocument) jsontests.RunJsonTests(t, testCases) } + +func TestJsonCompare(t *testing.T) { + ctx := sql.NewEmptyContext() + ns := NewTestNodeStore() + convertToIndexedJsonDocument := func(t *testing.T, left, right interface{}) (interface{}, interface{}) { + if left != nil { + left = newIndexedJsonDocumentFromValue(t, ctx, ns, left) + } + if right != nil { + right = newIndexedJsonDocumentFromValue(t, ctx, ns, right) + } + return left, right + } + convertOnlyLeftToIndexedJsonDocument := func(t *testing.T, left, right interface{}) (interface{}, interface{}) { + if left != nil { + left = newIndexedJsonDocumentFromValue(t, ctx, ns, left) + } + if right != nil { + rightJSON, inRange, err := types.JSON.Convert(right) + require.NoError(t, err) + require.True(t, bool(inRange)) + rightInterface, err := rightJSON.(sql.JSONWrapper).ToInterface() + require.NoError(t, err) + right = types.JSONDocument{Val: rightInterface} + } + return left, right + } + convertOnlyRightToIndexedJsonDocument := func(t *testing.T, left, right interface{}) (interface{}, interface{}) { + right, left = convertOnlyLeftToIndexedJsonDocument(t, right, left) + return left, right + } + + t.Run("small documents", func(t *testing.T) { + tests := append(typetests.JsonCompareTests, typetests.JsonCompareNullsTests...) + t.Run("compare two indexed json documents", func(t *testing.T) { + typetests.RunJsonCompareTests(t, tests, convertToIndexedJsonDocument) + }) + t.Run("compare indexed json document with non-indexed", func(t *testing.T) { + typetests.RunJsonCompareTests(t, tests, convertOnlyLeftToIndexedJsonDocument) + }) + t.Run("compare non-indexed json document with indexed", func(t *testing.T) { + typetests.RunJsonCompareTests(t, tests, convertOnlyRightToIndexedJsonDocument) + }) + }) + + noError := func(j types.MutableJSON, changed bool, err error) types.MutableJSON { + require.NoError(t, err) + require.True(t, changed) + return j + } + + largeArray := createLargeDocumentForTesting(t, ctx, ns) + largeObjectWrapper, err := largeArray.Lookup(ctx, "$[7]") + largeObject := newIndexedJsonDocumentFromValue(t, ctx, ns, largeObjectWrapper) + require.NoError(t, err) + largeDocTests := []typetests.JsonCompareTest{ + { + Name: "large object < boolean", + Left: largeObject, + Right: true, + Cmp: -1, + }, + { + Name: "large object > string", + Left: largeObject, + Right: `"test"`, + Cmp: 1, + }, + { + Name: "large object > number", + Left: largeObject, + Right: 1, + Cmp: 1, + }, + { + Name: "large object > null", + Left: largeObject, + Right: `null`, + Cmp: 1, + }, + { + Name: "inserting into beginning of object makes it greater", + Left: largeObject, + Right: noError(largeObject.Insert(ctx, "$.a", types.MustJSON("1"))), + Cmp: -1, + }, + { + Name: "inserting into end of object makes it greater", + Left: largeObject, + Right: noError(largeObject.Insert(ctx, "$.z", types.MustJSON("1"))), + Cmp: -1, + }, + { + Name: "large array < boolean", + Left: largeArray, + Right: true, + Cmp: -1, + }, + { + Name: "large array > string", + Left: largeArray, + Right: `"test"`, + Cmp: 1, + }, + { + Name: "large array > number", + Left: largeArray, + Right: 1, + Cmp: 1, + }, + { + Name: "large array > null", + Left: largeArray, + Right: `null`, + Cmp: 1, + }, + { + Name: "large array > null", + Left: largeArray, + Right: `null`, + Cmp: 1, + }, + { + Name: "inserting into end of array makes it greater", + Left: largeArray, + Right: noError(largeArray.ArrayAppend("$", types.MustJSON("1"))), + Cmp: -1, + }, + { + Name: "inserting high value into beginning of array makes it greater", + Left: largeArray, + Right: noError(largeArray.ArrayInsert("$[0]", types.MustJSON("true"))), + Cmp: -1, + }, + { + Name: "inserting low value into beginning of array makes it less", + Left: largeArray, + Right: noError(largeArray.ArrayInsert("$[0]", types.MustJSON("1"))), + Cmp: 1, + }, + { + Name: "large array > large object", + Left: largeArray, + Right: largeObject, + Cmp: 1, + }, + } + t.Run("large documents", func(t *testing.T) { + t.Run("compare two indexed json documents", func(t *testing.T) { + typetests.RunJsonCompareTests(t, largeDocTests, convertToIndexedJsonDocument) + }) + t.Run("compare indexed json document with non-indexed", func(t *testing.T) { + typetests.RunJsonCompareTests(t, largeDocTests, convertOnlyLeftToIndexedJsonDocument) + }) + t.Run("compare non-indexed json document with indexed", func(t *testing.T) { + typetests.RunJsonCompareTests(t, largeDocTests, convertOnlyRightToIndexedJsonDocument) + }) + }) +} From e28d4246160b9d335c877baeb8ef97a599f92a74 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 16 Jul 2024 22:30:19 -0700 Subject: [PATCH 02/12] Refactor json_diff.go to use new format for JSON path keys. --- go/store/prolly/tree/json_diff.go | 37 ++++--- go/store/prolly/tree/json_diff_test.go | 133 +++++++++++++++++++++---- 2 files changed, 137 insertions(+), 33 deletions(-) diff --git a/go/store/prolly/tree/json_diff.go b/go/store/prolly/tree/json_diff.go index 61f716d3be8..f59bd6bcab3 100644 --- a/go/store/prolly/tree/json_diff.go +++ b/go/store/prolly/tree/json_diff.go @@ -16,17 +16,17 @@ package tree import ( "bytes" - "fmt" + "context" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/types" "io" "reflect" "strings" - - "github.com/dolthub/go-mysql-server/sql/types" ) type JsonDiff struct { - Key string - From, To *types.JSONDocument + Key []byte + From, To sql.JSONWrapper Type DiffType } @@ -37,31 +37,42 @@ type jsonKeyPair struct { // JsonDiffer computes the diff between two JSON objects. type JsonDiffer struct { - root string + root []byte currentFromPair, currentToPair *jsonKeyPair from, to types.JSONIter subDiffer *JsonDiffer } -func NewJsonDiffer(root string, from, to types.JsonObject) JsonDiffer { +func NewJsonDiffer(from, to types.JsonObject) JsonDiffer { + fromIter := types.NewJSONIter(from) + toIter := types.NewJSONIter(to) + return JsonDiffer{ + root: []byte{byte(startOfValue)}, + from: fromIter, + to: toIter, + } +} + +func (differ *JsonDiffer) newSubDiffer(key string, from, to types.JsonObject) JsonDiffer { fromIter := types.NewJSONIter(from) toIter := types.NewJSONIter(to) + newRoot := differ.appendKey(key) return JsonDiffer{ - root: root, + root: newRoot, from: fromIter, to: toIter, } } -func (differ *JsonDiffer) appendKey(key string) string { +func (differ *JsonDiffer) appendKey(key string) []byte { escapedKey := strings.Replace(key, "\"", "\\\"", -1) - return fmt.Sprintf("%s.\"%s\"", differ.root, escapedKey) + return append(append(differ.root, beginObjectKey), []byte(escapedKey)...) } -func (differ *JsonDiffer) Next() (diff JsonDiff, err error) { +func (differ *JsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error) { for { if differ.subDiffer != nil { - diff, err := differ.subDiffer.Next() + diff, err := differ.subDiffer.Next(ctx) if err == io.EOF { differ.subDiffer = nil differ.currentFromPair = nil @@ -116,7 +127,7 @@ func (differ *JsonDiffer) Next() (diff JsonDiff, err error) { switch from := fromValue.(type) { case types.JsonObject: // Recursively compare the objects to generate diffs. - subDiffer := NewJsonDiffer(differ.appendKey(key), from, toValue.(types.JsonObject)) + subDiffer := differ.newSubDiffer(key, from, toValue.(types.JsonObject)) differ.subDiffer = &subDiffer continue case types.JsonArray: diff --git a/go/store/prolly/tree/json_diff_test.go b/go/store/prolly/tree/json_diff_test.go index 4fa4528825a..b59f7a87e2a 100644 --- a/go/store/prolly/tree/json_diff_test.go +++ b/go/store/prolly/tree/json_diff_test.go @@ -15,6 +15,9 @@ package tree import ( + "bytes" + "context" + "fmt" "io" "testing" @@ -29,6 +32,15 @@ type jsonDiffTest struct { expectedDiffs []JsonDiff } +func makeJsonPathKey(parts ...string) []byte { + result := []byte{byte(startOfValue)} + for _, part := range parts { + result = append(result, beginObjectKey) + result = append(result, []byte(part)...) + } + return result +} + var simpleJsonDiffTests = []jsonDiffTest{ { name: "empty object, no modifications", @@ -42,7 +54,7 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"a": 1}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\"", + Key: makeJsonPathKey(`a`), From: nil, To: &types.JSONDocument{Val: 1}, Type: AddedDiff, @@ -55,7 +67,7 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\"", + Key: makeJsonPathKey(`a`), From: &types.JSONDocument{Val: 1}, To: nil, Type: RemovedDiff, @@ -68,7 +80,7 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"a": 2}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\"", + Key: makeJsonPathKey(`a`), From: &types.JSONDocument{Val: 1}, To: &types.JSONDocument{Val: 2}, Type: ModifiedDiff, @@ -81,7 +93,7 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"a": types.JsonObject{"b": 1}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), To: &types.JSONDocument{Val: 1}, Type: AddedDiff, }, @@ -93,7 +105,7 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"a": types.JsonObject{}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), From: &types.JSONDocument{Val: 1}, Type: RemovedDiff, }, @@ -105,7 +117,7 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"a": types.JsonObject{"b": 2}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), From: &types.JSONDocument{Val: 1}, To: &types.JSONDocument{Val: 2}, Type: ModifiedDiff, @@ -118,7 +130,7 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), To: &types.JSONDocument{Val: types.JsonObject{"c": 3}}, Type: AddedDiff, }, @@ -130,7 +142,7 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), From: &types.JSONDocument{Val: 2}, To: &types.JSONDocument{Val: types.JsonObject{"c": 3}}, Type: ModifiedDiff, @@ -143,20 +155,72 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"a": 1}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\"", + Key: makeJsonPathKey(`a`), From: &types.JSONDocument{Val: types.JsonObject{"b": 2}}, To: &types.JSONDocument{Val: 1}, Type: ModifiedDiff, }, }, }, + { + name: "modify to array", + from: types.JsonObject{"a": types.JsonObject{"b": "foo"}}, + to: types.JsonObject{"a": types.JsonObject{"b": types.JsonArray{1, 2}}}, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`a`, `b`), + From: &types.JSONDocument{Val: "foo"}, + To: &types.JSONDocument{Val: types.JsonArray{1, 2}}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "modify from array", + from: types.JsonObject{"a": types.JsonArray{1, 2}}, + to: types.JsonObject{"a": 1}, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`a`), + From: &types.JSONDocument{Val: types.JsonArray{1, 2}}, + To: &types.JSONDocument{Val: 1}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "array to object", + from: types.JsonObject{"a": types.JsonArray{1, 2}}, + to: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`a`), + From: &types.JSONDocument{Val: types.JsonArray{1, 2}}, + To: &types.JSONDocument{Val: types.JsonObject{"b": types.JsonObject{"c": 3}}}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "object to array", + from: types.JsonObject{"a": types.JsonObject{"b": 2}}, + to: types.JsonObject{"a": types.JsonArray{1, 2}}, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`a`), + From: &types.JSONDocument{Val: types.JsonObject{"b": 2}}, + To: &types.JSONDocument{Val: types.JsonArray{1, 2}}, + Type: ModifiedDiff, + }, + }, + }, { name: "remove object", from: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, to: types.JsonObject{"a": types.JsonObject{}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), From: &types.JSONDocument{Val: types.JsonObject{"c": 3}}, Type: RemovedDiff, }, @@ -168,13 +232,13 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"b": "\"2\""}, expectedDiffs: []JsonDiff{ { - Key: "$.\"\\\"a\\\"\"", + Key: makeJsonPathKey(`"a"`), From: &types.JSONDocument{Val: "1"}, To: nil, Type: RemovedDiff, }, { - Key: "$.\"b\"", + Key: makeJsonPathKey(`b`), From: nil, To: &types.JSONDocument{Val: "\"2\""}, Type: AddedDiff, @@ -187,28 +251,28 @@ var simpleJsonDiffTests = []jsonDiffTest{ to: types.JsonObject{"": 1, "a": types.JsonObject{}, "aa": 3, "bb": 5}, expectedDiffs: []JsonDiff{ { - Key: "$.\"\"", + Key: makeJsonPathKey(``), To: &types.JSONDocument{Val: 1}, Type: AddedDiff, }, { - Key: "$.\"a\".\"1\"", + Key: makeJsonPathKey(`a`, `1`), From: &types.JSONDocument{Val: "i"}, Type: RemovedDiff, }, { - Key: "$.\"aa\"", + Key: makeJsonPathKey(`aa`), From: &types.JSONDocument{Val: 2}, To: &types.JSONDocument{Val: 3}, Type: ModifiedDiff, }, { - Key: "$.\"b\"", + Key: makeJsonPathKey(`b`), From: &types.JSONDocument{Val: 6}, Type: RemovedDiff, }, { - Key: "$.\"bb\"", + Key: makeJsonPathKey(`bb`), To: &types.JSONDocument{Val: 5}, Type: AddedDiff, }, @@ -231,10 +295,15 @@ func runTestBatch(t *testing.T, tests []jsonDiffTest) { } func runTest(t *testing.T, test jsonDiffTest) { - differ := NewJsonDiffer("$", test.from, test.to) + ctx := context.Background() + ns := NewTestNodeStore() + from := newIndexedJsonDocumentFromValue(t, ctx, ns, test.from) + to := newIndexedJsonDocumentFromValue(t, ctx, ns, test.to) + differ, err := NewIndexedJsonDiffer(ctx, from, to) + require.NoError(t, err) var actualDiffs []JsonDiff for { - diff, err := differ.Next() + diff, err := differ.Next(ctx) if err == io.EOF { break } @@ -242,5 +311,29 @@ func runTest(t *testing.T, test jsonDiffTest) { actualDiffs = append(actualDiffs, diff) } - require.Equal(t, test.expectedDiffs, actualDiffs) + diffsEqual := func(expected, actual JsonDiff) bool { + if expected.Type != actual.Type { + return false + } + if !bytes.Equal(expected.Key, actual.Key) { + return false + } + cmp, err := types.CompareJSON(expected.From, actual.From) + require.NoError(t, err) + if cmp != 0 { + return false + } + cmp, err = types.CompareJSON(expected.To, actual.To) + require.NoError(t, err) + if cmp != 0 { + return false + } + return true + } + require.Equal(t, len(test.expectedDiffs), len(actualDiffs)) + for i, expected := range test.expectedDiffs { + actual := actualDiffs[i] + require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual)) + + } } From ce4bf84d167d52703dce7f5a997918393259afaf Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 17 Jul 2024 10:25:09 -0700 Subject: [PATCH 03/12] Add JSON diffing implementation for indexed JSON documents. --- .../doltcore/merge/merge_prolly_rows.go | 100 +++++-- .../doltcore/merge/schema_merge_test.go | 58 +++- .../doltcore/merge/three_way_json_differ.go | 73 ++--- go/store/prolly/tree/indexed_json_diff.go | 256 ++++++++++++++++++ go/store/prolly/tree/json_cursor.go | 23 +- go/store/prolly/tree/json_indexed_document.go | 51 ++-- go/store/prolly/tree/json_location.go | 17 ++ 7 files changed, 502 insertions(+), 76 deletions(-) create mode 100644 go/store/prolly/tree/indexed_json_diff.go diff --git a/go/libraries/doltcore/merge/merge_prolly_rows.go b/go/libraries/doltcore/merge/merge_prolly_rows.go index 374fef0e0ca..f86242ff2ac 100644 --- a/go/libraries/doltcore/merge/merge_prolly_rows.go +++ b/go/libraries/doltcore/merge/merge_prolly_rows.go @@ -26,7 +26,6 @@ import ( "github.com/dolthub/go-mysql-server/sql/expression" "github.com/dolthub/go-mysql-server/sql/transform" "github.com/dolthub/go-mysql-server/sql/types" - "golang.org/x/exp/maps" errorkinds "gopkg.in/src-d/go-errors.v1" "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" @@ -1978,20 +1977,20 @@ func (m *valueMerger) processColumn(ctx *sql.Context, i int, left, right, base v } func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAddr []byte, rightAddr []byte) (resultAddr []byte, conflict bool, err error) { - baseDoc, err := tree.NewJSONDoc(hash.New(baseAddr), m.ns).ToJSONDocument(ctx) + baseDoc, err := tree.NewJSONDoc(hash.New(baseAddr), m.ns).ToIndexedJSONDocument(ctx) if err != nil { return nil, true, err } - leftDoc, err := tree.NewJSONDoc(hash.New(leftAddr), m.ns).ToJSONDocument(ctx) + leftDoc, err := tree.NewJSONDoc(hash.New(leftAddr), m.ns).ToIndexedJSONDocument(ctx) if err != nil { return nil, true, err } - rightDoc, err := tree.NewJSONDoc(hash.New(rightAddr), m.ns).ToJSONDocument(ctx) + rightDoc, err := tree.NewJSONDoc(hash.New(rightAddr), m.ns).ToIndexedJSONDocument(ctx) if err != nil { return nil, true, err } - mergedDoc, conflict, err := mergeJSON(ctx, baseDoc, leftDoc, rightDoc) + mergedDoc, conflict, err := mergeJSON(ctx, m.ns, baseDoc, leftDoc, rightDoc) if err != nil { return nil, true, err } @@ -2015,19 +2014,32 @@ func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAd } -func mergeJSON(ctx context.Context, base types.JSONDocument, left types.JSONDocument, right types.JSONDocument) (resultDoc types.JSONDocument, conflict bool, err error) { +func mergeJSON(ctx context.Context, ns tree.NodeStore, base, left, right sql.JSONWrapper) (resultDoc sql.JSONWrapper, conflict bool, err error) { // First, deserialize each value into JSON. // We can only merge if the value at all three commits is a JSON object. - baseObject, baseIsObject := base.Val.(types.JsonObject) - leftObject, leftIsObject := left.Val.(types.JsonObject) - rightObject, rightIsObject := right.Val.(types.JsonObject) + baseTypeCategory, err := tree.GetTypeCategory(base) + if err != nil { + return nil, true, err + } + leftTypeCategory, err := tree.GetTypeCategory(left) + if err != nil { + return nil, true, err + } + rightTypeCategory, err := tree.GetTypeCategory(right) + if err != nil { + return nil, true, err + } + + baseIsObject := baseTypeCategory == tree.JsonTypeObject + leftIsObject := leftTypeCategory == tree.JsonTypeObject + rightIsObject := rightTypeCategory == tree.JsonTypeObject if !baseIsObject || !leftIsObject || !rightIsObject { // At least one of the commits does not have a JSON object. // If both left and right have the same value, use that value. // But if they differ, this is an unresolvable merge conflict. - cmp, err := left.Compare(right) + cmp, err := types.CompareJSON(left, right) if err != nil { return types.JSONDocument{}, true, err } @@ -2039,12 +2051,68 @@ func mergeJSON(ctx context.Context, base types.JSONDocument, left types.JSONDocu } } - mergedObject := maps.Clone(leftObject) - merged := types.JSONDocument{Val: mergedObject} + indexedBase, isBaseIndexed := base.(tree.IndexedJsonDocument) + indexedLeft, isLeftIndexed := left.(tree.IndexedJsonDocument) + indexedRight, isRightIndexed := right.(tree.IndexedJsonDocument) - threeWayDiffer := NewThreeWayJsonDiffer(baseObject, leftObject, rightObject) + // We only do three way merges on values read from tables right now, which are read in as tree.IndexedJsonDocument. + + var leftDiffer IJsonDiffer + if isBaseIndexed && isLeftIndexed { + leftDiffer, err = tree.NewIndexedJsonDiffer(ctx, indexedBase, indexedLeft) + if err != nil { + return nil, true, err + } + } else { + baseObject, err := base.ToInterface() + if err != nil { + return nil, true, err + } + leftObject, err := left.ToInterface() + if err != nil { + return nil, true, err + } + leftDifferValue := tree.NewJsonDiffer(baseObject.(types.JsonObject), leftObject.(types.JsonObject)) + leftDiffer = &leftDifferValue + } + + var rightDiffer IJsonDiffer + if isBaseIndexed && isRightIndexed { + rightDiffer, err = tree.NewIndexedJsonDiffer(ctx, indexedBase, indexedRight) + if err != nil { + return nil, true, err + } + } else { + baseObject, err := base.ToInterface() + if err != nil { + return nil, true, err + } + rightObject, err := right.ToInterface() + if err != nil { + return nil, true, err + } + rightDifferValue := tree.NewJsonDiffer(baseObject.(types.JsonObject), rightObject.(types.JsonObject)) + rightDiffer = &rightDifferValue + } + + threeWayDiffer := ThreeWayJsonDiffer{ + leftDiffer: leftDiffer, + rightDiffer: rightDiffer, + ns: ns, + } // Compute the merged object by applying diffs to the left object as needed. + // If the left object isn't an IndexedJsonDocument, we make one. + var ok bool + var merged tree.IndexedJsonDocument + if merged, ok = left.(tree.IndexedJsonDocument); !ok { + root, err := tree.SerializeJsonToAddr(ctx, ns, left) + if err != nil { + return types.JSONDocument{}, true, err + } + merged = tree.NewIndexedJsonDocument(ctx, root, ns) + } + for { threeWayDiff, err := threeWayDiffer.Next(ctx) if err == io.EOF { @@ -2052,13 +2120,13 @@ func mergeJSON(ctx context.Context, base types.JSONDocument, left types.JSONDocu } switch threeWayDiff.Op { - case tree.DiffOpRightAdd, tree.DiffOpConvergentAdd, tree.DiffOpRightModify, tree.DiffOpConvergentModify: - _, _, err := merged.Set(ctx, threeWayDiff.Key, threeWayDiff.Right) + case tree.DiffOpRightAdd, tree.DiffOpConvergentAdd, tree.DiffOpRightModify, tree.DiffOpConvergentModify, tree.DiffOpDivergentModifyResolved: + merged, _, err = merged.SetWithKey(ctx, threeWayDiff.Key, threeWayDiff.Right) if err != nil { return types.JSONDocument{}, true, err } case tree.DiffOpRightDelete, tree.DiffOpConvergentDelete: - _, _, err := merged.Remove(ctx, threeWayDiff.Key) + merged, _, err = merged.RemoveWithKey(ctx, threeWayDiff.Key) if err != nil { return types.JSONDocument{}, true, err } diff --git a/go/libraries/doltcore/merge/schema_merge_test.go b/go/libraries/doltcore/merge/schema_merge_test.go index 9f0acbd0680..bc79d977a22 100644 --- a/go/libraries/doltcore/merge/schema_merge_test.go +++ b/go/libraries/doltcore/merge/schema_merge_test.go @@ -1207,6 +1207,13 @@ var jsonMergeTests = []schemaMergeTest{ right: singleRow(1, 1, 2, `{ "key1": "value1", "key2": "value4" }`), merged: singleRow(1, 2, 2, `{ "key1": "value3", "key2": "value4" }`), }, + { + name: `parallel array modification`, + ancestor: singleRow(1, 1, 1, `{"a": [1, 2, 1], "b":0, "c":0}`), + left: singleRow(1, 2, 1, `{"a": [2, 1, 2], "b":1, "c":0}`), + right: singleRow(1, 1, 2, `{"a": [2, 1, 2], "b":0, "c":1}`), + merged: singleRow(1, 2, 2, `{"a": [2, 1, 2], "b":1, "c":1}`), + }, { name: `parallel deletion`, ancestor: singleRow(1, 1, 1, `{ "key1": "value1" }`), @@ -1337,7 +1344,7 @@ var jsonMergeTests = []schemaMergeTest{ // Which array element should go first? // We avoid making assumptions and flag this as a conflict. name: "object inside array conflict", - ancestor: singleRow(1, 1, 1, `{ "key1": [ { } ] }`), + ancestor: singleRow(1, 1, 1, `{ "key1": [ ] }`), left: singleRow(1, 2, 1, `{ "key1": [ { "key2": "value2" } ] }`), right: singleRow(1, 1, 2, `{ "key1": [ { "key3": "value3" } ] }`), dataConflict: true, @@ -1354,10 +1361,59 @@ var jsonMergeTests = []schemaMergeTest{ right: singleRow(1, 1, 2, `{ "key1": [ 1, 2 ] }`), dataConflict: true, }, + { + name: "false positive conflict", + ancestor: singleRow(1, 1, 1, `{ "a": 1, "aa":2 }`), + left: singleRow(1, 2, 1, `{ "aa":2 }`), + right: singleRow(1, 1, 2, `{ "a": 1, "aa": 3 }`), + merged: singleRow(1, 2, 2, `{ "aa": 3 }`), + }, }, }, } +// newIndexedJsonDocumentFromValue creates an IndexedJsonDocument from a provided value. +func newIndexedJsonDocumentFromValue(t *testing.T, ctx context.Context, ns tree.NodeStore, v interface{}) tree.IndexedJsonDocument { + doc, _, err := sqltypes.JSON.Convert(v) + require.NoError(t, err) + root, err := tree.SerializeJsonToAddr(ctx, ns, doc.(sql.JSONWrapper)) + require.NoError(t, err) + return tree.NewIndexedJsonDocument(ctx, root, ns) +} + +// createLargeDocumentForTesting creates a JSON document large enough to be split across multiple chunks. +// This is useful for testing mutation operations in large documents. +// Every different possible jsonPathType appears on a chunk boundary, for better test coverage: +// chunk 0 key: $[6].children[2].children[0].number(endOfValue) +// chunk 2 key: $[7].children[5].children[4].children[2].children(arrayInitialElement) +// chunk 5 key: $[8].children[6].children[4].children[3].children[0](startOfValue) +// chunk 8 key: $[8].children[7].children[6].children[5].children[3].children[2].children[1](objectInitialElement) +func createLargeDocumentForTesting(t *testing.T, ctx *sql.Context, ns tree.NodeStore) tree.IndexedJsonDocument { + leafDoc := make(map[string]interface{}) + leafDoc["number"] = float64(1.0) + leafDoc["string"] = "dolt" + docExpression, err := json.NewJSONArray(expression.NewLiteral(newIndexedJsonDocumentFromValue(t, ctx, ns, leafDoc), sqltypes.JSON)) + require.NoError(t, err) + + for level := 0; level < 8; level++ { + childObjectExpression, err := json.NewJSONObject(expression.NewLiteral("children", sqltypes.Text), docExpression) + require.NoError(t, err) + docExpression, err = json.NewJSONArrayAppend(docExpression, expression.NewLiteral("$", sqltypes.Text), childObjectExpression) + require.NoError(t, err) + } + doc, err := docExpression.Eval(ctx, nil) + require.NoError(t, err) + return newIndexedJsonDocumentFromValue(t, ctx, ns, doc) +} + +func jsonMergeLargeDocumentTests(t *testing.T) []schemaMergeTest { + // Test for each possible case in the three-way merge code. + // Test for multiple diffs in the same chunk, + // multiple diffs in adjacent chunks (with a moved boundary) + // and multiple diffs in non-adjacent chunks. + return nil +} + func testSchemaMerge(t *testing.T, tests []schemaMergeTest) { t.Run("merge left to right", func(t *testing.T) { testSchemaMergeHelper(t, tests, false) diff --git a/go/libraries/doltcore/merge/three_way_json_differ.go b/go/libraries/doltcore/merge/three_way_json_differ.go index 5bb89dff3e4..80c78d72280 100644 --- a/go/libraries/doltcore/merge/three_way_json_differ.go +++ b/go/libraries/doltcore/merge/three_way_json_differ.go @@ -17,25 +17,21 @@ package merge import ( "bytes" "context" - "io" - "strings" - - "github.com/dolthub/go-mysql-server/sql/types" - "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/types" + "io" ) +type IJsonDiffer interface { + Next(ctx context.Context) (tree.JsonDiff, error) +} + type ThreeWayJsonDiffer struct { - leftDiffer, rightDiffer tree.JsonDiffer + leftDiffer, rightDiffer IJsonDiffer leftCurrentDiff, rightCurrentDiff *tree.JsonDiff leftIsDone, rightIsDone bool -} - -func NewThreeWayJsonDiffer(base, left, right types.JsonObject) ThreeWayJsonDiffer { - return ThreeWayJsonDiffer{ - leftDiffer: tree.NewJsonDiffer("$", base, left), - rightDiffer: tree.NewJsonDiffer("$", base, right), - } + ns tree.NodeStore } type ThreeWayJsonDiff struct { @@ -43,13 +39,13 @@ type ThreeWayJsonDiff struct { Op tree.DiffOp // a partial set of document values are set // depending on the diffOp - Key string - Base, Left, Right, Merged *types.JSONDocument + Key []byte + Base, Left, Right, Merged sql.JSONWrapper } func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, error) { for { - err := differ.loadNextDiff() + err := differ.loadNextDiff(ctx) if err != nil { return ThreeWayJsonDiff{}, err } @@ -66,13 +62,22 @@ func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, e // !differ.rightIsDone && !differ.leftIsDone leftDiff := differ.leftCurrentDiff rightDiff := differ.rightCurrentDiff - cmp := bytes.Compare([]byte(leftDiff.Key), []byte(rightDiff.Key)) + leftKey := leftDiff.Key + rightKey := rightDiff.Key + + cmp := bytes.Compare(leftKey, rightKey) + // If both sides modify the same array to different values, we currently consider that to be a conflict. + // This may be relaxed in the future. + if cmp != 0 && tree.JsonKeysModifySameArray(leftKey, rightKey) { + result := ThreeWayJsonDiff{ + Op: tree.DiffOpDivergentModifyConflict, + } + return result, nil + } if cmp > 0 { - if strings.HasPrefix(leftDiff.Key, rightDiff.Key) { - // The left diff must be replacing or deleting an object, - // and the right diff makes changes to that object. - // Note the fact that all keys in these paths are quoted means we don't have to worry about - // one key being a prefix of the other and triggering a false positive here. + if tree.IsJsonKeyPrefix(leftKey, rightKey) { + // The right diff must be replacing or deleting an object, + // and the left diff makes changes to that object. result := ThreeWayJsonDiff{ Op: tree.DiffOpDivergentModifyConflict, } @@ -82,11 +87,9 @@ func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, e // key only changed on right return differ.processRightSideOnlyDiff(), nil } else if cmp < 0 { - if strings.HasPrefix(rightDiff.Key, leftDiff.Key) { + if tree.IsJsonKeyPrefix(rightKey, leftKey) { // The right diff must be replacing or deleting an object, // and the left diff makes changes to that object. - // Note the fact that all keys in these paths are quoted means we don't have to worry about - // one key being a prefix of the other and triggering a false positive here. result := ThreeWayJsonDiff{ Op: tree.DiffOpDivergentModifyConflict, } @@ -101,12 +104,12 @@ func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, e if differ.leftCurrentDiff.From == nil { // Key did not exist at base, so both sides are inserts. // Check that they're inserting the same value. - valueCmp, err := differ.leftCurrentDiff.To.Compare(differ.rightCurrentDiff.To) + valueCmp, err := types.CompareJSON(differ.leftCurrentDiff.To, differ.rightCurrentDiff.To) if err != nil { return ThreeWayJsonDiff{}, err } if valueCmp == 0 { - return differ.processMergedDiff(tree.DiffOpConvergentModify, differ.leftCurrentDiff.To), nil + return differ.processMergedDiff(tree.DiffOpConvergentAdd, differ.leftCurrentDiff.To), nil } else { return differ.processMergedDiff(tree.DiffOpDivergentModifyConflict, nil), nil } @@ -120,24 +123,24 @@ func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, e // If the key existed at base, we can do a recursive three-way merge to resolve // changes to the values. // This shouldn't be necessary: if its an object on all three branches, the original diff is recursive. - mergedValue, conflict, err := mergeJSON(ctx, *differ.leftCurrentDiff.From, - *differ.leftCurrentDiff.To, - *differ.rightCurrentDiff.To) + mergedValue, conflict, err := mergeJSON(ctx, differ.ns, differ.leftCurrentDiff.From, + differ.leftCurrentDiff.To, + differ.rightCurrentDiff.To) if err != nil { return ThreeWayJsonDiff{}, err } if conflict { return differ.processMergedDiff(tree.DiffOpDivergentModifyConflict, nil), nil } else { - return differ.processMergedDiff(tree.DiffOpDivergentModifyResolved, &mergedValue), nil + return differ.processMergedDiff(tree.DiffOpDivergentModifyResolved, mergedValue), nil } } } } -func (differ *ThreeWayJsonDiffer) loadNextDiff() error { +func (differ *ThreeWayJsonDiffer) loadNextDiff(ctx context.Context) error { if differ.leftCurrentDiff == nil && !differ.leftIsDone { - newLeftDiff, err := differ.leftDiffer.Next() + newLeftDiff, err := differ.leftDiffer.Next(ctx) if err == io.EOF { differ.leftIsDone = true } else if err != nil { @@ -147,7 +150,7 @@ func (differ *ThreeWayJsonDiffer) loadNextDiff() error { } } if differ.rightCurrentDiff == nil && !differ.rightIsDone { - newRightDiff, err := differ.rightDiffer.Next() + newRightDiff, err := differ.rightDiffer.Next(ctx) if err == io.EOF { differ.rightIsDone = true } else if err != nil { @@ -193,7 +196,7 @@ func (differ *ThreeWayJsonDiffer) processRightSideOnlyDiff() ThreeWayJsonDiff { } } -func (differ *ThreeWayJsonDiffer) processMergedDiff(op tree.DiffOp, merged *types.JSONDocument) ThreeWayJsonDiff { +func (differ *ThreeWayJsonDiffer) processMergedDiff(op tree.DiffOp, merged sql.JSONWrapper) ThreeWayJsonDiff { result := ThreeWayJsonDiff{ Op: op, Key: differ.leftCurrentDiff.Key, diff --git a/go/store/prolly/tree/indexed_json_diff.go b/go/store/prolly/tree/indexed_json_diff.go new file mode 100644 index 00000000000..3692fb07298 --- /dev/null +++ b/go/store/prolly/tree/indexed_json_diff.go @@ -0,0 +1,256 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tree + +import ( + "context" + "github.com/dolthub/go-mysql-server/sql/types" + "golang.org/x/exp/slices" +) + +type IndexedJsonDiffer struct { + differ Differ[jsonLocationKey, jsonLocationOrdering] + currentFromCursor, currentToCursor *JsonCursor + from, to IndexedJsonDocument +} + +func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*IndexedJsonDiffer, error) { + differ, err := DifferFromRoots[jsonLocationKey, jsonLocationOrdering](ctx, from.m.NodeStore, to.m.NodeStore, from.m.Root, to.m.Root, jsonLocationOrdering{}, false) + if err != nil { + return nil, err + } + return &IndexedJsonDiffer{ + differ: differ, + from: from, + to: to, + }, nil +} + +// Next computes the next diff between the two JSON documents. +func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error) { + for { + if jd.currentFromCursor == nil && jd.currentToCursor == nil { + // Either this is the first iteration, or the last iteration exhausted both chunks at the same time. + // (ie, both chunks ended at the same JSON path). We can use `Differ.Next` to seek to the next difference. + chunkDiff, err := jd.differ.Next(ctx) + if err != nil { + return JsonDiff{}, err + } + jd.currentFromCursor, err = newJsonCursorAtStartOfChunk(ctx, jd.from.m.NodeStore, jd.from.m.Root, []byte(chunkDiff.Key)) + if err != nil { + return JsonDiff{}, err + } + jd.currentToCursor, err = newJsonCursorAtStartOfChunk(ctx, jd.to.m.NodeStore, jd.to.m.Root, []byte(chunkDiff.Key)) + if err != nil { + return JsonDiff{}, err + } + if err != nil { + return JsonDiff{}, err + } + } else if jd.currentFromCursor == nil { + // We exhausted the current `from` chunk but not the `to` chunk. Since the chunk boundaries don't align on + // the same key, we need to continue into the next chunk. + err := jd.differ.from.advance(ctx) + if err != nil { + return JsonDiff{}, err + } + jd.currentFromCursor, err = newJsonCursorFromCursor(ctx, jd.differ.from) + if err != nil { + return JsonDiff{}, err + } + } else if jd.currentToCursor == nil { + // We exhausted the current `to` chunk but not the `from` chunk. Since the chunk boundaries don't align on + // the same key, we need to continue into the next chunk. + err := jd.differ.to.advance(ctx) + if err != nil { + return JsonDiff{}, err + } + jd.currentToCursor, err = newJsonCursorFromCursor(ctx, jd.differ.to) + if err != nil { + return JsonDiff{}, err + } + } + // Both cursors point to chunks that are different between the two documents. + // We must be in one of the following states: + // 1) Both cursors have the JSON path with the same values: + // - This location has not changed, advance both cursors and continue. + // 2) Both cursors have the same JSON path but different values: + // - The value at that path has been modified. + // 3) Both cursors point to the start of a value, but the paths differ: + // - A value has been inserted or deleted in the beginning/middle of an object. + // 4) One cursor points to the start of a value, while the other cursor points to the end of that value's parent: + // - A value has been inserted or deleted at the end of an object or array. + // 5) One cursor points to the initial element of an object/array, while the other points to the end of that same path: + // - A value has been changed from an object/array to a scalar, or vice-versa. + // 6) One cursor points to the initial element of an object, while the other points to the initial element of an array: + // - The value has been change from an object to an array, or vice-versa. + + fromScanner := jd.currentFromCursor.jsonScanner + toScanner := jd.currentToCursor.jsonScanner + fromScannerAtStartOfValue := fromScanner.atStartOfValue() + toScannerAtStartOfValue := toScanner.atStartOfValue() + fromCurrentLocation := fromScanner.currentPath + toCurrentLocation := toScanner.currentPath + + // helper function to advance a JsonCursor and set it to nil if it reaches the end of a chunk + advanceCursor := func(jCur **JsonCursor) (err error) { + if (*jCur).jsonScanner.atEndOfChunk() { + *jCur = nil + } else { + err = (*jCur).jsonScanner.AdvanceToNextLocation() + if err != nil { + return err + } + } + return nil + } + + if !fromScannerAtStartOfValue && !toScannerAtStartOfValue { + // Neither cursor points to the start of a value. + // This should only be possible if they're at the same location. + // Do a sanity check, then continue. + if compareJsonLocations(fromCurrentLocation, toCurrentLocation) != 0 { + return JsonDiff{}, jsonParseError + } + err = advanceCursor(&jd.currentFromCursor) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentToCursor) + if err != nil { + return JsonDiff{}, err + } + continue + } + + if fromScannerAtStartOfValue && toScannerAtStartOfValue { + cmp := compareJsonLocations(fromCurrentLocation, toCurrentLocation) + switch cmp { + case 0: + key := fromCurrentLocation.Clone().key + + // Both sides have the same key. If they're both an object or both an array, continue. + // Otherwise, compare them and possibly return a modification. + if (fromScanner.current() == '{' && toScanner.current() == '{') || + (fromScanner.current() == '[' && toScanner.current() == '[') { + err = advanceCursor(&jd.currentFromCursor) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentToCursor) + if err != nil { + return JsonDiff{}, err + } + continue + } + + fromValue, err := jd.currentFromCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + toValue, err := jd.currentToCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + if !slices.Equal(fromValue, toValue) { + // Case 2: The value at this path has been modified + return JsonDiff{ + Key: key, + From: types.NewLazyJSONDocument(fromValue), + To: types.NewLazyJSONDocument(toValue), + Type: ModifiedDiff, + }, nil + } + // Case 1: This location has not changed + continue + + case -1: + key := fromCurrentLocation.Clone().key + // Case 3: A value has been removed from an object + removedValue, err := jd.currentFromCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentFromCursor) + if err != nil { + return JsonDiff{}, err + } + return JsonDiff{ + Key: key, + From: types.NewLazyJSONDocument(removedValue), + Type: RemovedDiff, + }, nil + case 1: + key := toCurrentLocation.Clone().key + // Case 3: A value has been added to an object + addedValue, err := jd.currentToCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentToCursor) + if err != nil { + return JsonDiff{}, err + } + return JsonDiff{ + Key: key, + To: types.NewLazyJSONDocument(addedValue), + Type: AddedDiff, + }, nil + } + } + + if !fromScannerAtStartOfValue && toScannerAtStartOfValue { + if fromCurrentLocation.getScannerState() != endOfValue { + return JsonDiff{}, jsonParseError + } + key := toCurrentLocation.Clone().key + // Case 4: A value has been inserted at the end of an object or array. + addedValue, err := jd.currentToCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentToCursor) + if err != nil { + return JsonDiff{}, err + } + return JsonDiff{ + Key: key, + To: types.NewLazyJSONDocument(addedValue), + Type: AddedDiff, + }, nil + } + + if fromScannerAtStartOfValue && !toScannerAtStartOfValue { + if toCurrentLocation.getScannerState() != endOfValue { + return JsonDiff{}, jsonParseError + } + key := fromCurrentLocation.Clone().key + // Case 4: A value has been removed from the end of an object or array. + addedValue, err := jd.currentFromCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentFromCursor) + if err != nil { + return JsonDiff{}, err + } + return JsonDiff{ + Key: key, + From: types.NewLazyJSONDocument(addedValue), + Type: RemovedDiff, + }, nil + } + } +} diff --git a/go/store/prolly/tree/json_cursor.go b/go/store/prolly/tree/json_cursor.go index e7d6be61c28..844015d629d 100644 --- a/go/store/prolly/tree/json_cursor.go +++ b/go/store/prolly/tree/json_cursor.go @@ -53,24 +53,33 @@ func getPreviousKey(ctx context.Context, cur *cursor) ([]byte, error) { // in the document. If the location does not exist in the document, the resulting JsonCursor // will be at the location where the value would be if it was inserted. func newJsonCursor(ctx context.Context, ns NodeStore, root Node, startKey jsonLocation, forRemoval bool) (jCur *JsonCursor, found bool, err error) { - cur, err := newCursorAtKey(ctx, ns, root, startKey.key, jsonLocationOrdering{}) + jcur, err := newJsonCursorAtStartOfChunk(ctx, ns, root, startKey.key) if err != nil { return nil, false, err } + found, err = jcur.AdvanceToLocation(ctx, startKey, forRemoval) + return jcur, found, err +} + +func newJsonCursorAtStartOfChunk(ctx context.Context, ns NodeStore, root Node, startKey []byte) (jCur *JsonCursor, err error) { + cur, err := newCursorAtKey(ctx, ns, root, startKey, jsonLocationOrdering{}) + if err != nil { + return nil, err + } + return newJsonCursorFromCursor(ctx, cur) +} + +func newJsonCursorFromCursor(ctx context.Context, cur *cursor) (*JsonCursor, error) { previousKey, err := getPreviousKey(ctx, cur) if err != nil { - return nil, false, err + return nil, err } jsonBytes := cur.currentValue() jsonDecoder := ScanJsonFromMiddleWithKey(jsonBytes, previousKey) jcur := JsonCursor{cur: cur, jsonScanner: jsonDecoder} - found, err = jcur.AdvanceToLocation(ctx, startKey, forRemoval) - if err != nil { - return nil, found, err - } - return &jcur, found, nil + return &jcur, nil } func (j JsonCursor) Valid() bool { diff --git a/go/store/prolly/tree/json_indexed_document.go b/go/store/prolly/tree/json_indexed_document.go index 2196157842e..a12dbc81992 100644 --- a/go/store/prolly/tree/json_indexed_document.go +++ b/go/store/prolly/tree/json_indexed_document.go @@ -19,9 +19,12 @@ import ( "database/sql/driver" "encoding/json" "fmt" + "github.com/shopspring/decimal" + "io" "sync" "github.com/dolthub/go-mysql-server/sql" + sqljson "github.com/dolthub/go-mysql-server/sql/expression/function/json" "github.com/dolthub/go-mysql-server/sql/types" ) @@ -205,7 +208,7 @@ func (i IndexedJsonDocument) tryInsert(ctx context.Context, path string, val sql return i.insertIntoCursor(ctx, keyPath, jsonCursor, val) } -func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonLocation, jsonCursor *JsonCursor, val sql.JSONWrapper) (types.MutableJSON, bool, error) { +func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonLocation, jsonCursor *JsonCursor, val sql.JSONWrapper) (IndexedJsonDocument, bool, error) { cursorPath := jsonCursor.GetCurrentPath() // If the inserted path is equivalent to "$" (which also includes "$[0]" on non-arrays), do nothing. @@ -241,7 +244,7 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL jsonChunker, err := newJsonChunker(ctx, jsonCursor, i.m.NodeStore) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } originalValue, err := jsonCursor.NextValue(ctx) @@ -251,7 +254,7 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL insertedValueBytes, err := types.MarshallJson(val) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } jsonChunker.appendJsonToBuffer([]byte(fmt.Sprintf("[%s,%s]", originalValue, insertedValueBytes))) @@ -259,7 +262,7 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL newRoot, err := jsonChunker.Done(ctx) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil @@ -285,14 +288,14 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL insertedValueBytes, err := types.MarshallJson(val) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // The key is guaranteed to not exist in the source doc. The cursor is pointing to the start of the subsequent object, // which will be the insertion point for the added value. jsonChunker, err := newJsonChunker(ctx, jsonCursor, i.m.NodeStore) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // If required, adds a comma before writing the value. @@ -313,7 +316,7 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL newRoot, err := jsonChunker.Done(ctx) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil @@ -343,10 +346,17 @@ func (i IndexedJsonDocument) tryRemove(ctx context.Context, path string) (types. if err != nil { return nil, false, err } + return i.removeWithLocation(ctx, keyPath) +} +func (i IndexedJsonDocument) RemoveWithKey(ctx context.Context, key []byte) (IndexedJsonDocument, bool, error) { + return i.removeWithLocation(ctx, jsonPathFromKey(key)) +} + +func (i IndexedJsonDocument) removeWithLocation(ctx context.Context, keyPath jsonLocation) (IndexedJsonDocument, bool, error) { jsonCursor, found, err := newJsonCursor(ctx, i.m.NodeStore, i.m.Root, keyPath, true) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } if !found { // The key does not exist in the document. @@ -356,7 +366,7 @@ func (i IndexedJsonDocument) tryRemove(ctx context.Context, path string) (types. // The cursor is now pointing to the end of the value prior to the one being removed. jsonChunker, err := newJsonChunker(ctx, jsonCursor, i.m.NodeStore) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } startofRemovedLocation := jsonCursor.GetCurrentPath() @@ -367,7 +377,7 @@ func (i IndexedJsonDocument) tryRemove(ctx context.Context, path string) (types. keyPath.setScannerState(endOfValue) _, err = jsonCursor.AdvanceToLocation(ctx, keyPath, false) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // If removing the first element of an object/array, skip past the comma, and set the chunker as if it's @@ -379,7 +389,7 @@ func (i IndexedJsonDocument) tryRemove(ctx context.Context, path string) (types. newRoot, err := jsonChunker.Done(ctx) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil @@ -406,10 +416,17 @@ func (i IndexedJsonDocument) trySet(ctx context.Context, path string, val sql.JS if err != nil { return nil, false, err } + return i.setWithLocation(ctx, keyPath, val) +} +func (i IndexedJsonDocument) SetWithKey(ctx context.Context, key []byte, val sql.JSONWrapper) (IndexedJsonDocument, bool, error) { + return i.setWithLocation(ctx, jsonPathFromKey(key), val) +} + +func (i IndexedJsonDocument) setWithLocation(ctx context.Context, keyPath jsonLocation, val sql.JSONWrapper) (IndexedJsonDocument, bool, error) { jsonCursor, found, err := newJsonCursor(ctx, i.m.NodeStore, i.m.Root, keyPath, false) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // The supplied path may be 0-indexing into a scalar, which is the same as referencing the scalar. Remove @@ -480,24 +497,24 @@ func (i IndexedJsonDocument) tryReplace(ctx context.Context, path string, val sq return i.replaceIntoCursor(ctx, keyPath, jsonCursor, val) } -func (i IndexedJsonDocument) replaceIntoCursor(ctx context.Context, keyPath jsonLocation, jsonCursor *JsonCursor, val sql.JSONWrapper) (types.MutableJSON, bool, error) { +func (i IndexedJsonDocument) replaceIntoCursor(ctx context.Context, keyPath jsonLocation, jsonCursor *JsonCursor, val sql.JSONWrapper) (IndexedJsonDocument, bool, error) { // The cursor is now pointing to the start of the value being replaced. jsonChunker, err := newJsonChunker(ctx, jsonCursor, i.m.NodeStore) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // Advance the cursor to the end of the value being removed. keyPath.setScannerState(endOfValue) _, err = jsonCursor.AdvanceToLocation(ctx, keyPath, false) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } insertedValueBytes, err := types.MarshallJson(val) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } jsonChunker.appendJsonToBuffer(insertedValueBytes) @@ -505,7 +522,7 @@ func (i IndexedJsonDocument) replaceIntoCursor(ctx context.Context, keyPath json newRoot, err := jsonChunker.Done(ctx) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil diff --git a/go/store/prolly/tree/json_location.go b/go/store/prolly/tree/json_location.go index b6c8a413be7..3bd407d01d3 100644 --- a/go/store/prolly/tree/json_location.go +++ b/go/store/prolly/tree/json_location.go @@ -190,6 +190,23 @@ func unescapeKey(key []byte) []byte { return bytes.Replace(key, []byte(`\"`), []byte(`"`), -1) } +// IsJsonKeyPrefix computes whether one key encodes a json location that is a prefix of another. +// Example: $.a is a prefix of $.a.b, but not $.aa +func IsJsonKeyPrefix(path, prefix []byte) bool { + return bytes.HasPrefix(path, prefix) && (path[len(prefix)] == beginArrayKey || path[len(prefix)] == beginObjectKey) +} + +func JsonKeysModifySameArray(leftKey, rightKey []byte) bool { + i := 0 + for i < len(leftKey) && i < len(rightKey) && leftKey[i] == rightKey[i] { + if leftKey[i] == beginArrayKey { + return true + } + i++ + } + return false +} + func jsonPathElementsFromMySQLJsonPath(pathBytes []byte) (jsonLocation, error) { location := newRootLocation() state := lexStatePath From dbc4a2a69529eac6b70185036250db4e5f6888ed Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 25 Jul 2024 16:25:54 -0700 Subject: [PATCH 04/12] Ensure that merged documents are written as IndexedJsonDocument, not blobs. --- go/libraries/doltcore/merge/merge_prolly_rows.go | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/go/libraries/doltcore/merge/merge_prolly_rows.go b/go/libraries/doltcore/merge/merge_prolly_rows.go index f86242ff2ac..2944136b674 100644 --- a/go/libraries/doltcore/merge/merge_prolly_rows.go +++ b/go/libraries/doltcore/merge/merge_prolly_rows.go @@ -1998,20 +1998,12 @@ func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAd return nil, true, nil } - mergedVal, err := mergedDoc.ToInterface() - if err != nil { - return nil, true, err - } - mergedBytes, err := json.Marshal(mergedVal) - if err != nil { - return nil, true, err - } - mergedAddr, err := tree.SerializeBytesToAddr(ctx, m.ns, bytes.NewReader(mergedBytes), len(mergedBytes)) + root, err := tree.SerializeJsonToAddr(ctx, m.ns, mergedDoc) if err != nil { return nil, true, err } + mergedAddr := root.HashOf() return mergedAddr[:], false, nil - } func mergeJSON(ctx context.Context, ns tree.NodeStore, base, left, right sql.JSONWrapper) (resultDoc sql.JSONWrapper, conflict bool, err error) { From 0908f6b74d666d4b62f9b8315e384901a2dacfc4 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 25 Jul 2024 16:26:31 -0700 Subject: [PATCH 05/12] Handle three-way merges of IndexedJsonDocuments when one or both of the documents fit in a single chunk. --- .../doltcore/merge/merge_prolly_rows.go | 3 + .../doltcore/merge/schema_merge_test.go | 207 +++++++++++++++++- .../doltcore/merge/three_way_json_differ.go | 11 +- go/store/prolly/tree/diff.go | 47 ++-- go/store/prolly/tree/indexed_json_diff.go | 100 ++++++--- go/store/prolly/tree/json_cursor.go | 12 +- go/store/prolly/tree/json_diff_test.go | 197 +++++++++++++---- 7 files changed, 484 insertions(+), 93 deletions(-) diff --git a/go/libraries/doltcore/merge/merge_prolly_rows.go b/go/libraries/doltcore/merge/merge_prolly_rows.go index 2944136b674..1b4c1332ff9 100644 --- a/go/libraries/doltcore/merge/merge_prolly_rows.go +++ b/go/libraries/doltcore/merge/merge_prolly_rows.go @@ -2110,6 +2110,9 @@ func mergeJSON(ctx context.Context, ns tree.NodeStore, base, left, right sql.JSO if err == io.EOF { return merged, false, nil } + if err != nil { + return types.JSONDocument{}, true, err + } switch threeWayDiff.Op { case tree.DiffOpRightAdd, tree.DiffOpConvergentAdd, tree.DiffOpRightModify, tree.DiffOpConvergentModify, tree.DiffOpDivergentModifyResolved: diff --git a/go/libraries/doltcore/merge/schema_merge_test.go b/go/libraries/doltcore/merge/schema_merge_test.go index bc79d977a22..3a0823c5826 100644 --- a/go/libraries/doltcore/merge/schema_merge_test.go +++ b/go/libraries/doltcore/merge/schema_merge_test.go @@ -16,6 +16,11 @@ package merge_test import ( "context" + "fmt" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/go-mysql-server/sql/expression" + "github.com/dolthub/go-mysql-server/sql/expression/function/json" + sqltypes "github.com/dolthub/go-mysql-server/sql/types" "testing" "github.com/dolthub/go-mysql-server/sql" @@ -105,6 +110,9 @@ func TestSchemaMerge(t *testing.T) { t.Run("json merge tests", func(t *testing.T) { testSchemaMerge(t, jsonMergeTests) }) + t.Run("large json merge tests", func(t *testing.T) { + testSchemaMerge(t, jsonMergeLargeDocumentTests(t)) + }) } var columnAddDropTests = []schemaMergeTest{ @@ -1392,13 +1400,12 @@ func createLargeDocumentForTesting(t *testing.T, ctx *sql.Context, ns tree.NodeS leafDoc := make(map[string]interface{}) leafDoc["number"] = float64(1.0) leafDoc["string"] = "dolt" - docExpression, err := json.NewJSONArray(expression.NewLiteral(newIndexedJsonDocumentFromValue(t, ctx, ns, leafDoc), sqltypes.JSON)) - require.NoError(t, err) + var docExpression sql.Expression = expression.NewLiteral(newIndexedJsonDocumentFromValue(t, ctx, ns, leafDoc), sqltypes.JSON) + // require.NoError(t, err) + var err error for level := 0; level < 8; level++ { - childObjectExpression, err := json.NewJSONObject(expression.NewLiteral("children", sqltypes.Text), docExpression) - require.NoError(t, err) - docExpression, err = json.NewJSONArrayAppend(docExpression, expression.NewLiteral("$", sqltypes.Text), childObjectExpression) + docExpression, err = json.NewJSONInsert(docExpression, expression.NewLiteral(fmt.Sprintf("$.level%d", level), sqltypes.Text), docExpression) require.NoError(t, err) } doc, err := docExpression.Eval(ctx, nil) @@ -1411,7 +1418,195 @@ func jsonMergeLargeDocumentTests(t *testing.T) []schemaMergeTest { // Test for multiple diffs in the same chunk, // multiple diffs in adjacent chunks (with a moved boundary) // and multiple diffs in non-adjacent chunks. - return nil + + ctx := sql.NewEmptyContext() + ns := tree.NewTestNodeStore() + + largeObject := createLargeDocumentForTesting(t, ctx, ns) + + insert := func(document sqltypes.MutableJSON, path string, val interface{}) sqltypes.MutableJSON { + jsonVal, inRange, err := sqltypes.JSON.Convert(val) + require.NoError(t, err) + require.True(t, (bool)(inRange)) + newDoc, changed, err := document.Insert(ctx, path, jsonVal.(sql.JSONWrapper)) + require.NoError(t, err) + require.True(t, changed) + return newDoc + } + + set := func(document sqltypes.MutableJSON, path string, val interface{}) sqltypes.MutableJSON { + jsonVal, inRange, err := sqltypes.JSON.Convert(val) + require.NoError(t, err) + require.True(t, (bool)(inRange)) + newDoc, changed, err := document.Replace(ctx, path, jsonVal.(sql.JSONWrapper)) + require.NoError(t, err) + require.True(t, changed) + return newDoc + } + + delete := func(document sqltypes.MutableJSON, path string) sqltypes.MutableJSON { + newDoc, changed, err := document.Remove(ctx, path) + require.True(t, changed) + require.NoError(t, err) + return newDoc + } + + var largeJsonMergeTests = []schemaMergeTest{ + { + name: "json merge", + ancestor: *tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int, b int, j json)")), + left: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int, b int, j json)")), + right: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int, b int, j json)")), + merged: *tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int, b int, j json)")), + dataTests: []dataTest{ + { + name: "parallel insertion", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(largeObject, "$.a", 1)), + right: singleRow(1, 1, 2, insert(largeObject, "$.a", 1)), + merged: singleRow(1, 2, 2, insert(largeObject, "$.a", 1)), + }, + { + name: "convergent insertion", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(largeObject, "$.a", 1)), + right: singleRow(1, 1, 2, insert(largeObject, "$.z", 2)), + merged: singleRow(1, 2, 2, insert(insert(largeObject, "$.a", 1), "$.z", 2)), + }, + { + name: "multiple insertions", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(insert(largeObject, "$.a1", 1), "$.z2", 2)), + right: singleRow(1, 1, 2, insert(insert(largeObject, "$.a2", 3), "$.z1", 4)), + merged: singleRow(1, 2, 2, insert(insert(insert(insert(largeObject, "$.z1", 4), "$.z2", 2), "$.a2", 3), "$.a1", 1)), + }, + { + name: "convergent insertion with escaped quotes in keys", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(largeObject, `$."\"a\""`, 1)), + right: singleRow(1, 1, 2, insert(largeObject, `$."\"z\""`, 2)), + merged: singleRow(1, 2, 2, insert(insert(largeObject, `$."\"a\""`, 1), `$."\"z\""`, 2)), + }, + { + name: "parallel modification", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level7", 1)), + right: singleRow(1, 1, 2, set(largeObject, "$.level7", 1)), + merged: singleRow(1, 2, 2, set(largeObject, "$.level7", 1)), + }, + { + name: "convergent modification", + ancestor: singleRow(1, 1, 1, insert(largeObject, "$.a", 1)), + left: singleRow(1, 2, 1, set(insert(largeObject, "$.a", 1), "$.level7", 2)), + right: singleRow(1, 1, 2, set(insert(largeObject, "$.a", 1), "$.a", 3)), + merged: singleRow(1, 2, 2, set(insert(largeObject, "$.a", 3), "$.level7", 2)), + skip: true, //doesn't terminate? + }, + { + name: `parallel deletion`, + ancestor: singleRow(1, 1, 1, insert(largeObject, "$.a", 1)), + left: singleRow(1, 2, 1, largeObject), + right: singleRow(1, 1, 2, largeObject), + merged: singleRow(1, 2, 2, largeObject), + }, + { + name: `convergent deletion`, + ancestor: singleRow(1, 1, 1, insert(insert(largeObject, "$.a", 1), "$.z", 2)), + left: singleRow(1, 2, 1, insert(largeObject, "$.a", 1)), + right: singleRow(1, 1, 2, insert(largeObject, "$.z", 2)), + merged: singleRow(1, 2, 2, largeObject), + }, + { + name: `divergent insertion`, + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(largeObject, "$.z", 1)), + right: singleRow(1, 1, 2, insert(largeObject, "$.z", 2)), + dataConflict: true, + }, + { + name: `divergent modification`, + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level7", 1)), + right: singleRow(1, 1, 2, set(largeObject, "$.level7", 2)), + dataConflict: true, + }, + { + name: `divergent modification and deletion`, + ancestor: singleRow(1, 1, 1, insert(largeObject, "$.a", 1)), + left: singleRow(1, 2, 1, insert(largeObject, "$.a", 2)), + right: singleRow(1, 1, 2, largeObject), + dataConflict: true, + }, + { + name: `nested insertion`, + ancestor: singleRow(1, 1, 1, insert(largeObject, "$.level7.level4.new", map[string]interface{}{})), + left: singleRow(1, 2, 1, insert(largeObject, "$.level7.level4.new", map[string]interface{}{"a": 1})), + right: singleRow(1, 1, 2, insert(largeObject, "$.level7.level4.new", map[string]interface{}{"b": 2})), + merged: singleRow(1, 2, 2, insert(largeObject, "$.level7.level4.new", map[string]interface{}{"a": 1, "b": 2})), + }, + { + name: `nested insertion with escaped quotes in keys`, + ancestor: singleRow(1, 1, 1, insert(largeObject, `$.level7.level4."\"new\""`, map[string]interface{}{})), + left: singleRow(1, 2, 1, insert(largeObject, `$.level7.level4."\"new\""`, map[string]interface{}{"a": 1})), + right: singleRow(1, 1, 2, insert(largeObject, `$.level7.level4."\"new\""`, map[string]interface{}{"b": 2})), + merged: singleRow(1, 2, 2, insert(largeObject, `$.level7.level4."\"new\""`, map[string]interface{}{"a": 1, "b": 2})), + }, + { + name: "nested convergent modification", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level7.level4", 1)), + right: singleRow(1, 1, 2, set(largeObject, "$.level7.level5", 2)), + merged: singleRow(1, 2, 2, set(set(largeObject, "$.level7.level5", 2), "$.level7.level4", 1)), + // skip: true, // panics + }, + { + name: `nested deletion`, + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, delete(largeObject, "$.level7")), + right: singleRow(1, 1, 2, delete(largeObject, "$.level6")), + merged: singleRow(1, 2, 2, delete(delete(largeObject, "$.level6"), "$.level7")), + skip: true, // doesn't terminate + }, + { + name: "complicated nested merge", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, delete(set(insert(largeObject, "$.added", 7), "$.level5.level1", 5), "$.level4")), + right: singleRow(1, 1, 2, delete(set(insert(largeObject, "$.level6.added", 8), "$.level1", 6), "$.level5.level2")), + merged: singleRow(1, 2, 2, delete(set(insert(delete(set(insert(largeObject, "$.added", 7), "$.level5.level1", 5), "$.level4"), "$.level6.added", 8), "$.level1", 6), "$.level5.level2")), + }, + { + name: "changing types", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level3.number", `"dolt"`)), + right: singleRow(1, 1, 2, set(largeObject, "$.level4.string", 4)), + merged: singleRow(1, 2, 2, set(set(largeObject, "$.level3.number", `"dolt"`), "$.level4.string", 4)), + }, + { + name: "changing types conflict", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level4.string", []interface{}{})), + right: singleRow(1, 1, 2, set(largeObject, "$.level4.string", 4)), + dataConflict: true, + }, + { + name: "object insert and modify conflict", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 1, 1, insert(largeObject, "$.level5.a", 5)), + right: singleRow(1, 1, 2, set(largeObject, "$.level5", 6)), + dataConflict: true, + }, + { + name: "object insert and delete conflict", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 1, 1, insert(largeObject, "$.level5.a", 5)), + right: singleRow(1, 1, 2, delete(largeObject, "$.level5")), + dataConflict: true, + }, + }, + }, + } + + return largeJsonMergeTests } func testSchemaMerge(t *testing.T, tests []schemaMergeTest) { diff --git a/go/libraries/doltcore/merge/three_way_json_differ.go b/go/libraries/doltcore/merge/three_way_json_differ.go index 80c78d72280..9d5784d550f 100644 --- a/go/libraries/doltcore/merge/three_way_json_differ.go +++ b/go/libraries/doltcore/merge/three_way_json_differ.go @@ -39,8 +39,8 @@ type ThreeWayJsonDiff struct { Op tree.DiffOp // a partial set of document values are set // depending on the diffOp - Key []byte - Base, Left, Right, Merged sql.JSONWrapper + Key []byte + Left, Right, Merged sql.JSONWrapper } func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, error) { @@ -175,9 +175,8 @@ func (differ *ThreeWayJsonDiffer) processRightSideOnlyDiff() ThreeWayJsonDiff { case tree.RemovedDiff: result := ThreeWayJsonDiff{ - Op: tree.DiffOpRightDelete, - Key: differ.rightCurrentDiff.Key, - Base: differ.rightCurrentDiff.From, + Op: tree.DiffOpRightDelete, + Key: differ.rightCurrentDiff.Key, } differ.rightCurrentDiff = nil return result @@ -186,7 +185,6 @@ func (differ *ThreeWayJsonDiffer) processRightSideOnlyDiff() ThreeWayJsonDiff { result := ThreeWayJsonDiff{ Op: tree.DiffOpRightModify, Key: differ.rightCurrentDiff.Key, - Base: differ.rightCurrentDiff.From, Right: differ.rightCurrentDiff.To, } differ.rightCurrentDiff = nil @@ -200,7 +198,6 @@ func (differ *ThreeWayJsonDiffer) processMergedDiff(op tree.DiffOp, merged sql.J result := ThreeWayJsonDiff{ Op: op, Key: differ.leftCurrentDiff.Key, - Base: differ.leftCurrentDiff.From, Left: differ.leftCurrentDiff.To, Right: differ.rightCurrentDiff.To, Merged: merged, diff --git a/go/store/prolly/tree/diff.go b/go/store/prolly/tree/diff.go index 2fc0a9956d2..9fcd841e701 100644 --- a/go/store/prolly/tree/diff.go +++ b/go/store/prolly/tree/diff.go @@ -128,6 +128,13 @@ func DifferFromCursors[K ~[]byte, O Ordering[K]]( } func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) { + return td.next(ctx, true) +} + +// next finds the next diff and then conditionally advances the cursors past the modified chunks. +// In most cases, we want to advance the cursors, but in some circumstances the caller may want to access the cursors +// and then advance them manually. +func (td Differ[K, O]) next(ctx context.Context, advanceCursors bool) (diff Diff, err error) { for td.from.Valid() && td.from.compare(td.fromStop) < 0 && td.to.Valid() && td.to.compare(td.toStop) < 0 { f := td.from.CurrentKey() @@ -136,16 +143,16 @@ func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) { switch { case cmp < 0: - return sendRemoved(ctx, td.from) + return sendRemoved(ctx, td.from, advanceCursors) case cmp > 0: - return sendAdded(ctx, td.to) + return sendAdded(ctx, td.to, advanceCursors) case cmp == 0: // If the cursor schema has changed, then all rows should be considered modified. // If the cursor schema hasn't changed, rows are modified iff their bytes have changed. if td.considerAllRowsModified || !equalcursorValues(td.from, td.to) { - return sendModified(ctx, td.from, td.to) + return sendModified(ctx, td.from, td.to, advanceCursors) } // advance both cursors since we have already determined that they are equal. This needs to be done because @@ -166,42 +173,46 @@ func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) { } if td.from.Valid() && td.from.compare(td.fromStop) < 0 { - return sendRemoved(ctx, td.from) + return sendRemoved(ctx, td.from, advanceCursors) } if td.to.Valid() && td.to.compare(td.toStop) < 0 { - return sendAdded(ctx, td.to) + return sendAdded(ctx, td.to, advanceCursors) } return Diff{}, io.EOF } -func sendRemoved(ctx context.Context, from *cursor) (diff Diff, err error) { +func sendRemoved(ctx context.Context, from *cursor, advanceCursors bool) (diff Diff, err error) { diff = Diff{ Type: RemovedDiff, Key: from.CurrentKey(), From: from.currentValue(), } - if err = from.advance(ctx); err != nil { - return Diff{}, err + if advanceCursors { + if err = from.advance(ctx); err != nil { + return Diff{}, err + } } return } -func sendAdded(ctx context.Context, to *cursor) (diff Diff, err error) { +func sendAdded(ctx context.Context, to *cursor, advanceCursors bool) (diff Diff, err error) { diff = Diff{ Type: AddedDiff, Key: to.CurrentKey(), To: to.currentValue(), } - if err = to.advance(ctx); err != nil { - return Diff{}, err + if advanceCursors { + if err = to.advance(ctx); err != nil { + return Diff{}, err + } } return } -func sendModified(ctx context.Context, from, to *cursor) (diff Diff, err error) { +func sendModified(ctx context.Context, from, to *cursor, advanceCursors bool) (diff Diff, err error) { diff = Diff{ Type: ModifiedDiff, Key: from.CurrentKey(), @@ -209,11 +220,13 @@ func sendModified(ctx context.Context, from, to *cursor) (diff Diff, err error) To: to.currentValue(), } - if err = from.advance(ctx); err != nil { - return Diff{}, err - } - if err = to.advance(ctx); err != nil { - return Diff{}, err + if advanceCursors { + if err = from.advance(ctx); err != nil { + return Diff{}, err + } + if err = to.advance(ctx); err != nil { + return Diff{}, err + } } return } diff --git a/go/store/prolly/tree/indexed_json_diff.go b/go/store/prolly/tree/indexed_json_diff.go index 3692fb07298..22b2634b486 100644 --- a/go/store/prolly/tree/indexed_json_diff.go +++ b/go/store/prolly/tree/indexed_json_diff.go @@ -18,12 +18,14 @@ import ( "context" "github.com/dolthub/go-mysql-server/sql/types" "golang.org/x/exp/slices" + "io" ) type IndexedJsonDiffer struct { differ Differ[jsonLocationKey, jsonLocationOrdering] currentFromCursor, currentToCursor *JsonCursor from, to IndexedJsonDocument + started bool } func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*IndexedJsonDiffer, error) { @@ -31,6 +33,35 @@ func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*I if err != nil { return nil, err } + // We want to diff the prolly tree as if it was an address map pointing to the individual blob fragments, rather + // than diffing the blob fragments themselves. We can accomplish this by just replacing the cursors in the differ + // with their parents. + differ.from = differ.from.parent + differ.to = differ.to.parent + differ.fromStop = differ.fromStop.parent + differ.toStop = differ.toStop.parent + + if differ.from == nil || differ.to == nil { + // This can happen when either document fits in a single chunk. + // We don't use the chunk differ in this case, and instead we create the cursors without it. + diffKey := []byte{byte(startOfValue)} + currentFromCursor, err := newJsonCursorAtStartOfChunk(ctx, from.m.NodeStore, from.m.Root, diffKey) + if err != nil { + return nil, err + } + currentToCursor, err := newJsonCursorAtStartOfChunk(ctx, to.m.NodeStore, to.m.Root, diffKey) + if err != nil { + return nil, err + } + return &IndexedJsonDiffer{ + differ: differ, + from: from, + to: to, + currentFromCursor: currentFromCursor, + currentToCursor: currentToCursor, + }, nil + } + return &IndexedJsonDiffer{ differ: differ, from: from, @@ -39,48 +70,78 @@ func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*I } // Next computes the next diff between the two JSON documents. +// To accomplish this, it uses the underlying Differ to find chunks that have changed, and uses a pair of JsonCursors +// to walk corresponding chunks. func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error) { + // helper function to advance a JsonCursor and set it to nil if it reaches the end of a chunk + advanceCursor := func(jCur **JsonCursor) (err error) { + if (*jCur).jsonScanner.atEndOfChunk() { + err = (*jCur).cur.advance(ctx) + if err != nil { + return err + } + *jCur = nil + } else { + err = (*jCur).jsonScanner.AdvanceToNextLocation() + if err != nil { + return err + } + } + return nil + } + for { if jd.currentFromCursor == nil && jd.currentToCursor == nil { + if jd.differ.from == nil || jd.differ.to == nil { + // One of the documents fits in a single chunk. We must have walked the entire document by now. + return JsonDiff{}, io.EOF + } + // Either this is the first iteration, or the last iteration exhausted both chunks at the same time. // (ie, both chunks ended at the same JSON path). We can use `Differ.Next` to seek to the next difference. - chunkDiff, err := jd.differ.Next(ctx) + // Passing advanceCursors=false means that instead of using the returned diff, we read the cursors out of + // the differ, and advance them manually once we've walked the chunk. + _, err := jd.differ.next(ctx, false) if err != nil { return JsonDiff{}, err } - jd.currentFromCursor, err = newJsonCursorAtStartOfChunk(ctx, jd.from.m.NodeStore, jd.from.m.Root, []byte(chunkDiff.Key)) - if err != nil { - return JsonDiff{}, err - } - jd.currentToCursor, err = newJsonCursorAtStartOfChunk(ctx, jd.to.m.NodeStore, jd.to.m.Root, []byte(chunkDiff.Key)) + + jd.currentFromCursor, err = newJsonCursorFromCursor(ctx, jd.differ.from) if err != nil { return JsonDiff{}, err } + jd.currentToCursor, err = newJsonCursorFromCursor(ctx, jd.differ.to) if err != nil { return JsonDiff{}, err } } else if jd.currentFromCursor == nil { // We exhausted the current `from` chunk but not the `to` chunk. Since the chunk boundaries don't align on // the same key, we need to continue into the next chunk. - err := jd.differ.from.advance(ctx) + + jd.currentFromCursor, err = newJsonCursorFromCursor(ctx, jd.differ.from) if err != nil { return JsonDiff{}, err } - jd.currentFromCursor, err = newJsonCursorFromCursor(ctx, jd.differ.from) + + err = advanceCursor(&jd.currentFromCursor) if err != nil { return JsonDiff{}, err } + continue } else if jd.currentToCursor == nil { // We exhausted the current `to` chunk but not the `from` chunk. Since the chunk boundaries don't align on // the same key, we need to continue into the next chunk. - err := jd.differ.to.advance(ctx) + + jd.currentToCursor, err = newJsonCursorFromCursor(ctx, jd.differ.to) if err != nil { return JsonDiff{}, err } - jd.currentToCursor, err = newJsonCursorFromCursor(ctx, jd.differ.to) + + err = advanceCursor(&jd.currentToCursor) if err != nil { return JsonDiff{}, err } + continue } // Both cursors point to chunks that are different between the two documents. // We must be in one of the following states: @@ -92,31 +153,20 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error // - A value has been inserted or deleted in the beginning/middle of an object. // 4) One cursor points to the start of a value, while the other cursor points to the end of that value's parent: // - A value has been inserted or deleted at the end of an object or array. + // + // The following states aren't actually possible because we will encounter state 2 first. // 5) One cursor points to the initial element of an object/array, while the other points to the end of that same path: // - A value has been changed from an object/array to a scalar, or vice-versa. // 6) One cursor points to the initial element of an object, while the other points to the initial element of an array: // - The value has been change from an object to an array, or vice-versa. - fromScanner := jd.currentFromCursor.jsonScanner - toScanner := jd.currentToCursor.jsonScanner + fromScanner := &jd.currentFromCursor.jsonScanner + toScanner := &jd.currentToCursor.jsonScanner fromScannerAtStartOfValue := fromScanner.atStartOfValue() toScannerAtStartOfValue := toScanner.atStartOfValue() fromCurrentLocation := fromScanner.currentPath toCurrentLocation := toScanner.currentPath - // helper function to advance a JsonCursor and set it to nil if it reaches the end of a chunk - advanceCursor := func(jCur **JsonCursor) (err error) { - if (*jCur).jsonScanner.atEndOfChunk() { - *jCur = nil - } else { - err = (*jCur).jsonScanner.AdvanceToNextLocation() - if err != nil { - return err - } - } - return nil - } - if !fromScannerAtStartOfValue && !toScannerAtStartOfValue { // Neither cursor points to the start of a value. // This should only be possible if they're at the same location. diff --git a/go/store/prolly/tree/json_cursor.go b/go/store/prolly/tree/json_cursor.go index 844015d629d..b980fcd2ef6 100644 --- a/go/store/prolly/tree/json_cursor.go +++ b/go/store/prolly/tree/json_cursor.go @@ -40,7 +40,10 @@ func getPreviousKey(ctx context.Context, cur *cursor) ([]byte, error) { if !cur2.Valid() { return nil, nil } - key := cur2.parent.CurrentKey() + key := cur2.CurrentKey() + if len(key) == 0 { + key = cur2.parent.CurrentKey() + } err = errorIfNotSupportedLocation(key) if err != nil { return nil, err @@ -74,6 +77,13 @@ func newJsonCursorFromCursor(ctx context.Context, cur *cursor) (*JsonCursor, err if err != nil { return nil, err } + if !cur.isLeaf() { + nd, err := fetchChild(ctx, cur.nrw, cur.currentRef()) + if err != nil { + return nil, err + } + return newJsonCursorFromCursor(ctx, &cursor{nd: nd, parent: cur, nrw: cur.nrw}) + } jsonBytes := cur.currentValue() jsonDecoder := ScanJsonFromMiddleWithKey(jsonBytes, previousKey) diff --git a/go/store/prolly/tree/json_diff_test.go b/go/store/prolly/tree/json_diff_test.go index b59f7a87e2a..0dc8d05ebab 100644 --- a/go/store/prolly/tree/json_diff_test.go +++ b/go/store/prolly/tree/json_diff_test.go @@ -18,17 +18,19 @@ import ( "bytes" "context" "fmt" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/expression" + "github.com/dolthub/go-mysql-server/sql/expression/function/json" "io" "testing" "github.com/dolthub/go-mysql-server/sql/types" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) type jsonDiffTest struct { name string - from, to types.JsonObject + from, to sql.JSONWrapper expectedDiffs []JsonDiff } @@ -44,14 +46,14 @@ func makeJsonPathKey(parts ...string) []byte { var simpleJsonDiffTests = []jsonDiffTest{ { name: "empty object, no modifications", - from: types.JsonObject{}, - to: types.JsonObject{}, + from: types.JSONDocument{types.JsonObject{}}, + to: types.JSONDocument{types.JsonObject{}}, expectedDiffs: nil, }, { name: "insert into empty object", - from: types.JsonObject{}, - to: types.JsonObject{"a": 1}, + from: types.JSONDocument{types.JsonObject{}}, + to: types.JSONDocument{types.JsonObject{"a": 1}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -63,8 +65,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "delete from object", - from: types.JsonObject{"a": 1}, - to: types.JsonObject{}, + from: types.JSONDocument{types.JsonObject{"a": 1}}, + to: types.JSONDocument{types.JsonObject{}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -76,8 +78,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify object", - from: types.JsonObject{"a": 1}, - to: types.JsonObject{"a": 2}, + from: types.JSONDocument{types.JsonObject{"a": 1}}, + to: types.JSONDocument{types.JsonObject{"a": 2}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -89,8 +91,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "nested insert", - from: types.JsonObject{"a": types.JsonObject{}}, - to: types.JsonObject{"a": types.JsonObject{"b": 1}}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{}}}, + to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 1}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -101,8 +103,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "nested delete", - from: types.JsonObject{"a": types.JsonObject{"b": 1}}, - to: types.JsonObject{"a": types.JsonObject{}}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 1}}}, + to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -113,8 +115,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "nested modify", - from: types.JsonObject{"a": types.JsonObject{"b": 1}}, - to: types.JsonObject{"a": types.JsonObject{"b": 2}}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 1}}}, + to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 2}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -126,8 +128,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "insert object", - from: types.JsonObject{"a": types.JsonObject{}}, - to: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{}}}, + to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -138,8 +140,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify to object", - from: types.JsonObject{"a": types.JsonObject{"b": 2}}, - to: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 2}}}, + to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -151,8 +153,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify from object", - from: types.JsonObject{"a": types.JsonObject{"b": 2}}, - to: types.JsonObject{"a": 1}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 2}}}, + to: types.JSONDocument{types.JsonObject{"a": 1}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -164,8 +166,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify to array", - from: types.JsonObject{"a": types.JsonObject{"b": "foo"}}, - to: types.JsonObject{"a": types.JsonObject{"b": types.JsonArray{1, 2}}}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": "foo"}}}, + to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonArray{1, 2}}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -177,8 +179,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify from array", - from: types.JsonObject{"a": types.JsonArray{1, 2}}, - to: types.JsonObject{"a": 1}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonArray{1, 2}}}, + to: types.JSONDocument{types.JsonObject{"a": 1}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -190,8 +192,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "array to object", - from: types.JsonObject{"a": types.JsonArray{1, 2}}, - to: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonArray{1, 2}}}, + to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -203,8 +205,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "object to array", - from: types.JsonObject{"a": types.JsonObject{"b": 2}}, - to: types.JsonObject{"a": types.JsonArray{1, 2}}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 2}}}, + to: types.JSONDocument{types.JsonObject{"a": types.JsonArray{1, 2}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -216,8 +218,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "remove object", - from: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, - to: types.JsonObject{"a": types.JsonObject{}}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, + to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -228,8 +230,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "insert escaped double quotes", - from: types.JsonObject{"\"a\"": "1"}, - to: types.JsonObject{"b": "\"2\""}, + from: types.JSONDocument{types.JsonObject{"\"a\"": "1"}}, + to: types.JSONDocument{types.JsonObject{"b": "\"2\""}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`"a"`), @@ -247,8 +249,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modifications returned in lexographic order", - from: types.JsonObject{"a": types.JsonObject{"1": "i"}, "aa": 2, "b": 6}, - to: types.JsonObject{"": 1, "a": types.JsonObject{}, "aa": 3, "bb": 5}, + from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"1": "i"}, "aa": 2, "b": 6}}, + to: types.JSONDocument{types.JsonObject{"": 1, "a": types.JsonObject{}, "aa": 3, "bb": 5}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(``), @@ -280,10 +282,131 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, } +func makeLargeJsonDiffTests(t *testing.T) []jsonDiffTest { + ctx := sql.NewEmptyContext() + ns := NewTestNodeStore() + + _ = func(document types.MutableJSON, path string, val interface{}) types.MutableJSON { + jsonVal, inRange, err := types.JSON.Convert(val) + require.NoError(t, err) + require.True(t, (bool)(inRange)) + newDoc, changed, err := document.Insert(ctx, path, jsonVal.(sql.JSONWrapper)) + require.NoError(t, err) + require.True(t, changed) + return newDoc + } + + set := func(document types.MutableJSON, path string, val interface{}) types.MutableJSON { + jsonVal, inRange, err := types.JSON.Convert(val) + require.NoError(t, err) + require.True(t, (bool)(inRange)) + newDoc, changed, err := document.Replace(ctx, path, jsonVal.(sql.JSONWrapper)) + require.NoError(t, err) + require.True(t, changed) + return newDoc + } + + lookup := func(document types.SearchableJSON, path string) sql.JSONWrapper { + newDoc, err := document.Lookup(ctx, path) + require.NoError(t, err) + return newDoc + } + + _ = func(document types.MutableJSON, path string) types.MutableJSON { + newDoc, changed, err := document.Remove(ctx, path) + require.True(t, changed) + require.NoError(t, err) + return newDoc + } + + largeObject := createLargeDocumentForTesting2(t, ctx, ns) + return []jsonDiffTest{ + { + name: "nested insert 1", + from: largeObject, + to: set(largeObject, "$.level7.level5", 2), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level5`), + From: lookup(largeObject, "$.level7.level5"), + To: &types.JSONDocument{Val: 2}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "nested insert 2", + from: largeObject, + to: set(largeObject, "$.level7.level4", 1), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level4`), + From: lookup(largeObject, "$.level7.level4"), + To: &types.JSONDocument{Val: 1}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "convert object to array", + from: largeObject, + to: set(largeObject, "$.level7.level6", []interface{}{}), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level6`), + From: lookup(largeObject, "$.level7.level6"), + To: &types.JSONDocument{Val: []interface{}{}}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "convert array to object", + from: set(largeObject, "$.level7.level6", []interface{}{}), + to: largeObject, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level6`), + From: &types.JSONDocument{Val: []interface{}{}}, + To: lookup(largeObject, "$.level7.level6"), + Type: ModifiedDiff, + }, + }, + }, + } +} + +// createLargeDocumentForTesting creates a JSON document large enough to be split across multiple chunks. +// This is useful for testing mutation operations in large documents. +// Every different possible jsonPathType appears on a chunk boundary, for better test coverage: +// chunk 0 key: $[6].children[2].children[0].number(endOfValue) +// chunk 2 key: $[7].children[5].children[4].children[2].children(arrayInitialElement) +// chunk 5 key: $[8].children[6].children[4].children[3].children[0](startOfValue) +// chunk 8 key: $[8].children[7].children[6].children[5].children[3].children[2].children[1](objectInitialElement) +func createLargeDocumentForTesting2(t *testing.T, ctx *sql.Context, ns NodeStore) IndexedJsonDocument { + leafDoc := make(map[string]interface{}) + leafDoc["number"] = float64(1.0) + leafDoc["string"] = "dolt" + var docExpression sql.Expression = expression.NewLiteral(newIndexedJsonDocumentFromValue(t, ctx, ns, leafDoc), types.JSON) + // require.NoError(t, err) + var err error + + for level := 0; level < 8; level++ { + docExpression, err = json.NewJSONInsert(docExpression, expression.NewLiteral(fmt.Sprintf("$.level%d", level), types.Text), docExpression) + require.NoError(t, err) + } + doc, err := docExpression.Eval(ctx, nil) + require.NoError(t, err) + return newIndexedJsonDocumentFromValue(t, ctx, ns, doc) +} + func TestJsonDiff(t *testing.T) { t.Run("simple tests", func(t *testing.T) { runTestBatch(t, simpleJsonDiffTests) }) + t.Run("large document tests", func(t *testing.T) { + runTestBatch(t, makeLargeJsonDiffTests(t)) + }) } func runTestBatch(t *testing.T, tests []jsonDiffTest) { @@ -307,7 +430,7 @@ func runTest(t *testing.T, test jsonDiffTest) { if err == io.EOF { break } - assert.NoError(t, err) + require.NoError(t, err) actualDiffs = append(actualDiffs, diff) } From 79a64bdcd0cd55765dd665a87b84a2ac19a3a6c6 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 11 Jul 2024 20:10:39 -0700 Subject: [PATCH 06/12] Miscellanious bugs around key escaping and comparisons. --- go/store/prolly/tree/json_indexed_document.go | 7 +++++-- go/store/prolly/tree/json_location.go | 14 ++++++++++++++ go/store/prolly/tree/json_scanner.go | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/go/store/prolly/tree/json_indexed_document.go b/go/store/prolly/tree/json_indexed_document.go index a12dbc81992..054faded680 100644 --- a/go/store/prolly/tree/json_indexed_document.go +++ b/go/store/prolly/tree/json_indexed_document.go @@ -305,14 +305,17 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL // If the value is a newly inserted key, write the key. if !keyLastPathElement.isArrayIndex { - jsonChunker.appendJsonToBuffer([]byte(fmt.Sprintf(`"%s":`, keyLastPathElement.key))) + jsonChunker.appendJsonToBuffer([]byte(fmt.Sprintf(`"%s":`, escapeKey(keyLastPathElement.key)))) } // Manually set the chunker's path and offset to the start of the value we're about to insert. jsonChunker.jScanner.valueOffset = len(jsonChunker.jScanner.jsonBuffer) jsonChunker.jScanner.currentPath = keyPath jsonChunker.appendJsonToBuffer(insertedValueBytes) - jsonChunker.processBuffer(ctx) + err = jsonChunker.processBuffer(ctx) + if err != nil { + return nil, false, err + } newRoot, err := jsonChunker.Done(ctx) if err != nil { diff --git a/go/store/prolly/tree/json_location.go b/go/store/prolly/tree/json_location.go index 3bd407d01d3..c8986b491a4 100644 --- a/go/store/prolly/tree/json_location.go +++ b/go/store/prolly/tree/json_location.go @@ -186,6 +186,10 @@ const ( lexStateEscapedQuotedKey lexState = 5 ) +func escapeKey(key []byte) []byte { + return bytes.Replace(key, []byte(`"`), []byte(`\"`), -1) +} + func unescapeKey(key []byte) []byte { return bytes.Replace(key, []byte(`\"`), []byte(`"`), -1) } @@ -434,6 +438,16 @@ type jsonLocationOrdering struct{} var _ Ordering[[]byte] = jsonLocationOrdering{} func (jsonLocationOrdering) Compare(left, right []byte) int { + // A JSON document that fits entirely in a single chunk has no keys, + if len(left) == 0 { + if len(right) == 0 { + return 0 + } + return -1 + } + if len(right) == 0 { + return 1 + } leftPath := jsonPathFromKey(left) rightPath := jsonPathFromKey(right) return compareJsonLocations(leftPath, rightPath) diff --git a/go/store/prolly/tree/json_scanner.go b/go/store/prolly/tree/json_scanner.go index 413b44ca457..8a7ffcf28f7 100644 --- a/go/store/prolly/tree/json_scanner.go +++ b/go/store/prolly/tree/json_scanner.go @@ -160,7 +160,7 @@ func (s *JsonScanner) acceptValue() error { const endOfFile byte = 0xFF // current returns the current byte being parsed, or 0xFF if we've reached the end of the file. -// (Since the JSON is UTF-8, the 0xFF byte cannot otherwise appear within in.) +// (Since the JSON is UTF-8, the 0xFF byte cannot otherwise appear within it.) func (s JsonScanner) current() byte { if s.valueOffset >= len(s.jsonBuffer) { return endOfFile From c01fa60ab7cee0fd2435d1596b40d3569a63ab1b Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 30 Jul 2024 23:01:03 -0400 Subject: [PATCH 07/12] Bump GMS version. --- go/go.mod | 2 +- go/go.sum | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/go/go.mod b/go/go.mod index ab807586d0d..972d1de2811 100644 --- a/go/go.mod +++ b/go/go.mod @@ -57,7 +57,7 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 github.com/creasty/defaults v1.6.0 github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 - github.com/dolthub/go-mysql-server v0.18.2-0.20240730232246-9f1f6200964e + github.com/dolthub/go-mysql-server v0.18.2-0.20240731192831-1de53bb45f7b github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 github.com/dolthub/swiss v0.1.0 github.com/goccy/go-json v0.10.2 diff --git a/go/go.sum b/go/go.sum index 94163ed38e0..690082d9f03 100644 --- a/go/go.sum +++ b/go/go.sum @@ -183,8 +183,14 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168= +github.com/dolthub/go-mysql-server v0.18.2-0.20240718001922-c72df2b7864c h1:LddV4+ETycOwneaBYCc2N3MBlyuZpbF0PK3E/21DhTs= +github.com/dolthub/go-mysql-server v0.18.2-0.20240718001922-c72df2b7864c/go.mod h1:P6bG0p+3mH4LS4DLo3BySh10ZJTDqgWyfWBu8gGE3eU= github.com/dolthub/go-mysql-server v0.18.2-0.20240730232246-9f1f6200964e h1:kc3zbqUcLqEwUHFHb5cKnS6uDsgHWu8RdG3x9wpigWM= github.com/dolthub/go-mysql-server v0.18.2-0.20240730232246-9f1f6200964e/go.mod h1:IvXmiR+fvWQs27nNmp0XuTO7ze6j6ekrVkhWw+3qo2s= +github.com/dolthub/go-mysql-server v0.18.2-0.20240731025910-cbea6900c667 h1:SuzzM+nAdirWtkXrhWZ5O3KxfpfIoAM0jE4/huqIN70= +github.com/dolthub/go-mysql-server v0.18.2-0.20240731025910-cbea6900c667/go.mod h1:IvXmiR+fvWQs27nNmp0XuTO7ze6j6ekrVkhWw+3qo2s= +github.com/dolthub/go-mysql-server v0.18.2-0.20240731192831-1de53bb45f7b h1:OAxeKSqJZVgeEzE5Z1tPy0vekWSOV9SoIifL+r/h1EM= +github.com/dolthub/go-mysql-server v0.18.2-0.20240731192831-1de53bb45f7b/go.mod h1:IvXmiR+fvWQs27nNmp0XuTO7ze6j6ekrVkhWw+3qo2s= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q= github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE= From 1af849f2174a018ccb78c755d6bcc8bc64cf6b8b Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 12 Aug 2024 14:58:33 -0700 Subject: [PATCH 08/12] Respond to PR feedback for three-way JSON merging. --- .../doltcore/merge/merge_prolly_rows.go | 20 ++-- .../doltcore/merge/schema_merge_test.go | 6 +- .../doltcore/merge/three_way_json_differ.go | 6 +- go/store/prolly/tree/indexed_json_diff.go | 100 ++++++++---------- go/store/prolly/tree/json_diff.go | 10 +- go/store/prolly/tree/json_diff_test.go | 19 ++-- go/store/prolly/tree/json_indexed_document.go | 63 ++--------- .../prolly/tree/json_indexed_document_test.go | 6 -- go/store/prolly/tree/json_location.go | 10 +- go/store/prolly/tree/json_type_categories.go | 77 ++++++++++++++ 10 files changed, 159 insertions(+), 158 deletions(-) create mode 100644 go/store/prolly/tree/json_type_categories.go diff --git a/go/libraries/doltcore/merge/merge_prolly_rows.go b/go/libraries/doltcore/merge/merge_prolly_rows.go index 1b4c1332ff9..e4d22489884 100644 --- a/go/libraries/doltcore/merge/merge_prolly_rows.go +++ b/go/libraries/doltcore/merge/merge_prolly_rows.go @@ -2010,23 +2010,19 @@ func mergeJSON(ctx context.Context, ns tree.NodeStore, base, left, right sql.JSO // First, deserialize each value into JSON. // We can only merge if the value at all three commits is a JSON object. - baseTypeCategory, err := tree.GetTypeCategory(base) + baseIsObject, err := tree.IsJsonObject(base) if err != nil { return nil, true, err } - leftTypeCategory, err := tree.GetTypeCategory(left) + leftIsObject, err := tree.IsJsonObject(left) if err != nil { return nil, true, err } - rightTypeCategory, err := tree.GetTypeCategory(right) + rightIsObject, err := tree.IsJsonObject(right) if err != nil { return nil, true, err } - baseIsObject := baseTypeCategory == tree.JsonTypeObject - leftIsObject := leftTypeCategory == tree.JsonTypeObject - rightIsObject := rightTypeCategory == tree.JsonTypeObject - if !baseIsObject || !leftIsObject || !rightIsObject { // At least one of the commits does not have a JSON object. // If both left and right have the same value, use that value. @@ -2049,7 +2045,7 @@ func mergeJSON(ctx context.Context, ns tree.NodeStore, base, left, right sql.JSO // We only do three way merges on values read from tables right now, which are read in as tree.IndexedJsonDocument. - var leftDiffer IJsonDiffer + var leftDiffer tree.IJsonDiffer if isBaseIndexed && isLeftIndexed { leftDiffer, err = tree.NewIndexedJsonDiffer(ctx, indexedBase, indexedLeft) if err != nil { @@ -2064,11 +2060,10 @@ func mergeJSON(ctx context.Context, ns tree.NodeStore, base, left, right sql.JSO if err != nil { return nil, true, err } - leftDifferValue := tree.NewJsonDiffer(baseObject.(types.JsonObject), leftObject.(types.JsonObject)) - leftDiffer = &leftDifferValue + leftDiffer = tree.NewJsonDiffer(baseObject.(types.JsonObject), leftObject.(types.JsonObject)) } - var rightDiffer IJsonDiffer + var rightDiffer tree.IJsonDiffer if isBaseIndexed && isRightIndexed { rightDiffer, err = tree.NewIndexedJsonDiffer(ctx, indexedBase, indexedRight) if err != nil { @@ -2083,8 +2078,7 @@ func mergeJSON(ctx context.Context, ns tree.NodeStore, base, left, right sql.JSO if err != nil { return nil, true, err } - rightDifferValue := tree.NewJsonDiffer(baseObject.(types.JsonObject), rightObject.(types.JsonObject)) - rightDiffer = &rightDifferValue + rightDiffer = tree.NewJsonDiffer(baseObject.(types.JsonObject), rightObject.(types.JsonObject)) } threeWayDiffer := ThreeWayJsonDiffer{ diff --git a/go/libraries/doltcore/merge/schema_merge_test.go b/go/libraries/doltcore/merge/schema_merge_test.go index 3a0823c5826..3b8c1e74fb3 100644 --- a/go/libraries/doltcore/merge/schema_merge_test.go +++ b/go/libraries/doltcore/merge/schema_merge_test.go @@ -1370,6 +1370,8 @@ var jsonMergeTests = []schemaMergeTest{ dataConflict: true, }, { + // Regression test: Older versions of json documents could accidentally think that $.aa is a child + // of $.a and see this as a conflict, even though it isn't one. name: "false positive conflict", ancestor: singleRow(1, 1, 1, `{ "a": 1, "aa":2 }`), left: singleRow(1, 2, 1, `{ "aa":2 }`), @@ -1401,7 +1403,6 @@ func createLargeDocumentForTesting(t *testing.T, ctx *sql.Context, ns tree.NodeS leafDoc["number"] = float64(1.0) leafDoc["string"] = "dolt" var docExpression sql.Expression = expression.NewLiteral(newIndexedJsonDocumentFromValue(t, ctx, ns, leafDoc), sqltypes.JSON) - // require.NoError(t, err) var err error for level := 0; level < 8; level++ { @@ -1500,7 +1501,6 @@ func jsonMergeLargeDocumentTests(t *testing.T) []schemaMergeTest { left: singleRow(1, 2, 1, set(insert(largeObject, "$.a", 1), "$.level7", 2)), right: singleRow(1, 1, 2, set(insert(largeObject, "$.a", 1), "$.a", 3)), merged: singleRow(1, 2, 2, set(insert(largeObject, "$.a", 3), "$.level7", 2)), - skip: true, //doesn't terminate? }, { name: `parallel deletion`, @@ -1557,7 +1557,6 @@ func jsonMergeLargeDocumentTests(t *testing.T) []schemaMergeTest { left: singleRow(1, 2, 1, set(largeObject, "$.level7.level4", 1)), right: singleRow(1, 1, 2, set(largeObject, "$.level7.level5", 2)), merged: singleRow(1, 2, 2, set(set(largeObject, "$.level7.level5", 2), "$.level7.level4", 1)), - // skip: true, // panics }, { name: `nested deletion`, @@ -1565,7 +1564,6 @@ func jsonMergeLargeDocumentTests(t *testing.T) []schemaMergeTest { left: singleRow(1, 2, 1, delete(largeObject, "$.level7")), right: singleRow(1, 1, 2, delete(largeObject, "$.level6")), merged: singleRow(1, 2, 2, delete(delete(largeObject, "$.level6"), "$.level7")), - skip: true, // doesn't terminate }, { name: "complicated nested merge", diff --git a/go/libraries/doltcore/merge/three_way_json_differ.go b/go/libraries/doltcore/merge/three_way_json_differ.go index 9d5784d550f..4c35beaf99c 100644 --- a/go/libraries/doltcore/merge/three_way_json_differ.go +++ b/go/libraries/doltcore/merge/three_way_json_differ.go @@ -23,12 +23,8 @@ import ( "io" ) -type IJsonDiffer interface { - Next(ctx context.Context) (tree.JsonDiff, error) -} - type ThreeWayJsonDiffer struct { - leftDiffer, rightDiffer IJsonDiffer + leftDiffer, rightDiffer tree.IJsonDiffer leftCurrentDiff, rightCurrentDiff *tree.JsonDiff leftIsDone, rightIsDone bool ns tree.NodeStore diff --git a/go/store/prolly/tree/indexed_json_diff.go b/go/store/prolly/tree/indexed_json_diff.go index 22b2634b486..8a94caab77d 100644 --- a/go/store/prolly/tree/indexed_json_diff.go +++ b/go/store/prolly/tree/indexed_json_diff.go @@ -28,6 +28,8 @@ type IndexedJsonDiffer struct { started bool } +var _ IJsonDiffer = &IndexedJsonDiffer{} + func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*IndexedJsonDiffer, error) { differ, err := DifferFromRoots[jsonLocationKey, jsonLocationOrdering](ctx, from.m.NodeStore, to.m.NodeStore, from.m.Root, to.m.Root, jsonLocationOrdering{}, false) if err != nil { @@ -90,6 +92,38 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error return nil } + newAddedDiff := func(key []byte) (JsonDiff, error) { + addedValue, err := jd.currentToCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentToCursor) + if err != nil { + return JsonDiff{}, err + } + return JsonDiff{ + Key: key, + To: types.NewLazyJSONDocument(addedValue), + Type: AddedDiff, + }, nil + } + + newRemovedDiff := func(key []byte) (JsonDiff, error) { + removedValue, err := jd.currentFromCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentFromCursor) + if err != nil { + return JsonDiff{}, err + } + return JsonDiff{ + Key: key, + From: types.NewLazyJSONDocument(removedValue), + Type: RemovedDiff, + }, nil + } + for { if jd.currentFromCursor == nil && jd.currentToCursor == nil { if jd.differ.from == nil || jd.differ.to == nil { @@ -158,7 +192,7 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error // 5) One cursor points to the initial element of an object/array, while the other points to the end of that same path: // - A value has been changed from an object/array to a scalar, or vice-versa. // 6) One cursor points to the initial element of an object, while the other points to the initial element of an array: - // - The value has been change from an object to an array, or vice-versa. + // - The value has been changed from an object to an array, or vice-versa. fromScanner := &jd.currentFromCursor.jsonScanner toScanner := &jd.currentToCursor.jsonScanner @@ -227,37 +261,13 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error continue case -1: - key := fromCurrentLocation.Clone().key // Case 3: A value has been removed from an object - removedValue, err := jd.currentFromCursor.NextValue(ctx) - if err != nil { - return JsonDiff{}, err - } - err = advanceCursor(&jd.currentFromCursor) - if err != nil { - return JsonDiff{}, err - } - return JsonDiff{ - Key: key, - From: types.NewLazyJSONDocument(removedValue), - Type: RemovedDiff, - }, nil + key := fromCurrentLocation.Clone().key + return newRemovedDiff(key) case 1: - key := toCurrentLocation.Clone().key // Case 3: A value has been added to an object - addedValue, err := jd.currentToCursor.NextValue(ctx) - if err != nil { - return JsonDiff{}, err - } - err = advanceCursor(&jd.currentToCursor) - if err != nil { - return JsonDiff{}, err - } - return JsonDiff{ - Key: key, - To: types.NewLazyJSONDocument(addedValue), - Type: AddedDiff, - }, nil + key := toCurrentLocation.Clone().key + return newAddedDiff(key) } } @@ -265,42 +275,18 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error if fromCurrentLocation.getScannerState() != endOfValue { return JsonDiff{}, jsonParseError } - key := toCurrentLocation.Clone().key // Case 4: A value has been inserted at the end of an object or array. - addedValue, err := jd.currentToCursor.NextValue(ctx) - if err != nil { - return JsonDiff{}, err - } - err = advanceCursor(&jd.currentToCursor) - if err != nil { - return JsonDiff{}, err - } - return JsonDiff{ - Key: key, - To: types.NewLazyJSONDocument(addedValue), - Type: AddedDiff, - }, nil + key := toCurrentLocation.Clone().key + return newAddedDiff(key) } if fromScannerAtStartOfValue && !toScannerAtStartOfValue { if toCurrentLocation.getScannerState() != endOfValue { return JsonDiff{}, jsonParseError } - key := fromCurrentLocation.Clone().key // Case 4: A value has been removed from the end of an object or array. - addedValue, err := jd.currentFromCursor.NextValue(ctx) - if err != nil { - return JsonDiff{}, err - } - err = advanceCursor(&jd.currentFromCursor) - if err != nil { - return JsonDiff{}, err - } - return JsonDiff{ - Key: key, - From: types.NewLazyJSONDocument(addedValue), - Type: RemovedDiff, - }, nil + key := fromCurrentLocation.Clone().key + return newRemovedDiff(key) } } } diff --git a/go/store/prolly/tree/json_diff.go b/go/store/prolly/tree/json_diff.go index f59bd6bcab3..8ef4839d01b 100644 --- a/go/store/prolly/tree/json_diff.go +++ b/go/store/prolly/tree/json_diff.go @@ -24,6 +24,10 @@ import ( "strings" ) +type IJsonDiffer interface { + Next(ctx context.Context) (JsonDiff, error) +} + type JsonDiff struct { Key []byte From, To sql.JSONWrapper @@ -43,10 +47,12 @@ type JsonDiffer struct { subDiffer *JsonDiffer } -func NewJsonDiffer(from, to types.JsonObject) JsonDiffer { +var _ IJsonDiffer = &JsonDiffer{} + +func NewJsonDiffer(from, to types.JsonObject) *JsonDiffer { fromIter := types.NewJSONIter(from) toIter := types.NewJSONIter(to) - return JsonDiffer{ + return &JsonDiffer{ root: []byte{byte(startOfValue)}, from: fromIter, to: toIter, diff --git a/go/store/prolly/tree/json_diff_test.go b/go/store/prolly/tree/json_diff_test.go index 0dc8d05ebab..7bd04d98a7a 100644 --- a/go/store/prolly/tree/json_diff_test.go +++ b/go/store/prolly/tree/json_diff_test.go @@ -282,7 +282,7 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, } -func makeLargeJsonDiffTests(t *testing.T) []jsonDiffTest { +func largeJsonDiffTests(t *testing.T) []jsonDiffTest { ctx := sql.NewEmptyContext() ns := NewTestNodeStore() @@ -319,7 +319,7 @@ func makeLargeJsonDiffTests(t *testing.T) []jsonDiffTest { return newDoc } - largeObject := createLargeDocumentForTesting2(t, ctx, ns) + largeObject := createLargeArraylessDocumentForTesting(t, ctx, ns) return []jsonDiffTest{ { name: "nested insert 1", @@ -376,19 +376,14 @@ func makeLargeJsonDiffTests(t *testing.T) []jsonDiffTest { } } -// createLargeDocumentForTesting creates a JSON document large enough to be split across multiple chunks. -// This is useful for testing mutation operations in large documents. -// Every different possible jsonPathType appears on a chunk boundary, for better test coverage: -// chunk 0 key: $[6].children[2].children[0].number(endOfValue) -// chunk 2 key: $[7].children[5].children[4].children[2].children(arrayInitialElement) -// chunk 5 key: $[8].children[6].children[4].children[3].children[0](startOfValue) -// chunk 8 key: $[8].children[7].children[6].children[5].children[3].children[2].children[1](objectInitialElement) -func createLargeDocumentForTesting2(t *testing.T, ctx *sql.Context, ns NodeStore) IndexedJsonDocument { +// createLargeArraylessDocumentForTesting creates a JSON document large enough to be split across multiple chunks that +// does not contain arrays. This makes it easier to write tests for three-way merging, since we cant't currently merge +// concurrent changes to arrays. +func createLargeArraylessDocumentForTesting(t *testing.T, ctx *sql.Context, ns NodeStore) IndexedJsonDocument { leafDoc := make(map[string]interface{}) leafDoc["number"] = float64(1.0) leafDoc["string"] = "dolt" var docExpression sql.Expression = expression.NewLiteral(newIndexedJsonDocumentFromValue(t, ctx, ns, leafDoc), types.JSON) - // require.NoError(t, err) var err error for level := 0; level < 8; level++ { @@ -405,7 +400,7 @@ func TestJsonDiff(t *testing.T) { runTestBatch(t, simpleJsonDiffTests) }) t.Run("large document tests", func(t *testing.T) { - runTestBatch(t, makeLargeJsonDiffTests(t)) + runTestBatch(t, largeJsonDiffTests(t)) }) } diff --git a/go/store/prolly/tree/json_indexed_document.go b/go/store/prolly/tree/json_indexed_document.go index 054faded680..b0ac82f43b1 100644 --- a/go/store/prolly/tree/json_indexed_document.go +++ b/go/store/prolly/tree/json_indexed_document.go @@ -19,7 +19,6 @@ import ( "database/sql/driver" "encoding/json" "fmt" - "github.com/shopspring/decimal" "io" "sync" @@ -258,7 +257,10 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL } jsonChunker.appendJsonToBuffer([]byte(fmt.Sprintf("[%s,%s]", originalValue, insertedValueBytes))) - jsonChunker.processBuffer(ctx) + err = jsonChunker.processBuffer(ctx) + if err != nil { + return IndexedJsonDocument{}, false, err + } newRoot, err := jsonChunker.Done(ctx) if err != nil { @@ -314,7 +316,7 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL jsonChunker.appendJsonToBuffer(insertedValueBytes) err = jsonChunker.processBuffer(ctx) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } newRoot, err := jsonChunker.Done(ctx) @@ -521,7 +523,10 @@ func (i IndexedJsonDocument) replaceIntoCursor(ctx context.Context, keyPath json } jsonChunker.appendJsonToBuffer(insertedValueBytes) - jsonChunker.processBuffer(ctx) + err = jsonChunker.processBuffer(ctx) + if err != nil { + return IndexedJsonDocument{}, false, err + } newRoot, err := jsonChunker.Done(ctx) if err != nil { @@ -585,54 +590,6 @@ func (i IndexedJsonDocument) getFirstCharacter(ctx context.Context) (byte, error return firstCharacter, nil } -type jsonTypeCategory int - -const ( - jsonTypeNull jsonTypeCategory = iota - jsonTypeNumber - jsonTypeString - JsonTypeObject - jsonTypeArray - jsonTypeBoolean -) - -func getTypeCategoryOfValue(val interface{}) (jsonTypeCategory, error) { - if val == nil { - return jsonTypeNull, nil - } - switch val.(type) { - case map[string]interface{}: - return JsonTypeObject, nil - case []interface{}: - return jsonTypeArray, nil - case bool: - return jsonTypeBoolean, nil - case string: - return jsonTypeString, nil - case decimal.Decimal, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64: - return jsonTypeNumber, nil - } - return 0, fmt.Errorf("expected json value, got %v", val) -} - -// getTypeCategoryFromFirstCharacter returns the type of a JSON object by inspecting its first byte. -func getTypeCategoryFromFirstCharacter(c byte) jsonTypeCategory { - switch c { - case '{': - return JsonTypeObject - case '[': - return jsonTypeArray - case 'n': - return jsonTypeNull - case 't', 'f': - return jsonTypeBoolean - case '"': - return jsonTypeString - default: - return jsonTypeNumber - } -} - func (i IndexedJsonDocument) getTypeCategory() (jsonTypeCategory, error) { firstCharacter, err := i.getFirstCharacter(i.ctx) if err != nil { @@ -713,7 +670,7 @@ func (i IndexedJsonDocument) Compare(other interface{}) (int, error) { switch thisTypeCategory { case jsonTypeNull: return 0, nil - case jsonTypeArray, JsonTypeObject: + case jsonTypeArray, jsonTypeObject: // To compare two values that are both arrays or both objects, we must locate the first location // where they differ. diff --git a/go/store/prolly/tree/json_indexed_document_test.go b/go/store/prolly/tree/json_indexed_document_test.go index da1d51da304..7a7adc5637b 100644 --- a/go/store/prolly/tree/json_indexed_document_test.go +++ b/go/store/prolly/tree/json_indexed_document_test.go @@ -418,12 +418,6 @@ func TestJsonCompare(t *testing.T) { Right: `null`, Cmp: 1, }, - { - Name: "large array > null", - Left: largeArray, - Right: `null`, - Cmp: 1, - }, { Name: "inserting into end of array makes it greater", Left: largeArray, diff --git a/go/store/prolly/tree/json_location.go b/go/store/prolly/tree/json_location.go index c8986b491a4..c532f07ac9b 100644 --- a/go/store/prolly/tree/json_location.go +++ b/go/store/prolly/tree/json_location.go @@ -439,13 +439,11 @@ var _ Ordering[[]byte] = jsonLocationOrdering{} func (jsonLocationOrdering) Compare(left, right []byte) int { // A JSON document that fits entirely in a single chunk has no keys, - if len(left) == 0 { - if len(right) == 0 { - return 0 - } + if len(left) == 0 && len(right) == 0 { + return 0 + } else if len(left) == 0 { return -1 - } - if len(right) == 0 { + } else if len(right) == 0 { return 1 } leftPath := jsonPathFromKey(left) diff --git a/go/store/prolly/tree/json_type_categories.go b/go/store/prolly/tree/json_type_categories.go new file mode 100644 index 00000000000..016643e290f --- /dev/null +++ b/go/store/prolly/tree/json_type_categories.go @@ -0,0 +1,77 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tree + +import ( + "fmt" + "github.com/dolthub/go-mysql-server/sql" + "github.com/shopspring/decimal" +) + +type jsonTypeCategory int + +const ( + jsonTypeNull jsonTypeCategory = iota + jsonTypeNumber + jsonTypeString + jsonTypeObject + jsonTypeArray + jsonTypeBoolean +) + +func getTypeCategoryOfValue(val interface{}) (jsonTypeCategory, error) { + if val == nil { + return jsonTypeNull, nil + } + switch val.(type) { + case map[string]interface{}: + return jsonTypeObject, nil + case []interface{}: + return jsonTypeArray, nil + case bool: + return jsonTypeBoolean, nil + case string: + return jsonTypeString, nil + case decimal.Decimal, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64: + return jsonTypeNumber, nil + } + return 0, fmt.Errorf("expected json value, got %v", val) +} + +// getTypeCategoryFromFirstCharacter returns the type of a JSON object by inspecting its first byte. +func getTypeCategoryFromFirstCharacter(c byte) jsonTypeCategory { + switch c { + case '{': + return jsonTypeObject + case '[': + return jsonTypeArray + case 'n': + return jsonTypeNull + case 't', 'f': + return jsonTypeBoolean + case '"': + return jsonTypeString + default: + return jsonTypeNumber + } +} + +func IsJsonObject(json sql.JSONWrapper) (bool, error) { + valType, err := GetTypeCategory(json) + if err != nil { + return false, err + } + return valType == jsonTypeObject, nil +} From 51444a34d0900e0f71a71334596d2fc883285d5a Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 12 Aug 2024 15:08:31 -0700 Subject: [PATCH 09/12] Add additional tests to json_diff_test.go --- go/store/prolly/tree/json_diff_test.go | 34 +++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/go/store/prolly/tree/json_diff_test.go b/go/store/prolly/tree/json_diff_test.go index 7bd04d98a7a..d24647bf824 100644 --- a/go/store/prolly/tree/json_diff_test.go +++ b/go/store/prolly/tree/json_diff_test.go @@ -286,7 +286,7 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest { ctx := sql.NewEmptyContext() ns := NewTestNodeStore() - _ = func(document types.MutableJSON, path string, val interface{}) types.MutableJSON { + insert := func(document types.MutableJSON, path string, val interface{}) types.MutableJSON { jsonVal, inRange, err := types.JSON.Convert(val) require.NoError(t, err) require.True(t, (bool)(inRange)) @@ -312,7 +312,7 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest { return newDoc } - _ = func(document types.MutableJSON, path string) types.MutableJSON { + remove := func(document types.MutableJSON, path string) types.MutableJSON { newDoc, changed, err := document.Remove(ctx, path) require.True(t, changed) require.NoError(t, err) @@ -322,7 +322,33 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest { largeObject := createLargeArraylessDocumentForTesting(t, ctx, ns) return []jsonDiffTest{ { - name: "nested insert 1", + name: "nested insert", + from: largeObject, + to: insert(largeObject, "$.level7.newKey", 2), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `newKey`), + From: nil, + To: &types.JSONDocument{Val: 2}, + Type: AddedDiff, + }, + }, + }, + { + name: "nested remove", + from: largeObject, + to: remove(largeObject, "$.level7.level6"), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level6`), + From: lookup(largeObject, "$.level7.level6"), + To: nil, + Type: RemovedDiff, + }, + }, + }, + { + name: "nested modification 1", from: largeObject, to: set(largeObject, "$.level7.level5", 2), expectedDiffs: []JsonDiff{ @@ -335,7 +361,7 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest { }, }, { - name: "nested insert 2", + name: "nested modification 2", from: largeObject, to: set(largeObject, "$.level7.level4", 1), expectedDiffs: []JsonDiff{ From e4bcf5e56aae4a9aa87d9677119e549f83c8c732 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 12 Aug 2024 15:16:17 -0700 Subject: [PATCH 10/12] Bump GMS version. --- go/go.mod | 4 ++-- go/go.sum | 14 ++++---------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/go/go.mod b/go/go.mod index 972d1de2811..6e758c21850 100644 --- a/go/go.mod +++ b/go/go.mod @@ -15,7 +15,7 @@ require ( github.com/dolthub/fslock v0.0.3 github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 - github.com/dolthub/vitess v0.0.0-20240730224954-d707fadb2e04 + github.com/dolthub/vitess v0.0.0-20240807181005-71d735078e24 github.com/dustin/go-humanize v1.0.1 github.com/fatih/color v1.13.0 github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568 @@ -57,7 +57,7 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 github.com/creasty/defaults v1.6.0 github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 - github.com/dolthub/go-mysql-server v0.18.2-0.20240731192831-1de53bb45f7b + github.com/dolthub/go-mysql-server v0.18.2-0.20240812221236-ebc28713f178 github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 github.com/dolthub/swiss v0.1.0 github.com/goccy/go-json v0.10.2 diff --git a/go/go.sum b/go/go.sum index 690082d9f03..ae15aacd1e6 100644 --- a/go/go.sum +++ b/go/go.sum @@ -183,14 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168= -github.com/dolthub/go-mysql-server v0.18.2-0.20240718001922-c72df2b7864c h1:LddV4+ETycOwneaBYCc2N3MBlyuZpbF0PK3E/21DhTs= -github.com/dolthub/go-mysql-server v0.18.2-0.20240718001922-c72df2b7864c/go.mod h1:P6bG0p+3mH4LS4DLo3BySh10ZJTDqgWyfWBu8gGE3eU= -github.com/dolthub/go-mysql-server v0.18.2-0.20240730232246-9f1f6200964e h1:kc3zbqUcLqEwUHFHb5cKnS6uDsgHWu8RdG3x9wpigWM= -github.com/dolthub/go-mysql-server v0.18.2-0.20240730232246-9f1f6200964e/go.mod h1:IvXmiR+fvWQs27nNmp0XuTO7ze6j6ekrVkhWw+3qo2s= -github.com/dolthub/go-mysql-server v0.18.2-0.20240731025910-cbea6900c667 h1:SuzzM+nAdirWtkXrhWZ5O3KxfpfIoAM0jE4/huqIN70= -github.com/dolthub/go-mysql-server v0.18.2-0.20240731025910-cbea6900c667/go.mod h1:IvXmiR+fvWQs27nNmp0XuTO7ze6j6ekrVkhWw+3qo2s= -github.com/dolthub/go-mysql-server v0.18.2-0.20240731192831-1de53bb45f7b h1:OAxeKSqJZVgeEzE5Z1tPy0vekWSOV9SoIifL+r/h1EM= -github.com/dolthub/go-mysql-server v0.18.2-0.20240731192831-1de53bb45f7b/go.mod h1:IvXmiR+fvWQs27nNmp0XuTO7ze6j6ekrVkhWw+3qo2s= +github.com/dolthub/go-mysql-server v0.18.2-0.20240812221236-ebc28713f178 h1:aCckOhs6UuESaVFEWMKaYbRtE1ht6qqs++pzUGOu9NM= +github.com/dolthub/go-mysql-server v0.18.2-0.20240812221236-ebc28713f178/go.mod h1:PwuemL+YK+YiWcUFhknixeqNLjJNfCx7KDsHNajx9fM= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q= github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE= @@ -203,8 +197,8 @@ github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9X github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY= github.com/dolthub/swiss v0.1.0 h1:EaGQct3AqeP/MjASHLiH6i4TAmgbG/c4rA6a1bzCOPc= github.com/dolthub/swiss v0.1.0/go.mod h1:BeucyB08Vb1G9tumVN3Vp/pyY4AMUnr9p7Rz7wJ7kAQ= -github.com/dolthub/vitess v0.0.0-20240730224954-d707fadb2e04 h1:2uejd/7c93uwExCGeZDzU5uiOaGqfJ1Tlse7mxziXzQ= -github.com/dolthub/vitess v0.0.0-20240730224954-d707fadb2e04/go.mod h1:uBvlRluuL+SbEWTCZ68o0xvsdYZER3CEG/35INdzfJM= +github.com/dolthub/vitess v0.0.0-20240807181005-71d735078e24 h1:/zCd98CLZURqK85jQ+qRmEMx/dpXz85F1/Et7gqMGkk= +github.com/dolthub/vitess v0.0.0-20240807181005-71d735078e24/go.mod h1:uBvlRluuL+SbEWTCZ68o0xvsdYZER3CEG/35INdzfJM= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= From 5e0990fd0b17cc11f0a605f99b379efdbf744179 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 12 Aug 2024 16:22:13 -0700 Subject: [PATCH 11/12] Lint `json_diff.test.go` --- go/store/prolly/tree/json_diff_test.go | 75 +++++++++++++------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/go/store/prolly/tree/json_diff_test.go b/go/store/prolly/tree/json_diff_test.go index d24647bf824..e14cf442a3b 100644 --- a/go/store/prolly/tree/json_diff_test.go +++ b/go/store/prolly/tree/json_diff_test.go @@ -46,14 +46,14 @@ func makeJsonPathKey(parts ...string) []byte { var simpleJsonDiffTests = []jsonDiffTest{ { name: "empty object, no modifications", - from: types.JSONDocument{types.JsonObject{}}, - to: types.JSONDocument{types.JsonObject{}}, + from: types.JSONDocument{Val: types.JsonObject{}}, + to: types.JSONDocument{Val: types.JsonObject{}}, expectedDiffs: nil, }, { name: "insert into empty object", - from: types.JSONDocument{types.JsonObject{}}, - to: types.JSONDocument{types.JsonObject{"a": 1}}, + from: types.JSONDocument{Val: types.JsonObject{}}, + to: types.JSONDocument{Val: types.JsonObject{"a": 1}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -65,8 +65,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "delete from object", - from: types.JSONDocument{types.JsonObject{"a": 1}}, - to: types.JSONDocument{types.JsonObject{}}, + from: types.JSONDocument{Val: types.JsonObject{"a": 1}}, + to: types.JSONDocument{Val: types.JsonObject{}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -78,8 +78,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify object", - from: types.JSONDocument{types.JsonObject{"a": 1}}, - to: types.JSONDocument{types.JsonObject{"a": 2}}, + from: types.JSONDocument{Val: types.JsonObject{"a": 1}}, + to: types.JSONDocument{Val: types.JsonObject{"a": 2}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -91,8 +91,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "nested insert", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{}}}, - to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 1}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 1}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -103,8 +103,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "nested delete", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 1}}}, - to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 1}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -115,8 +115,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "nested modify", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 1}}}, - to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 2}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 1}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 2}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -128,8 +128,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "insert object", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{}}}, - to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -140,8 +140,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify to object", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 2}}}, - to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -153,8 +153,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify from object", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 2}}}, - to: types.JSONDocument{types.JsonObject{"a": 1}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": 1}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -166,8 +166,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify to array", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": "foo"}}}, - to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonArray{1, 2}}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": "foo"}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonArray{1, 2}}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -179,8 +179,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify from array", - from: types.JSONDocument{types.JsonObject{"a": types.JsonArray{1, 2}}}, - to: types.JSONDocument{types.JsonObject{"a": 1}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonArray{1, 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": 1}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -192,8 +192,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "array to object", - from: types.JSONDocument{types.JsonObject{"a": types.JsonArray{1, 2}}}, - to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonArray{1, 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -205,8 +205,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "object to array", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": 2}}}, - to: types.JSONDocument{types.JsonObject{"a": types.JsonArray{1, 2}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonArray{1, 2}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`), @@ -218,8 +218,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "remove object", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, - to: types.JSONDocument{types.JsonObject{"a": types.JsonObject{}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{}}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`a`, `b`), @@ -230,8 +230,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "insert escaped double quotes", - from: types.JSONDocument{types.JsonObject{"\"a\"": "1"}}, - to: types.JSONDocument{types.JsonObject{"b": "\"2\""}}, + from: types.JSONDocument{Val: types.JsonObject{"\"a\"": "1"}}, + to: types.JSONDocument{Val: types.JsonObject{"b": "\"2\""}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(`"a"`), @@ -249,8 +249,8 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modifications returned in lexographic order", - from: types.JSONDocument{types.JsonObject{"a": types.JsonObject{"1": "i"}, "aa": 2, "b": 6}}, - to: types.JSONDocument{types.JsonObject{"": 1, "a": types.JsonObject{}, "aa": 3, "bb": 5}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"1": "i"}, "aa": 2, "b": 6}}, + to: types.JSONDocument{Val: types.JsonObject{"": 1, "a": types.JsonObject{}, "aa": 3, "bb": 5}}, expectedDiffs: []JsonDiff{ { Key: makeJsonPathKey(``), @@ -469,15 +469,12 @@ func runTest(t *testing.T, test jsonDiffTest) { } cmp, err = types.CompareJSON(expected.To, actual.To) require.NoError(t, err) - if cmp != 0 { - return false - } - return true + + return cmp == 0 } require.Equal(t, len(test.expectedDiffs), len(actualDiffs)) for i, expected := range test.expectedDiffs { actual := actualDiffs[i] require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual)) - } } From 1399c9f8889023df81651152c8cce0df1d794eff Mon Sep 17 00:00:00 2001 From: nicktobey Date: Mon, 12 Aug 2024 23:29:51 +0000 Subject: [PATCH 12/12] [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh --- go/libraries/doltcore/merge/schema_merge_test.go | 8 ++++---- go/libraries/doltcore/merge/three_way_json_differ.go | 6 ++++-- go/store/prolly/tree/indexed_json_diff.go | 3 ++- go/store/prolly/tree/json_diff.go | 5 +++-- go/store/prolly/tree/json_diff_test.go | 5 +++-- go/store/prolly/tree/json_type_categories.go | 1 + 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/go/libraries/doltcore/merge/schema_merge_test.go b/go/libraries/doltcore/merge/schema_merge_test.go index 3b8c1e74fb3..f92d1947979 100644 --- a/go/libraries/doltcore/merge/schema_merge_test.go +++ b/go/libraries/doltcore/merge/schema_merge_test.go @@ -17,13 +17,12 @@ package merge_test import ( "context" "fmt" - "github.com/dolthub/dolt/go/store/prolly/tree" - "github.com/dolthub/go-mysql-server/sql/expression" - "github.com/dolthub/go-mysql-server/sql/expression/function/json" - sqltypes "github.com/dolthub/go-mysql-server/sql/types" "testing" "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/expression" + "github.com/dolthub/go-mysql-server/sql/expression/function/json" + sqltypes "github.com/dolthub/go-mysql-server/sql/types" "github.com/shopspring/decimal" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -40,6 +39,7 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer" "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/types" ) diff --git a/go/libraries/doltcore/merge/three_way_json_differ.go b/go/libraries/doltcore/merge/three_way_json_differ.go index 4c35beaf99c..d406b27596b 100644 --- a/go/libraries/doltcore/merge/three_way_json_differ.go +++ b/go/libraries/doltcore/merge/three_way_json_differ.go @@ -17,10 +17,12 @@ package merge import ( "bytes" "context" - "github.com/dolthub/dolt/go/store/prolly/tree" + "io" + "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/types" - "io" + + "github.com/dolthub/dolt/go/store/prolly/tree" ) type ThreeWayJsonDiffer struct { diff --git a/go/store/prolly/tree/indexed_json_diff.go b/go/store/prolly/tree/indexed_json_diff.go index 8a94caab77d..bf4bc2e427a 100644 --- a/go/store/prolly/tree/indexed_json_diff.go +++ b/go/store/prolly/tree/indexed_json_diff.go @@ -16,9 +16,10 @@ package tree import ( "context" + "io" + "github.com/dolthub/go-mysql-server/sql/types" "golang.org/x/exp/slices" - "io" ) type IndexedJsonDiffer struct { diff --git a/go/store/prolly/tree/json_diff.go b/go/store/prolly/tree/json_diff.go index 8ef4839d01b..a52856c47ac 100644 --- a/go/store/prolly/tree/json_diff.go +++ b/go/store/prolly/tree/json_diff.go @@ -17,11 +17,12 @@ package tree import ( "bytes" "context" - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/types" "io" "reflect" "strings" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/types" ) type IJsonDiffer interface { diff --git a/go/store/prolly/tree/json_diff_test.go b/go/store/prolly/tree/json_diff_test.go index e14cf442a3b..4d95573175f 100644 --- a/go/store/prolly/tree/json_diff_test.go +++ b/go/store/prolly/tree/json_diff_test.go @@ -18,11 +18,12 @@ import ( "bytes" "context" "fmt" + "io" + "testing" + "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/expression" "github.com/dolthub/go-mysql-server/sql/expression/function/json" - "io" - "testing" "github.com/dolthub/go-mysql-server/sql/types" "github.com/stretchr/testify/require" diff --git a/go/store/prolly/tree/json_type_categories.go b/go/store/prolly/tree/json_type_categories.go index 016643e290f..ac640e2c1f6 100644 --- a/go/store/prolly/tree/json_type_categories.go +++ b/go/store/prolly/tree/json_type_categories.go @@ -16,6 +16,7 @@ package tree import ( "fmt" + "github.com/dolthub/go-mysql-server/sql" "github.com/shopspring/decimal" )