diff --git a/go/go.mod b/go/go.mod index 57ea7d3db0e..6e758c21850 100644 --- a/go/go.mod +++ b/go/go.mod @@ -57,7 +57,7 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 github.com/creasty/defaults v1.6.0 github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 - github.com/dolthub/go-mysql-server v0.18.2-0.20240812215633-0627bdf00f58 + github.com/dolthub/go-mysql-server v0.18.2-0.20240812221236-ebc28713f178 github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 github.com/dolthub/swiss v0.1.0 github.com/goccy/go-json v0.10.2 diff --git a/go/go.sum b/go/go.sum index b5c638ded15..ae15aacd1e6 100644 --- a/go/go.sum +++ b/go/go.sum @@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168= -github.com/dolthub/go-mysql-server v0.18.2-0.20240812215633-0627bdf00f58 h1:ysak2V/gc2YXJYAh1H/qphoDUOpgaSO0CuI+Lr/811U= -github.com/dolthub/go-mysql-server v0.18.2-0.20240812215633-0627bdf00f58/go.mod h1:PwuemL+YK+YiWcUFhknixeqNLjJNfCx7KDsHNajx9fM= +github.com/dolthub/go-mysql-server v0.18.2-0.20240812221236-ebc28713f178 h1:aCckOhs6UuESaVFEWMKaYbRtE1ht6qqs++pzUGOu9NM= +github.com/dolthub/go-mysql-server v0.18.2-0.20240812221236-ebc28713f178/go.mod h1:PwuemL+YK+YiWcUFhknixeqNLjJNfCx7KDsHNajx9fM= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI= github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q= github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE= diff --git a/go/libraries/doltcore/merge/merge_prolly_rows.go b/go/libraries/doltcore/merge/merge_prolly_rows.go index 374fef0e0ca..e4d22489884 100644 --- a/go/libraries/doltcore/merge/merge_prolly_rows.go +++ b/go/libraries/doltcore/merge/merge_prolly_rows.go @@ -26,7 +26,6 @@ import ( "github.com/dolthub/go-mysql-server/sql/expression" "github.com/dolthub/go-mysql-server/sql/transform" "github.com/dolthub/go-mysql-server/sql/types" - "golang.org/x/exp/maps" errorkinds "gopkg.in/src-d/go-errors.v1" "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" @@ -1978,20 +1977,20 @@ func (m *valueMerger) processColumn(ctx *sql.Context, i int, left, right, base v } func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAddr []byte, rightAddr []byte) (resultAddr []byte, conflict bool, err error) { - baseDoc, err := tree.NewJSONDoc(hash.New(baseAddr), m.ns).ToJSONDocument(ctx) + baseDoc, err := tree.NewJSONDoc(hash.New(baseAddr), m.ns).ToIndexedJSONDocument(ctx) if err != nil { return nil, true, err } - leftDoc, err := tree.NewJSONDoc(hash.New(leftAddr), m.ns).ToJSONDocument(ctx) + leftDoc, err := tree.NewJSONDoc(hash.New(leftAddr), m.ns).ToIndexedJSONDocument(ctx) if err != nil { return nil, true, err } - rightDoc, err := tree.NewJSONDoc(hash.New(rightAddr), m.ns).ToJSONDocument(ctx) + rightDoc, err := tree.NewJSONDoc(hash.New(rightAddr), m.ns).ToIndexedJSONDocument(ctx) if err != nil { return nil, true, err } - mergedDoc, conflict, err := mergeJSON(ctx, baseDoc, leftDoc, rightDoc) + mergedDoc, conflict, err := mergeJSON(ctx, m.ns, baseDoc, leftDoc, rightDoc) if err != nil { return nil, true, err } @@ -1999,35 +1998,36 @@ func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAd return nil, true, nil } - mergedVal, err := mergedDoc.ToInterface() - if err != nil { - return nil, true, err - } - mergedBytes, err := json.Marshal(mergedVal) - if err != nil { - return nil, true, err - } - mergedAddr, err := tree.SerializeBytesToAddr(ctx, m.ns, bytes.NewReader(mergedBytes), len(mergedBytes)) + root, err := tree.SerializeJsonToAddr(ctx, m.ns, mergedDoc) if err != nil { return nil, true, err } + mergedAddr := root.HashOf() return mergedAddr[:], false, nil - } -func mergeJSON(ctx context.Context, base types.JSONDocument, left types.JSONDocument, right types.JSONDocument) (resultDoc types.JSONDocument, conflict bool, err error) { +func mergeJSON(ctx context.Context, ns tree.NodeStore, base, left, right sql.JSONWrapper) (resultDoc sql.JSONWrapper, conflict bool, err error) { // First, deserialize each value into JSON. // We can only merge if the value at all three commits is a JSON object. - baseObject, baseIsObject := base.Val.(types.JsonObject) - leftObject, leftIsObject := left.Val.(types.JsonObject) - rightObject, rightIsObject := right.Val.(types.JsonObject) + baseIsObject, err := tree.IsJsonObject(base) + if err != nil { + return nil, true, err + } + leftIsObject, err := tree.IsJsonObject(left) + if err != nil { + return nil, true, err + } + rightIsObject, err := tree.IsJsonObject(right) + if err != nil { + return nil, true, err + } if !baseIsObject || !leftIsObject || !rightIsObject { // At least one of the commits does not have a JSON object. // If both left and right have the same value, use that value. // But if they differ, this is an unresolvable merge conflict. - cmp, err := left.Compare(right) + cmp, err := types.CompareJSON(left, right) if err != nil { return types.JSONDocument{}, true, err } @@ -2039,26 +2039,83 @@ func mergeJSON(ctx context.Context, base types.JSONDocument, left types.JSONDocu } } - mergedObject := maps.Clone(leftObject) - merged := types.JSONDocument{Val: mergedObject} + indexedBase, isBaseIndexed := base.(tree.IndexedJsonDocument) + indexedLeft, isLeftIndexed := left.(tree.IndexedJsonDocument) + indexedRight, isRightIndexed := right.(tree.IndexedJsonDocument) + + // We only do three way merges on values read from tables right now, which are read in as tree.IndexedJsonDocument. + + var leftDiffer tree.IJsonDiffer + if isBaseIndexed && isLeftIndexed { + leftDiffer, err = tree.NewIndexedJsonDiffer(ctx, indexedBase, indexedLeft) + if err != nil { + return nil, true, err + } + } else { + baseObject, err := base.ToInterface() + if err != nil { + return nil, true, err + } + leftObject, err := left.ToInterface() + if err != nil { + return nil, true, err + } + leftDiffer = tree.NewJsonDiffer(baseObject.(types.JsonObject), leftObject.(types.JsonObject)) + } + + var rightDiffer tree.IJsonDiffer + if isBaseIndexed && isRightIndexed { + rightDiffer, err = tree.NewIndexedJsonDiffer(ctx, indexedBase, indexedRight) + if err != nil { + return nil, true, err + } + } else { + baseObject, err := base.ToInterface() + if err != nil { + return nil, true, err + } + rightObject, err := right.ToInterface() + if err != nil { + return nil, true, err + } + rightDiffer = tree.NewJsonDiffer(baseObject.(types.JsonObject), rightObject.(types.JsonObject)) + } - threeWayDiffer := NewThreeWayJsonDiffer(baseObject, leftObject, rightObject) + threeWayDiffer := ThreeWayJsonDiffer{ + leftDiffer: leftDiffer, + rightDiffer: rightDiffer, + ns: ns, + } // Compute the merged object by applying diffs to the left object as needed. + // If the left object isn't an IndexedJsonDocument, we make one. + var ok bool + var merged tree.IndexedJsonDocument + if merged, ok = left.(tree.IndexedJsonDocument); !ok { + root, err := tree.SerializeJsonToAddr(ctx, ns, left) + if err != nil { + return types.JSONDocument{}, true, err + } + merged = tree.NewIndexedJsonDocument(ctx, root, ns) + } + for { threeWayDiff, err := threeWayDiffer.Next(ctx) if err == io.EOF { return merged, false, nil } + if err != nil { + return types.JSONDocument{}, true, err + } switch threeWayDiff.Op { - case tree.DiffOpRightAdd, tree.DiffOpConvergentAdd, tree.DiffOpRightModify, tree.DiffOpConvergentModify: - _, _, err := merged.Set(ctx, threeWayDiff.Key, threeWayDiff.Right) + case tree.DiffOpRightAdd, tree.DiffOpConvergentAdd, tree.DiffOpRightModify, tree.DiffOpConvergentModify, tree.DiffOpDivergentModifyResolved: + merged, _, err = merged.SetWithKey(ctx, threeWayDiff.Key, threeWayDiff.Right) if err != nil { return types.JSONDocument{}, true, err } case tree.DiffOpRightDelete, tree.DiffOpConvergentDelete: - _, _, err := merged.Remove(ctx, threeWayDiff.Key) + merged, _, err = merged.RemoveWithKey(ctx, threeWayDiff.Key) if err != nil { return types.JSONDocument{}, true, err } diff --git a/go/libraries/doltcore/merge/schema_merge_test.go b/go/libraries/doltcore/merge/schema_merge_test.go index 9f0acbd0680..f92d1947979 100644 --- a/go/libraries/doltcore/merge/schema_merge_test.go +++ b/go/libraries/doltcore/merge/schema_merge_test.go @@ -16,9 +16,13 @@ package merge_test import ( "context" + "fmt" "testing" "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/expression" + "github.com/dolthub/go-mysql-server/sql/expression/function/json" + sqltypes "github.com/dolthub/go-mysql-server/sql/types" "github.com/shopspring/decimal" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -35,6 +39,7 @@ import ( "github.com/dolthub/dolt/go/libraries/doltcore/sqle/writer" "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/prolly/tree" "github.com/dolthub/dolt/go/store/types" ) @@ -105,6 +110,9 @@ func TestSchemaMerge(t *testing.T) { t.Run("json merge tests", func(t *testing.T) { testSchemaMerge(t, jsonMergeTests) }) + t.Run("large json merge tests", func(t *testing.T) { + testSchemaMerge(t, jsonMergeLargeDocumentTests(t)) + }) } var columnAddDropTests = []schemaMergeTest{ @@ -1207,6 +1215,13 @@ var jsonMergeTests = []schemaMergeTest{ right: singleRow(1, 1, 2, `{ "key1": "value1", "key2": "value4" }`), merged: singleRow(1, 2, 2, `{ "key1": "value3", "key2": "value4" }`), }, + { + name: `parallel array modification`, + ancestor: singleRow(1, 1, 1, `{"a": [1, 2, 1], "b":0, "c":0}`), + left: singleRow(1, 2, 1, `{"a": [2, 1, 2], "b":1, "c":0}`), + right: singleRow(1, 1, 2, `{"a": [2, 1, 2], "b":0, "c":1}`), + merged: singleRow(1, 2, 2, `{"a": [2, 1, 2], "b":1, "c":1}`), + }, { name: `parallel deletion`, ancestor: singleRow(1, 1, 1, `{ "key1": "value1" }`), @@ -1337,7 +1352,7 @@ var jsonMergeTests = []schemaMergeTest{ // Which array element should go first? // We avoid making assumptions and flag this as a conflict. name: "object inside array conflict", - ancestor: singleRow(1, 1, 1, `{ "key1": [ { } ] }`), + ancestor: singleRow(1, 1, 1, `{ "key1": [ ] }`), left: singleRow(1, 2, 1, `{ "key1": [ { "key2": "value2" } ] }`), right: singleRow(1, 1, 2, `{ "key1": [ { "key3": "value3" } ] }`), dataConflict: true, @@ -1354,10 +1369,244 @@ var jsonMergeTests = []schemaMergeTest{ right: singleRow(1, 1, 2, `{ "key1": [ 1, 2 ] }`), dataConflict: true, }, + { + // Regression test: Older versions of json documents could accidentally think that $.aa is a child + // of $.a and see this as a conflict, even though it isn't one. + name: "false positive conflict", + ancestor: singleRow(1, 1, 1, `{ "a": 1, "aa":2 }`), + left: singleRow(1, 2, 1, `{ "aa":2 }`), + right: singleRow(1, 1, 2, `{ "a": 1, "aa": 3 }`), + merged: singleRow(1, 2, 2, `{ "aa": 3 }`), + }, }, }, } +// newIndexedJsonDocumentFromValue creates an IndexedJsonDocument from a provided value. +func newIndexedJsonDocumentFromValue(t *testing.T, ctx context.Context, ns tree.NodeStore, v interface{}) tree.IndexedJsonDocument { + doc, _, err := sqltypes.JSON.Convert(v) + require.NoError(t, err) + root, err := tree.SerializeJsonToAddr(ctx, ns, doc.(sql.JSONWrapper)) + require.NoError(t, err) + return tree.NewIndexedJsonDocument(ctx, root, ns) +} + +// createLargeDocumentForTesting creates a JSON document large enough to be split across multiple chunks. +// This is useful for testing mutation operations in large documents. +// Every different possible jsonPathType appears on a chunk boundary, for better test coverage: +// chunk 0 key: $[6].children[2].children[0].number(endOfValue) +// chunk 2 key: $[7].children[5].children[4].children[2].children(arrayInitialElement) +// chunk 5 key: $[8].children[6].children[4].children[3].children[0](startOfValue) +// chunk 8 key: $[8].children[7].children[6].children[5].children[3].children[2].children[1](objectInitialElement) +func createLargeDocumentForTesting(t *testing.T, ctx *sql.Context, ns tree.NodeStore) tree.IndexedJsonDocument { + leafDoc := make(map[string]interface{}) + leafDoc["number"] = float64(1.0) + leafDoc["string"] = "dolt" + var docExpression sql.Expression = expression.NewLiteral(newIndexedJsonDocumentFromValue(t, ctx, ns, leafDoc), sqltypes.JSON) + var err error + + for level := 0; level < 8; level++ { + docExpression, err = json.NewJSONInsert(docExpression, expression.NewLiteral(fmt.Sprintf("$.level%d", level), sqltypes.Text), docExpression) + require.NoError(t, err) + } + doc, err := docExpression.Eval(ctx, nil) + require.NoError(t, err) + return newIndexedJsonDocumentFromValue(t, ctx, ns, doc) +} + +func jsonMergeLargeDocumentTests(t *testing.T) []schemaMergeTest { + // Test for each possible case in the three-way merge code. + // Test for multiple diffs in the same chunk, + // multiple diffs in adjacent chunks (with a moved boundary) + // and multiple diffs in non-adjacent chunks. + + ctx := sql.NewEmptyContext() + ns := tree.NewTestNodeStore() + + largeObject := createLargeDocumentForTesting(t, ctx, ns) + + insert := func(document sqltypes.MutableJSON, path string, val interface{}) sqltypes.MutableJSON { + jsonVal, inRange, err := sqltypes.JSON.Convert(val) + require.NoError(t, err) + require.True(t, (bool)(inRange)) + newDoc, changed, err := document.Insert(ctx, path, jsonVal.(sql.JSONWrapper)) + require.NoError(t, err) + require.True(t, changed) + return newDoc + } + + set := func(document sqltypes.MutableJSON, path string, val interface{}) sqltypes.MutableJSON { + jsonVal, inRange, err := sqltypes.JSON.Convert(val) + require.NoError(t, err) + require.True(t, (bool)(inRange)) + newDoc, changed, err := document.Replace(ctx, path, jsonVal.(sql.JSONWrapper)) + require.NoError(t, err) + require.True(t, changed) + return newDoc + } + + delete := func(document sqltypes.MutableJSON, path string) sqltypes.MutableJSON { + newDoc, changed, err := document.Remove(ctx, path) + require.True(t, changed) + require.NoError(t, err) + return newDoc + } + + var largeJsonMergeTests = []schemaMergeTest{ + { + name: "json merge", + ancestor: *tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int, b int, j json)")), + left: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int, b int, j json)")), + right: tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int, b int, j json)")), + merged: *tbl(sch("CREATE TABLE t (id int PRIMARY KEY, a int, b int, j json)")), + dataTests: []dataTest{ + { + name: "parallel insertion", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(largeObject, "$.a", 1)), + right: singleRow(1, 1, 2, insert(largeObject, "$.a", 1)), + merged: singleRow(1, 2, 2, insert(largeObject, "$.a", 1)), + }, + { + name: "convergent insertion", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(largeObject, "$.a", 1)), + right: singleRow(1, 1, 2, insert(largeObject, "$.z", 2)), + merged: singleRow(1, 2, 2, insert(insert(largeObject, "$.a", 1), "$.z", 2)), + }, + { + name: "multiple insertions", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(insert(largeObject, "$.a1", 1), "$.z2", 2)), + right: singleRow(1, 1, 2, insert(insert(largeObject, "$.a2", 3), "$.z1", 4)), + merged: singleRow(1, 2, 2, insert(insert(insert(insert(largeObject, "$.z1", 4), "$.z2", 2), "$.a2", 3), "$.a1", 1)), + }, + { + name: "convergent insertion with escaped quotes in keys", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(largeObject, `$."\"a\""`, 1)), + right: singleRow(1, 1, 2, insert(largeObject, `$."\"z\""`, 2)), + merged: singleRow(1, 2, 2, insert(insert(largeObject, `$."\"a\""`, 1), `$."\"z\""`, 2)), + }, + { + name: "parallel modification", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level7", 1)), + right: singleRow(1, 1, 2, set(largeObject, "$.level7", 1)), + merged: singleRow(1, 2, 2, set(largeObject, "$.level7", 1)), + }, + { + name: "convergent modification", + ancestor: singleRow(1, 1, 1, insert(largeObject, "$.a", 1)), + left: singleRow(1, 2, 1, set(insert(largeObject, "$.a", 1), "$.level7", 2)), + right: singleRow(1, 1, 2, set(insert(largeObject, "$.a", 1), "$.a", 3)), + merged: singleRow(1, 2, 2, set(insert(largeObject, "$.a", 3), "$.level7", 2)), + }, + { + name: `parallel deletion`, + ancestor: singleRow(1, 1, 1, insert(largeObject, "$.a", 1)), + left: singleRow(1, 2, 1, largeObject), + right: singleRow(1, 1, 2, largeObject), + merged: singleRow(1, 2, 2, largeObject), + }, + { + name: `convergent deletion`, + ancestor: singleRow(1, 1, 1, insert(insert(largeObject, "$.a", 1), "$.z", 2)), + left: singleRow(1, 2, 1, insert(largeObject, "$.a", 1)), + right: singleRow(1, 1, 2, insert(largeObject, "$.z", 2)), + merged: singleRow(1, 2, 2, largeObject), + }, + { + name: `divergent insertion`, + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, insert(largeObject, "$.z", 1)), + right: singleRow(1, 1, 2, insert(largeObject, "$.z", 2)), + dataConflict: true, + }, + { + name: `divergent modification`, + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level7", 1)), + right: singleRow(1, 1, 2, set(largeObject, "$.level7", 2)), + dataConflict: true, + }, + { + name: `divergent modification and deletion`, + ancestor: singleRow(1, 1, 1, insert(largeObject, "$.a", 1)), + left: singleRow(1, 2, 1, insert(largeObject, "$.a", 2)), + right: singleRow(1, 1, 2, largeObject), + dataConflict: true, + }, + { + name: `nested insertion`, + ancestor: singleRow(1, 1, 1, insert(largeObject, "$.level7.level4.new", map[string]interface{}{})), + left: singleRow(1, 2, 1, insert(largeObject, "$.level7.level4.new", map[string]interface{}{"a": 1})), + right: singleRow(1, 1, 2, insert(largeObject, "$.level7.level4.new", map[string]interface{}{"b": 2})), + merged: singleRow(1, 2, 2, insert(largeObject, "$.level7.level4.new", map[string]interface{}{"a": 1, "b": 2})), + }, + { + name: `nested insertion with escaped quotes in keys`, + ancestor: singleRow(1, 1, 1, insert(largeObject, `$.level7.level4."\"new\""`, map[string]interface{}{})), + left: singleRow(1, 2, 1, insert(largeObject, `$.level7.level4."\"new\""`, map[string]interface{}{"a": 1})), + right: singleRow(1, 1, 2, insert(largeObject, `$.level7.level4."\"new\""`, map[string]interface{}{"b": 2})), + merged: singleRow(1, 2, 2, insert(largeObject, `$.level7.level4."\"new\""`, map[string]interface{}{"a": 1, "b": 2})), + }, + { + name: "nested convergent modification", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level7.level4", 1)), + right: singleRow(1, 1, 2, set(largeObject, "$.level7.level5", 2)), + merged: singleRow(1, 2, 2, set(set(largeObject, "$.level7.level5", 2), "$.level7.level4", 1)), + }, + { + name: `nested deletion`, + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, delete(largeObject, "$.level7")), + right: singleRow(1, 1, 2, delete(largeObject, "$.level6")), + merged: singleRow(1, 2, 2, delete(delete(largeObject, "$.level6"), "$.level7")), + }, + { + name: "complicated nested merge", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, delete(set(insert(largeObject, "$.added", 7), "$.level5.level1", 5), "$.level4")), + right: singleRow(1, 1, 2, delete(set(insert(largeObject, "$.level6.added", 8), "$.level1", 6), "$.level5.level2")), + merged: singleRow(1, 2, 2, delete(set(insert(delete(set(insert(largeObject, "$.added", 7), "$.level5.level1", 5), "$.level4"), "$.level6.added", 8), "$.level1", 6), "$.level5.level2")), + }, + { + name: "changing types", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level3.number", `"dolt"`)), + right: singleRow(1, 1, 2, set(largeObject, "$.level4.string", 4)), + merged: singleRow(1, 2, 2, set(set(largeObject, "$.level3.number", `"dolt"`), "$.level4.string", 4)), + }, + { + name: "changing types conflict", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 2, 1, set(largeObject, "$.level4.string", []interface{}{})), + right: singleRow(1, 1, 2, set(largeObject, "$.level4.string", 4)), + dataConflict: true, + }, + { + name: "object insert and modify conflict", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 1, 1, insert(largeObject, "$.level5.a", 5)), + right: singleRow(1, 1, 2, set(largeObject, "$.level5", 6)), + dataConflict: true, + }, + { + name: "object insert and delete conflict", + ancestor: singleRow(1, 1, 1, largeObject), + left: singleRow(1, 1, 1, insert(largeObject, "$.level5.a", 5)), + right: singleRow(1, 1, 2, delete(largeObject, "$.level5")), + dataConflict: true, + }, + }, + }, + } + + return largeJsonMergeTests +} + func testSchemaMerge(t *testing.T, tests []schemaMergeTest) { t.Run("merge left to right", func(t *testing.T) { testSchemaMergeHelper(t, tests, false) diff --git a/go/libraries/doltcore/merge/three_way_json_differ.go b/go/libraries/doltcore/merge/three_way_json_differ.go index 5bb89dff3e4..d406b27596b 100644 --- a/go/libraries/doltcore/merge/three_way_json_differ.go +++ b/go/libraries/doltcore/merge/three_way_json_differ.go @@ -18,24 +18,18 @@ import ( "bytes" "context" "io" - "strings" + "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/types" "github.com/dolthub/dolt/go/store/prolly/tree" ) type ThreeWayJsonDiffer struct { - leftDiffer, rightDiffer tree.JsonDiffer + leftDiffer, rightDiffer tree.IJsonDiffer leftCurrentDiff, rightCurrentDiff *tree.JsonDiff leftIsDone, rightIsDone bool -} - -func NewThreeWayJsonDiffer(base, left, right types.JsonObject) ThreeWayJsonDiffer { - return ThreeWayJsonDiffer{ - leftDiffer: tree.NewJsonDiffer("$", base, left), - rightDiffer: tree.NewJsonDiffer("$", base, right), - } + ns tree.NodeStore } type ThreeWayJsonDiff struct { @@ -43,13 +37,13 @@ type ThreeWayJsonDiff struct { Op tree.DiffOp // a partial set of document values are set // depending on the diffOp - Key string - Base, Left, Right, Merged *types.JSONDocument + Key []byte + Left, Right, Merged sql.JSONWrapper } func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, error) { for { - err := differ.loadNextDiff() + err := differ.loadNextDiff(ctx) if err != nil { return ThreeWayJsonDiff{}, err } @@ -66,13 +60,22 @@ func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, e // !differ.rightIsDone && !differ.leftIsDone leftDiff := differ.leftCurrentDiff rightDiff := differ.rightCurrentDiff - cmp := bytes.Compare([]byte(leftDiff.Key), []byte(rightDiff.Key)) + leftKey := leftDiff.Key + rightKey := rightDiff.Key + + cmp := bytes.Compare(leftKey, rightKey) + // If both sides modify the same array to different values, we currently consider that to be a conflict. + // This may be relaxed in the future. + if cmp != 0 && tree.JsonKeysModifySameArray(leftKey, rightKey) { + result := ThreeWayJsonDiff{ + Op: tree.DiffOpDivergentModifyConflict, + } + return result, nil + } if cmp > 0 { - if strings.HasPrefix(leftDiff.Key, rightDiff.Key) { - // The left diff must be replacing or deleting an object, - // and the right diff makes changes to that object. - // Note the fact that all keys in these paths are quoted means we don't have to worry about - // one key being a prefix of the other and triggering a false positive here. + if tree.IsJsonKeyPrefix(leftKey, rightKey) { + // The right diff must be replacing or deleting an object, + // and the left diff makes changes to that object. result := ThreeWayJsonDiff{ Op: tree.DiffOpDivergentModifyConflict, } @@ -82,11 +85,9 @@ func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, e // key only changed on right return differ.processRightSideOnlyDiff(), nil } else if cmp < 0 { - if strings.HasPrefix(rightDiff.Key, leftDiff.Key) { + if tree.IsJsonKeyPrefix(rightKey, leftKey) { // The right diff must be replacing or deleting an object, // and the left diff makes changes to that object. - // Note the fact that all keys in these paths are quoted means we don't have to worry about - // one key being a prefix of the other and triggering a false positive here. result := ThreeWayJsonDiff{ Op: tree.DiffOpDivergentModifyConflict, } @@ -101,12 +102,12 @@ func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, e if differ.leftCurrentDiff.From == nil { // Key did not exist at base, so both sides are inserts. // Check that they're inserting the same value. - valueCmp, err := differ.leftCurrentDiff.To.Compare(differ.rightCurrentDiff.To) + valueCmp, err := types.CompareJSON(differ.leftCurrentDiff.To, differ.rightCurrentDiff.To) if err != nil { return ThreeWayJsonDiff{}, err } if valueCmp == 0 { - return differ.processMergedDiff(tree.DiffOpConvergentModify, differ.leftCurrentDiff.To), nil + return differ.processMergedDiff(tree.DiffOpConvergentAdd, differ.leftCurrentDiff.To), nil } else { return differ.processMergedDiff(tree.DiffOpDivergentModifyConflict, nil), nil } @@ -120,24 +121,24 @@ func (differ *ThreeWayJsonDiffer) Next(ctx context.Context) (ThreeWayJsonDiff, e // If the key existed at base, we can do a recursive three-way merge to resolve // changes to the values. // This shouldn't be necessary: if its an object on all three branches, the original diff is recursive. - mergedValue, conflict, err := mergeJSON(ctx, *differ.leftCurrentDiff.From, - *differ.leftCurrentDiff.To, - *differ.rightCurrentDiff.To) + mergedValue, conflict, err := mergeJSON(ctx, differ.ns, differ.leftCurrentDiff.From, + differ.leftCurrentDiff.To, + differ.rightCurrentDiff.To) if err != nil { return ThreeWayJsonDiff{}, err } if conflict { return differ.processMergedDiff(tree.DiffOpDivergentModifyConflict, nil), nil } else { - return differ.processMergedDiff(tree.DiffOpDivergentModifyResolved, &mergedValue), nil + return differ.processMergedDiff(tree.DiffOpDivergentModifyResolved, mergedValue), nil } } } } -func (differ *ThreeWayJsonDiffer) loadNextDiff() error { +func (differ *ThreeWayJsonDiffer) loadNextDiff(ctx context.Context) error { if differ.leftCurrentDiff == nil && !differ.leftIsDone { - newLeftDiff, err := differ.leftDiffer.Next() + newLeftDiff, err := differ.leftDiffer.Next(ctx) if err == io.EOF { differ.leftIsDone = true } else if err != nil { @@ -147,7 +148,7 @@ func (differ *ThreeWayJsonDiffer) loadNextDiff() error { } } if differ.rightCurrentDiff == nil && !differ.rightIsDone { - newRightDiff, err := differ.rightDiffer.Next() + newRightDiff, err := differ.rightDiffer.Next(ctx) if err == io.EOF { differ.rightIsDone = true } else if err != nil { @@ -172,9 +173,8 @@ func (differ *ThreeWayJsonDiffer) processRightSideOnlyDiff() ThreeWayJsonDiff { case tree.RemovedDiff: result := ThreeWayJsonDiff{ - Op: tree.DiffOpRightDelete, - Key: differ.rightCurrentDiff.Key, - Base: differ.rightCurrentDiff.From, + Op: tree.DiffOpRightDelete, + Key: differ.rightCurrentDiff.Key, } differ.rightCurrentDiff = nil return result @@ -183,7 +183,6 @@ func (differ *ThreeWayJsonDiffer) processRightSideOnlyDiff() ThreeWayJsonDiff { result := ThreeWayJsonDiff{ Op: tree.DiffOpRightModify, Key: differ.rightCurrentDiff.Key, - Base: differ.rightCurrentDiff.From, Right: differ.rightCurrentDiff.To, } differ.rightCurrentDiff = nil @@ -193,11 +192,10 @@ func (differ *ThreeWayJsonDiffer) processRightSideOnlyDiff() ThreeWayJsonDiff { } } -func (differ *ThreeWayJsonDiffer) processMergedDiff(op tree.DiffOp, merged *types.JSONDocument) ThreeWayJsonDiff { +func (differ *ThreeWayJsonDiffer) processMergedDiff(op tree.DiffOp, merged sql.JSONWrapper) ThreeWayJsonDiff { result := ThreeWayJsonDiff{ Op: op, Key: differ.leftCurrentDiff.Key, - Base: differ.leftCurrentDiff.From, Left: differ.leftCurrentDiff.To, Right: differ.rightCurrentDiff.To, Merged: merged, diff --git a/go/store/prolly/tree/diff.go b/go/store/prolly/tree/diff.go index 2fc0a9956d2..9fcd841e701 100644 --- a/go/store/prolly/tree/diff.go +++ b/go/store/prolly/tree/diff.go @@ -128,6 +128,13 @@ func DifferFromCursors[K ~[]byte, O Ordering[K]]( } func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) { + return td.next(ctx, true) +} + +// next finds the next diff and then conditionally advances the cursors past the modified chunks. +// In most cases, we want to advance the cursors, but in some circumstances the caller may want to access the cursors +// and then advance them manually. +func (td Differ[K, O]) next(ctx context.Context, advanceCursors bool) (diff Diff, err error) { for td.from.Valid() && td.from.compare(td.fromStop) < 0 && td.to.Valid() && td.to.compare(td.toStop) < 0 { f := td.from.CurrentKey() @@ -136,16 +143,16 @@ func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) { switch { case cmp < 0: - return sendRemoved(ctx, td.from) + return sendRemoved(ctx, td.from, advanceCursors) case cmp > 0: - return sendAdded(ctx, td.to) + return sendAdded(ctx, td.to, advanceCursors) case cmp == 0: // If the cursor schema has changed, then all rows should be considered modified. // If the cursor schema hasn't changed, rows are modified iff their bytes have changed. if td.considerAllRowsModified || !equalcursorValues(td.from, td.to) { - return sendModified(ctx, td.from, td.to) + return sendModified(ctx, td.from, td.to, advanceCursors) } // advance both cursors since we have already determined that they are equal. This needs to be done because @@ -166,42 +173,46 @@ func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) { } if td.from.Valid() && td.from.compare(td.fromStop) < 0 { - return sendRemoved(ctx, td.from) + return sendRemoved(ctx, td.from, advanceCursors) } if td.to.Valid() && td.to.compare(td.toStop) < 0 { - return sendAdded(ctx, td.to) + return sendAdded(ctx, td.to, advanceCursors) } return Diff{}, io.EOF } -func sendRemoved(ctx context.Context, from *cursor) (diff Diff, err error) { +func sendRemoved(ctx context.Context, from *cursor, advanceCursors bool) (diff Diff, err error) { diff = Diff{ Type: RemovedDiff, Key: from.CurrentKey(), From: from.currentValue(), } - if err = from.advance(ctx); err != nil { - return Diff{}, err + if advanceCursors { + if err = from.advance(ctx); err != nil { + return Diff{}, err + } } return } -func sendAdded(ctx context.Context, to *cursor) (diff Diff, err error) { +func sendAdded(ctx context.Context, to *cursor, advanceCursors bool) (diff Diff, err error) { diff = Diff{ Type: AddedDiff, Key: to.CurrentKey(), To: to.currentValue(), } - if err = to.advance(ctx); err != nil { - return Diff{}, err + if advanceCursors { + if err = to.advance(ctx); err != nil { + return Diff{}, err + } } return } -func sendModified(ctx context.Context, from, to *cursor) (diff Diff, err error) { +func sendModified(ctx context.Context, from, to *cursor, advanceCursors bool) (diff Diff, err error) { diff = Diff{ Type: ModifiedDiff, Key: from.CurrentKey(), @@ -209,11 +220,13 @@ func sendModified(ctx context.Context, from, to *cursor) (diff Diff, err error) To: to.currentValue(), } - if err = from.advance(ctx); err != nil { - return Diff{}, err - } - if err = to.advance(ctx); err != nil { - return Diff{}, err + if advanceCursors { + if err = from.advance(ctx); err != nil { + return Diff{}, err + } + if err = to.advance(ctx); err != nil { + return Diff{}, err + } } return } diff --git a/go/store/prolly/tree/indexed_json_diff.go b/go/store/prolly/tree/indexed_json_diff.go new file mode 100644 index 00000000000..bf4bc2e427a --- /dev/null +++ b/go/store/prolly/tree/indexed_json_diff.go @@ -0,0 +1,293 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tree + +import ( + "context" + "io" + + "github.com/dolthub/go-mysql-server/sql/types" + "golang.org/x/exp/slices" +) + +type IndexedJsonDiffer struct { + differ Differ[jsonLocationKey, jsonLocationOrdering] + currentFromCursor, currentToCursor *JsonCursor + from, to IndexedJsonDocument + started bool +} + +var _ IJsonDiffer = &IndexedJsonDiffer{} + +func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*IndexedJsonDiffer, error) { + differ, err := DifferFromRoots[jsonLocationKey, jsonLocationOrdering](ctx, from.m.NodeStore, to.m.NodeStore, from.m.Root, to.m.Root, jsonLocationOrdering{}, false) + if err != nil { + return nil, err + } + // We want to diff the prolly tree as if it was an address map pointing to the individual blob fragments, rather + // than diffing the blob fragments themselves. We can accomplish this by just replacing the cursors in the differ + // with their parents. + differ.from = differ.from.parent + differ.to = differ.to.parent + differ.fromStop = differ.fromStop.parent + differ.toStop = differ.toStop.parent + + if differ.from == nil || differ.to == nil { + // This can happen when either document fits in a single chunk. + // We don't use the chunk differ in this case, and instead we create the cursors without it. + diffKey := []byte{byte(startOfValue)} + currentFromCursor, err := newJsonCursorAtStartOfChunk(ctx, from.m.NodeStore, from.m.Root, diffKey) + if err != nil { + return nil, err + } + currentToCursor, err := newJsonCursorAtStartOfChunk(ctx, to.m.NodeStore, to.m.Root, diffKey) + if err != nil { + return nil, err + } + return &IndexedJsonDiffer{ + differ: differ, + from: from, + to: to, + currentFromCursor: currentFromCursor, + currentToCursor: currentToCursor, + }, nil + } + + return &IndexedJsonDiffer{ + differ: differ, + from: from, + to: to, + }, nil +} + +// Next computes the next diff between the two JSON documents. +// To accomplish this, it uses the underlying Differ to find chunks that have changed, and uses a pair of JsonCursors +// to walk corresponding chunks. +func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error) { + // helper function to advance a JsonCursor and set it to nil if it reaches the end of a chunk + advanceCursor := func(jCur **JsonCursor) (err error) { + if (*jCur).jsonScanner.atEndOfChunk() { + err = (*jCur).cur.advance(ctx) + if err != nil { + return err + } + *jCur = nil + } else { + err = (*jCur).jsonScanner.AdvanceToNextLocation() + if err != nil { + return err + } + } + return nil + } + + newAddedDiff := func(key []byte) (JsonDiff, error) { + addedValue, err := jd.currentToCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentToCursor) + if err != nil { + return JsonDiff{}, err + } + return JsonDiff{ + Key: key, + To: types.NewLazyJSONDocument(addedValue), + Type: AddedDiff, + }, nil + } + + newRemovedDiff := func(key []byte) (JsonDiff, error) { + removedValue, err := jd.currentFromCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentFromCursor) + if err != nil { + return JsonDiff{}, err + } + return JsonDiff{ + Key: key, + From: types.NewLazyJSONDocument(removedValue), + Type: RemovedDiff, + }, nil + } + + for { + if jd.currentFromCursor == nil && jd.currentToCursor == nil { + if jd.differ.from == nil || jd.differ.to == nil { + // One of the documents fits in a single chunk. We must have walked the entire document by now. + return JsonDiff{}, io.EOF + } + + // Either this is the first iteration, or the last iteration exhausted both chunks at the same time. + // (ie, both chunks ended at the same JSON path). We can use `Differ.Next` to seek to the next difference. + // Passing advanceCursors=false means that instead of using the returned diff, we read the cursors out of + // the differ, and advance them manually once we've walked the chunk. + _, err := jd.differ.next(ctx, false) + if err != nil { + return JsonDiff{}, err + } + + jd.currentFromCursor, err = newJsonCursorFromCursor(ctx, jd.differ.from) + if err != nil { + return JsonDiff{}, err + } + jd.currentToCursor, err = newJsonCursorFromCursor(ctx, jd.differ.to) + if err != nil { + return JsonDiff{}, err + } + } else if jd.currentFromCursor == nil { + // We exhausted the current `from` chunk but not the `to` chunk. Since the chunk boundaries don't align on + // the same key, we need to continue into the next chunk. + + jd.currentFromCursor, err = newJsonCursorFromCursor(ctx, jd.differ.from) + if err != nil { + return JsonDiff{}, err + } + + err = advanceCursor(&jd.currentFromCursor) + if err != nil { + return JsonDiff{}, err + } + continue + } else if jd.currentToCursor == nil { + // We exhausted the current `to` chunk but not the `from` chunk. Since the chunk boundaries don't align on + // the same key, we need to continue into the next chunk. + + jd.currentToCursor, err = newJsonCursorFromCursor(ctx, jd.differ.to) + if err != nil { + return JsonDiff{}, err + } + + err = advanceCursor(&jd.currentToCursor) + if err != nil { + return JsonDiff{}, err + } + continue + } + // Both cursors point to chunks that are different between the two documents. + // We must be in one of the following states: + // 1) Both cursors have the JSON path with the same values: + // - This location has not changed, advance both cursors and continue. + // 2) Both cursors have the same JSON path but different values: + // - The value at that path has been modified. + // 3) Both cursors point to the start of a value, but the paths differ: + // - A value has been inserted or deleted in the beginning/middle of an object. + // 4) One cursor points to the start of a value, while the other cursor points to the end of that value's parent: + // - A value has been inserted or deleted at the end of an object or array. + // + // The following states aren't actually possible because we will encounter state 2 first. + // 5) One cursor points to the initial element of an object/array, while the other points to the end of that same path: + // - A value has been changed from an object/array to a scalar, or vice-versa. + // 6) One cursor points to the initial element of an object, while the other points to the initial element of an array: + // - The value has been changed from an object to an array, or vice-versa. + + fromScanner := &jd.currentFromCursor.jsonScanner + toScanner := &jd.currentToCursor.jsonScanner + fromScannerAtStartOfValue := fromScanner.atStartOfValue() + toScannerAtStartOfValue := toScanner.atStartOfValue() + fromCurrentLocation := fromScanner.currentPath + toCurrentLocation := toScanner.currentPath + + if !fromScannerAtStartOfValue && !toScannerAtStartOfValue { + // Neither cursor points to the start of a value. + // This should only be possible if they're at the same location. + // Do a sanity check, then continue. + if compareJsonLocations(fromCurrentLocation, toCurrentLocation) != 0 { + return JsonDiff{}, jsonParseError + } + err = advanceCursor(&jd.currentFromCursor) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentToCursor) + if err != nil { + return JsonDiff{}, err + } + continue + } + + if fromScannerAtStartOfValue && toScannerAtStartOfValue { + cmp := compareJsonLocations(fromCurrentLocation, toCurrentLocation) + switch cmp { + case 0: + key := fromCurrentLocation.Clone().key + + // Both sides have the same key. If they're both an object or both an array, continue. + // Otherwise, compare them and possibly return a modification. + if (fromScanner.current() == '{' && toScanner.current() == '{') || + (fromScanner.current() == '[' && toScanner.current() == '[') { + err = advanceCursor(&jd.currentFromCursor) + if err != nil { + return JsonDiff{}, err + } + err = advanceCursor(&jd.currentToCursor) + if err != nil { + return JsonDiff{}, err + } + continue + } + + fromValue, err := jd.currentFromCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + toValue, err := jd.currentToCursor.NextValue(ctx) + if err != nil { + return JsonDiff{}, err + } + if !slices.Equal(fromValue, toValue) { + // Case 2: The value at this path has been modified + return JsonDiff{ + Key: key, + From: types.NewLazyJSONDocument(fromValue), + To: types.NewLazyJSONDocument(toValue), + Type: ModifiedDiff, + }, nil + } + // Case 1: This location has not changed + continue + + case -1: + // Case 3: A value has been removed from an object + key := fromCurrentLocation.Clone().key + return newRemovedDiff(key) + case 1: + // Case 3: A value has been added to an object + key := toCurrentLocation.Clone().key + return newAddedDiff(key) + } + } + + if !fromScannerAtStartOfValue && toScannerAtStartOfValue { + if fromCurrentLocation.getScannerState() != endOfValue { + return JsonDiff{}, jsonParseError + } + // Case 4: A value has been inserted at the end of an object or array. + key := toCurrentLocation.Clone().key + return newAddedDiff(key) + } + + if fromScannerAtStartOfValue && !toScannerAtStartOfValue { + if toCurrentLocation.getScannerState() != endOfValue { + return JsonDiff{}, jsonParseError + } + // Case 4: A value has been removed from the end of an object or array. + key := fromCurrentLocation.Clone().key + return newRemovedDiff(key) + } + } +} diff --git a/go/store/prolly/tree/json_cursor.go b/go/store/prolly/tree/json_cursor.go index e7d6be61c28..b980fcd2ef6 100644 --- a/go/store/prolly/tree/json_cursor.go +++ b/go/store/prolly/tree/json_cursor.go @@ -40,7 +40,10 @@ func getPreviousKey(ctx context.Context, cur *cursor) ([]byte, error) { if !cur2.Valid() { return nil, nil } - key := cur2.parent.CurrentKey() + key := cur2.CurrentKey() + if len(key) == 0 { + key = cur2.parent.CurrentKey() + } err = errorIfNotSupportedLocation(key) if err != nil { return nil, err @@ -53,24 +56,40 @@ func getPreviousKey(ctx context.Context, cur *cursor) ([]byte, error) { // in the document. If the location does not exist in the document, the resulting JsonCursor // will be at the location where the value would be if it was inserted. func newJsonCursor(ctx context.Context, ns NodeStore, root Node, startKey jsonLocation, forRemoval bool) (jCur *JsonCursor, found bool, err error) { - cur, err := newCursorAtKey(ctx, ns, root, startKey.key, jsonLocationOrdering{}) + jcur, err := newJsonCursorAtStartOfChunk(ctx, ns, root, startKey.key) if err != nil { return nil, false, err } + found, err = jcur.AdvanceToLocation(ctx, startKey, forRemoval) + return jcur, found, err +} + +func newJsonCursorAtStartOfChunk(ctx context.Context, ns NodeStore, root Node, startKey []byte) (jCur *JsonCursor, err error) { + cur, err := newCursorAtKey(ctx, ns, root, startKey, jsonLocationOrdering{}) + if err != nil { + return nil, err + } + return newJsonCursorFromCursor(ctx, cur) +} + +func newJsonCursorFromCursor(ctx context.Context, cur *cursor) (*JsonCursor, error) { previousKey, err := getPreviousKey(ctx, cur) if err != nil { - return nil, false, err + return nil, err + } + if !cur.isLeaf() { + nd, err := fetchChild(ctx, cur.nrw, cur.currentRef()) + if err != nil { + return nil, err + } + return newJsonCursorFromCursor(ctx, &cursor{nd: nd, parent: cur, nrw: cur.nrw}) } jsonBytes := cur.currentValue() jsonDecoder := ScanJsonFromMiddleWithKey(jsonBytes, previousKey) jcur := JsonCursor{cur: cur, jsonScanner: jsonDecoder} - found, err = jcur.AdvanceToLocation(ctx, startKey, forRemoval) - if err != nil { - return nil, found, err - } - return &jcur, found, nil + return &jcur, nil } func (j JsonCursor) Valid() bool { diff --git a/go/store/prolly/tree/json_diff.go b/go/store/prolly/tree/json_diff.go index 61f716d3be8..a52856c47ac 100644 --- a/go/store/prolly/tree/json_diff.go +++ b/go/store/prolly/tree/json_diff.go @@ -16,17 +16,22 @@ package tree import ( "bytes" - "fmt" + "context" "io" "reflect" "strings" + "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/types" ) +type IJsonDiffer interface { + Next(ctx context.Context) (JsonDiff, error) +} + type JsonDiff struct { - Key string - From, To *types.JSONDocument + Key []byte + From, To sql.JSONWrapper Type DiffType } @@ -37,31 +42,44 @@ type jsonKeyPair struct { // JsonDiffer computes the diff between two JSON objects. type JsonDiffer struct { - root string + root []byte currentFromPair, currentToPair *jsonKeyPair from, to types.JSONIter subDiffer *JsonDiffer } -func NewJsonDiffer(root string, from, to types.JsonObject) JsonDiffer { +var _ IJsonDiffer = &JsonDiffer{} + +func NewJsonDiffer(from, to types.JsonObject) *JsonDiffer { + fromIter := types.NewJSONIter(from) + toIter := types.NewJSONIter(to) + return &JsonDiffer{ + root: []byte{byte(startOfValue)}, + from: fromIter, + to: toIter, + } +} + +func (differ *JsonDiffer) newSubDiffer(key string, from, to types.JsonObject) JsonDiffer { fromIter := types.NewJSONIter(from) toIter := types.NewJSONIter(to) + newRoot := differ.appendKey(key) return JsonDiffer{ - root: root, + root: newRoot, from: fromIter, to: toIter, } } -func (differ *JsonDiffer) appendKey(key string) string { +func (differ *JsonDiffer) appendKey(key string) []byte { escapedKey := strings.Replace(key, "\"", "\\\"", -1) - return fmt.Sprintf("%s.\"%s\"", differ.root, escapedKey) + return append(append(differ.root, beginObjectKey), []byte(escapedKey)...) } -func (differ *JsonDiffer) Next() (diff JsonDiff, err error) { +func (differ *JsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error) { for { if differ.subDiffer != nil { - diff, err := differ.subDiffer.Next() + diff, err := differ.subDiffer.Next(ctx) if err == io.EOF { differ.subDiffer = nil differ.currentFromPair = nil @@ -116,7 +134,7 @@ func (differ *JsonDiffer) Next() (diff JsonDiff, err error) { switch from := fromValue.(type) { case types.JsonObject: // Recursively compare the objects to generate diffs. - subDiffer := NewJsonDiffer(differ.appendKey(key), from, toValue.(types.JsonObject)) + subDiffer := differ.newSubDiffer(key, from, toValue.(types.JsonObject)) differ.subDiffer = &subDiffer continue case types.JsonArray: diff --git a/go/store/prolly/tree/json_diff_test.go b/go/store/prolly/tree/json_diff_test.go index 4fa4528825a..4d95573175f 100644 --- a/go/store/prolly/tree/json_diff_test.go +++ b/go/store/prolly/tree/json_diff_test.go @@ -15,34 +15,49 @@ package tree import ( + "bytes" + "context" + "fmt" "io" "testing" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/expression" + "github.com/dolthub/go-mysql-server/sql/expression/function/json" + "github.com/dolthub/go-mysql-server/sql/types" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) type jsonDiffTest struct { name string - from, to types.JsonObject + from, to sql.JSONWrapper expectedDiffs []JsonDiff } +func makeJsonPathKey(parts ...string) []byte { + result := []byte{byte(startOfValue)} + for _, part := range parts { + result = append(result, beginObjectKey) + result = append(result, []byte(part)...) + } + return result +} + var simpleJsonDiffTests = []jsonDiffTest{ { name: "empty object, no modifications", - from: types.JsonObject{}, - to: types.JsonObject{}, + from: types.JSONDocument{Val: types.JsonObject{}}, + to: types.JSONDocument{Val: types.JsonObject{}}, expectedDiffs: nil, }, { name: "insert into empty object", - from: types.JsonObject{}, - to: types.JsonObject{"a": 1}, + from: types.JSONDocument{Val: types.JsonObject{}}, + to: types.JSONDocument{Val: types.JsonObject{"a": 1}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\"", + Key: makeJsonPathKey(`a`), From: nil, To: &types.JSONDocument{Val: 1}, Type: AddedDiff, @@ -51,11 +66,11 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "delete from object", - from: types.JsonObject{"a": 1}, - to: types.JsonObject{}, + from: types.JSONDocument{Val: types.JsonObject{"a": 1}}, + to: types.JSONDocument{Val: types.JsonObject{}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\"", + Key: makeJsonPathKey(`a`), From: &types.JSONDocument{Val: 1}, To: nil, Type: RemovedDiff, @@ -64,11 +79,11 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify object", - from: types.JsonObject{"a": 1}, - to: types.JsonObject{"a": 2}, + from: types.JSONDocument{Val: types.JsonObject{"a": 1}}, + to: types.JSONDocument{Val: types.JsonObject{"a": 2}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\"", + Key: makeJsonPathKey(`a`), From: &types.JSONDocument{Val: 1}, To: &types.JSONDocument{Val: 2}, Type: ModifiedDiff, @@ -77,11 +92,11 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "nested insert", - from: types.JsonObject{"a": types.JsonObject{}}, - to: types.JsonObject{"a": types.JsonObject{"b": 1}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 1}}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), To: &types.JSONDocument{Val: 1}, Type: AddedDiff, }, @@ -89,11 +104,11 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "nested delete", - from: types.JsonObject{"a": types.JsonObject{"b": 1}}, - to: types.JsonObject{"a": types.JsonObject{}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 1}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{}}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), From: &types.JSONDocument{Val: 1}, Type: RemovedDiff, }, @@ -101,11 +116,11 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "nested modify", - from: types.JsonObject{"a": types.JsonObject{"b": 1}}, - to: types.JsonObject{"a": types.JsonObject{"b": 2}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 1}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 2}}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), From: &types.JSONDocument{Val: 1}, To: &types.JSONDocument{Val: 2}, Type: ModifiedDiff, @@ -114,11 +129,11 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "insert object", - from: types.JsonObject{"a": types.JsonObject{}}, - to: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), To: &types.JSONDocument{Val: types.JsonObject{"c": 3}}, Type: AddedDiff, }, @@ -126,11 +141,11 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify to object", - from: types.JsonObject{"a": types.JsonObject{"b": 2}}, - to: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), From: &types.JSONDocument{Val: 2}, To: &types.JSONDocument{Val: types.JsonObject{"c": 3}}, Type: ModifiedDiff, @@ -139,24 +154,76 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modify from object", - from: types.JsonObject{"a": types.JsonObject{"b": 2}}, - to: types.JsonObject{"a": 1}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": 1}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\"", + Key: makeJsonPathKey(`a`), From: &types.JSONDocument{Val: types.JsonObject{"b": 2}}, To: &types.JSONDocument{Val: 1}, Type: ModifiedDiff, }, }, }, + { + name: "modify to array", + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": "foo"}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonArray{1, 2}}}}, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`a`, `b`), + From: &types.JSONDocument{Val: "foo"}, + To: &types.JSONDocument{Val: types.JsonArray{1, 2}}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "modify from array", + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonArray{1, 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": 1}}, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`a`), + From: &types.JSONDocument{Val: types.JsonArray{1, 2}}, + To: &types.JSONDocument{Val: 1}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "array to object", + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonArray{1, 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`a`), + From: &types.JSONDocument{Val: types.JsonArray{1, 2}}, + To: &types.JSONDocument{Val: types.JsonObject{"b": types.JsonObject{"c": 3}}}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "object to array", + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": 2}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonArray{1, 2}}}, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`a`), + From: &types.JSONDocument{Val: types.JsonObject{"b": 2}}, + To: &types.JSONDocument{Val: types.JsonArray{1, 2}}, + Type: ModifiedDiff, + }, + }, + }, { name: "remove object", - from: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}, - to: types.JsonObject{"a": types.JsonObject{}}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"b": types.JsonObject{"c": 3}}}}, + to: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{}}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"a\".\"b\"", + Key: makeJsonPathKey(`a`, `b`), From: &types.JSONDocument{Val: types.JsonObject{"c": 3}}, Type: RemovedDiff, }, @@ -164,17 +231,17 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "insert escaped double quotes", - from: types.JsonObject{"\"a\"": "1"}, - to: types.JsonObject{"b": "\"2\""}, + from: types.JSONDocument{Val: types.JsonObject{"\"a\"": "1"}}, + to: types.JSONDocument{Val: types.JsonObject{"b": "\"2\""}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"\\\"a\\\"\"", + Key: makeJsonPathKey(`"a"`), From: &types.JSONDocument{Val: "1"}, To: nil, Type: RemovedDiff, }, { - Key: "$.\"b\"", + Key: makeJsonPathKey(`b`), From: nil, To: &types.JSONDocument{Val: "\"2\""}, Type: AddedDiff, @@ -183,32 +250,32 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, { name: "modifications returned in lexographic order", - from: types.JsonObject{"a": types.JsonObject{"1": "i"}, "aa": 2, "b": 6}, - to: types.JsonObject{"": 1, "a": types.JsonObject{}, "aa": 3, "bb": 5}, + from: types.JSONDocument{Val: types.JsonObject{"a": types.JsonObject{"1": "i"}, "aa": 2, "b": 6}}, + to: types.JSONDocument{Val: types.JsonObject{"": 1, "a": types.JsonObject{}, "aa": 3, "bb": 5}}, expectedDiffs: []JsonDiff{ { - Key: "$.\"\"", + Key: makeJsonPathKey(``), To: &types.JSONDocument{Val: 1}, Type: AddedDiff, }, { - Key: "$.\"a\".\"1\"", + Key: makeJsonPathKey(`a`, `1`), From: &types.JSONDocument{Val: "i"}, Type: RemovedDiff, }, { - Key: "$.\"aa\"", + Key: makeJsonPathKey(`aa`), From: &types.JSONDocument{Val: 2}, To: &types.JSONDocument{Val: 3}, Type: ModifiedDiff, }, { - Key: "$.\"b\"", + Key: makeJsonPathKey(`b`), From: &types.JSONDocument{Val: 6}, Type: RemovedDiff, }, { - Key: "$.\"bb\"", + Key: makeJsonPathKey(`bb`), To: &types.JSONDocument{Val: 5}, Type: AddedDiff, }, @@ -216,10 +283,152 @@ var simpleJsonDiffTests = []jsonDiffTest{ }, } +func largeJsonDiffTests(t *testing.T) []jsonDiffTest { + ctx := sql.NewEmptyContext() + ns := NewTestNodeStore() + + insert := func(document types.MutableJSON, path string, val interface{}) types.MutableJSON { + jsonVal, inRange, err := types.JSON.Convert(val) + require.NoError(t, err) + require.True(t, (bool)(inRange)) + newDoc, changed, err := document.Insert(ctx, path, jsonVal.(sql.JSONWrapper)) + require.NoError(t, err) + require.True(t, changed) + return newDoc + } + + set := func(document types.MutableJSON, path string, val interface{}) types.MutableJSON { + jsonVal, inRange, err := types.JSON.Convert(val) + require.NoError(t, err) + require.True(t, (bool)(inRange)) + newDoc, changed, err := document.Replace(ctx, path, jsonVal.(sql.JSONWrapper)) + require.NoError(t, err) + require.True(t, changed) + return newDoc + } + + lookup := func(document types.SearchableJSON, path string) sql.JSONWrapper { + newDoc, err := document.Lookup(ctx, path) + require.NoError(t, err) + return newDoc + } + + remove := func(document types.MutableJSON, path string) types.MutableJSON { + newDoc, changed, err := document.Remove(ctx, path) + require.True(t, changed) + require.NoError(t, err) + return newDoc + } + + largeObject := createLargeArraylessDocumentForTesting(t, ctx, ns) + return []jsonDiffTest{ + { + name: "nested insert", + from: largeObject, + to: insert(largeObject, "$.level7.newKey", 2), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `newKey`), + From: nil, + To: &types.JSONDocument{Val: 2}, + Type: AddedDiff, + }, + }, + }, + { + name: "nested remove", + from: largeObject, + to: remove(largeObject, "$.level7.level6"), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level6`), + From: lookup(largeObject, "$.level7.level6"), + To: nil, + Type: RemovedDiff, + }, + }, + }, + { + name: "nested modification 1", + from: largeObject, + to: set(largeObject, "$.level7.level5", 2), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level5`), + From: lookup(largeObject, "$.level7.level5"), + To: &types.JSONDocument{Val: 2}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "nested modification 2", + from: largeObject, + to: set(largeObject, "$.level7.level4", 1), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level4`), + From: lookup(largeObject, "$.level7.level4"), + To: &types.JSONDocument{Val: 1}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "convert object to array", + from: largeObject, + to: set(largeObject, "$.level7.level6", []interface{}{}), + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level6`), + From: lookup(largeObject, "$.level7.level6"), + To: &types.JSONDocument{Val: []interface{}{}}, + Type: ModifiedDiff, + }, + }, + }, + { + name: "convert array to object", + from: set(largeObject, "$.level7.level6", []interface{}{}), + to: largeObject, + expectedDiffs: []JsonDiff{ + { + Key: makeJsonPathKey(`level7`, `level6`), + From: &types.JSONDocument{Val: []interface{}{}}, + To: lookup(largeObject, "$.level7.level6"), + Type: ModifiedDiff, + }, + }, + }, + } +} + +// createLargeArraylessDocumentForTesting creates a JSON document large enough to be split across multiple chunks that +// does not contain arrays. This makes it easier to write tests for three-way merging, since we cant't currently merge +// concurrent changes to arrays. +func createLargeArraylessDocumentForTesting(t *testing.T, ctx *sql.Context, ns NodeStore) IndexedJsonDocument { + leafDoc := make(map[string]interface{}) + leafDoc["number"] = float64(1.0) + leafDoc["string"] = "dolt" + var docExpression sql.Expression = expression.NewLiteral(newIndexedJsonDocumentFromValue(t, ctx, ns, leafDoc), types.JSON) + var err error + + for level := 0; level < 8; level++ { + docExpression, err = json.NewJSONInsert(docExpression, expression.NewLiteral(fmt.Sprintf("$.level%d", level), types.Text), docExpression) + require.NoError(t, err) + } + doc, err := docExpression.Eval(ctx, nil) + require.NoError(t, err) + return newIndexedJsonDocumentFromValue(t, ctx, ns, doc) +} + func TestJsonDiff(t *testing.T) { t.Run("simple tests", func(t *testing.T) { runTestBatch(t, simpleJsonDiffTests) }) + t.Run("large document tests", func(t *testing.T) { + runTestBatch(t, largeJsonDiffTests(t)) + }) } func runTestBatch(t *testing.T, tests []jsonDiffTest) { @@ -231,16 +440,42 @@ func runTestBatch(t *testing.T, tests []jsonDiffTest) { } func runTest(t *testing.T, test jsonDiffTest) { - differ := NewJsonDiffer("$", test.from, test.to) + ctx := context.Background() + ns := NewTestNodeStore() + from := newIndexedJsonDocumentFromValue(t, ctx, ns, test.from) + to := newIndexedJsonDocumentFromValue(t, ctx, ns, test.to) + differ, err := NewIndexedJsonDiffer(ctx, from, to) + require.NoError(t, err) var actualDiffs []JsonDiff for { - diff, err := differ.Next() + diff, err := differ.Next(ctx) if err == io.EOF { break } - assert.NoError(t, err) + require.NoError(t, err) actualDiffs = append(actualDiffs, diff) } - require.Equal(t, test.expectedDiffs, actualDiffs) + diffsEqual := func(expected, actual JsonDiff) bool { + if expected.Type != actual.Type { + return false + } + if !bytes.Equal(expected.Key, actual.Key) { + return false + } + cmp, err := types.CompareJSON(expected.From, actual.From) + require.NoError(t, err) + if cmp != 0 { + return false + } + cmp, err = types.CompareJSON(expected.To, actual.To) + require.NoError(t, err) + + return cmp == 0 + } + require.Equal(t, len(test.expectedDiffs), len(actualDiffs)) + for i, expected := range test.expectedDiffs { + actual := actualDiffs[i] + require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual)) + } } diff --git a/go/store/prolly/tree/json_indexed_document.go b/go/store/prolly/tree/json_indexed_document.go index 0c8c3eb38c2..b0ac82f43b1 100644 --- a/go/store/prolly/tree/json_indexed_document.go +++ b/go/store/prolly/tree/json_indexed_document.go @@ -19,9 +19,11 @@ import ( "database/sql/driver" "encoding/json" "fmt" + "io" "sync" "github.com/dolthub/go-mysql-server/sql" + sqljson "github.com/dolthub/go-mysql-server/sql/expression/function/json" "github.com/dolthub/go-mysql-server/sql/types" ) @@ -205,7 +207,7 @@ func (i IndexedJsonDocument) tryInsert(ctx context.Context, path string, val sql return i.insertIntoCursor(ctx, keyPath, jsonCursor, val) } -func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonLocation, jsonCursor *JsonCursor, val sql.JSONWrapper) (types.MutableJSON, bool, error) { +func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonLocation, jsonCursor *JsonCursor, val sql.JSONWrapper) (IndexedJsonDocument, bool, error) { cursorPath := jsonCursor.GetCurrentPath() // If the inserted path is equivalent to "$" (which also includes "$[0]" on non-arrays), do nothing. @@ -241,7 +243,7 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL jsonChunker, err := newJsonChunker(ctx, jsonCursor, i.m.NodeStore) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } originalValue, err := jsonCursor.NextValue(ctx) @@ -251,15 +253,18 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL insertedValueBytes, err := types.MarshallJson(val) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } jsonChunker.appendJsonToBuffer([]byte(fmt.Sprintf("[%s,%s]", originalValue, insertedValueBytes))) - jsonChunker.processBuffer(ctx) + err = jsonChunker.processBuffer(ctx) + if err != nil { + return IndexedJsonDocument{}, false, err + } newRoot, err := jsonChunker.Done(ctx) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil @@ -285,14 +290,14 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL insertedValueBytes, err := types.MarshallJson(val) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // The key is guaranteed to not exist in the source doc. The cursor is pointing to the start of the subsequent object, // which will be the insertion point for the added value. jsonChunker, err := newJsonChunker(ctx, jsonCursor, i.m.NodeStore) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // If required, adds a comma before writing the value. @@ -302,18 +307,21 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL // If the value is a newly inserted key, write the key. if !keyLastPathElement.isArrayIndex { - jsonChunker.appendJsonToBuffer([]byte(fmt.Sprintf(`"%s":`, keyLastPathElement.key))) + jsonChunker.appendJsonToBuffer([]byte(fmt.Sprintf(`"%s":`, escapeKey(keyLastPathElement.key)))) } // Manually set the chunker's path and offset to the start of the value we're about to insert. jsonChunker.jScanner.valueOffset = len(jsonChunker.jScanner.jsonBuffer) jsonChunker.jScanner.currentPath = keyPath jsonChunker.appendJsonToBuffer(insertedValueBytes) - jsonChunker.processBuffer(ctx) + err = jsonChunker.processBuffer(ctx) + if err != nil { + return IndexedJsonDocument{}, false, err + } newRoot, err := jsonChunker.Done(ctx) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil @@ -343,10 +351,17 @@ func (i IndexedJsonDocument) tryRemove(ctx context.Context, path string) (types. if err != nil { return nil, false, err } + return i.removeWithLocation(ctx, keyPath) +} + +func (i IndexedJsonDocument) RemoveWithKey(ctx context.Context, key []byte) (IndexedJsonDocument, bool, error) { + return i.removeWithLocation(ctx, jsonPathFromKey(key)) +} +func (i IndexedJsonDocument) removeWithLocation(ctx context.Context, keyPath jsonLocation) (IndexedJsonDocument, bool, error) { jsonCursor, found, err := newJsonCursor(ctx, i.m.NodeStore, i.m.Root, keyPath, true) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } if !found { // The key does not exist in the document. @@ -356,7 +371,7 @@ func (i IndexedJsonDocument) tryRemove(ctx context.Context, path string) (types. // The cursor is now pointing to the end of the value prior to the one being removed. jsonChunker, err := newJsonChunker(ctx, jsonCursor, i.m.NodeStore) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } startofRemovedLocation := jsonCursor.GetCurrentPath() @@ -367,7 +382,7 @@ func (i IndexedJsonDocument) tryRemove(ctx context.Context, path string) (types. keyPath.setScannerState(endOfValue) _, err = jsonCursor.AdvanceToLocation(ctx, keyPath, false) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // If removing the first element of an object/array, skip past the comma, and set the chunker as if it's @@ -379,7 +394,7 @@ func (i IndexedJsonDocument) tryRemove(ctx context.Context, path string) (types. newRoot, err := jsonChunker.Done(ctx) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil @@ -406,10 +421,17 @@ func (i IndexedJsonDocument) trySet(ctx context.Context, path string, val sql.JS if err != nil { return nil, false, err } + return i.setWithLocation(ctx, keyPath, val) +} +func (i IndexedJsonDocument) SetWithKey(ctx context.Context, key []byte, val sql.JSONWrapper) (IndexedJsonDocument, bool, error) { + return i.setWithLocation(ctx, jsonPathFromKey(key), val) +} + +func (i IndexedJsonDocument) setWithLocation(ctx context.Context, keyPath jsonLocation, val sql.JSONWrapper) (IndexedJsonDocument, bool, error) { jsonCursor, found, err := newJsonCursor(ctx, i.m.NodeStore, i.m.Root, keyPath, false) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // The supplied path may be 0-indexing into a scalar, which is the same as referencing the scalar. Remove @@ -480,32 +502,35 @@ func (i IndexedJsonDocument) tryReplace(ctx context.Context, path string, val sq return i.replaceIntoCursor(ctx, keyPath, jsonCursor, val) } -func (i IndexedJsonDocument) replaceIntoCursor(ctx context.Context, keyPath jsonLocation, jsonCursor *JsonCursor, val sql.JSONWrapper) (types.MutableJSON, bool, error) { +func (i IndexedJsonDocument) replaceIntoCursor(ctx context.Context, keyPath jsonLocation, jsonCursor *JsonCursor, val sql.JSONWrapper) (IndexedJsonDocument, bool, error) { // The cursor is now pointing to the start of the value being replaced. jsonChunker, err := newJsonChunker(ctx, jsonCursor, i.m.NodeStore) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } // Advance the cursor to the end of the value being removed. keyPath.setScannerState(endOfValue) _, err = jsonCursor.AdvanceToLocation(ctx, keyPath, false) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } insertedValueBytes, err := types.MarshallJson(val) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } jsonChunker.appendJsonToBuffer(insertedValueBytes) - jsonChunker.processBuffer(ctx) + err = jsonChunker.processBuffer(ctx) + if err != nil { + return IndexedJsonDocument{}, false, err + } newRoot, err := jsonChunker.Done(ctx) if err != nil { - return nil, false, err + return IndexedJsonDocument{}, false, err } return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil @@ -548,3 +573,137 @@ func (i IndexedJsonDocument) GetBytes() (bytes []byte, err error) { // TODO: Add context parameter to JSONBytes.GetBytes return getBytesFromIndexedJsonMap(i.ctx, i.m) } + +func (i IndexedJsonDocument) getFirstCharacter(ctx context.Context) (byte, error) { + stopIterationError := fmt.Errorf("stop") + var firstCharacter byte + err := i.m.WalkNodes(ctx, func(ctx context.Context, nd Node) error { + if nd.IsLeaf() { + firstCharacter = nd.GetValue(0)[0] + return stopIterationError + } + return nil + }) + if err != stopIterationError { + return 0, err + } + return firstCharacter, nil +} + +func (i IndexedJsonDocument) getTypeCategory() (jsonTypeCategory, error) { + firstCharacter, err := i.getFirstCharacter(i.ctx) + if err != nil { + return 0, err + } + return getTypeCategoryFromFirstCharacter(firstCharacter), nil +} + +func GetTypeCategory(wrapper sql.JSONWrapper) (jsonTypeCategory, error) { + switch doc := wrapper.(type) { + case IndexedJsonDocument: + return doc.getTypeCategory() + case *types.LazyJSONDocument: + return getTypeCategoryFromFirstCharacter(doc.Bytes[0]), nil + default: + val, err := doc.ToInterface() + if err != nil { + return 0, err + } + return getTypeCategoryOfValue(val) + } +} + +// Type implements types.ComparableJson +func (i IndexedJsonDocument) Type(ctx context.Context) (string, error) { + firstCharacter, err := i.getFirstCharacter(ctx) + if err != nil { + return "", err + } + + switch firstCharacter { + case '{': + return "OBJECT", nil + case '[': + return "ARRAY", nil + } + // At this point the value must be a scalar, so it's okay to just load the whole thing. + val, err := i.ToInterface() + if err != nil { + return "", err + } + return sqljson.TypeOfJsonValue(val), nil +} + +// Compare implements types.ComparableJson +func (i IndexedJsonDocument) Compare(other interface{}) (int, error) { + thisTypeCategory, err := i.getTypeCategory() + if err != nil { + return 0, err + } + + otherIndexedDocument, ok := other.(IndexedJsonDocument) + if !ok { + val, err := i.ToInterface() + if err != nil { + return 0, err + } + otherVal := other + if otherWrapper, ok := other.(sql.JSONWrapper); ok { + otherVal, err = otherWrapper.ToInterface() + if err != nil { + return 0, err + } + } + return types.CompareJSON(val, otherVal) + } + + otherTypeCategory, err := otherIndexedDocument.getTypeCategory() + if err != nil { + return 0, err + } + if thisTypeCategory < otherTypeCategory { + return -1, nil + } + if thisTypeCategory > otherTypeCategory { + return 1, nil + } + switch thisTypeCategory { + case jsonTypeNull: + return 0, nil + case jsonTypeArray, jsonTypeObject: + // To compare two values that are both arrays or both objects, we must locate the first location + // where they differ. + + jsonDiffer, err := NewIndexedJsonDiffer(i.ctx, i, otherIndexedDocument) + if err != nil { + return 0, err + } + firstDiff, err := jsonDiffer.Next(i.ctx) + if err == io.EOF { + // The two documents have no differences. + return 0, nil + } + if err != nil { + return 0, err + } + switch firstDiff.Type { + case AddedDiff: + // A key is present in other but not this. + return -1, nil + case RemovedDiff: + return 1, nil + case ModifiedDiff: + // Since both modified values have already been loaded into memory, + // We can just compare them. + return types.JSON.Compare(firstDiff.From, firstDiff.To) + default: + panic("Impossible diff type") + } + default: + val, err := i.ToInterface() + if err != nil { + return 0, err + } + return types.CompareJSON(val, other) + } +} diff --git a/go/store/prolly/tree/json_indexed_document_test.go b/go/store/prolly/tree/json_indexed_document_test.go index 5a9b7e0e0a0..7a7adc5637b 100644 --- a/go/store/prolly/tree/json_indexed_document_test.go +++ b/go/store/prolly/tree/json_indexed_document_test.go @@ -27,6 +27,7 @@ import ( "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/expression/function/json/jsontests" "github.com/dolthub/go-mysql-server/sql/types" + typetests "github.com/dolthub/go-mysql-server/sql/types/jsontests" "github.com/stretchr/testify/require" ) @@ -301,3 +302,156 @@ func TestIndexedJsonDocument_ContainsPath(t *testing.T) { testCases := jsontests.JsonContainsPathTestCases(t, convertToIndexedJsonDocument) jsontests.RunJsonTests(t, testCases) } + +func TestJsonCompare(t *testing.T) { + ctx := sql.NewEmptyContext() + ns := NewTestNodeStore() + convertToIndexedJsonDocument := func(t *testing.T, left, right interface{}) (interface{}, interface{}) { + if left != nil { + left = newIndexedJsonDocumentFromValue(t, ctx, ns, left) + } + if right != nil { + right = newIndexedJsonDocumentFromValue(t, ctx, ns, right) + } + return left, right + } + convertOnlyLeftToIndexedJsonDocument := func(t *testing.T, left, right interface{}) (interface{}, interface{}) { + if left != nil { + left = newIndexedJsonDocumentFromValue(t, ctx, ns, left) + } + if right != nil { + rightJSON, inRange, err := types.JSON.Convert(right) + require.NoError(t, err) + require.True(t, bool(inRange)) + rightInterface, err := rightJSON.(sql.JSONWrapper).ToInterface() + require.NoError(t, err) + right = types.JSONDocument{Val: rightInterface} + } + return left, right + } + convertOnlyRightToIndexedJsonDocument := func(t *testing.T, left, right interface{}) (interface{}, interface{}) { + right, left = convertOnlyLeftToIndexedJsonDocument(t, right, left) + return left, right + } + + t.Run("small documents", func(t *testing.T) { + tests := append(typetests.JsonCompareTests, typetests.JsonCompareNullsTests...) + t.Run("compare two indexed json documents", func(t *testing.T) { + typetests.RunJsonCompareTests(t, tests, convertToIndexedJsonDocument) + }) + t.Run("compare indexed json document with non-indexed", func(t *testing.T) { + typetests.RunJsonCompareTests(t, tests, convertOnlyLeftToIndexedJsonDocument) + }) + t.Run("compare non-indexed json document with indexed", func(t *testing.T) { + typetests.RunJsonCompareTests(t, tests, convertOnlyRightToIndexedJsonDocument) + }) + }) + + noError := func(j types.MutableJSON, changed bool, err error) types.MutableJSON { + require.NoError(t, err) + require.True(t, changed) + return j + } + + largeArray := createLargeDocumentForTesting(t, ctx, ns) + largeObjectWrapper, err := largeArray.Lookup(ctx, "$[7]") + largeObject := newIndexedJsonDocumentFromValue(t, ctx, ns, largeObjectWrapper) + require.NoError(t, err) + largeDocTests := []typetests.JsonCompareTest{ + { + Name: "large object < boolean", + Left: largeObject, + Right: true, + Cmp: -1, + }, + { + Name: "large object > string", + Left: largeObject, + Right: `"test"`, + Cmp: 1, + }, + { + Name: "large object > number", + Left: largeObject, + Right: 1, + Cmp: 1, + }, + { + Name: "large object > null", + Left: largeObject, + Right: `null`, + Cmp: 1, + }, + { + Name: "inserting into beginning of object makes it greater", + Left: largeObject, + Right: noError(largeObject.Insert(ctx, "$.a", types.MustJSON("1"))), + Cmp: -1, + }, + { + Name: "inserting into end of object makes it greater", + Left: largeObject, + Right: noError(largeObject.Insert(ctx, "$.z", types.MustJSON("1"))), + Cmp: -1, + }, + { + Name: "large array < boolean", + Left: largeArray, + Right: true, + Cmp: -1, + }, + { + Name: "large array > string", + Left: largeArray, + Right: `"test"`, + Cmp: 1, + }, + { + Name: "large array > number", + Left: largeArray, + Right: 1, + Cmp: 1, + }, + { + Name: "large array > null", + Left: largeArray, + Right: `null`, + Cmp: 1, + }, + { + Name: "inserting into end of array makes it greater", + Left: largeArray, + Right: noError(largeArray.ArrayAppend("$", types.MustJSON("1"))), + Cmp: -1, + }, + { + Name: "inserting high value into beginning of array makes it greater", + Left: largeArray, + Right: noError(largeArray.ArrayInsert("$[0]", types.MustJSON("true"))), + Cmp: -1, + }, + { + Name: "inserting low value into beginning of array makes it less", + Left: largeArray, + Right: noError(largeArray.ArrayInsert("$[0]", types.MustJSON("1"))), + Cmp: 1, + }, + { + Name: "large array > large object", + Left: largeArray, + Right: largeObject, + Cmp: 1, + }, + } + t.Run("large documents", func(t *testing.T) { + t.Run("compare two indexed json documents", func(t *testing.T) { + typetests.RunJsonCompareTests(t, largeDocTests, convertToIndexedJsonDocument) + }) + t.Run("compare indexed json document with non-indexed", func(t *testing.T) { + typetests.RunJsonCompareTests(t, largeDocTests, convertOnlyLeftToIndexedJsonDocument) + }) + t.Run("compare non-indexed json document with indexed", func(t *testing.T) { + typetests.RunJsonCompareTests(t, largeDocTests, convertOnlyRightToIndexedJsonDocument) + }) + }) +} diff --git a/go/store/prolly/tree/json_location.go b/go/store/prolly/tree/json_location.go index b6c8a413be7..c532f07ac9b 100644 --- a/go/store/prolly/tree/json_location.go +++ b/go/store/prolly/tree/json_location.go @@ -186,10 +186,31 @@ const ( lexStateEscapedQuotedKey lexState = 5 ) +func escapeKey(key []byte) []byte { + return bytes.Replace(key, []byte(`"`), []byte(`\"`), -1) +} + func unescapeKey(key []byte) []byte { return bytes.Replace(key, []byte(`\"`), []byte(`"`), -1) } +// IsJsonKeyPrefix computes whether one key encodes a json location that is a prefix of another. +// Example: $.a is a prefix of $.a.b, but not $.aa +func IsJsonKeyPrefix(path, prefix []byte) bool { + return bytes.HasPrefix(path, prefix) && (path[len(prefix)] == beginArrayKey || path[len(prefix)] == beginObjectKey) +} + +func JsonKeysModifySameArray(leftKey, rightKey []byte) bool { + i := 0 + for i < len(leftKey) && i < len(rightKey) && leftKey[i] == rightKey[i] { + if leftKey[i] == beginArrayKey { + return true + } + i++ + } + return false +} + func jsonPathElementsFromMySQLJsonPath(pathBytes []byte) (jsonLocation, error) { location := newRootLocation() state := lexStatePath @@ -417,6 +438,14 @@ type jsonLocationOrdering struct{} var _ Ordering[[]byte] = jsonLocationOrdering{} func (jsonLocationOrdering) Compare(left, right []byte) int { + // A JSON document that fits entirely in a single chunk has no keys, + if len(left) == 0 && len(right) == 0 { + return 0 + } else if len(left) == 0 { + return -1 + } else if len(right) == 0 { + return 1 + } leftPath := jsonPathFromKey(left) rightPath := jsonPathFromKey(right) return compareJsonLocations(leftPath, rightPath) diff --git a/go/store/prolly/tree/json_scanner.go b/go/store/prolly/tree/json_scanner.go index 413b44ca457..8a7ffcf28f7 100644 --- a/go/store/prolly/tree/json_scanner.go +++ b/go/store/prolly/tree/json_scanner.go @@ -160,7 +160,7 @@ func (s *JsonScanner) acceptValue() error { const endOfFile byte = 0xFF // current returns the current byte being parsed, or 0xFF if we've reached the end of the file. -// (Since the JSON is UTF-8, the 0xFF byte cannot otherwise appear within in.) +// (Since the JSON is UTF-8, the 0xFF byte cannot otherwise appear within it.) func (s JsonScanner) current() byte { if s.valueOffset >= len(s.jsonBuffer) { return endOfFile diff --git a/go/store/prolly/tree/json_type_categories.go b/go/store/prolly/tree/json_type_categories.go new file mode 100644 index 00000000000..ac640e2c1f6 --- /dev/null +++ b/go/store/prolly/tree/json_type_categories.go @@ -0,0 +1,78 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tree + +import ( + "fmt" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/shopspring/decimal" +) + +type jsonTypeCategory int + +const ( + jsonTypeNull jsonTypeCategory = iota + jsonTypeNumber + jsonTypeString + jsonTypeObject + jsonTypeArray + jsonTypeBoolean +) + +func getTypeCategoryOfValue(val interface{}) (jsonTypeCategory, error) { + if val == nil { + return jsonTypeNull, nil + } + switch val.(type) { + case map[string]interface{}: + return jsonTypeObject, nil + case []interface{}: + return jsonTypeArray, nil + case bool: + return jsonTypeBoolean, nil + case string: + return jsonTypeString, nil + case decimal.Decimal, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float32, float64: + return jsonTypeNumber, nil + } + return 0, fmt.Errorf("expected json value, got %v", val) +} + +// getTypeCategoryFromFirstCharacter returns the type of a JSON object by inspecting its first byte. +func getTypeCategoryFromFirstCharacter(c byte) jsonTypeCategory { + switch c { + case '{': + return jsonTypeObject + case '[': + return jsonTypeArray + case 'n': + return jsonTypeNull + case 't', 'f': + return jsonTypeBoolean + case '"': + return jsonTypeString + default: + return jsonTypeNumber + } +} + +func IsJsonObject(json sql.JSONWrapper) (bool, error) { + valType, err := GetTypeCategory(json) + if err != nil { + return false, err + } + return valType == jsonTypeObject, nil +}