Skip to content

Commit

Permalink
Merge pull request #8129 from dolthub/nicktobey/json-funcs
Browse files Browse the repository at this point in the history
Add optimized diffing and three-way merge of indexed JSON Documents.
  • Loading branch information
nicktobey authored Aug 13, 2024
2 parents a0d0708 + 1399c9f commit 2489c4a
Show file tree
Hide file tree
Showing 15 changed files with 1,475 additions and 173 deletions.
2 changes: 1 addition & 1 deletion go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ require (
github.com/cespare/xxhash/v2 v2.2.0
github.com/creasty/defaults v1.6.0
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
github.com/dolthub/go-mysql-server v0.18.2-0.20240812215633-0627bdf00f58
github.com/dolthub/go-mysql-server v0.18.2-0.20240812221236-ebc28713f178
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63
github.com/dolthub/swiss v0.1.0
github.com/goccy/go-json v0.10.2
Expand Down
4 changes: 2 additions & 2 deletions go/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168=
github.com/dolthub/go-mysql-server v0.18.2-0.20240812215633-0627bdf00f58 h1:ysak2V/gc2YXJYAh1H/qphoDUOpgaSO0CuI+Lr/811U=
github.com/dolthub/go-mysql-server v0.18.2-0.20240812215633-0627bdf00f58/go.mod h1:PwuemL+YK+YiWcUFhknixeqNLjJNfCx7KDsHNajx9fM=
github.com/dolthub/go-mysql-server v0.18.2-0.20240812221236-ebc28713f178 h1:aCckOhs6UuESaVFEWMKaYbRtE1ht6qqs++pzUGOu9NM=
github.com/dolthub/go-mysql-server v0.18.2-0.20240812221236-ebc28713f178/go.mod h1:PwuemL+YK+YiWcUFhknixeqNLjJNfCx7KDsHNajx9fM=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q=
github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE=
Expand Down
109 changes: 83 additions & 26 deletions go/libraries/doltcore/merge/merge_prolly_rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"github.com/dolthub/go-mysql-server/sql/expression"
"github.com/dolthub/go-mysql-server/sql/transform"
"github.com/dolthub/go-mysql-server/sql/types"
"golang.org/x/exp/maps"
errorkinds "gopkg.in/src-d/go-errors.v1"

"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
Expand Down Expand Up @@ -1978,56 +1977,57 @@ func (m *valueMerger) processColumn(ctx *sql.Context, i int, left, right, base v
}

func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAddr []byte, rightAddr []byte) (resultAddr []byte, conflict bool, err error) {
baseDoc, err := tree.NewJSONDoc(hash.New(baseAddr), m.ns).ToJSONDocument(ctx)
baseDoc, err := tree.NewJSONDoc(hash.New(baseAddr), m.ns).ToIndexedJSONDocument(ctx)
if err != nil {
return nil, true, err
}
leftDoc, err := tree.NewJSONDoc(hash.New(leftAddr), m.ns).ToJSONDocument(ctx)
leftDoc, err := tree.NewJSONDoc(hash.New(leftAddr), m.ns).ToIndexedJSONDocument(ctx)
if err != nil {
return nil, true, err
}
rightDoc, err := tree.NewJSONDoc(hash.New(rightAddr), m.ns).ToJSONDocument(ctx)
rightDoc, err := tree.NewJSONDoc(hash.New(rightAddr), m.ns).ToIndexedJSONDocument(ctx)
if err != nil {
return nil, true, err
}

mergedDoc, conflict, err := mergeJSON(ctx, baseDoc, leftDoc, rightDoc)
mergedDoc, conflict, err := mergeJSON(ctx, m.ns, baseDoc, leftDoc, rightDoc)
if err != nil {
return nil, true, err
}
if conflict {
return nil, true, nil
}

mergedVal, err := mergedDoc.ToInterface()
if err != nil {
return nil, true, err
}
mergedBytes, err := json.Marshal(mergedVal)
if err != nil {
return nil, true, err
}
mergedAddr, err := tree.SerializeBytesToAddr(ctx, m.ns, bytes.NewReader(mergedBytes), len(mergedBytes))
root, err := tree.SerializeJsonToAddr(ctx, m.ns, mergedDoc)
if err != nil {
return nil, true, err
}
mergedAddr := root.HashOf()
return mergedAddr[:], false, nil

}

func mergeJSON(ctx context.Context, base types.JSONDocument, left types.JSONDocument, right types.JSONDocument) (resultDoc types.JSONDocument, conflict bool, err error) {
func mergeJSON(ctx context.Context, ns tree.NodeStore, base, left, right sql.JSONWrapper) (resultDoc sql.JSONWrapper, conflict bool, err error) {
// First, deserialize each value into JSON.
// We can only merge if the value at all three commits is a JSON object.

baseObject, baseIsObject := base.Val.(types.JsonObject)
leftObject, leftIsObject := left.Val.(types.JsonObject)
rightObject, rightIsObject := right.Val.(types.JsonObject)
baseIsObject, err := tree.IsJsonObject(base)
if err != nil {
return nil, true, err
}
leftIsObject, err := tree.IsJsonObject(left)
if err != nil {
return nil, true, err
}
rightIsObject, err := tree.IsJsonObject(right)
if err != nil {
return nil, true, err
}

if !baseIsObject || !leftIsObject || !rightIsObject {
// At least one of the commits does not have a JSON object.
// If both left and right have the same value, use that value.
// But if they differ, this is an unresolvable merge conflict.
cmp, err := left.Compare(right)
cmp, err := types.CompareJSON(left, right)
if err != nil {
return types.JSONDocument{}, true, err
}
Expand All @@ -2039,26 +2039,83 @@ func mergeJSON(ctx context.Context, base types.JSONDocument, left types.JSONDocu
}
}

mergedObject := maps.Clone(leftObject)
merged := types.JSONDocument{Val: mergedObject}
indexedBase, isBaseIndexed := base.(tree.IndexedJsonDocument)
indexedLeft, isLeftIndexed := left.(tree.IndexedJsonDocument)
indexedRight, isRightIndexed := right.(tree.IndexedJsonDocument)

// We only do three way merges on values read from tables right now, which are read in as tree.IndexedJsonDocument.

var leftDiffer tree.IJsonDiffer
if isBaseIndexed && isLeftIndexed {
leftDiffer, err = tree.NewIndexedJsonDiffer(ctx, indexedBase, indexedLeft)
if err != nil {
return nil, true, err
}
} else {
baseObject, err := base.ToInterface()
if err != nil {
return nil, true, err
}
leftObject, err := left.ToInterface()
if err != nil {
return nil, true, err
}
leftDiffer = tree.NewJsonDiffer(baseObject.(types.JsonObject), leftObject.(types.JsonObject))
}

var rightDiffer tree.IJsonDiffer
if isBaseIndexed && isRightIndexed {
rightDiffer, err = tree.NewIndexedJsonDiffer(ctx, indexedBase, indexedRight)
if err != nil {
return nil, true, err
}
} else {
baseObject, err := base.ToInterface()
if err != nil {
return nil, true, err
}
rightObject, err := right.ToInterface()
if err != nil {
return nil, true, err
}
rightDiffer = tree.NewJsonDiffer(baseObject.(types.JsonObject), rightObject.(types.JsonObject))
}

threeWayDiffer := NewThreeWayJsonDiffer(baseObject, leftObject, rightObject)
threeWayDiffer := ThreeWayJsonDiffer{
leftDiffer: leftDiffer,
rightDiffer: rightDiffer,
ns: ns,
}

// Compute the merged object by applying diffs to the left object as needed.
// If the left object isn't an IndexedJsonDocument, we make one.
var ok bool
var merged tree.IndexedJsonDocument
if merged, ok = left.(tree.IndexedJsonDocument); !ok {
root, err := tree.SerializeJsonToAddr(ctx, ns, left)
if err != nil {
return types.JSONDocument{}, true, err
}
merged = tree.NewIndexedJsonDocument(ctx, root, ns)
}

for {
threeWayDiff, err := threeWayDiffer.Next(ctx)
if err == io.EOF {
return merged, false, nil
}
if err != nil {
return types.JSONDocument{}, true, err
}

switch threeWayDiff.Op {
case tree.DiffOpRightAdd, tree.DiffOpConvergentAdd, tree.DiffOpRightModify, tree.DiffOpConvergentModify:
_, _, err := merged.Set(ctx, threeWayDiff.Key, threeWayDiff.Right)
case tree.DiffOpRightAdd, tree.DiffOpConvergentAdd, tree.DiffOpRightModify, tree.DiffOpConvergentModify, tree.DiffOpDivergentModifyResolved:
merged, _, err = merged.SetWithKey(ctx, threeWayDiff.Key, threeWayDiff.Right)
if err != nil {
return types.JSONDocument{}, true, err
}
case tree.DiffOpRightDelete, tree.DiffOpConvergentDelete:
_, _, err := merged.Remove(ctx, threeWayDiff.Key)
merged, _, err = merged.RemoveWithKey(ctx, threeWayDiff.Key)
if err != nil {
return types.JSONDocument{}, true, err
}
Expand Down
Loading

0 comments on commit 2489c4a

Please sign in to comment.