Skip to content

Commit

Permalink
Merge pull request #7785 from dolthub/nicktobey/json-serialize
Browse files Browse the repository at this point in the history
Use `LazyJSONDocument` when reading from a JSON column.
  • Loading branch information
nicktobey authored May 1, 2024
2 parents 4249386 + 78ec207 commit cd0b5ac
Show file tree
Hide file tree
Showing 19 changed files with 344 additions and 189 deletions.
2 changes: 1 addition & 1 deletion go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ require (
github.com/cespare/xxhash v1.1.0
github.com/creasty/defaults v1.6.0
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
github.com/dolthub/go-mysql-server v0.18.2-0.20240429214844-6feb67867355
github.com/dolthub/go-mysql-server v0.18.2-0.20240430015631-3d60d20186c8
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63
github.com/dolthub/swiss v0.1.0
github.com/goccy/go-json v0.10.2
Expand Down
4 changes: 2 additions & 2 deletions go/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168=
github.com/dolthub/go-mysql-server v0.18.2-0.20240429214844-6feb67867355 h1:Dylx0T0J40z3momZ0pDlUm0PWEvPWrcOVkeZ9jFXtVQ=
github.com/dolthub/go-mysql-server v0.18.2-0.20240429214844-6feb67867355/go.mod h1:T6EEu2iQoasR13Ovtp44yDn+rXQOBgh3BACPZMxSF/8=
github.com/dolthub/go-mysql-server v0.18.2-0.20240430015631-3d60d20186c8 h1:Xm6syv6978frTakO8OAvmcwXDEKq1Eij7rJFr6F+BNQ=
github.com/dolthub/go-mysql-server v0.18.2-0.20240430015631-3d60d20186c8/go.mod h1:T6EEu2iQoasR13Ovtp44yDn+rXQOBgh3BACPZMxSF/8=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q=
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 h1:0HHu0GWJH0N6a6keStrHhUAK5/o9LVfkh44pvsV4514=
Expand Down
6 changes: 5 additions & 1 deletion go/libraries/doltcore/merge/merge_prolly_rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -1998,7 +1998,11 @@ func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAd
return nil, true, nil
}

mergedBytes, err := json.Marshal(mergedDoc.ToInterface())
mergedVal, err := mergedDoc.ToInterface()
if err != nil {
return nil, true, err
}
mergedBytes, err := json.Marshal(mergedVal)
if err != nil {
return nil, true, err
}
Expand Down
4 changes: 2 additions & 2 deletions go/libraries/doltcore/merge/violations_fk_prolly.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ type FkCVMeta struct {

var _ sql.JSONWrapper = FkCVMeta{}

func (m FkCVMeta) ToInterface() interface{} {
func (m FkCVMeta) ToInterface() (interface{}, error) {
return map[string]interface{}{
"Columns": m.Columns,
"ForeignKey": m.ForeignKey,
Expand All @@ -407,7 +407,7 @@ func (m FkCVMeta) ToInterface() interface{} {
"ReferencedIndex": m.ReferencedIndex,
"ReferencedTable": m.ReferencedTable,
"Table": m.Table,
}
}, nil
}

// PrettyPrint is a custom pretty print function to match the old format's
Expand Down
12 changes: 6 additions & 6 deletions go/libraries/doltcore/merge/violations_unique_prolly.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ type UniqCVMeta struct {
Name string `json:"Name"`
}

func (m UniqCVMeta) ToInterface() interface{} {
func (m UniqCVMeta) ToInterface() (interface{}, error) {
return map[string]interface{}{
"Columns": m.Columns,
"Name": m.Name,
}
}, nil
}

var _ sql.JSONWrapper = UniqCVMeta{}
Expand Down Expand Up @@ -149,10 +149,10 @@ func newNotNullViolationMeta(violations []string, value val.Tuple) (prolly.Const
}, nil
}

func (m NullViolationMeta) ToInterface() interface{} {
func (m NullViolationMeta) ToInterface() (interface{}, error) {
return map[string]interface{}{
"Columns": m.Columns,
}
}, nil
}

func (m NullViolationMeta) Unmarshall(ctx *sql.Context) (val types.JSONDocument, err error) {
Expand Down Expand Up @@ -193,9 +193,9 @@ func (m CheckCVMeta) Unmarshall(_ *sql.Context) (val types.JSONDocument, err err
return types.JSONDocument{Val: m}, nil
}

func (m CheckCVMeta) ToInterface() interface{} {
func (m CheckCVMeta) ToInterface() (interface{}, error) {
return map[string]interface{}{
"Name": m.Name,
"Expression": m.Expression,
}
}, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package binlogreplication

import (
"encoding/json"
"fmt"
"math/rand"
"strings"
Expand Down Expand Up @@ -521,11 +522,16 @@ func assertValues(t *testing.T, assertionIndex int, row map[string]interface{})
if typeDesc.TypeDefinition == "json" {
// LD_1 and DOLT storage formats return JSON strings slightly differently; DOLT removes spaces
// while LD_1 add whitespace, so for json comparison, we sanitize by removing whitespace.
actualValue = strings.ReplaceAll(actualValue, " ", "")
var actual interface{}
json.Unmarshal([]byte(actualValue), &actual)
var expected interface{}
json.Unmarshal([]byte(expectedValue.(string)), &expected)
require.EqualValues(t, expected, actual,
"Failed on assertion %d for for column %q", assertionIndex, typeDesc.ColumnName())
} else {
require.EqualValues(t, expectedValue, actualValue,
"Failed on assertion %d for for column %q", assertionIndex, typeDesc.ColumnName())
}

require.EqualValues(t, expectedValue, actualValue,
"Failed on assertion %d for for column %q", assertionIndex, typeDesc.ColumnName())
}
}

Expand Down
13 changes: 8 additions & 5 deletions go/libraries/doltcore/sqle/json/noms_json_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ func NomsJSONFromJSONValue(ctx context.Context, vrw types.ValueReadWriter, val s
return noms, nil
}

sqlVal := val.ToInterface()
sqlVal, err := val.ToInterface()
if err != nil {
return NomsJSON{}, err
}

v, err := marshalJSON(ctx, vrw, sqlVal)
if err != nil {
Expand Down Expand Up @@ -133,17 +136,17 @@ func marshalJSONObject(ctx context.Context, vrw types.ValueReadWriter, obj map[s
return types.NewMap(ctx, vrw, vals...)
}

func (v NomsJSON) ToInterface() interface{} {
func (v NomsJSON) ToInterface() (interface{}, error) {
nomsVal, err := types.JSON(v).Inner()
if err != nil {
panic(err)
return nil, err
}

val, err := unmarshalJSON(context.Background(), nomsVal)
if err != nil {
panic(err)
return nil, err
}
return val
return val, nil
}

// Unmarshall implements the sql.JSONValue interface.
Expand Down
10 changes: 8 additions & 2 deletions go/libraries/doltcore/sqle/schema_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ func getSchemaFragmentsOfType(ctx *sql.Context, tbl *WritableDoltTable, fragType

// Extract Created Time from JSON column
createdTime, err := getCreatedTime(ctx, sqlRow[extraIdx].(sql.JSONWrapper))
if err != nil {
return nil, err
}

frags = append(frags, schemaFragment{
name: sqlRow[nameIdx].(string),
Expand Down Expand Up @@ -327,9 +330,12 @@ func loadDefaultSqlMode() (string, error) {
}

func getCreatedTime(ctx *sql.Context, extraCol sql.JSONWrapper) (int64, error) {
doc := extraCol.ToInterface()
doc, err := extraCol.ToInterface()
if err != nil {
return 0, err
}

err := fmt.Errorf("value %v does not contain creation time", doc)
err = fmt.Errorf("value %v does not contain creation time", doc)

obj, ok := doc.(map[string]interface{})
if !ok {
Expand Down
12 changes: 2 additions & 10 deletions go/libraries/doltcore/sqle/schema_table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (

"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
"github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
"github.com/dolthub/dolt/go/libraries/doltcore/sqle/json"
"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
)

Expand Down Expand Up @@ -137,16 +136,9 @@ func TestSchemaTableMigrationV1(t *testing.T) {
require.NoError(t, err)
// convert the JSONDocument to a string for comparison
if row[3] != nil {
// Annoying difference in representation between storage versions here
jsonDoc, ok := row[3].(gmstypes.JSONDocument)
jsonDoc, ok := row[3].(sql.JSONWrapper)
if ok {
row[3], err = jsonDoc.JSONString()
row[3] = strings.ReplaceAll(row[3].(string), " ", "") // remove spaces
}

nomsJson, ok := row[3].(json.NomsJSON)
if ok {
row[3], err = nomsJson.JSONString()
row[3], err = gmstypes.StringifyJSON(jsonDoc)
row[3] = strings.ReplaceAll(row[3].(string), " ", "") // remove spaces
}

Expand Down
16 changes: 12 additions & 4 deletions go/libraries/doltcore/sqle/statspro/dolt_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,15 +136,23 @@ func NewDoltStats() *DoltStats {
return &DoltStats{mu: &sync.Mutex{}, Active: make(map[hash.Hash]int), Statistic: &stats.Statistic{}}
}

func (s *DoltStats) ToInterface() interface{} {
ret := s.Statistic.ToInterface().(map[string]interface{})
func (s *DoltStats) ToInterface() (interface{}, error) {
statVal, err := s.Statistic.ToInterface()
if err != nil {
return nil, err
}
ret := statVal.(map[string]interface{})

var hist sql.Histogram
for _, b := range s.Hist {
hist = append(hist, b)
}
ret["statistic"].(map[string]interface{})["buckets"] = hist.ToInterface()
return ret
histVal, err := hist.ToInterface()
if err != nil {
return nil, err
}
ret["statistic"].(map[string]interface{})["buckets"] = histVal
return ret, nil
}

func (s *DoltStats) WithHistogram(h sql.Histogram) (sql.Statistic, error) {
Expand Down
2 changes: 2 additions & 0 deletions go/libraries/doltcore/table/typed/json/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ func (j *RowWriter) jsonDataForSqlSchema(row sql.Row) ([]byte, error) {

// This is kind of silly: we are unmarshalling JSON just to marshall it back again
// But it makes marshalling much simpler
// Reset val so we don't unmarshall into the old value.
val = nil
err = json.Unmarshal([]byte(str), &val)
if err != nil {
return nil, err
Expand Down
21 changes: 16 additions & 5 deletions go/store/prolly/tree/blob_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ import (
"errors"
"io"

"github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/go-mysql-server/sql"
sqltypes "github.com/dolthub/go-mysql-server/sql/types"
"github.com/goccy/go-json"

"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/prolly/message"
"github.com/dolthub/dolt/go/store/types"
)

const DefaultFixedChunkLength = 4000
Expand Down Expand Up @@ -272,19 +274,28 @@ func NewJSONDoc(addr hash.Hash, ns NodeStore) *JSONDoc {
return &JSONDoc{ImmutableTree{Addr: addr, ns: ns}}
}

func (b *JSONDoc) ToJSONDocument(ctx context.Context) (types.JSONDocument, error) {
func (b *JSONDoc) ToJSONDocument(ctx context.Context) (sqltypes.JSONDocument, error) {
buf, err := b.bytes(ctx)
if err != nil {
return types.JSONDocument{}, err
return sqltypes.JSONDocument{}, err
}
var doc types.JSONDocument
var doc sqltypes.JSONDocument
err = json.Unmarshal(buf, &doc.Val)
if err != nil {
return types.JSONDocument{}, err
return sqltypes.JSONDocument{}, err
}
return doc, err
}

func (b *JSONDoc) ToLazyJSONDocument(ctx context.Context) (sql.JSONWrapper, error) {
buf, err := b.bytes(ctx)
if err != nil {
return sqltypes.JSONDocument{}, err
}
buf = types.UnescapeHTMLCodepoints(buf)
return sqltypes.NewLazyJSONDocument(buf), nil
}

func (b *JSONDoc) ToString(ctx context.Context) (string, error) {
buf, err := b.bytes(ctx)
if err != nil {
Expand Down
5 changes: 3 additions & 2 deletions go/store/prolly/tree/prolly_fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"math"
"time"

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/types"
"github.com/shopspring/decimal"

Expand Down Expand Up @@ -127,7 +128,7 @@ func GetField(ctx context.Context, td val.TupleDesc, i int, tup val.Tuple, ns No
var h hash.Hash
h, ok = td.GetJSONAddr(i, tup)
if ok {
v, err = NewJSONDoc(h, ns).ToJSONDocument(ctx)
v, err = NewJSONDoc(h, ns).ToLazyJSONDocument(ctx)
}
case val.StringAddrEnc:
var h hash.Hash
Expand Down Expand Up @@ -409,5 +410,5 @@ func convJson(v interface{}) (buf []byte, err error) {
if err != nil {
return nil, err
}
return json.Marshal(v.(types.JSONDocument).Val)
return types.MarshallJson(v.(sql.JSONWrapper))
}
15 changes: 14 additions & 1 deletion go/store/prolly/tree/prolly_fields_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"testing"
"time"

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/expression/function/spatial"
"github.com/dolthub/go-mysql-server/sql/types"
"github.com/shopspring/decimal"
Expand Down Expand Up @@ -195,7 +196,19 @@ func testRoundTripProllyFields(t *testing.T, test prollyFieldTest) {

v, err := GetField(context.Background(), desc, 0, tup, ns)
assert.NoError(t, err)
assert.Equal(t, test.value, v)
jsonType := val.Type{Enc: val.JSONAddrEnc}
if test.typ == jsonType {
getJson := func(field interface{}) interface{} {
jsonWrapper, ok := field.(sql.JSONWrapper)
require.Equal(t, ok, true)
val, err := jsonWrapper.ToInterface()
require.NoError(t, err)
return val
}
assert.Equal(t, getJson(test.value), getJson(v))
} else {
assert.Equal(t, test.value, v)
}
}

func mustParseGeometryType(t *testing.T, s string) (v interface{}) {
Expand Down
Loading

0 comments on commit cd0b5ac

Please sign in to comment.