From 2231f5bd6fdd6245cd3f34b95a5f11d7943d6103 Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Sat, 29 Jun 2024 18:55:24 +0800 Subject: [PATCH 1/4] feat:(ast) `node` support concurrently-read --- README.md | 13 ++ api.go | 6 + ast/encode.go | 15 +- ast/encode_test.go | 4 +- ast/iterator.go | 7 +- ast/node.go | 289 +++++++++++++++++---------------- ast/node_test.go | 73 +++++---- ast/parser.go | 118 +++++++++++++- ast/parser_test.go | 58 ++++++- ast/search.go | 31 +++- ast/search_test.go | 101 ++++++++++++ ast/visitor_test.go | 36 ++-- internal/native/types/types.go | 2 +- 13 files changed, 526 insertions(+), 227 deletions(-) diff --git a/README.md b/README.md index 62a2fdfaa..a6c8314f9 100644 --- a/README.md +++ b/README.md @@ -301,6 +301,19 @@ exist, err := root.UnsetByIndex(1) // exist == true println(root.Get("key4").Check()) // "value not exist" ``` +#### SearchOption +```go +opts := ast.SearchOption{ CopyReturn: true ... } +val, err := ast.GetWithOption(JSON, opts, "key"...) +``` +`Searcher` provides some options for use to meet different needs: +- CopyReturn +Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results +- ConcurentRead +Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it. +- ValidateJSON +Indicate the searcher to validate the entire JSON. This option is enabled by default. + #### Serialize To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer) diff --git a/api.go b/api.go index 2691e9945..33e9b0e16 100644 --- a/api.go +++ b/api.go @@ -207,6 +207,12 @@ func Get(src []byte, path ...interface{}) (ast.Node, error) { return GetCopyFromString(rt.Mem2Str(src), path...) } +func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) { + s := ast.NewSearcher(rt.Mem2Str(src)) + s.SearchOptions = opts + return s.GetByPath(path...) +} + // GetFromString is same with Get except src is string. // // WARNING: The returned JSON is **Referenced** from the input. diff --git a/ast/encode.go b/ast/encode.go index 564b96305..345f91fe8 100644 --- a/ast/encode.go +++ b/ast/encode.go @@ -126,10 +126,10 @@ func freeBuffer(buf *[]byte) { } func (self *Node) encode(buf *[]byte) error { - if self.IsRaw() { + if self.isRaw() { return self.encodeRaw(buf) } - switch self.Type() { + switch int(self.itype()) { case V_NONE : return ErrNotExist case V_ERROR : return self.Check() case V_NULL : return self.encodeNull(buf) @@ -145,9 +145,14 @@ func (self *Node) encode(buf *[]byte) error { } func (self *Node) encodeRaw(buf *[]byte) error { - raw, err := self.Raw() - if err != nil { - return err + lock := self.rlock() + if !self.isRaw() { + self.runlock() + return self.encode(buf) + } + raw := self.toString() + if lock { + self.runlock() } *buf = append(*buf, raw...) return nil diff --git a/ast/encode_test.go b/ast/encode_test.go index 250f82b30..9b07e81b4 100644 --- a/ast/encode_test.go +++ b/ast/encode_test.go @@ -141,7 +141,7 @@ func TestEncodeNode(t *testing.T) { if string(ret) != data { t.Fatal(string(ret)) } - root.loadAllKey() + root.Load() ret, err = root.MarshalJSON() if err != nil { t.Fatal(err) @@ -228,7 +228,7 @@ func BenchmarkEncodeLoad_Sonic(b *testing.B) { if e != 0 { b.Fatal(root) } - root.loadAllKey() + root.Load() _, err := root.MarshalJSON() if err != nil { b.Fatal(err) diff --git a/ast/iterator.go b/ast/iterator.go index 64e1e5a90..13001119e 100644 --- a/ast/iterator.go +++ b/ast/iterator.go @@ -29,7 +29,7 @@ type Pair struct { // Values returns iterator for array's children traversal func (self *Node) Values() (ListIterator, error) { - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return ListIterator{}, err } return self.values(), nil @@ -41,7 +41,7 @@ func (self *Node) values() ListIterator { // Properties returns iterator for object's children traversal func (self *Node) Properties() (ObjectIterator, error) { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return ObjectIterator{}, err } return self.properties(), nil @@ -168,6 +168,9 @@ type Scanner func(path Sequence, node *Node) bool // // NOTICE: A unsetted node WON'T trigger sc, but its index still counts into Path.Index func (self *Node) ForEach(sc Scanner) error { + if err := self.checkRaw(); err != nil { + return err + } switch self.itype() { case types.V_ARRAY: iter, err := self.Values() diff --git a/ast/node.go b/ast/node.go index 1a77a1fa1..fe67be62a 100644 --- a/ast/node.go +++ b/ast/node.go @@ -17,13 +17,15 @@ package ast import ( - `encoding/json` - `fmt` - `strconv` - `unsafe` - - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` + "encoding/json" + "fmt" + "strconv" + "sync" + "sync/atomic" + "unsafe" + + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" ) const ( @@ -36,7 +38,7 @@ const ( _V_ARRAY_LAZY = _V_LAZY | types.V_ARRAY _V_OBJECT_LAZY = _V_LAZY | types.V_OBJECT _MASK_LAZY = _V_LAZY - 1 - _MASK_RAW = _V_RAW - 1 + _MASK_RAW = _V_RAW - 1 ) const ( @@ -56,6 +58,7 @@ type Node struct { t types.ValueType l uint p unsafe.Pointer + m *sync.RWMutex } // UnmarshalJSON is just an adapter to json.Unmarshaler. @@ -79,17 +82,39 @@ func (self *Node) UnmarshalJSON(data []byte) (err error) { // V_STRING = 7 (json value string) // V_NUMBER = 33 (json value number ) // V_ANY = 34 (golang interface{}) +// +// Deprecated: not concurrent safe. Use TypeSafe instead func (self Node) Type() int { return int(self.t & _MASK_LAZY & _MASK_RAW) } -func (self Node) itype() types.ValueType { +// Type returns json type represented by the node +// It will be one of belows: +// V_NONE = 0 (empty node, key not exists) +// V_ERROR = 1 (error node) +// V_NULL = 2 (json value `null`, key exists) +// V_TRUE = 3 (json value `true`) +// V_FALSE = 4 (json value `false`) +// V_ARRAY = 5 (json value array) +// V_OBJECT = 6 (json value object) +// V_STRING = 7 (json value string) +// V_NUMBER = 33 (json value number ) +// V_ANY = 34 (golang interface{}) +func (self *Node) TypeSafe() int { + return int(self.loadt() & _MASK_LAZY & _MASK_RAW) +} + +func (self *Node) itype() types.ValueType { return self.t & _MASK_LAZY & _MASK_RAW } // Exists returns false only if the self is nil or empty node V_NONE func (self *Node) Exists() bool { - return self.Valid() && self.t != _V_NONE + if self == nil { + return false + } + t := self.loadt() + return t != V_ERROR && t != _V_NONE } // Valid reports if self is NOT V_ERROR or nil @@ -97,7 +122,7 @@ func (self *Node) Valid() bool { if self == nil { return false } - return self.t != V_ERROR + return self.loadt() != V_ERROR } // Check checks if the node itself is valid, and return: @@ -106,20 +131,26 @@ func (self *Node) Valid() bool { func (self *Node) Check() error { if self == nil { return ErrNotExist - } else if self.t != V_ERROR { + } else if self.loadt() != V_ERROR { return nil } else { return self } } -// IsRaw returns true if node's underlying value is raw json +// isRaw returns true if node's underlying value is raw json +// Deprecated: not concurent safe func (self Node) IsRaw() bool { - return self.t&_V_RAW != 0 + return self.t & _V_RAW != 0 +} + +// IsRaw returns true if node's underlying value is raw json +func (self *Node) isRaw() bool { + return self.loadt() & _V_RAW != 0 } func (self *Node) isLazy() bool { - return self != nil && self.t&_V_LAZY != 0 + return self != nil && self.t & _V_LAZY != 0 } func (self *Node) isAny() bool { @@ -133,18 +164,26 @@ func (self *Node) Raw() (string, error) { if self == nil { return "", ErrNotExist } - if !self.IsRaw() { + lock := self.rlock() + if !self.isRaw() { + if lock { + self.runlock() + } buf, err := self.MarshalJSON() return rt.Mem2Str(buf), err } - return self.toString(), nil + ret := self.toString() + if lock { + self.runlock() + } + return ret, nil } func (self *Node) checkRaw() error { if err := self.Check(); err != nil { return err } - if self.IsRaw() { + if self.isRaw() { self.parseRaw(false) } return self.Check() @@ -504,7 +543,7 @@ func (self *Node) Len() (int, error) { } } -func (self Node) len() int { +func (self *Node) len() int { return int(self.l) } @@ -527,7 +566,7 @@ func (self *Node) Cap() (int, error) { // // If self is V_NONE or V_NULL, it becomes V_OBJECT and sets the node at the key. func (self *Node) Set(key string, node Node) (bool, error) { - if err := self.Check(); err != nil { + if err := self.checkRaw(); err != nil { return false, err } if err := node.Check(); err != nil { @@ -568,7 +607,7 @@ func (self *Node) SetAny(key string, val interface{}) (bool, error) { // Unset REMOVE (soft) the node of given key under object parent, and reports if the key has existed. func (self *Node) Unset(key string) (bool, error) { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return false, err } // NOTICE: must get acurate length before deduct @@ -589,7 +628,7 @@ func (self *Node) Unset(key string) (bool, error) { // // The index must be within self's children. func (self *Node) SetByIndex(index int, node Node) (bool, error) { - if err := self.Check(); err != nil { + if err := self.checkRaw(); err != nil { return false, err } if err := node.Check(); err != nil { @@ -669,7 +708,7 @@ func (self *Node) UnsetByIndex(index int) (bool, error) { // // If self is V_NONE or V_NULL, it becomes V_ARRAY and sets the node at index 0. func (self *Node) Add(node Node) error { - if err := self.Check(); err != nil { + if err := self.checkRaw(); err != nil { return err } @@ -677,7 +716,7 @@ func (self *Node) Add(node Node) error { *self = NewArray([]Node{node}) return nil } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return err } @@ -740,7 +779,7 @@ func (self *Node) Pop() error { // // WARN: this will change address of elements, which is a dangerous action. func (self *Node) Move(dst, src int) error { - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return err } @@ -812,7 +851,7 @@ func (self *Node) GetByPath(path ...interface{}) *Node { // Get loads given key of an object node on demands func (self *Node) Get(key string) *Node { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return unwrapError(err) } n, _ := self.skipKey(key) @@ -845,14 +884,14 @@ func (self *Node) Index(idx int) *Node { // IndexPair indexies pair at given idx, // node type MUST be either V_OBJECT func (self *Node) IndexPair(idx int) *Pair { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil } return self.skipIndexPair(idx) } func (self *Node) indexOrGet(idx int, key string) (*Node, int) { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return unwrapError(err), idx } @@ -889,10 +928,10 @@ func (self *Node) Map() (map[string]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil, err } - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObject() @@ -908,10 +947,10 @@ func (self *Node) MapUseNumber() (map[string]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil, err } - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObjectUseNumber() @@ -928,7 +967,7 @@ func (self *Node) MapUseNode() (map[string]Node, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil, err } if err := self.skipAllKey(); err != nil { @@ -1034,10 +1073,10 @@ func (self *Node) Array() ([]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return nil, err } - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArray() @@ -1053,10 +1092,10 @@ func (self *Node) ArrayUseNumber() ([]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return nil, err } - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArrayUseNumber() @@ -1073,7 +1112,7 @@ func (self *Node) ArrayUseNode() ([]Node, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return nil, err } if err := self.skipAllIndex(); err != nil { @@ -1129,12 +1168,12 @@ func (self *Node) Interface() (interface{}, error) { } return v, nil case _V_ARRAY_LAZY : - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArray() case _V_OBJECT_LAZY : - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObject() @@ -1168,12 +1207,12 @@ func (self *Node) InterfaceUseNumber() (interface{}, error) { case types.V_STRING : return self.toString(), nil case _V_NUMBER : return self.toNumber(), nil case _V_ARRAY_LAZY : - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArrayUseNumber() case _V_OBJECT_LAZY : - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObjectUseNumber() @@ -1205,70 +1244,30 @@ func (self *Node) InterfaceUseNode() (interface{}, error) { } } -// LoadAll loads all the node's children and children's children as parsed. -// After calling it, the node can be safely used on concurrency +// LoadAll loads the node's children +// and ensure all its children can be READ concurrently (include its children's children) func (self *Node) LoadAll() error { - if self.IsRaw() { - self.parseRaw(true) - return self.Check() - } - - switch self.itype() { - case types.V_ARRAY: - e := self.len() - if err := self.loadAllIndex(); err != nil { - return err - } - for i := 0; i < e; i++ { - n := self.nodeAt(i) - if n.IsRaw() { - n.parseRaw(true) - } - if err := n.Check(); err != nil { - return err - } - } - return nil - case types.V_OBJECT: - e := self.len() - if err := self.loadAllKey(); err != nil { - return err - } - for i := 0; i < e; i++ { - n := self.pairAt(i) - if n.Value.IsRaw() { - n.Value.parseRaw(true) - } - if err := n.Value.Check(); err != nil { - return err - } - } - return nil - default: - return self.Check() - } + return self.Load() } // Load loads the node's children as parsed. -// After calling it, only the node itself can be used on concurrency (not include its children) +// and ensure all its children can be READ concurrently (include its children's children) func (self *Node) Load() error { - if err := self.checkRaw(); err != nil { - return err - } - switch self.t { - case _V_ARRAY_LAZY: - return self.skipAllIndex() - case _V_OBJECT_LAZY: - return self.skipAllKey() - default: - return self.Check() + case _V_ARRAY_LAZY: self.loadAllIndex(true) + case _V_OBJECT_LAZY: self.loadAllKey(true) + case V_ERROR: return self + case V_NONE: return nil + } + if self.m == nil { + self.m = new(sync.RWMutex) } + return self.checkRaw() } /**---------------------------------- Internal Helper Methods ----------------------------------**/ -func (self *Node) should(t types.ValueType, s string) error { +func (self *Node) should(t types.ValueType) error { if err := self.checkRaw(); err != nil { return err } @@ -1439,28 +1438,38 @@ func (self *Node) skipIndexPair(index int) *Pair { return nil } -func (self *Node) loadAllIndex() error { +func (self *Node) loadAllIndex(loadOnce bool) error { if !self.isLazy() { return nil } var err types.ParsingError parser, stack := self.getParserAndArrayStack() - parser.noLazy = true - *self, err = parser.decodeArray(&stack.v) + if !loadOnce { + parser.noLazy = true + *self, err = parser.decodeArray(&stack.v) + } else { + parser.loadOnce = true + *self, err = parser.decodeArray(&stack.v) + } if err != 0 { return parser.ExportError(err) } return nil } -func (self *Node) loadAllKey() error { +func (self *Node) loadAllKey(loadOnce bool) error { if !self.isLazy() { return nil } var err types.ParsingError parser, stack := self.getParserAndObjectStack() - parser.noLazy = true - *self, err = parser.decodeObject(&stack.v) + if !loadOnce { + parser.noLazy = true + *self, err = parser.decodeObject(&stack.v) + } else { + parser.loadOnce = true + *self, err = parser.decodeObject(&stack.v) + } if err != 0 { return parser.ExportError(err) } @@ -1629,7 +1638,23 @@ func NewRaw(json string) Node { if it == _V_NONE { return Node{} } - return newRawNode(parser.s[start:parser.p], it) + return newRawNode(parser.s[start:parser.p], it, false) +} + +// NewRawConcurrentRead creates a node of raw json, which can be READ +// (GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON) concurrently. +// If the input json is invalid, NewRaw returns a error Node. +func NewRawConcurrentRead(json string) Node { + parser := NewParserObj(json) + start, err := parser.skip() + if err != 0 { + return *newError(err, err.Message()) + } + it := switchRawType(parser.s[start]) + if it == _V_NONE { + return Node{} + } + return newRawNode(parser.s[start:parser.p], it, true) } // NewAny creates a node of type V_ANY if any's type isn't Node or *Node, @@ -1689,15 +1714,15 @@ func NewNumber(v string) Node { } } -func (node Node) toNumber() json.Number { +func (node *Node) toNumber() json.Number { return json.Number(rt.StrFrom(node.p, int64(node.l))) } -func (self Node) toString() string { +func (self *Node) toString() string { return rt.StrFrom(self.p, int64(self.l)) } -func (node Node) toFloat64() (float64, error) { +func (node *Node) toFloat64() (float64, error) { ret, err := node.toNumber().Float64() if err != nil { return 0, err @@ -1705,7 +1730,7 @@ func (node Node) toFloat64() (float64, error) { return ret, nil } -func (node Node) toInt64() (int64, error) { +func (node *Node) toInt64() (int64, error) { ret,err := node.toNumber().Int64() if err != nil { return 0, err @@ -1777,48 +1802,34 @@ func (self *Node) setObject(v *linkedPairs) { self.p = unsafe.Pointer(v) } -func newRawNode(str string, typ types.ValueType) Node { - return Node{ - t: _V_RAW | typ, - p: rt.StrPtr(str), - l: uint(len(str)), - } -} - func (self *Node) parseRaw(full bool) { + lock := self.lock() + defer self.unlock() + if !self.isRaw() { + return + } raw := self.toString() parser := NewParserObj(raw) + var e types.ParsingError if full { parser.noLazy = true - parser.skipValue = false + *self, e = parser.Parse() + } else if lock { + var n Node + parser.noLazy = true + parser.loadOnce = true + n, e = parser.Parse() + self.assign(n) + } else { + *self, e = parser.Parse() } - var e types.ParsingError - *self, e = parser.Parse() if e != 0 { *self = *newSyntaxError(parser.syntaxError(e)) } } -var typeJumpTable = [256]types.ValueType{ - '"' : types.V_STRING, - '-' : _V_NUMBER, - '0' : _V_NUMBER, - '1' : _V_NUMBER, - '2' : _V_NUMBER, - '3' : _V_NUMBER, - '4' : _V_NUMBER, - '5' : _V_NUMBER, - '6' : _V_NUMBER, - '7' : _V_NUMBER, - '8' : _V_NUMBER, - '9' : _V_NUMBER, - '[' : types.V_ARRAY, - 'f' : types.V_FALSE, - 'n' : types.V_NULL, - 't' : types.V_TRUE, - '{' : types.V_OBJECT, -} - -func switchRawType(c byte) types.ValueType { - return typeJumpTable[c] +func (self *Node) assign(n Node) { + self.l = n.l + self.p = n.p + atomic.StoreInt64(&self.t, n.t) } diff --git a/ast/node_test.go b/ast/node_test.go index ae40c18c4..60a2aadeb 100644 --- a/ast/node_test.go +++ b/ast/node_test.go @@ -17,18 +17,18 @@ package ast import ( - `bytes` - `encoding/json` - `errors` - `fmt` - `reflect` - `strconv` - `testing` - - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` - `github.com/stretchr/testify/assert` - `github.com/stretchr/testify/require` + "bytes" + "encoding/json" + "errors" + "fmt" + "reflect" + "strconv" + "testing" + + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestNodeSortKeys(t *testing.T) { @@ -151,20 +151,21 @@ func TestLoadAll(t *testing.T) { if err = root.Load(); err != nil { t.Fatal(err) } - if root.len() != 3 { - t.Fatal(root.len()) + + if l, _ := root.Len(); l != 3 { + t.Fatal(root.Len()) } c := root.Get("c") - if !c.IsRaw() { + if !c.isRaw() { t.Fatal(err) } err = c.LoadAll() if err != nil { t.Fatal(err) } - if c.len() != 2 { - t.Fatal(c.len()) + if l, _ := c.Len(); l != 2 { + t.Fatal(c.Len()) } c1 := c.nodeAt(0) if n, err := c1.Int64(); err != nil || n != 1 { @@ -174,47 +175,47 @@ func TestLoadAll(t *testing.T) { a := root.pairAt(0) if a.Key != "a" { t.Fatal(a.Key) - } else if !a.Value.IsRaw() { + } else if !a.Value.isRaw() { t.Fatal(a.Value.itype()) - } else if n, err := a.Value.Len(); n != 0 || err != nil { + } else if n, err := a.Value.Len(); n != 2 || err != nil { t.Fatal(n, err) } if err := a.Value.Load(); err != nil { t.Fatal(err) } - if a.Value.len() != 2 { - t.Fatal(a.Value.len()) + if l, _ := a.Value.Len(); l != 2 { + t.Fatal(a.Value.Len()) } a1 := a.Value.Get("1") - if !a1.IsRaw() { + if !a1.isRaw() { t.Fatal(a1) } a.Value.LoadAll() - if a1.t != types.V_ARRAY || a1.len() != 1 { - t.Fatal(a1.t, a1.len()) + if l, _ := a1.Len(); a1.t != types.V_ARRAY || l != 1 { + t.Fatal(a1.t) } b := root.pairAt(1) if b.Key != "b" { t.Fatal(b.Key) - } else if !b.Value.IsRaw() { + } else if !b.Value.isRaw() { t.Fatal(b.Value.itype()) - } else if n, err := b.Value.Len(); n != 0 || err != nil { + } else if n, err := b.Value.Len(); n != 2 || err != nil { t.Fatal(n, err) } if err := b.Value.Load(); err != nil { t.Fatal(err) } - if b.Value.len() != 2 { - t.Fatal(b.Value.len()) + if l, _ := b.Value.Len(); l != 2 { + t.Fatal(b.Value.Len()) } b1 := b.Value.Index(0) - if !b1.IsRaw() { + if !b1.isRaw() { t.Fatal(b1) } b.Value.LoadAll() - if b1.t != types.V_OBJECT || b1.len() != 1 { - t.Fatal(a1.t, a1.len()) + if l, _ := b1.Len(); b1.t != types.V_OBJECT || l != 1 { + t.Fatal(a1.Len()) } } @@ -699,12 +700,12 @@ func TestCheckError_Empty(t *testing.T) { t.Fatal() } - n := newRawNode("[hello]", types.V_ARRAY) + n := newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(false) if n.Check() != nil { t.Fatal(n.Check()) } - n = newRawNode("[hello]", types.V_ARRAY) + n = newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(true) p := NewParser("[hello]") p.noLazy = true @@ -735,7 +736,7 @@ func TestCheckError_Empty(t *testing.T) { if e != nil { t.Fatal(e) } - exist, e := a.Set("d", newRawNode("x", types.V_OBJECT)) + exist, e := a.Set("d", newRawNode("x", types.V_OBJECT, false)) if exist || e != nil { t.Fatal(err) } @@ -746,7 +747,7 @@ func TestCheckError_Empty(t *testing.T) { if d.Check() == nil { t.Fatal(d) } - exist, e = a.Set("e", newRawNode("[}", types.V_ARRAY)) + exist, e = a.Set("e", newRawNode("[}", types.V_ARRAY, false)) if e != nil { t.Fatal(e) } @@ -839,7 +840,7 @@ func TestUnset(t *testing.T) { *entities = NewRaw(string(out)) hashtags := entities.Get("hashtags").Index(0) - hashtags.Set("text2", newRawNode(`{}`, types.V_OBJECT)) + hashtags.Set("text2", NewRaw(`{}`)) exist, err = hashtags.Unset("indices") // NOTICE: Unset() won't change node.Len() here if !exist || err != nil || hashtags.len() != 2 { t.Fatal(hashtags.len()) diff --git a/ast/parser.go b/ast/parser.go index 506f9d86c..6f95ed2e6 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -17,10 +17,12 @@ package ast import ( - `fmt` + "fmt" + "sync" + "sync/atomic" - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" ) const ( @@ -45,10 +47,13 @@ type Parser struct { p int s string noLazy bool + loadOnce bool skipValue bool dbuf *byte } +// var noLazy = option.AstSafeConcurrentRead + /** Parser Private Methods **/ func (self *Parser) delim() types.ParsingError { @@ -152,7 +157,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) }else{ /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -238,7 +243,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) } else { /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -295,6 +300,10 @@ func (self *Parser) Pos() int { return self.p } + +// Parse returns a ast.Node representing the parser's JSON. +// NOTICE: the specific parsing lazy dependens parser's option +// It only parse first layer and first child for Object or Array be default func (self *Parser) Parse() (Node, types.ParsingError) { switch val := self.decodeValue(); val.Vt { case types.V_EOF : return Node{}, types.ERR_EOF @@ -303,22 +312,48 @@ func (self *Parser) Parse() (Node, types.ParsingError) { case types.V_FALSE : return falseNode, 0 case types.V_STRING : return self.decodeString(val.Iv, val.Ep) case types.V_ARRAY: + s := self.p - 1; if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' { self.p = p + 1 return Node{t: types.V_ARRAY}, 0 } if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeArray(new(linkedNodes)) } + // NOTICE: loadOnce always keep raw json for object or array + if self.loadOnce { + self.p = s + s, e := self.skipFast() + if e != 0 { + return Node{}, e + } + return newRawNode(self.s[s:self.p], types.V_ARRAY, true), 0 + } return newLazyArray(self), 0 case types.V_OBJECT: + s := self.p - 1; if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' { self.p = p + 1 return Node{t: types.V_OBJECT}, 0 } + // NOTICE: loadOnce always keep raw json for object or array if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeObject(new(linkedPairs)) } + if self.loadOnce { + self.p = s + s, e := self.skipFast() + if e != 0 { + return Node{}, e + } + return newRawNode(self.s[s:self.p], types.V_OBJECT, true), 0 + } return newLazyObject(self), 0 case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0 case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0 @@ -475,7 +510,7 @@ func (self *Node) skipNextNode() *Node { if t == _V_NONE { return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ @@ -558,7 +593,7 @@ func (self *Node) skipNextPair() (*Pair) { if t == _V_NONE { return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ @@ -662,3 +697,72 @@ func backward(src string, i int) int { for ; i>=0 && isSpace(src[i]); i-- {} return i } + + +func newRawNode(str string, typ types.ValueType, lock bool) Node { + ret := Node{ + t: typ | _V_RAW, + p: rt.StrPtr(str), + l: uint(len(str)), + } + if lock { + ret.m = new(sync.RWMutex) + } + return ret +} + +var typeJumpTable = [256]types.ValueType{ + '"' : types.V_STRING, + '-' : _V_NUMBER, + '0' : _V_NUMBER, + '1' : _V_NUMBER, + '2' : _V_NUMBER, + '3' : _V_NUMBER, + '4' : _V_NUMBER, + '5' : _V_NUMBER, + '6' : _V_NUMBER, + '7' : _V_NUMBER, + '8' : _V_NUMBER, + '9' : _V_NUMBER, + '[' : types.V_ARRAY, + 'f' : types.V_FALSE, + 'n' : types.V_NULL, + 't' : types.V_TRUE, + '{' : types.V_OBJECT, +} + +func switchRawType(c byte) types.ValueType { + return typeJumpTable[c] +} + +func (self *Node) loadt() types.ValueType { + return (types.ValueType)(atomic.LoadInt64(&self.t)) +} + +func (self *Node) lock() bool { + if m := self.m; m != nil { + m.Lock() + return true + } + return false +} + +func (self *Node) unlock() { + if m := self.m; m != nil { + m.Unlock() + } +} + +func (self *Node) rlock() bool { + if m := self.m; m != nil { + m.RLock() + return true + } + return false +} + +func (self *Node) runlock() { + if m := self.m; m != nil { + m.RUnlock() + } +} diff --git a/ast/parser_test.go b/ast/parser_test.go index 2469bc411..594f3437d 100644 --- a/ast/parser_test.go +++ b/ast/parser_test.go @@ -17,16 +17,16 @@ package ast import ( - `encoding/json` - `os` - `runtime` - `runtime/debug` - `sync` - `testing` - `time` + "encoding/json" + "os" + "runtime" + "runtime/debug" + "sync" + "testing" + "time" - `github.com/stretchr/testify/assert` - `github.com/stretchr/testify/require` + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var ( @@ -316,6 +316,46 @@ func BenchmarkParseOne_Parallel_Sonic(b *testing.B) { }) } +func BenchmarkParseNoLazy_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + ast := NewRawConcurrentRead(_TwitterJson) + for i := 0; i < b.N; i++ { + node := ast.GetByPath("statuses", 3) + if node.Check() != nil { + b.Fail() + } + } +} + +func BenchmarkParseNoLazy_Parallel_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + ast := NewRawConcurrentRead(_TwitterJson) + b.RunParallel(func(p *testing.PB) { + for p.Next() { + node := ast.GetByPath("statuses", 3) + if node.Check() != nil { + b.Fail() + } + } + }) +} + +func BenchmarkNodeRaw_Parallel_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + ast := NewRawConcurrentRead(_TwitterJson) + b.RunParallel(func(p *testing.PB) { + for p.Next() { + node := ast.GetByPath("statuses", 3) + if _, e := node.Raw(); e != nil { + b.Fatal(e) + } + } + }) +} + func BenchmarkParseSeven_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() diff --git a/ast/search.go b/ast/search.go index a8d1e76f6..9a5fb9420 100644 --- a/ast/search.go +++ b/ast/search.go @@ -21,8 +21,23 @@ import ( `github.com/bytedance/sonic/internal/native/types` ) +// SearchOptions controls Searcher's behavior +type SearchOptions struct { + // ValidateJSON indicates the searcher to validate the entire JSON + ValidateJSON bool + + // CopyReturn indicates the searcher to copy the result JSON instead of refer from the input + // This can help to reduce memory usage if you cache the results + CopyReturn bool + + // ConcurrentRead indicates the searcher to return a concurrently-READ-safe node, + // including: GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON + ConcurrentRead bool +} + type Searcher struct { parser Parser + SearchOptions } func NewSearcher(str string) *Searcher { @@ -31,12 +46,16 @@ func NewSearcher(str string) *Searcher { s: str, noLazy: false, }, + SearchOptions: SearchOptions{ + ValidateJSON: true, + }, } } // GetByPathCopy search in depth from top json and returns a **Copied** json node at the path location func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { - return self.getByPath(true, true, path...) + self.CopyReturn = true + return self.getByPath(path...) } // GetByPathNoCopy search in depth from top json and returns a **Referenced** json node at the path location @@ -44,15 +63,15 @@ func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { // WARN: this search directly refer partial json from top json, which has faster speed, // may consumes more memory. func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { - return self.getByPath(false, true, path...) + return self.getByPath(path...) } -func (self *Searcher) getByPath(copystring bool, validate bool, path ...interface{}) (Node, error) { +func (self *Searcher) getByPath(path ...interface{}) (Node, error) { var err types.ParsingError var start int self.parser.p = 0 - start, err = self.parser.getByPath(validate, path...) + start, err = self.parser.getByPath(self.ValidateJSON, path...) if err != 0 { // for compatibility with old version if err == types.ERR_NOT_FOUND { @@ -71,12 +90,12 @@ func (self *Searcher) getByPath(copystring bool, validate bool, path ...interfac // copy string to reducing memory usage var raw string - if copystring { + if self.CopyReturn { raw = rt.Mem2Str([]byte(self.parser.s[start:self.parser.p])) } else { raw = self.parser.s[start:self.parser.p] } - return newRawNode(raw, t), nil + return newRawNode(raw, t, self.ConcurrentRead), nil } // GetByPath searches a path and returns relaction and types of target diff --git a/ast/search_test.go b/ast/search_test.go index 6cbacc500..5a264c7b0 100644 --- a/ast/search_test.go +++ b/ast/search_test.go @@ -25,6 +25,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestGC_Search(t *testing.T) { @@ -54,6 +55,72 @@ func TestGC_Search(t *testing.T) { wg.Wait() } + +func TestNodeRace(t *testing.T) { + + src := `{"1":1,"2": [ 1 , 1 , { "3" : 1 , "4" : [] } ] }` + s := NewSearcher(src) + s.ConcurrentRead = true + node, _ := s.GetByPath() + + cases := []struct{ + path []interface{} + exp []string + scalar bool + lv int + }{ + {[]interface{}{"1"}, []string{`1`}, true, 0}, + {[]interface{}{"2"}, []string{`[ 1 , 1 , { "3" : 1 , "4" : [] } ]`, `[1,1,{ "3" : 1 , "4" : [] }]`, `[1,1,{"3":1,"4":[]}]`}, false, 3}, + {[]interface{}{"2", 1}, []string{`1`}, true, 1}, + {[]interface{}{"2", 2}, []string{`{ "3" : 1 , "4" : [] }`, `{"3":1,"4":[]}`}, false, 2}, + {[]interface{}{"2", 2, "3"}, []string{`1`}, true, 0}, + {[]interface{}{"2", 2, "4"}, []string{`[]`}, false, 0}, + } + + wg := sync.WaitGroup{} + start := sync.RWMutex{} + start.Lock() + + P := 100 + for i := range cases { + // println(i) + c := cases[i] + for j := 0; j < P; j++ { + wg.Add(1) + go func () { + defer wg.Done() + start.RLock() + n := node.GetByPath(c.path...) + _ = n.TypeSafe() + v, err := n.Raw() + iv, _ := n.Int64() + lv, _ := n.Len() + _, e := n.Interface() + e2 := n.SortKeys(false) + require.NoError(t, err) + require.NoError(t, e) + require.NoError(t, e2) + if c.scalar { + require.Equal(t, int64(1), iv) + } else { + require.Equal(t, c.lv, lv) + } + eq := false + for _, exp := range c.exp { + if exp == v { + eq = true + break + } + } + require.True(t, eq) + }() + } + } + + start.Unlock() + wg.Wait() +} + func TestExportErrorInvalidChar(t *testing.T) { data := `{"a":]` p := NewSearcher(data) @@ -325,6 +392,22 @@ func BenchmarkGetOne_Sonic(b *testing.B) { } } +func BenchmarkGetOneSafe_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + ast := NewSearcher(_TwitterJson) + ast.ConcurrentRead = true + for i := 0; i < b.N; i++ { + node, err := ast.GetByPath("statuses", 3, "id") + if err != nil { + b.Fatal(err) + } + x, _ := node.Int64() + if x != 249279667666817024 { + b.Fatal(node.Interface()) + } + } +} + func BenchmarkGetFull_Sonic(b *testing.B) { ast := NewSearcher(_TwitterJson) b.SetBytes(int64(len(_TwitterJson))) @@ -370,6 +453,24 @@ func BenchmarkGetOne_Parallel_Sonic(b *testing.B) { }) } +func BenchmarkGetOneSafe_Parallel_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.RunParallel(func(pb *testing.PB) { + ast := NewSearcher(_TwitterJson) + ast.ConcurrentRead = true + for pb.Next() { + node, err := ast.GetByPath("statuses", 3, "id") + if err != nil { + b.Fatal(err) + } + x, _ := node.Int64() + if x != 249279667666817024 { + b.Fatal(node.Interface()) + } + } + }) +} + func BenchmarkSetOne_Sonic(b *testing.B) { node, err := NewSearcher(_TwitterJson).GetByPath("statuses", 3) if err != nil { diff --git a/ast/visitor_test.go b/ast/visitor_test.go index 2618c5460..221996dfa 100644 --- a/ast/visitor_test.go +++ b/ast/visitor_test.go @@ -225,9 +225,7 @@ func (self *visitorNodeDiffTest) OnObjectEnd() error { require.NotNil(self.t, object) node := self.stk[self.sp-1].Node - ps, err := node.unsafeMap() - var pairs = make([]Pair, ps.Len()) - ps.ToSlice(pairs) + pairs, err := node.MapUseNode() require.NoError(self.t, err) keysGot := make([]string, 0, len(object)) @@ -235,16 +233,16 @@ func (self *visitorNodeDiffTest) OnObjectEnd() error { keysGot = append(keysGot, key) } keysWant := make([]string, 0, len(pairs)) - for _, pair := range pairs { - keysWant = append(keysWant, pair.Key) + for key := range pairs { + keysWant = append(keysWant, key) } sort.Strings(keysGot) sort.Strings(keysWant) require.EqualValues(self.t, keysWant, keysGot) - for _, pair := range pairs { - typeGot := object[pair.Key].Type() - typeWant := pair.Value.Type() + for key, pair := range pairs { + typeGot := object[key].Type() + typeWant := pair.Type() require.EqualValues(self.t, typeWant, typeGot) } @@ -278,10 +276,8 @@ func (self *visitorNodeDiffTest) OnArrayEnd() error { require.NotNil(self.t, array) node := self.stk[self.sp-1].Node - vs, err := node.unsafeArray() + values, err := node.ArrayUseNode() require.NoError(self.t, err) - var values = make([]Node, vs.Len()) - vs.ToSlice(values) require.EqualValues(self.t, len(values), len(array)) @@ -470,13 +466,13 @@ func (self *visitorUserNodeASTDecoder) decodeValue(root *Node) (visitorUserNode, value, ierr, ferr) case V_ARRAY: - nodes, err := root.unsafeArray() + nodes, err := root.ArrayUseNode() if err != nil { return nil, err } - values := make([]visitorUserNode, nodes.Len()) - for i := 0; i Date: Sat, 29 Jun 2024 21:39:32 +0800 Subject: [PATCH 2/4] opt:(ast) use hash map to speed up searching object key --- ast/buffer.go | 89 ++++++++++++++++++++++++++++++++++++++-------- ast/buffer_test.go | 14 ++++---- ast/encode_test.go | 4 +-- ast/error.go | 4 +++ ast/iterator.go | 14 ++++++-- ast/node.go | 12 +++++-- ast/node_test.go | 14 ++++---- ast/parser.go | 20 +++++------ 8 files changed, 127 insertions(+), 44 deletions(-) diff --git a/ast/buffer.go b/ast/buffer.go index bccbf4814..04701ef5b 100644 --- a/ast/buffer.go +++ b/ast/buffer.go @@ -17,8 +17,10 @@ package ast import ( - `sort` - `unsafe` + "sort" + "unsafe" + + "github.com/bytedance/sonic/internal/caching" ) type nodeChunk [_DEFAULT_NODE_CAP]Node @@ -90,18 +92,11 @@ func (self *linkedNodes) Pop() { self.size-- } -func (self *linkedPairs) Pop() { - if self == nil || self.size == 0 { - return - } - self.Set(self.size-1, Pair{}) - self.size-- -} - func (self *linkedNodes) Push(v Node) { self.Set(self.size, v) } + func (self *linkedNodes) Set(i int, v Node) { if i < _DEFAULT_NODE_CAP { self.head[i] = v @@ -195,11 +190,22 @@ func (self *linkedNodes) FromSlice(con []Node) { type pairChunk [_DEFAULT_NODE_CAP]Pair type linkedPairs struct { + index map[uint64]int head pairChunk tail []*pairChunk size int } +func (self *linkedPairs) BuildIndex() { + if self.index == nil { + self.index = make(map[uint64]int, self.size) + } + for i:=0; i _Threshold_Index { + v.BuildIndex() + } return Node{ t: types.V_OBJECT, l: uint(v.Len()), @@ -1797,6 +1802,9 @@ func newObject(v *linkedPairs) Node { } func (self *Node) setObject(v *linkedPairs) { + if v.size > _Threshold_Index { + v.BuildIndex() + } self.t = types.V_OBJECT self.l = uint(v.Len()) self.p = unsafe.Pointer(v) diff --git a/ast/node_test.go b/ast/node_test.go index 60a2aadeb..002403ffe 100644 --- a/ast/node_test.go +++ b/ast/node_test.go @@ -287,14 +287,14 @@ func TestTypeCast(t *testing.T) { {"Map", Node{}, map[string]interface{}(nil), ErrUnsupportType}, {"Map", NewAny(map[string]Node{"a": NewNumber("1")}), map[string]interface{}(nil), ErrUnsupportType}, {"Map", NewAny(map[string]interface{}{"a": 1}), map[string]interface{}{"a": 1}, nil}, - {"Map", NewObject([]Pair{{"a", NewNumber("1")}}), map[string]interface{}{"a": float64(1.0)}, nil}, + {"Map", NewObject([]Pair{NewPair("a", NewNumber("1"))}), map[string]interface{}{"a": float64(1.0)}, nil}, {"MapUseNode", Node{}, map[string]Node(nil), ErrUnsupportType}, {"MapUseNode", NewAny(map[string]interface{}{"a": 1}), map[string]Node(nil), ErrUnsupportType}, {"MapUseNode", NewAny(map[string]Node{"a": NewNumber("1")}), map[string]Node{"a": NewNumber("1")}, nil}, - {"MapUseNode", NewObject([]Pair{{"a", NewNumber("1")}}), map[string]Node{"a": NewNumber("1")}, nil}, + {"MapUseNode", NewObject([]Pair{NewPair("a", NewNumber("1"))}), map[string]Node{"a": NewNumber("1")}, nil}, {"MapUseNumber", Node{}, map[string]interface{}(nil), ErrUnsupportType}, {"MapUseNumber", NewAny(map[string]interface{}{"a": 1}), map[string]interface{}{"a": 1}, nil}, - {"MapUseNumber", NewObject([]Pair{{"a", NewNumber("1")}}), map[string]interface{}{"a": json.Number("1")}, nil}, + {"MapUseNumber", NewObject([]Pair{NewPair("a", NewNumber("1"))}), map[string]interface{}{"a": json.Number("1")}, nil}, {"Array", Node{}, []interface{}(nil), ErrUnsupportType}, {"Array", NewAny([]interface{}{1}), []interface{}{1}, nil}, {"Array", NewArray([]Node{NewNumber("1")}), []interface{}{float64(1.0)}, nil}, @@ -506,7 +506,7 @@ func TestTypeCast(t *testing.T) { {"Cap", NewAny(0), 0, ErrUnsupportType}, {"Cap", NewNull(), 0, nil}, {"Cap", NewRaw(`[1]`), _DEFAULT_NODE_CAP, nil}, - {"Cap", NewObject([]Pair{{"", NewNull()}}), _DEFAULT_NODE_CAP, nil}, + {"Cap", NewObject([]Pair{NewPair("", NewNull())}), _DEFAULT_NODE_CAP, nil}, {"Cap", NewRaw(`{"a":1}`), _DEFAULT_NODE_CAP, nil}, } lazyArray.skipAllIndex() @@ -1762,7 +1762,7 @@ func BenchmarkSliceUnsetByIndex(b *testing.B) { } func BenchmarkNodeAdd(b *testing.B) { - n := NewObject([]Pair{{"test", NewNumber("1")}}) + n := NewObject([]Pair{NewPair("test", NewNumber("1"))}) b.ResetTimer() for i := 0; i < b.N; i++ { node := NewArray([]Node{}) @@ -1771,7 +1771,7 @@ func BenchmarkNodeAdd(b *testing.B) { } func BenchmarkSliceAdd(b *testing.B) { - n := NewObject([]Pair{{"test", NewNumber("1")}}) + n := NewObject([]Pair{NewPair("test", NewNumber("1"))}) b.ResetTimer() for i := 0; i < b.N; i++ { node := []Node{} @@ -1780,7 +1780,7 @@ func BenchmarkSliceAdd(b *testing.B) { } func BenchmarkMapAdd(b *testing.B) { - n := NewObject([]Pair{{"test", NewNumber("1")}}) + n := NewObject([]Pair{NewPair("test", NewNumber("1"))}) b.ResetTimer() for i := 0; i < b.N; i++ { node := map[string]Node{} diff --git a/ast/parser.go b/ast/parser.go index 6f95ed2e6..85ea4d9b9 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -253,7 +253,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { /* add the value to result */ // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !! - ret.Push(Pair{Key: key, Value: val}) + ret.Push(NewPair(key, val)) self.p = self.lspace(self.p) /* check for EOF */ @@ -549,7 +549,7 @@ func (self *Node) skipNextPair() (*Pair) { /* check for EOF */ if parser.p = parser.lspace(sp); parser.p >= ns { - return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))} + return newErrorPair(parser.syntaxError(types.ERR_EOF)) } /* check for empty object */ @@ -566,7 +566,7 @@ func (self *Node) skipNextPair() (*Pair) { /* decode the key */ if njs = parser.decodeValue(); njs.Vt != types.V_STRING { - return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } /* extract the key */ @@ -576,34 +576,34 @@ func (self *Node) skipNextPair() (*Pair) { /* check for escape sequence */ if njs.Ep != -1 { if key, err = unquote(key); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + return newErrorPair(parser.syntaxError(err)) } } /* expect a ':' delimiter */ if err = parser.delim(); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + return newErrorPair(parser.syntaxError(err)) } /* skip the value */ if start, err := parser.skipFast(); err != 0 { - return &Pair{key, *newSyntaxError(parser.syntaxError(err))} + return newErrorPair(parser.syntaxError(err)) } else { t := switchRawType(parser.s[start]) if t == _V_NONE { - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ - ret.Push(Pair{Key: key, Value: val}) + ret.Push(NewPair(key, val)) self.l++ parser.p = parser.lspace(parser.p) /* check for EOF */ if parser.p >= ns { - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))} + return newErrorPair(parser.syntaxError(types.ERR_EOF)) } /* check for the next character */ @@ -616,7 +616,7 @@ func (self *Node) skipNextPair() (*Pair) { self.setObject(ret) return ret.At(ret.Len()-1) default: - return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} + return newErrorPair(parser.syntaxError(types.ERR_INVALID_CHAR)) } } From 1aaf54a3eafaf9b43a5f93e9a7ab35655d8cab4c Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Sun, 30 Jun 2024 13:36:21 +0800 Subject: [PATCH 3/4] opt: enlarge `_Threshold_Index` and `_DEFAULT_NODE_CAP` --- ast/node.go | 2 +- ast/node_test.go | 9 +++++---- ast/parser.go | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ast/node.go b/ast/node.go index 5ff5b4142..2d62f8f1d 100644 --- a/ast/node.go +++ b/ast/node.go @@ -1766,7 +1766,7 @@ func NewArray(v []Node) Node { return newArray(s) } -const _Threshold_Index = _DEFAULT_NODE_CAP +const _Threshold_Index = 16 func newArray(v *linkedNodes) Node { return Node{ diff --git a/ast/node_test.go b/ast/node_test.go index 002403ffe..dc1027597 100644 --- a/ast/node_test.go +++ b/ast/node_test.go @@ -23,6 +23,7 @@ import ( "fmt" "reflect" "strconv" + "strings" "testing" "github.com/bytedance/sonic/internal/native/types" @@ -271,7 +272,7 @@ func TestTypeCast(t *testing.T) { } var nonEmptyErr error = errors.New("") a1 := NewAny(1) - lazyArray, _ := NewParser("[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]").Parse() + lazyArray, _ := NewParser("["+strings.Repeat("1,", _DEFAULT_NODE_CAP)+"1]").Parse() lazyObject, _ := NewParser(`{"0":0,"1":1,"2":2,"3":3,"4":4,"5":5,"6":6,"7":7,"8":8,"9":9,"10":10,"11":11,"12":12,"13":13,"14":14,"15":15,"16":16}`).Parse() var cases = []tcase{ {"Interface", Node{}, interface{}(nil), ErrUnsupportType}, @@ -512,10 +513,10 @@ func TestTypeCast(t *testing.T) { lazyArray.skipAllIndex() lazyObject.skipAllKey() cases = append(cases, - tcase{"Len", lazyArray, 17, nil}, + tcase{"Len", lazyArray, _DEFAULT_NODE_CAP+1, nil}, tcase{"Len", lazyObject, 17, nil}, - tcase{"Cap", lazyArray, _DEFAULT_NODE_CAP * 3, nil}, - tcase{"Cap", lazyObject, _DEFAULT_NODE_CAP * 3, nil}, + tcase{"Cap", lazyArray, _DEFAULT_NODE_CAP*2, nil}, + tcase{"Cap", lazyObject, _DEFAULT_NODE_CAP*2, nil}, ) for i, c := range cases { diff --git a/ast/parser.go b/ast/parser.go index 85ea4d9b9..7e7991c26 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -26,7 +26,7 @@ import ( ) const ( - _DEFAULT_NODE_CAP int = 8 + _DEFAULT_NODE_CAP int = 16 _APPEND_GROW_SHIFT = 1 ) From 2155f343da7ccffe91e9cf621b4ec559c00ac295 Mon Sep 17 00:00:00 2001 From: "duanyi.aster" Date: Fri, 19 Jul 2024 16:36:23 +0800 Subject: [PATCH 4/4] chore: api comment chore --- README.md | 28 ++++++++++++++-------------- README_ZH_CN.md | 15 ++++++++++++++- api.go | 2 ++ ast/node.go | 8 ++++---- ast/parser.go | 2 -- ast/search_test.go | 1 + 6 files changed, 35 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index a6c8314f9..64c2bc90d 100644 --- a/README.md +++ b/README.md @@ -282,6 +282,20 @@ sub := root.Get("key3").Index(2).Int64() // == 3 **Tip**: since `Index()` uses offset to locate data, which is much faster than scanning like `Get()`, we suggest you use it as much as possible. And sonic also provides another API `IndexOrGet()` to underlying use offset as well as ensure the key is matched. +#### SearchOption +`Searcher` provides some options for user to meet different needs: +```go +opts := ast.SearchOption{ CopyReturn: true ... } +val, err := sonic.GetWithOptions(JSON, opts, "key") +``` +- CopyReturn +Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results +- ConcurentRead +Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it. +- ValidateJSON +Indicate the searcher to validate the entire JSON. This option is enabled by default, which slow down the search speed a little. + + #### Set/Unset Modify the json content by Set()/Unset() @@ -300,20 +314,6 @@ println(alias1 == alias2) // true exist, err := root.UnsetByIndex(1) // exist == true println(root.Get("key4").Check()) // "value not exist" ``` - -#### SearchOption -```go -opts := ast.SearchOption{ CopyReturn: true ... } -val, err := ast.GetWithOption(JSON, opts, "key"...) -``` -`Searcher` provides some options for use to meet different needs: -- CopyReturn -Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results -- ConcurentRead -Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it. -- ValidateJSON -Indicate the searcher to validate the entire JSON. This option is enabled by default. - #### Serialize To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer) diff --git a/README_ZH_CN.md b/README_ZH_CN.md index 2f7a4b6a4..329ffc0e9 100644 --- a/README_ZH_CN.md +++ b/README_ZH_CN.md @@ -260,7 +260,7 @@ fmt.Printf("%+v", data) // {A:0 B:1} ### `Ast.Node` -Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改通用数据的鲁棒的 API。 +Sonic/ast.Node 是完全独立的 JSON 抽象语法树库。它实现了序列化和反序列化,并提供了获取和修改JSON数据的鲁棒的 API。 #### 查找/索引 @@ -282,6 +282,19 @@ sub := root.Get("key3").Index(2).Int64() // == 3 **注意**:由于 `Index()` 使用偏移量来定位数据,比使用扫描的 `Get()` 要快的多,建议尽可能的使用 `Index` 。 Sonic 也提供了另一个 API, `IndexOrGet()` ,以偏移量为基础并且也确保键的匹配。 +#### 查找选项 +`ast.Searcher`提供了一些选项,以满足用户的不同需求: +``` +opts:= ast.SearchOption{CopyReturn: true…} +Val, err:= sonic。gettwithoptions (JSON, opts, "key") +``` +- CopyReturn +指示搜索器复制结果JSON字符串,而不是从输入引用。如果用户缓存结果,这有助于减少内存使用 +- ConcurentRead +因为`ast.Node`使用`Lazy-Load`设计,默认不支持并发读取。如果您想同时读取,请指定它。 +- ValidateJSON +指示搜索器来验证整个JSON。默认情况下启用该选项, 但是对于查找速度有一定影响。 + #### 修改 使用 `Set()` / `Unset()` 修改 json 的内容 diff --git a/api.go b/api.go index 33e9b0e16..919a6d125 100644 --- a/api.go +++ b/api.go @@ -207,6 +207,8 @@ func Get(src []byte, path ...interface{}) (ast.Node, error) { return GetCopyFromString(rt.Mem2Str(src), path...) } +//GetWithOptions searches and locates the given path from src json, +// with specific options of ast.Searcher func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) { s := ast.NewSearcher(rt.Mem2Str(src)) s.SearchOptions = opts diff --git a/ast/node.go b/ast/node.go index 2d62f8f1d..0fbcf7835 100644 --- a/ast/node.go +++ b/ast/node.go @@ -88,7 +88,7 @@ func (self Node) Type() int { return int(self.t & _MASK_LAZY & _MASK_RAW) } -// Type returns json type represented by the node +// Type concurrently-safe returns json type represented by the node // It will be one of belows: // V_NONE = 0 (empty node, key not exists) // V_ERROR = 1 (error node) @@ -139,6 +139,7 @@ func (self *Node) Check() error { } // isRaw returns true if node's underlying value is raw json +// // Deprecated: not concurent safe func (self Node) IsRaw() bool { return self.t & _V_RAW != 0 @@ -154,7 +155,7 @@ func (self *Node) isLazy() bool { } func (self *Node) isAny() bool { - return self != nil && self.t == _V_ANY + return self != nil && self.loadt() == _V_ANY } /** Simple Value Methods **/ @@ -1446,11 +1447,10 @@ func (self *Node) loadAllIndex(loadOnce bool) error { parser, stack := self.getParserAndArrayStack() if !loadOnce { parser.noLazy = true - *self, err = parser.decodeArray(&stack.v) } else { parser.loadOnce = true - *self, err = parser.decodeArray(&stack.v) } + *self, err = parser.decodeArray(&stack.v) if err != 0 { return parser.ExportError(err) } diff --git a/ast/parser.go b/ast/parser.go index 7e7991c26..30bd1f451 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -52,8 +52,6 @@ type Parser struct { dbuf *byte } -// var noLazy = option.AstSafeConcurrentRead - /** Parser Private Methods **/ func (self *Parser) delim() types.ParsingError { diff --git a/ast/search_test.go b/ast/search_test.go index 5a264c7b0..af2160fbc 100644 --- a/ast/search_test.go +++ b/ast/search_test.go @@ -92,6 +92,7 @@ func TestNodeRace(t *testing.T) { start.RLock() n := node.GetByPath(c.path...) _ = n.TypeSafe() + _ = n.isAny() v, err := n.Raw() iv, _ := n.Int64() lv, _ := n.Len()