From fbe9b5b5da28a37f39484f113d7b40e7bd277eff Mon Sep 17 00:00:00 2001 From: ChrisTrenkamp Date: Sat, 26 Aug 2023 14:26:24 -0400 Subject: [PATCH] Added support for unmarshalling results into structs and slices. --- README.md | 78 ++++++++++++++++ exec/doc_test.go | 61 ++++++++++++ exec/exec_test.go | 78 ++++++++++++++++ exec/unmarshal.go | 229 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 446 insertions(+) create mode 100644 exec/unmarshal.go diff --git a/README.md b/README.md index 34f2499..90a9e4f 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,84 @@ func main() { } ``` +## Unmarshal result into a struct + +```go +package main + +import ( + "bytes" + "fmt" + + "github.com/ChrisTrenkamp/xsel/exec" + "github.com/ChrisTrenkamp/xsel/grammar" + "github.com/ChrisTrenkamp/xsel/node" + "github.com/ChrisTrenkamp/xsel/parser" + "github.com/ChrisTrenkamp/xsel/store" +) + +func main() { + xml := ` + + + + Great Lakes Food Market + Howard Snyder + Marketing Manager + +
2732 Baker Blvd.
+ Eugene + OR +
+
+ + Hungry Coyote Import Store + Yoshi Latimer + +
City Center Plaza 516 Main St.
+ Walla Walla + WA +
+
+
+
+` + + type Address struct { + Address string `xsel:"NS:Address"` + City string `xsel:"NS:City"` + Region string `xsel:"NS:Region"` + } + + type Customer struct { + Id string `xsel:"@CustomerID"` + Name string `xsel:"NS:CompanyName"` + ContactName string `xsel:"NS:ContactName"` + Address Address `xsel:"NS:FullAddress"` + } + + type Customers struct { + Customers []Customer `xsel:"NS:Customers/NS:Customer"` + } + + contextSettings := func(c *exec.ContextSettings) { + c.NamespaceDecls["NS"] = "http://www.adventure-works.com" + } + + xpath := grammar.MustBuild(`/NS:Root`) + parser := parser.ReadXml(bytes.NewBufferString(xml)) + cursor, _ := store.CreateInMemory(parser) + result, _ := exec.Exec(cursor, &xpath, contextSettings) + + customers := Customers{} + exec.Unmarshal(result, &customers, contextSettings) // Remember to check for errors + + fmt.Printf("%+v\n", customers) + //{Customers:[{Id:GREAL Name:Great Lakes Food Market ContactName:Howard Snyder Address:{Address:2732 Baker Blvd. City:Eugene Region:OR}} + // {Id:HUNGC Name:Hungry Coyote Import Store ContactName:Yoshi Latimer Address:{Address:City Center Plaza 516 Main St. City:Walla Walla Region:WA}}]} +} +``` + ## Extensible `xsel` supplies an XML parser (using the `encoding/xml` package) out of the box, but the XPath logic does not depend directly on XML. It instead depends on the interfaces defined in the [node](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/node) and [store](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/store) packages. This means it's possible to use `xsel` for querying against non-XML documents. The [parser](https://pkg.go.dev/github.com/ChrisTrenkamp/xsel/parser) package supplies methods for parsing XML, HTML, and JSON documents. diff --git a/exec/doc_test.go b/exec/doc_test.go index 72fea8a..6f1ce23 100644 --- a/exec/doc_test.go +++ b/exec/doc_test.go @@ -97,3 +97,64 @@ func ExampleExec_custom_variables() { fmt.Println(result) } + +func ExampleExec_unmarshal() { + xml := ` + + + + Great Lakes Food Market + Howard Snyder + Marketing Manager + +
2732 Baker Blvd.
+ Eugene + OR +
+
+ + Hungry Coyote Import Store + Yoshi Latimer + +
City Center Plaza 516 Main St.
+ Walla Walla + WA +
+
+
+
+` + + type Address struct { + Address string `xsel:"NS:Address"` + City string `xsel:"NS:City"` + Region string `xsel:"NS:Region"` + } + + type Customer struct { + Id string `xsel:"@CustomerID"` + Name string `xsel:"NS:CompanyName"` + ContactName string `xsel:"NS:ContactName"` + Address Address `xsel:"NS:FullAddress"` + } + + type Customers struct { + Customers []Customer `xsel:"NS:Customers/NS:Customer"` + } + + contextSettings := func(c *exec.ContextSettings) { + c.NamespaceDecls["NS"] = "http://www.adventure-works.com" + } + + xpath := grammar.MustBuild(`/NS:Root`) + parser := parser.ReadXml(bytes.NewBufferString(xml)) + cursor, _ := store.CreateInMemory(parser) + result, _ := exec.Exec(cursor, &xpath, contextSettings) + + customers := Customers{} + exec.Unmarshal(result, &customers, contextSettings) // Remember to check for errors + + fmt.Printf("%+v\n", customers) + //{Customers:[{Id:GREAL Name:Great Lakes Food Market ContactName:Howard Snyder Address:{Address:2732 Baker Blvd. City:Eugene Region:OR}} + // {Id:HUNGC Name:Hungry Coyote Import Store ContactName:Yoshi Latimer Address:{Address:City Center Plaza 516 Main St. City:Walla Walla Region:WA}}]} +} diff --git a/exec/exec_test.go b/exec/exec_test.go index 0138bb7..9e969eb 100644 --- a/exec/exec_test.go +++ b/exec/exec_test.go @@ -3,6 +3,7 @@ package exec import ( "bytes" "math" + "reflect" "testing" "github.com/ChrisTrenkamp/xsel/grammar" @@ -1550,3 +1551,80 @@ func TestHtmlDocument(t *testing.T) { t.Error("bad href value") } } + +func TestUnmarshal(t *testing.T) { + type SubUnmarshalTarget struct { + A *string `xsel:"a"` + Battr bool `xsel:"b/@attr"` + Ignore string + } + + type SliceUnmarshalTarget struct { + Elem string `xsel:"."` + } + + type UnmarshalTarget struct { + Text string `xsel:"normalize-space(text())"` + Attr float32 `xsel:"node/@attr"` + Attr64 float64 `xsel:"node/@attr"` + Subfield **SubUnmarshalTarget `xsel:"node"` + Slice *[]*SliceUnmarshalTarget `xsel:"slice/elem"` + StringSlice []string `xsel:"slice/elem"` + Uint8 uint8 `xsel:"slice/elem[1]"` + Int8 int8 `xsel:"slice/elem[1]"` + Uint16 uint16 `xsel:"slice/elem[1]"` + Int16 int16 `xsel:"slice/elem[1]"` + Uint32 uint32 `xsel:"slice/elem[1]"` + Int32 int32 `xsel:"slice/elem[1]"` + Uint64 uint64 `xsel:"slice/elem[1]"` + Int64 int64 `xsel:"slice/elem[1]"` + } + + xml := ` + + foo + + a + + + + 1 + 2 + 3 + + +` + nodes := execXmlNodes(t, "/root", xml) + target := UnmarshalTarget{} + + if err := Unmarshal(nodes, &target); err != nil { + t.Error(err) + } + + a := "a" + subExpected := &SubUnmarshalTarget{ + A: &a, + Battr: true, + } + sliceResults := []*SliceUnmarshalTarget{{"1"}, {"2"}, {"3"}} + expected := UnmarshalTarget{ + Text: "foo", + Attr: 3.14, + Attr64: 3.14, + Subfield: &subExpected, + Slice: &sliceResults, + StringSlice: []string{"1", "2", "3"}, + Uint8: 1, + Int8: 1, + Uint16: 1, + Int16: 1, + Uint32: 1, + Int32: 1, + Uint64: 1, + Int64: 1, + } + + if !reflect.DeepEqual(expected, target) { + t.Error("incorrect result") + } +} diff --git a/exec/unmarshal.go b/exec/unmarshal.go new file mode 100644 index 0000000..1ff139d --- /dev/null +++ b/exec/unmarshal.go @@ -0,0 +1,229 @@ +package exec + +import ( + "fmt" + "reflect" + + "github.com/ChrisTrenkamp/xsel/grammar" +) + +// Unmarshal maps a XPath result to a struct or slice. +// When unmarshaling a slice, the result must be a NodeSet. When unmarshaling +// a struct, the result must be a NodeSet with one result. To unmarshal a +// value to a struct field, give it a "xsel" tag name, and a XPath expression +// for its value (e.g. `xsel:"//my-struct[@my-id = 'my-value']"`). + +// For struct fields, Unmarshal can set fields that are ints and uints, bools, +// strings, slices, and nested structs. + +// For slice elements, Unmarshal can set ints and uints, bools, strings, and +// structs. It cannot Unmarshal multidimensional slices. + +// Arrays, maps, and channels are not supported. +func Unmarshal(result Result, value any, settings ...ContextApply) error { + return unmarshal(result, value, settings...) +} + +func unmarshal(result Result, value any, settings ...ContextApply) error { + val := reflect.ValueOf(value) + typ := val.Type() + + for typ.Kind() == reflect.Pointer { + val = val.Elem() + typ = typ.Elem() + } + + kind := typ.Kind() + + if kind == reflect.Struct { + return unmarshalStruct(result, val.Addr(), settings...) + } + + if kind == reflect.Slice { + return unmarshalSlice(result, val, settings...) + } + + return fmt.Errorf("unsupported data type") +} + +func unmarshalStruct(result Result, val reflect.Value, settings ...ContextApply) error { + cursor, ok := result.(NodeSet) + + if !ok || len(cursor) != 1 { + return fmt.Errorf("struct unmarshals must operate on a NodeSet with one result") + } + + val = val.Elem() + + numField := val.NumField() + valType := val.Type() + + for i := 0; i < numField; i++ { + fieldValType := valType.Field(i) + name := fieldValType.Name + tag := fieldValType.Tag.Get("xsel") + + if tag == "" { + continue + } + + xselExec, err := grammar.Build(tag) + if err != nil { + return err + } + + result, err := Exec(cursor[0], &xselExec, settings...) + if err != nil { + return err + } + + field := val.Field(i) + fieldType := field.Type() + for fieldType.Kind() == reflect.Pointer { + fieldType = fieldType.Elem() + } + + fieldVal, ok := createValue(fieldType.Kind(), result) + + if ok { + err = setField(name, field, fieldVal, false) + } else { + ptr := reflect.New(fieldType) + ptr.Elem().Set(reflect.Zero(fieldType)) + err = unmarshal(result, ptr.Interface(), settings...) + if err != nil { + return err + } + + err = setField(name, field, ptr.Elem(), false) + } + + if err != nil { + return err + } + } + + return nil +} + +func unmarshalSlice(result Result, val reflect.Value, settings ...ContextApply) error { + nodeset, ok := result.(NodeSet) + + if !ok { + return fmt.Errorf("slice unmarshals must operate on a NodeSet") + } + + sliceType := reflect.TypeOf(val.Interface()) + sliceElement := sliceType.Elem() + sliceElementKind := sliceElement.Kind() + + for sliceElementKind == reflect.Pointer { + sliceElement = sliceElement.Elem() + sliceElementKind = sliceElement.Kind() + } + + for _, i := range nodeset { + var err error + var sliceValue reflect.Value + + if sliceElementKind == reflect.Slice { + return fmt.Errorf("slice unmarshals can only operate on 1-dimensional slices") + } else if sliceElementKind == reflect.Struct { + ptr := reflect.New(sliceElement) + ptr.Elem().Set(reflect.Zero(sliceElement)) + err = unmarshal(NodeSet{i}, ptr.Interface(), settings...) + sliceValue = ptr.Elem() + } else { + val, ok := createValue(sliceElementKind, NodeSet{i}) + + if !ok { + return fmt.Errorf("invalid slice element type") + } + + sliceValue = val + } + + setField("", val, sliceValue, true) + + if err != nil { + return err + } + } + + return nil +} + +func setField(name string, field reflect.Value, val reflect.Value, checkSlice bool) error { + isSlice := false + dereferences := 0 + typ := field.Type() + assignableType := typ + + if checkSlice && typ.Kind() == reflect.Slice { + isSlice = true + typ = typ.Elem() + assignableType = typ + } + + elemKind := typ.Kind() + + for elemKind == reflect.Pointer { + typ = typ.Elem() + elemKind = typ.Kind() + dereferences++ + } + + ptrVal := val + + for dereferences != 0 { + ptr := reflect.New(ptrVal.Type()) + ptr.Elem().Set(ptrVal) + ptrVal = ptr + dereferences-- + } + + if !assignableType.AssignableTo(ptrVal.Type()) { + return fmt.Errorf("could not set field, %s", name) + } + + if isSlice { + field.Set(reflect.Append(field, ptrVal)) + } else { + field.Set(ptrVal) + } + + return nil +} + +func createValue(kind reflect.Kind, result Result) (reflect.Value, bool) { + switch kind { + case reflect.String: + return reflect.ValueOf(result.String()), true + + case reflect.Bool: + return reflect.ValueOf(result.Bool()), true + + case reflect.Uint8: + return reflect.ValueOf(uint8(result.Number())), true + case reflect.Int8: + return reflect.ValueOf(int8(result.Number())), true + case reflect.Uint16: + return reflect.ValueOf(uint16(result.Number())), true + case reflect.Int16: + return reflect.ValueOf(int16(result.Number())), true + case reflect.Uint32: + return reflect.ValueOf(uint32(result.Number())), true + case reflect.Int32: + return reflect.ValueOf(int32(result.Number())), true + case reflect.Uint64: + return reflect.ValueOf(uint64(result.Number())), true + case reflect.Int64: + return reflect.ValueOf(int64(result.Number())), true + case reflect.Float32: + return reflect.ValueOf(float32(result.Number())), true + case reflect.Float64: + return reflect.ValueOf(result.Number()), true + } + + return reflect.ValueOf(0), false +}