Skip to content

Commit

Permalink
added avro_test
Browse files Browse the repository at this point in the history
bump arrow to v13
  • Loading branch information
loicalleyne committed Jul 21, 2023
1 parent 1aa8480 commit bbf3485
Show file tree
Hide file tree
Showing 4 changed files with 309 additions and 10 deletions.
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
# arrow_schemagen
Generate an Apache Arrow schema from an Avro schema or an arbitrary map.
Use with Apache Arrow Go package v12 or higher
Use with Apache Arrow Go package v13

How to use:

Pass in an Avro schema JSON and receive a *arrow.Schema
```golang
import (
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v13/arrow"
asg "github.com/loicalleyne/arrow_schemagen"
)

func main() {
var avroSchema map[string]interface{}
json.Unmarshal([]byte(avroSchemaJSON), &avroSchema)
schema, err := asg.ArrowSchemaFromAvro(avroSchema)
//
// ArrowSchemaFromAvro returns a new Arrow schema from an Avro schema JSON.
// If the top level is of record type, set includeTopLevel to either make
// its fields top level fields in the resulting schema or nested in a single field.
//
schema, err := asg.ArrowSchemaFromAvro(avroSchema, false)
if err != nil {
// deal with error
}
Expand Down
269 changes: 269 additions & 0 deletions avro_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
package arrow_schemagen

import (
"fmt"
"testing"

"github.com/apache/arrow/go/v13/arrow"
)

func TestSchemaStringEqual(t *testing.T) {
tests := []struct {
avroSchema string
arrowSchema []arrow.Field
}{
{
avroSchema: `{
"type": "record",
"name": "Example",
"doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.",
"fields": [{
"name": "inheritNull",
"type": {
"type": "enum",
"name": "Simple",
"doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.",
"symbols": ["a", "b"]
}
}, {
"name": "explicitNamespace",
"type": {
"type": "fixed",
"name": "Simple",
"namespace": "explicit",
"doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).",
"size": 12
}
}, {
"name": "fullName",
"type": {
"type": "record",
"name": "a.full.Name",
"namespace": "ignored",
"doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.",
"fields": [{
"name": "inheritNamespace",
"type": {
"type": "enum",
"name": "Understanding",
"doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.",
"symbols": ["d", "e"]
}
}, {
"type": "fixed",
"size": 16,
"name": "md5"
}
]
}
}, {
"name": "id",
"type": "int"
}, {
"name": "bigId",
"type": "long"
}, {
"name": "temperature",
"type": [
"null",
"float"
]
}, {
"name": "fraction",
"type": [
"null",
"double"
]
}, {
"name": "is_emergency",
"type": "boolean"
}, {
"name": "remote_ip",
"type": [
"null",
"bytes"
]
}, {
"name": "person",
"type": "record",
"fields": [{
"name": "lastname",
"type": "string"
}, {
"name": "address",
"type": {
"type": "record",
"name": "AddressUSRecord",
"fields": [{
"name": "streetaddress",
"type": "string"
}, {
"name": "city",
"type": "string"
}
]
}
}, {
"name": "mapfield",
"type": {
"type": "map",
"values": "long",
"default": {}
}
}, {
"name": "arrayField",
"type": {
"type": "array",
"items": "string",
"default": []
}
}
]
}, {
"name": "decimalField",
"type": {
"type": "bytes",
"logicalType": "decimal",
"precision": 4,
"scale": 2
}
}, {
"name": "uuidField",
"type": "string",
"logicalType": "uuid"
}, {
"name": "time-millis",
"type": "int",
"logicalType": "time-millis"
}, {
"name": "time-micros",
"type": "long",
"logicalType": "time-micros"
}, {
"name": "timestamp-millis",
"type": "long",
"logicalType": "timestamp-millis"
}, {
"name": "timestamp-micros",
"type": "long",
"logicalType": "timestamp-micros"
}, {
"name": "local-timestamp-millis",
"type": "long",
"logicalType": "local-timestamp-millis"
}, {
"type": "fixed",
"size": 12,
"logicalType": "duration",
"name": "duration"
}, {
"name": "date",
"type": {
"type": "int",
"logicalType": "date"
}
}
]
}
`,
arrowSchema: []arrow.Field{
{Name: "inheritNull",
Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false},
Nullable: true,
Metadata: arrow.MetadataFrom(map[string]string{"0": "a", "1": "b"})},
{Name: "explicitNamespace",
Type: &arrow.FixedSizeBinaryType{ByteWidth: 12},
},
{Name: "fullName",
Type: arrow.StructOf(
arrow.Field{Name: "inheritNamespace",
Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}},
arrow.Field{Name: "md5",
Type: &arrow.FixedSizeBinaryType{ByteWidth: 16}},
),
},
{Name: "id",
Type: arrow.PrimitiveTypes.Int32,
},
{Name: "bigId",
Type: arrow.PrimitiveTypes.Int64,
},
{Name: "temperature",
Type: arrow.PrimitiveTypes.Float32,
},
{Name: "fraction",
Type: arrow.PrimitiveTypes.Float64,
},
{Name: "is_emergency",
Type: arrow.FixedWidthTypes.Boolean,
},
{Name: "remote_ip",
Type: &arrow.FixedSizeBinaryType{ByteWidth: 8},
},
{Name: "person",
Type: arrow.StructOf(
arrow.Field{Name: "lastname",
Type: arrow.BinaryTypes.String},
arrow.Field{Name: "address",
Type: arrow.StructOf(
arrow.Field{Name: "streetaddress",
Type: arrow.BinaryTypes.String},
arrow.Field{Name: "city",
Type: arrow.BinaryTypes.String},
),
},
arrow.Field{Name: "mapfield",
Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64),
},
arrow.Field{Name: "arrayField",
Type: arrow.ListOf(arrow.BinaryTypes.String),
},
),
},
{Name: "decimalField",
Type: &arrow.Decimal128Type{Precision: 4, Scale: 2},
},
{Name: "uuidField",
Type: arrow.BinaryTypes.String,
},
{Name: "time-millis",
Type: arrow.FixedWidthTypes.Time32ms,
},
{Name: "time-micros",
Type: arrow.FixedWidthTypes.Time64us,
},
{Name: "timestamp-millis",
Type: arrow.FixedWidthTypes.Timestamp_ms,
},
{Name: "timestamp-micros",
Type: arrow.FixedWidthTypes.Timestamp_us,
},
{Name: "local-timestamp-millis",
Type: arrow.FixedWidthTypes.Timestamp_ms,
},
{Name: "duration",
Type: arrow.FixedWidthTypes.MonthDayNanoInterval,
},
{Name: "date",
Type: arrow.FixedWidthTypes.Date32,
},
},
},
}

for _, test := range tests {
t.Run("", func(t *testing.T) {
want := arrow.NewSchema(test.arrowSchema, nil)
got, err := ArrowSchemaFromAvro([]byte(test.avroSchema), false)
if err != nil {
t.Fatalf("%v", err)
}
if !(fmt.Sprintf("%+v", want.String()) == fmt.Sprintf("%+v", got.String())) {
t.Fatalf("got=%v,\n want=%v", got.String(), want.String())
} else {
t.Logf("schema.String() comparison passed")
}
})
}
}
11 changes: 8 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@ module github.com/loicalleyne/arrow_schemagen
go 1.20

require (
github.com/apache/arrow/go/v12 v12.0.1 // indirect
github.com/google/flatbuffers v2.0.8+incompatible // indirect
github.com/apache/arrow/go/v12 v12.0.1
github.com/apache/arrow/go/v13 v13.0.0-20230720205829-d98b3a18a198
)

require (
github.com/goccy/go-json v0.10.0 // indirect
github.com/google/flatbuffers v23.1.21+incompatible // indirect
golang.org/x/mod v0.8.0 // indirect
golang.org/x/sys v0.5.0 // indirect
golang.org/x/tools v0.6.0 // indirect
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
)
28 changes: 24 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,12 +1,32 @@
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/apache/arrow/go/v12 v12.0.1 h1:JsR2+hzYYjgSUkBSaahpqCetqZMr76djX80fF/DiJbg=
github.com/apache/arrow/go/v12 v12.0.1/go.mod h1:weuTY7JvTG/HDPtMQxEUp7pU73vkLWMLpY67QwZ/WWw=
github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM=
github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/apache/arrow/go/v13 v13.0.0-20230720205829-d98b3a18a198 h1:ns9hLCFdg1KDuC9I/5gQoPLCeMbkr57wimo4Most0fc=
github.com/apache/arrow/go/v13 v13.0.0-20230720205829-d98b3a18a198/go.mod h1:W69eByFNO0ZR30q1/7Sr9d83zcVZmF2MiP3fFYAWJOc=
github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/goccy/go-json v0.10.0 h1:mXKd9Qw4NuzShiRlOXKews24ufknHO7gx30lsDyokKA=
github.com/goccy/go-json v0.10.0/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/google/flatbuffers v23.1.21+incompatible h1:bUqzx/MXCDxuS0hRJL2EfjyZL3uQrPbMocUa8zGqsTA=
github.com/google/flatbuffers v23.1.21+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4=
github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw=
github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
golang.org/x/exp v0.0.0-20230206171751-46f607a40771 h1:xP7rWLUr1e1n2xkK5YB4LI0hPEy3LJC6Wk+D4pGlOJg=
golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f h1:uF6paiQQebLeSXkrTqHqz0MXhXXS1KgF41eUdBNvxK0=
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk=
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=

0 comments on commit bbf3485

Please sign in to comment.