diff --git a/yaml.go b/yaml.go index d57dfb1..0375ebb 100644 --- a/yaml.go +++ b/yaml.go @@ -15,6 +15,7 @@ import ( "io" "reflect" "strconv" + "strings" "gopkg.in/yaml.v3" ) @@ -77,20 +78,78 @@ func jsonUnmarshal(r io.Reader, o interface{}, opts ...JSONOpt) error { // JSONToYAML converts JSON to YAML. func JSONToYAML(j []byte) ([]byte, error) { - // Convert the JSON to an object. - var jsonObj interface{} + var n yaml.Node // We are using yaml.Unmarshal here (instead of json.Unmarshal) because the // Go JSON library doesn't try to pick the right number type (int, float, // etc.) when unmarshalling to interface{}, it just picks float64 // universally. go-yaml does go through the effort of picking the right // number type, so we can preserve number type throughout this process. - err := yaml.Unmarshal(j, &jsonObj) + err := yaml.Unmarshal(j, &n) if err != nil { return nil, err } + // Force yaml.Node to be marshaled as formatted YAML. + enforceNodeStyle(&n) + // Marshal this object into YAML. - return yaml.Marshal(jsonObj) + return yaml.Marshal(&n) +} + +func enforceNodeStyle(n *yaml.Node) { + if n == nil { + return + } + + switch n.Kind { + case yaml.SequenceNode, yaml.MappingNode: + n.Style = yaml.LiteralStyle + case yaml.ScalarNode: + // Special case: if node is a string, then there are special styling + // rules that we must abide by to conform to yaml.v3. Some of the logic + // has been copied out, because the other way would've been to re-encode + // the string which causes a ~2x performance hit! + // + // Ideally, we wouldn't need to copy this at all! + // https://github.com/go-yaml/yaml/pull/574 implements a fix for this + // issue that included code for the internal node() marshaling function, + // except https://github.com/go-yaml/yaml/pull/583 was merged instead, + // and it completely left out the fix for this issue! + // + // Instead of trying to make a pull request to a repository which hasn't + // received any commit in over 2 years and has 125+ open pull requests, + // I've decided to just copy the code here. + // + // There is one case that has been omitted from this code, though: the + // code makes no attempt at checking for isBase64Float(). The README of + // the YAML package says that it doesn't support this either (but it is + // in the code). + if n.ShortTag() == "!!str" { + switch { + case strings.Contains(n.Value, "\n"): + n.Style = yaml.LiteralStyle + case isOldBool(n.Value): + n.Style = yaml.DoubleQuotedStyle + default: + n.Style = yaml.FlowStyle + } + } + } + + for _, c := range n.Content { + enforceNodeStyle(c) + } +} + +// isOldBool is copied from yaml.v3. +func isOldBool(s string) (result bool) { + switch s { + case "y", "Y", "yes", "Yes", "YES", "on", "On", "ON", + "n", "N", "no", "No", "NO", "off", "Off", "OFF": + return true + default: + return false + } } // YAMLToJSON converts YAML to JSON. Since JSON is a subset of YAML, @@ -109,9 +168,8 @@ func YAMLToJSON(y []byte) ([]byte, error) { //nolint:revive } func yamlToJSON(dec *yaml.Decoder, jsonTarget *reflect.Value) ([]byte, error) { - // Convert the YAML to an object. - var yamlObj interface{} - if err := dec.Decode(&yamlObj); err != nil { + var n yaml.Node + if err := dec.Decode(&n); err != nil { // Functionality changed in v3 which means we need to ignore EOF error. // See https://github.com/go-yaml/yaml/issues/639 if !errors.Is(err, io.EOF) { @@ -123,7 +181,7 @@ func yamlToJSON(dec *yaml.Decoder, jsonTarget *reflect.Value) ([]byte, error) { // can have non-string keys in YAML). So, convert the YAML-compatible object // to a JSON-compatible object, failing with an error if irrecoverable // incompatibilities happen along the way. - jsonObj, err := convertToJSONableObject(yamlObj, jsonTarget) + jsonObj, err := convertToJSONableObject(&n, jsonTarget) if err != nil { return nil, err } @@ -132,7 +190,7 @@ func yamlToJSON(dec *yaml.Decoder, jsonTarget *reflect.Value) ([]byte, error) { return json.Marshal(jsonObj) } -func convertToJSONableObject(yamlObj interface{}, jsonTarget *reflect.Value) (interface{}, error) { //nolint:gocyclo +func convertToJSONableObject(n *yaml.Node, jsonTarget *reflect.Value) (json.RawMessage, error) { //nolint:gocyclo var err error // Resolve jsonTarget to a concrete value (i.e. not a pointer or an @@ -150,57 +208,52 @@ func convertToJSONableObject(yamlObj interface{}, jsonTarget *reflect.Value) (in } } - // go-yaml v3 changed from v2 and now will provide map[string]interface{} by - // default and map[interface{}]interface{} when none of the keys strings. - // To get around this, we run a pre-loop to convert the map. - // JSON only supports strings as keys, so we must convert. - - switch typedYAMLObj := yamlObj.(type) { - case map[interface{}]interface{}: - // From my reading of go-yaml v2 (specifically the resolve function), - // keys can only have the types string, int, int64, float64, binary - // (unsupported), or null (unsupported). - strMap := make(map[string]interface{}) - for k, v := range typedYAMLObj { + switch n.Kind { + case yaml.DocumentNode: + return convertToJSONableObject(n.Content[0], jsonTarget) + + case yaml.MappingNode: + jsonMap := make(orderedMap, 0, len(n.Content)/2) + keyNodes := make(map[string]*yaml.Node, len(n.Content)/2) + for i := 0; i < len(n.Content); i += 2 { + kNode := n.Content[i] + vNode := n.Content[i+1] + + var anyKey interface{} + if err := kNode.Decode(&anyKey); err != nil { + return nil, fmt.Errorf("error decoding yaml map key %s: %v", kNode.Tag, err) + } + // Resolve the key to a string first. - var keyString string - switch typedKey := k.(type) { + var key string + switch typedKey := anyKey.(type) { case string: - keyString = typedKey + key = typedKey case int: - keyString = strconv.Itoa(typedKey) + key = strconv.Itoa(typedKey) case int64: // go-yaml will only return an int64 as a key if the system // architecture is 32-bit and the key's value is between 32-bit // and 64-bit. Otherwise the key type will simply be int. - keyString = strconv.FormatInt(typedKey, 10) + key = strconv.FormatInt(typedKey, 10) case float64: // Float64 is now supported in keys - keyString = strconv.FormatFloat(typedKey, 'g', -1, 64) + key = strconv.FormatFloat(typedKey, 'g', -1, 64) case bool: if typedKey { - keyString = "true" + key = "true" } else { - keyString = "false" + key = "false" } default: return nil, fmt.Errorf("unsupported map key of type: %s, key: %+#v, value: %+#v", - reflect.TypeOf(k), k, v) + reflect.TypeOf(kNode), kNode, vNode) } - strMap[keyString] = v - } - // replace yamlObj with our new string map - yamlObj = strMap - } - // If yamlObj is a number or a boolean, check if jsonTarget is a string - - // if so, coerce. Else return normal. - // If yamlObj is a map or array, find the field that each key is - // unmarshaling to, and when you recurse pass the reflect.Value for that - // field back into this function. - switch typedYAMLObj := yamlObj.(type) { - case map[string]interface{}: - for k, v := range typedYAMLObj { + if otherNode, ok := keyNodes[key]; ok { + return nil, fmt.Errorf("mapping key %q already defined at line %d", key, otherNode.Line) + } + keyNodes[key] = kNode // jsonTarget should be a struct or a map. If it's a struct, find // the field it's going to map to and pass its reflect.Value. If @@ -210,7 +263,7 @@ func convertToJSONableObject(yamlObj interface{}, jsonTarget *reflect.Value) (in if jsonTarget != nil { t := *jsonTarget if t.Kind() == reflect.Struct { - keyBytes := []byte(k) + keyBytes := []byte(key) // Find the field that the JSON library would use. var f *field fields := cachedTypeFields(t.Type()) @@ -229,8 +282,7 @@ func convertToJSONableObject(yamlObj interface{}, jsonTarget *reflect.Value) (in // Find the reflect.Value of the most preferential // struct field. jtf := t.Field(f.index[0]) - typedYAMLObj[k], err = convertToJSONableObject(v, &jtf) - if err != nil { + if err := jsonMap.AppendYAML(f.name, vNode, &jtf); err != nil { return nil, err } continue @@ -239,20 +291,21 @@ func convertToJSONableObject(yamlObj interface{}, jsonTarget *reflect.Value) (in // Create a zero value of the map's element type to use as // the JSON target. jtv := reflect.Zero(t.Type().Elem()) - typedYAMLObj[k], err = convertToJSONableObject(v, &jtv) - if err != nil { + if err := jsonMap.AppendYAML(key, vNode, &jtv); err != nil { return nil, err } continue } } - typedYAMLObj[k], err = convertToJSONableObject(v, nil) - if err != nil { + + if err := jsonMap.AppendYAML(key, vNode, nil); err != nil { return nil, err } } - return typedYAMLObj, nil - case []interface{}: + + return jsonMap.MarshalJSON() + + case yaml.SequenceNode: // We need to recurse into arrays in case there are any // map[interface{}]interface{}'s inside and to convert any // numbers to strings. @@ -272,22 +325,28 @@ func convertToJSONableObject(yamlObj interface{}, jsonTarget *reflect.Value) (in } // Make and use a new array. - arr := make([]interface{}, len(typedYAMLObj)) - for i, v := range typedYAMLObj { + arr := make([]json.RawMessage, len(n.Content)) + for i, v := range n.Content { arr[i], err = convertToJSONableObject(v, jsonSliceElemValue) if err != nil { return nil, err } } - return arr, nil + return json.Marshal(arr) + default: + var rawObject interface{} + if err := n.Decode(&rawObject); err != nil { + return nil, fmt.Errorf("error decoding yaml object %s: %v", n.Tag, err) + } + // If the target type is a string and the YAML type is a number, // convert the YAML type to a string. if jsonTarget != nil && (*jsonTarget).Kind() == reflect.String { // Based on my reading of go-yaml, it may return int, int64, // float64, or uint64. var s string - switch typedVal := typedYAMLObj.(type) { + switch typedVal := rawObject.(type) { case int: s = strconv.FormatInt(int64(typedVal), 10) case int64: @@ -304,9 +363,49 @@ func convertToJSONableObject(yamlObj interface{}, jsonTarget *reflect.Value) (in } } if len(s) > 0 { - yamlObj = interface{}(s) + rawObject = s } } - return yamlObj, nil + + return json.Marshal(rawObject) + } +} + +type orderedMap []orderedPair + +type orderedPair struct { + K string + V interface{} +} + +func (m *orderedMap) AppendYAML(k string, v *yaml.Node, jsonTarget *reflect.Value) error { + r, err := convertToJSONableObject(v, jsonTarget) + if err != nil { + return fmt.Errorf("%q: %w", k, err) + } + *m = append(*m, orderedPair{K: k, V: r}) + return nil +} + +func (m orderedMap) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + buf.WriteByte('{') + for i, p := range m { + if i > 0 { + buf.WriteByte(',') + } + k, err := json.Marshal(p.K) + if err != nil { + return nil, fmt.Errorf("key %q error: %w", p.K, err) + } + buf.Write(k) + buf.WriteByte(':') + b, err := json.Marshal(p.V) + if err != nil { + return nil, fmt.Errorf("value %q error: %w", p.K, err) + } + buf.Write(b) } + buf.WriteByte('}') + return buf.Bytes(), nil } diff --git a/yaml_test.go b/yaml_test.go index dbec2c6..926f159 100644 --- a/yaml_test.go +++ b/yaml_test.go @@ -323,6 +323,12 @@ func TestYAMLToJSON(t *testing.T) { "- t: null\n", `[{"t":null}]`, nil, + }, { + "obj:\n" + + " z_hello: hello\n" + + " a_world: world\n", + `{"obj":{"z_hello":"hello","a_world":"world"}}`, + nil, }, } @@ -359,40 +365,41 @@ func runCases(t *testing.T, runType RunType, cases []Case) { invMsg = "JSON back to YAML" } - for _, c := range cases { - // Convert the string. - t.Logf("converting %s\n", c.input) - output, err := f([]byte(c.input)) - if err != nil { - t.Errorf("Failed to convert %s, input: `%s`, err: %v", msg, c.input, err) - } - - // Check it against the expected output. - if string(output) != c.output { - t.Errorf("Failed to convert %s, input: `%s`, expected `%s`, got `%s`", - msg, c.input, c.output, string(output)) - } - - // Set the string that we will compare the reversed output to. - reverse := c.input - // If a special reverse string was specified, use that instead. - if c.reverse != nil { - reverse = *c.reverse - } - - // Reverse the output. - input, err := invF(output) - if err != nil { - t.Errorf("Failed to convert %s, input: `%s`, err: %v", invMsg, string(output), err) - } - - // Check the reverse is equal to the input (or to *c.reverse). - if string(input) != reverse { - t.Errorf("Failed to convert %s, input: `%s`, expected `%s`, got `%s`", - invMsg, string(output), reverse, string(input)) - } + for i, c := range cases { + t.Run(fmt.Sprintf("%s_%d", msg, i), func(t *testing.T) { + // Convert the string. + t.Logf("converting %s\n", c.input) + output, err := f([]byte(c.input)) + if err != nil { + t.Errorf("Failed to convert %s, input: `%s`, err: %v", msg, c.input, err) + } + + // Check it against the expected output. + if string(output) != c.output { + t.Errorf("Failed to convert %s, input: `%s`, expected `%s`, got `%s`", + msg, c.input, c.output, string(output)) + } + + // Set the string that we will compare the reversed output to. + reverse := c.input + // If a special reverse string was specified, use that instead. + if c.reverse != nil { + reverse = *c.reverse + } + + // Reverse the output. + input, err := invF(output) + if err != nil { + t.Errorf("Failed to convert %s, input: `%s`, err: %v", invMsg, string(output), err) + } + + // Check the reverse is equal to the input (or to *c.reverse). + if string(input) != reverse { + t.Errorf("Failed to convert %s, input: `%s`, expected `%s`, got `%s`", + invMsg, string(output), reverse, string(input)) + } + }) } - } // To be able to easily fill in the *Case.reverse string above. @@ -406,3 +413,17 @@ func TestYAMLToJSONDuplicateFields(t *testing.T) { t.Error("expected YAMLtoJSON to fail on duplicate field names") } } + +func BenchmarkJSONToYAML(b *testing.B) { + data := []byte(`{"t":"a"}`) + for i := 0; i < b.N; i++ { + JSONToYAML(data) + } +} + +func BenchmarkYAMLToJSON(b *testing.B) { + data := []byte("t: a\n") + for i := 0; i < b.N; i++ { + YAMLToJSON(data) + } +}