From d0e138cd05cf168bb9d8ec5008061e63e8c23edf Mon Sep 17 00:00:00 2001 From: Masaaki Goshima Date: Wed, 6 Nov 2024 11:43:50 +0900 Subject: [PATCH] fix trailing new line chars handling --- decode_test.go | 6 ------ lexer/lexer_test.go | 25 +++++++++++++++++++++++++ scanner/context.go | 38 +++++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/decode_test.go b/decode_test.go index 718ca2e..cac2b81 100644 --- a/decode_test.go +++ b/decode_test.go @@ -2763,18 +2763,12 @@ func TestDecoder_LiteralWithNewLine(t *testing.T) { { Node: "hello\nworld\n", }, - { - Node: "hello\nworld\n\n", - }, { LastNode: "hello\nworld", }, { LastNode: "hello\nworld\n", }, - { - LastNode: "hello\nworld\n\n", - }, } // struct(want) -> Marshal -> Unmarchal -> struct(got) for _, want := range tests { diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 81b8be9..810d2d2 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -2172,6 +2172,31 @@ s: >-3 }, }, }, + { + YAML: ` +| + a + + + +`, + Tokens: token.Tokens{ + { + Type: token.LiteralType, + CharacterType: token.CharacterTypeIndicator, + Indicator: token.BlockScalarIndicator, + Value: "|", + Origin: "\n|\n", + }, + { + Type: token.StringType, + CharacterType: token.CharacterTypeMiscellaneous, + Indicator: token.NotIndicator, + Value: "a\n", + Origin: " a\n\n\n\n", + }, + }, + }, } for _, test := range tests { t.Run(test.YAML, func(t *testing.T) { diff --git a/scanner/context.go b/scanner/context.go index 415cf07..5263f3e 100644 --- a/scanner/context.go +++ b/scanner/context.go @@ -273,27 +273,39 @@ func (c *Context) existsBuffer() bool { func (c *Context) bufferedSrc() []rune { src := c.buf[:c.notSpaceCharPos] - if c.isDocument() && (strings.HasPrefix(c.docOpt, "-") || strings.HasSuffix(c.docOpt, "-")) { - // remove end '\n' character and trailing empty lines + if c.isDocument() { + // remove end '\n' character and trailing empty lines. // https://yaml.org/spec/1.2.2/#8112-block-chomping-indicator - for { - if len(src) > 0 && src[len(src)-1] == '\n' { - src = src[:len(src)-1] - continue + if c.hasTrimAllEndNewlineOpt() { + // If the '-' flag is specified, all trailing newline characters will be removed. + src = []rune(strings.TrimRight(string(src), "\n")) + } else { + // Normally, all but one of the trailing newline characters are removed. + var newLineCharCount int + for i := len(src) - 1; i >= 0; i-- { + if src[i] == '\n' { + newLineCharCount++ + continue + } + break } - break - } - for { - if len(src) > 0 && src[len(src)-1] == ' ' { - src = src[:len(src)-1] - continue + removedNewLineCharCount := newLineCharCount - 1 + for removedNewLineCharCount > 0 { + src = []rune(strings.TrimSuffix(string(src), "\n")) + removedNewLineCharCount-- } - break } + + // If the text ends with a space character, remove all of them. + src = []rune(strings.TrimRight(string(src), " ")) } return src } +func (c *Context) hasTrimAllEndNewlineOpt() bool { + return strings.HasPrefix(c.docOpt, "-") || strings.HasSuffix(c.docOpt, "-") +} + func (c *Context) bufferedToken(pos *token.Position) *token.Token { if c.idx == 0 { return nil