From 03adbc490195acbdea3d8a0d42cdbe8f9ed0ff7d Mon Sep 17 00:00:00 2001
From: Masaaki Goshima <goccy54@gmail.com>
Date: Sun, 3 Nov 2024 01:57:21 +0900
Subject: [PATCH] fix parsing of document

---
 lexer/lexer_test.go   | 304 ++++++++++++++++++++++++++++++++++++++++++
 parser/parser_test.go |   9 ++
 scanner/context.go    | 132 +++++++++++++++---
 scanner/scanner.go    | 157 +++++++++++++---------
 4 files changed, 519 insertions(+), 83 deletions(-)

diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go
index 309403f1..cb1f9ef2 100644
--- a/lexer/lexer_test.go
+++ b/lexer/lexer_test.go
@@ -1850,10 +1850,310 @@ a: >
 				},
 			},
 		},
+		{
+			YAML: `
+s: >
+        1s
+`,
+			Tokens: token.Tokens{
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "s",
+					Origin:        "\ns",
+				},
+				{
+					Type:          token.MappingValueType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockStructureIndicator,
+					Value:         ":",
+					Origin:        ":",
+				},
+				{
+					Type:          token.FoldedType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockScalarIndicator,
+					Value:         ">",
+					Origin:        " >\n",
+				},
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "1s\n",
+					Origin:        "        1s\n",
+				},
+			},
+		},
+		{
+			YAML: `
+s: >1
+        1s
+`,
+			Tokens: token.Tokens{
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "s",
+					Origin:        "\ns",
+				},
+				{
+					Type:          token.MappingValueType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockStructureIndicator,
+					Value:         ":",
+					Origin:        ":",
+				},
+				{
+					Type:          token.FoldedType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockScalarIndicator,
+					Value:         ">1",
+					Origin:        " >1\n",
+				},
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "       1s\n",
+					Origin:        "        1s\n",
+				},
+			},
+		},
+		{
+			YAML: `
+s: >+2
+        1s
+`,
+			Tokens: token.Tokens{
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "s",
+					Origin:        "\ns",
+				},
+				{
+					Type:          token.MappingValueType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockStructureIndicator,
+					Value:         ":",
+					Origin:        ":",
+				},
+				{
+					Type:          token.FoldedType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockScalarIndicator,
+					Value:         ">+2",
+					Origin:        " >+2\n",
+				},
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "      1s\n",
+					Origin:        "        1s\n",
+				},
+			},
+		},
+		{
+			YAML: `
+s: >-3
+        1s
+`,
+			Tokens: token.Tokens{
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "s",
+					Origin:        "\ns",
+				},
+				{
+					Type:          token.MappingValueType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockStructureIndicator,
+					Value:         ":",
+					Origin:        ":",
+				},
+				{
+					Type:          token.FoldedType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockScalarIndicator,
+					Value:         ">-3",
+					Origin:        " >-3\n",
+				},
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "     1s",
+					Origin:        "        1s\n",
+				},
+			},
+		},
+		{
+			YAML: `
+s: >
+    1s
+    2s
+`,
+			Tokens: token.Tokens{
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "s",
+					Origin:        "\ns",
+				},
+				{
+					Type:          token.MappingValueType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockStructureIndicator,
+					Value:         ":",
+					Origin:        ":",
+				},
+				{
+					Type:          token.FoldedType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockScalarIndicator,
+					Value:         ">",
+					Origin:        " >\n",
+				},
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "1s 2s\n",
+					Origin:        "    1s\n    2s\n",
+				},
+			},
+		},
+		{
+			YAML: `
+s: >
+    1s
+      2s
+    3s
+`,
+			Tokens: token.Tokens{
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "s",
+					Origin:        "\ns",
+				},
+				{
+					Type:          token.MappingValueType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockStructureIndicator,
+					Value:         ":",
+					Origin:        ":",
+				},
+				{
+					Type:          token.FoldedType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockScalarIndicator,
+					Value:         ">",
+					Origin:        " >\n",
+				},
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "1s\n  2s\n3s\n",
+					Origin:        "    1s\n      2s\n    3s\n",
+				},
+			},
+		},
+		{
+			YAML: `
+s: >
+    1s
+      2s
+      3s
+    4s
+    5s
+`,
+			Tokens: token.Tokens{
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "s",
+					Origin:        "\ns",
+				},
+				{
+					Type:          token.MappingValueType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockStructureIndicator,
+					Value:         ":",
+					Origin:        ":",
+				},
+				{
+					Type:          token.FoldedType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockScalarIndicator,
+					Value:         ">",
+					Origin:        " >\n",
+				},
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "1s\n  2s\n  3s\n4s 5s\n",
+					Origin:        "    1s\n      2s\n      3s\n    4s\n    5s\n",
+				},
+			},
+		},
+		{
+			YAML: `
+s: >-3
+    1s
+      2s
+      3s
+    4s
+    5s
+`,
+			Tokens: token.Tokens{
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         "s",
+					Origin:        "\ns",
+				},
+				{
+					Type:          token.MappingValueType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockStructureIndicator,
+					Value:         ":",
+					Origin:        ":",
+				},
+				{
+					Type:          token.FoldedType,
+					CharacterType: token.CharacterTypeIndicator,
+					Indicator:     token.BlockScalarIndicator,
+					Value:         ">-3",
+					Origin:        " >-3\n",
+				},
+				{
+					Type:          token.StringType,
+					CharacterType: token.CharacterTypeMiscellaneous,
+					Indicator:     token.NotIndicator,
+					Value:         " 1s\n   2s\n   3s\n 4s\n 5s",
+					Origin:        "    1s\n      2s\n      3s\n    4s\n    5s\n",
+				},
+			},
+		},
 	}
 	for _, test := range tests {
 		t.Run(test.YAML, func(t *testing.T) {
 			tokens := lexer.Tokenize(test.YAML)
+			tokens.Dump()
 			if len(tokens) != len(test.Tokens) {
 				t.Fatalf("Tokenize(%q) token count mismatch, expected: %d got: %d", test.YAML, len(test.Tokens), len(tokens))
 			}
@@ -2464,6 +2764,10 @@ a: |invalid`,
 			name: "invalid document number",
 			src:  ">\n1",
 		},
+		{
+			name: "invalid document header option number",
+			src:  "a: >3\n  1",
+		},
 	}
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
diff --git a/parser/parser_test.go b/parser/parser_test.go
index cf77aa2f..6c824a00 100644
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@@ -1126,6 +1126,15 @@ b: - 2
    1 | |
 >  2 | 1
        ^
+`,
+		},
+		{
+			"a: >3\n  1",
+			`
+[2:3] found invalid token
+   1 | a: >3
+>  2 |   1
+         ^
 `,
 		},
 	}
diff --git a/scanner/context.go b/scanner/context.go
index 54dc0b79..24b96c4d 100644
--- a/scanner/context.go
+++ b/scanner/context.go
@@ -1,6 +1,9 @@
 package scanner
 
 import (
+	"fmt"
+	"strconv"
+	"strings"
 	"sync"
 
 	"github.com/goccy/go-yaml/token"
@@ -8,18 +11,22 @@ import (
 
 // Context context at scanning
 type Context struct {
-	idx                int
-	size               int
-	notSpaceCharPos    int
-	notSpaceOrgCharPos int
-	src                []rune
-	buf                []rune
-	obuf               []rune
-	tokens             token.Tokens
-	isRawFolded        bool
-	isLiteral          bool
-	isFolded           bool
-	literalOpt         string
+	idx                      int
+	size                     int
+	notSpaceCharPos          int
+	notSpaceOrgCharPos       int
+	src                      []rune
+	buf                      []rune
+	obuf                     []rune
+	tokens                   token.Tokens
+	isRawFolded              bool
+	isLiteral                bool
+	isFolded                 bool
+	docOpt                   string
+	docFirstLineIndentColumn int
+	docPrevLineIndentColumn  int
+	docLineIndentColumn      int
+	docFoldedNewLine         bool
 }
 
 var (
@@ -52,7 +59,11 @@ func (c *Context) clear() {
 	c.isRawFolded = false
 	c.isLiteral = false
 	c.isFolded = false
-	c.literalOpt = ""
+	c.docOpt = ""
+	c.docFirstLineIndentColumn = 0
+	c.docLineIndentColumn = 0
+	c.docPrevLineIndentColumn = 0
+	c.docFoldedNewLine = false
 }
 
 func (c *Context) reset(src []rune) {
@@ -64,7 +75,7 @@ func (c *Context) reset(src []rune) {
 	c.isRawFolded = false
 	c.isLiteral = false
 	c.isFolded = false
-	c.literalOpt = ""
+	c.docOpt = ""
 }
 
 func (c *Context) resetBuffer() {
@@ -74,11 +85,91 @@ func (c *Context) resetBuffer() {
 	c.notSpaceOrgCharPos = 0
 }
 
-func (c *Context) breakLiteral() {
+func (c *Context) breakDocument() {
 	c.isLiteral = false
 	c.isRawFolded = false
 	c.isFolded = false
-	c.literalOpt = ""
+	c.docOpt = ""
+	c.docFirstLineIndentColumn = 0
+	c.docLineIndentColumn = 0
+	c.docPrevLineIndentColumn = 0
+	c.docFoldedNewLine = false
+}
+
+func (c *Context) updateDocumentIndentColumn() {
+	indent := c.docFirstLineIndentColumnByDocOpt()
+	if indent > 0 {
+		c.docFirstLineIndentColumn = indent + 1
+	}
+}
+
+func (c *Context) docFirstLineIndentColumnByDocOpt() int {
+	trimmed := strings.TrimPrefix(c.docOpt, "-")
+	trimmed = strings.TrimPrefix(trimmed, "+")
+	i, _ := strconv.ParseInt(trimmed, 10, 64)
+	return int(i)
+}
+
+func (c *Context) updateDocumentLineIndentColumn(column int) {
+	if c.docFirstLineIndentColumn == 0 {
+		c.docFirstLineIndentColumn = column
+	}
+	if c.docLineIndentColumn == 0 {
+		c.docLineIndentColumn = column
+	}
+}
+
+func (c *Context) validateDocumentLineIndentColumn() error {
+	if c.docFirstLineIndentColumnByDocOpt() == 0 {
+		return nil
+	}
+	if c.docFirstLineIndentColumn > c.docLineIndentColumn {
+		return fmt.Errorf("invalid number of indent is specified in the document header")
+	}
+	return nil
+}
+
+func (c *Context) updateDocumentNewLineState() {
+	c.docPrevLineIndentColumn = c.docLineIndentColumn
+	c.docFoldedNewLine = true
+	c.docLineIndentColumn = 0
+}
+
+func (c *Context) addDocumentIndent(column int) {
+	if c.docFirstLineIndentColumn == 0 {
+		return
+	}
+
+	// If the first line of the document has already been evaluated, the number is treated as the threshold, since the `docFirstLineIndentColumn` is a positive number.
+	if c.docFirstLineIndentColumn <= column {
+		// In the folded state, new-line-char is normally treated as space,
+		// but if the number of indents is different from the number of indents in the first line,
+		// new-line-char is used as is instead of space.
+		// Therefore, it is necessary to replace the space already added to buf.
+		// `c.docFoldedNewLine` is a variable that is set to true for every newline.
+		if c.isFolded && c.docFoldedNewLine {
+			c.buf[len(c.buf)-1] = '\n'
+			c.docFoldedNewLine = false
+		}
+		// Since addBuf ignore space character, add to the buffer directly.
+		c.buf = append(c.buf, ' ')
+	}
+}
+
+func (c *Context) addDocumentNewLineInFolded(column int) {
+	if !c.isFolded {
+		return
+	}
+	if !c.docFoldedNewLine {
+		return
+	}
+	if c.docFirstLineIndentColumn == c.docLineIndentColumn &&
+		c.docLineIndentColumn == c.docPrevLineIndentColumn {
+		// use space as a new line delimiter.
+		return
+	}
+	c.buf[len(c.buf)-1] = '\n'
+	c.docFoldedNewLine = false
 }
 
 func (c *Context) addToken(tk *token.Token) {
@@ -179,7 +270,7 @@ func (c *Context) existsBuffer() bool {
 
 func (c *Context) bufferedSrc() []rune {
 	src := c.buf[:c.notSpaceCharPos]
-	if c.isDocument() && c.literalOpt == "-" {
+	if c.isDocument() && strings.HasPrefix(c.docOpt, "-") {
 		// remove end '\n' character and trailing empty lines
 		// https://yaml.org/spec/1.2.2/#8112-block-chomping-indicator
 		for {
@@ -189,6 +280,13 @@ func (c *Context) bufferedSrc() []rune {
 			}
 			break
 		}
+		for {
+			if len(src) > 0 && src[len(src)-1] == ' ' {
+				src = src[:len(src)-1]
+				continue
+			}
+			break
+		}
 	}
 	return src
 }
diff --git a/scanner/scanner.go b/scanner/scanner.go
index 1b21a462..c6b5d4cc 100644
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@@ -4,6 +4,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"strconv"
 	"strings"
 
 	"github.com/goccy/go-yaml/token"
@@ -43,7 +44,6 @@ type Scanner struct {
 	prevLineIndentNum int
 	// indentLevel indicates the level of indent depth. This value does not match the column value.
 	indentLevel            int
-	docStartColumn         int
 	isFirstCharAtLine      bool
 	isAnchor               bool
 	startedFlowSequenceNum int
@@ -204,9 +204,8 @@ func (s *Scanner) addBufferedTokenIfExists(ctx *Context) {
 	ctx.addToken(s.bufferedToken(ctx))
 }
 
-func (s *Scanner) breakLiteral(ctx *Context) {
-	s.docStartColumn = 0
-	ctx.breakLiteral()
+func (s *Scanner) breakDocument(ctx *Context) {
+	ctx.breakDocument()
 }
 
 func (s *Scanner) scanSingleQuote(ctx *Context) (*token.Token, error) {
@@ -535,26 +534,32 @@ func (s *Scanner) scanComment(ctx *Context) bool {
 	return true
 }
 
-func (s *Scanner) trimCommentFromLiteralOpt(text string, header rune) (string, error) {
+func (s *Scanner) trimCommentFromDocumentOpt(text string, header rune) (string, error) {
 	idx := strings.Index(text, "#")
 	if idx < 0 {
 		return text, nil
 	}
 	if idx == 0 {
 		return "", ErrInvalidToken(
-			fmt.Sprintf("invalid literal header %s", text),
+			fmt.Sprintf("invalid document header %s", text),
 			token.Invalid(string(header)+text, s.pos()),
 		)
 	}
 	return text[:idx-1], nil
 }
 
-func (s *Scanner) scanLiteral(ctx *Context, c rune) {
+func (s *Scanner) scanDocument(ctx *Context, c rune) error {
 	ctx.addOriginBuf(c)
 	if ctx.isEOS() {
+		ctx.updateDocumentLineIndentColumn(s.column)
+		if err := ctx.validateDocumentLineIndentColumn(); err != nil {
+			invalidTk := token.Invalid(string(ctx.obuf), s.pos())
+			s.progressColumn(ctx, 1)
+			return ErrInvalidToken(err.Error(), invalidTk)
+		}
 		if ctx.isLiteral {
 			ctx.addBuf(c)
-		} else if ctx.isFolded && !s.isNewLineChar(c) {
+		} else if ctx.isFolded {
 			ctx.addBuf(c)
 		}
 		value := ctx.bufferedSrc()
@@ -567,19 +572,23 @@ func (s *Scanner) scanLiteral(ctx *Context, c rune) {
 		} else {
 			ctx.addBuf(' ')
 		}
+		ctx.updateDocumentNewLineState()
 		s.progressLine(ctx)
 	} else if s.isFirstCharAtLine && c == ' ' {
-		if 0 < s.docStartColumn && s.docStartColumn <= s.column {
-			ctx.addBuf(c)
-		}
+		ctx.addDocumentIndent(s.column)
 		s.progressColumn(ctx, 1)
 	} else {
-		if s.docStartColumn == 0 {
-			s.docStartColumn = s.column
+		ctx.updateDocumentLineIndentColumn(s.column)
+		if err := ctx.validateDocumentLineIndentColumn(); err != nil {
+			invalidTk := token.Invalid(string(ctx.obuf), s.pos())
+			s.progressColumn(ctx, 1)
+			return ErrInvalidToken(err.Error(), invalidTk)
 		}
+		ctx.addDocumentNewLineInFolded(s.column)
 		ctx.addBuf(c)
 		s.progressColumn(ctx, 1)
 	}
+	return nil
 }
 
 func (s *Scanner) scanNewLine(ctx *Context, c rune) {
@@ -807,19 +816,36 @@ func (s *Scanner) scanSequence(ctx *Context) bool {
 	return true
 }
 
-func (s *Scanner) scanLiteralHeader(ctx *Context) (bool, error) {
+func (s *Scanner) scanDocumentHeader(ctx *Context) (bool, error) {
 	if ctx.existsBuffer() {
 		return false, nil
 	}
 
-	if err := s.scanLiteralHeaderOption(ctx); err != nil {
+	if err := s.scanDocumentHeaderOption(ctx); err != nil {
 		return false, err
 	}
+	ctx.updateDocumentIndentColumn()
 	s.progressLine(ctx)
 	return true, nil
 }
 
-func (s *Scanner) scanLiteralHeaderOption(ctx *Context) error {
+func (s *Scanner) validateDocumentHeaderOption(opt string) error {
+	if len(opt) == 0 {
+		return nil
+	}
+	if opt[0] == '+' || opt[0] == '-' {
+		opt = opt[1:]
+	}
+	if len(opt) == 0 {
+		return nil
+	}
+	if _, err := strconv.ParseInt(opt, 10, 64); err != nil {
+		return fmt.Errorf("invalid header option: %q", opt)
+	}
+	return nil
+}
+
+func (s *Scanner) scanDocumentHeaderOption(ctx *Context) error {
 	header := ctx.currentChar()
 	ctx.addOriginBuf(header)
 	s.progress(ctx, 1) // skip '|' or '>' character
@@ -831,64 +857,61 @@ func (s *Scanner) scanLiteralHeaderOption(ctx *Context) error {
 			value := ctx.source(ctx.idx, ctx.idx+idx)
 			opt := strings.TrimRight(value, " ")
 			orgOptLen := len(opt)
-			opt, err := s.trimCommentFromLiteralOpt(opt, header)
+			opt, err := s.trimCommentFromDocumentOpt(opt, header)
 			if err != nil {
 				return err
 			}
-			switch opt {
-			case "", "+", "-",
-				"0", "1", "2", "3", "4", "5", "6", "7", "8", "9":
-				hasComment := len(opt) < orgOptLen
-				if s.column == 1 {
-					s.lastDelimColumn = 1
-				}
-				if header == '|' {
-					if hasComment {
-						commentLen := orgOptLen - len(opt)
-						headerPos := strings.Index(string(ctx.obuf), "|")
-						litBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
-						commentBuf := ctx.obuf[len(litBuf):]
-						ctx.addToken(token.Literal("|"+opt, string(litBuf), s.pos()))
-						s.column += len(litBuf)
-						s.offset += len(litBuf)
-						commentHeader := strings.Index(value, "#")
-						ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
-					} else {
-						ctx.addToken(token.Literal("|"+opt, string(ctx.obuf), s.pos()))
-					}
-					ctx.isLiteral = true
-				} else if header == '>' {
-					if hasComment {
-						commentLen := orgOptLen - len(opt)
-						headerPos := strings.Index(string(ctx.obuf), ">")
-						foldedBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
-						commentBuf := ctx.obuf[len(foldedBuf):]
-						ctx.addToken(token.Folded(">"+opt, string(foldedBuf), s.pos()))
-						s.column += len(foldedBuf)
-						s.offset += len(foldedBuf)
-						commentHeader := strings.Index(value, "#")
-						ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
-					} else {
-						ctx.addToken(token.Folded(">"+opt, string(ctx.obuf), s.pos()))
-					}
-					ctx.isFolded = true
-				}
-				s.indentState = IndentStateKeep
-				ctx.resetBuffer()
-				ctx.literalOpt = opt
-				s.progressColumn(ctx, progress)
-				return nil
-			default:
+			if err := s.validateDocumentHeaderOption(opt); err != nil {
 				invalidTk := token.Invalid(string(ctx.obuf), s.pos())
 				s.progressColumn(ctx, progress)
-				return ErrInvalidToken(fmt.Sprintf("invalid literal header: %q", opt), invalidTk)
+				return ErrInvalidToken(err.Error(), invalidTk)
+			}
+			hasComment := len(opt) < orgOptLen
+			if s.column == 1 {
+				s.lastDelimColumn = 1
 			}
+			if header == '|' {
+				if hasComment {
+					commentLen := orgOptLen - len(opt)
+					headerPos := strings.Index(string(ctx.obuf), "|")
+					litBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
+					commentBuf := ctx.obuf[len(litBuf):]
+					ctx.addToken(token.Literal("|"+opt, string(litBuf), s.pos()))
+					s.column += len(litBuf)
+					s.offset += len(litBuf)
+					commentHeader := strings.Index(value, "#")
+					ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
+				} else {
+					ctx.addToken(token.Literal("|"+opt, string(ctx.obuf), s.pos()))
+				}
+				ctx.isLiteral = true
+			} else if header == '>' {
+				if hasComment {
+					commentLen := orgOptLen - len(opt)
+					headerPos := strings.Index(string(ctx.obuf), ">")
+					foldedBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
+					commentBuf := ctx.obuf[len(foldedBuf):]
+					ctx.addToken(token.Folded(">"+opt, string(foldedBuf), s.pos()))
+					s.column += len(foldedBuf)
+					s.offset += len(foldedBuf)
+					commentHeader := strings.Index(value, "#")
+					ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
+				} else {
+					ctx.addToken(token.Folded(">"+opt, string(ctx.obuf), s.pos()))
+				}
+				ctx.isFolded = true
+			}
+			s.indentState = IndentStateKeep
+			ctx.resetBuffer()
+			ctx.docOpt = opt
+			s.progressColumn(ctx, progress)
+			return nil
 		}
 	}
 	text := string(ctx.src[ctx.idx:])
 	invalidTk := token.Invalid(string(ctx.obuf), s.pos())
 	s.progressColumn(ctx, len(text))
-	return ErrInvalidToken(fmt.Sprintf("invalid literal header: %q", text), invalidTk)
+	return ErrInvalidToken(fmt.Sprintf("invalid document header: %q", text), invalidTk)
 }
 
 func (s *Scanner) scanMapKey(ctx *Context) bool {
@@ -977,9 +1000,11 @@ func (s *Scanner) scan(ctx *Context) error {
 						ctx.addToken(token.String("", "", s.pos()))
 					}
 				}
-				s.breakLiteral(ctx)
+				s.breakDocument(ctx)
 			} else {
-				s.scanLiteral(ctx, c)
+				if err := s.scanDocument(ctx, c); err != nil {
+					return err
+				}
 				continue
 			}
 		}
@@ -1027,7 +1052,7 @@ func (s *Scanner) scan(ctx *Context) error {
 				continue
 			}
 		case '|', '>':
-			scanned, err := s.scanLiteralHeader(ctx)
+			scanned, err := s.scanDocumentHeader(ctx)
 			if err != nil {
 				return err
 			}