From 146b5e7e9d95d95af157979f0ef9884cb0c89609 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wro=C5=84ski?= <46607934+lwronski@users.noreply.github.com> Date: Tue, 7 Sep 2021 09:57:18 +0200 Subject: [PATCH] Parse new lines for plain scalar style (#66) * Parse new lines for plain scalar style --- .../yaml/internal/load/reader/ReaderCtx.scala | 25 +++---- .../internal/load/reader/ReaderState.scala | 15 +++-- .../yaml/internal/load/reader/Tokenizer.scala | 65 +++++++++---------- .../yaml/internal/load/parse/ScalarSpec.scala | 55 ++++++++++++++++ 4 files changed, 107 insertions(+), 53 deletions(-) diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala index 8ae13cb7e..8285d196b 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderCtx.scala @@ -6,7 +6,6 @@ import org.virtuslab.yaml.internal.load.reader.StringReader import scala.annotation.tailrec import scala.collection.mutable - import token.Token case class ReaderCtx( stateStack: mutable.Stack[ReaderState], @@ -38,10 +37,13 @@ case class ReaderCtx( closeOpenedCollectionMapping(indent) case _ => () + def getIndentOfLatestCollection(): Option[Int] = + stateStack.headOption.map(_.indent) + def appendState(state: ReaderState): Unit = stateStack.push(state) def closeOpenedFlowMapping(): List[Token] = stateStack.headOption match - case Some(ReaderState.FlowMapping) => + case Some(ReaderState.FlowMapping(_)) => stateStack.pop() List(Token.FlowMappingEnd(reader.pos())) case _ => @@ -52,7 +54,7 @@ case class ReaderCtx( case Some(ReaderState.Sequence(_)) => stateStack.pop() List(Token.SequenceEnd(reader.pos())) - case Some(ReaderState.FlowSequence) => + case Some(ReaderState.FlowSequence(_)) => stateStack.pop() List(Token.FlowSequenceEnd(reader.pos())) case _ => @@ -70,15 +72,16 @@ case class ReaderCtx( def isAllowedSpecialCharacter(char: Char): Boolean = stateStack.headOption match - case Some(ReaderState.FlowMapping) if char == '}' => false - case Some(ReaderState.FlowMapping) | Some(ReaderState.FlowSequence) if char == ',' => false - case Some(ReaderState.FlowSequence) if char == ']' => false - case _ => true + case Some(ReaderState.FlowMapping(_)) if char == '}' => false + case Some(ReaderState.FlowMapping(_)) | Some(ReaderState.FlowSequence(_)) if char == ',' => + false + case Some(ReaderState.FlowSequence(_)) if char == ']' => false + case _ => true def isFlowMapping(): Boolean = stateStack.headOption match - case Some(ReaderState.FlowMapping) => true - case _ => false + case Some(ReaderState.FlowMapping(_)) => true + case _ => false def closeOpenedScopes(): List[Token] = @tailrec @@ -94,9 +97,9 @@ case class ReaderCtx( loop(Nil) - def parseDocumentStart(): List[Token] = + def parseDocumentStart(indent: Int): List[Token] = val closedScopes = closeOpenedScopes() - stateStack.push(ReaderState.Document) + stateStack.push(ReaderState.Document(indent)) closedScopes :+ Token.DocumentStart(reader.pos()) def parseDocumentEnd(): List[Token] = diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderState.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderState.scala index 1c15a8da0..9a8360213 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderState.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/ReaderState.scala @@ -1,10 +1,11 @@ package org.virtuslab.yaml.internal.load.reader -sealed trait ReaderState +sealed trait ReaderState: + def indent: Int + case object ReaderState: - case object Stream extends ReaderState - case object Document extends ReaderState - final case class Mapping(indent: Int) extends ReaderState - final case class Sequence(indent: Int) extends ReaderState - case object FlowMapping extends ReaderState - case object FlowSequence extends ReaderState + final case class Document(indent: Int) extends ReaderState + final case class Mapping(indent: Int) extends ReaderState + final case class Sequence(indent: Int) extends ReaderState + final case class FlowMapping(indent: Int) extends ReaderState + final case class FlowSequence(indent: Int) extends ReaderState diff --git a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala index 000912dcd..e204508a8 100644 --- a/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala +++ b/yaml/shared/src/main/scala/org/virtuslab/yaml/internal/load/reader/Tokenizer.scala @@ -13,9 +13,8 @@ trait Tokenizer: private[yaml] class Scanner(str: String) extends Tokenizer { - private val ctx = ReaderCtx.init(str) - private val in = ctx.reader - private var indent = 0 + private val ctx = ReaderCtx.init(str) + private val in = ctx.reader override def peekToken(): Token = ctx.tokens.headOption match case Some(token) => token @@ -47,7 +46,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer { private def parseDocumentStart(): List[Token] = in.skipN(4) - ctx.parseDocumentStart() + ctx.parseDocumentStart(in.column) private def isDocumentEnd = in.peekN(3) == "..." && in.peek(3).exists(_.isWhitespace) @@ -58,7 +57,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer { private def parseFlowSequenceStart() = in.skipCharacter() - ctx.appendState(ReaderState.FlowSequence) + ctx.appendState(ReaderState.FlowSequence(in.column)) List(FlowSequenceStart(in.pos())) private def parseFlowSequenceEnd() = @@ -67,7 +66,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer { private def parseFlowMappingStart() = in.skipCharacter() - ctx.appendState(ReaderState.FlowMapping) + ctx.appendState(ReaderState.FlowMapping(in.column)) List(FlowMappingStart(in.pos())) private def parseFlowMappingEnd() = @@ -75,13 +74,12 @@ private[yaml] class Scanner(str: String) extends Tokenizer { ctx.closeOpenedFlowMapping() private def parseBlockSequence() = - ctx.closeOpenedCollectionSequences(indent) - if (ctx.shouldParseSequenceEntry(indent)) then + ctx.closeOpenedCollectionSequences(in.column) + if (ctx.shouldParseSequenceEntry(in.column)) then in.skipCharacter() - indent += 1 getNextTokens() else - ctx.appendState(ReaderState.Sequence(indent)) + ctx.appendState(ReaderState.Sequence(in.column)) List(SequenceStart(in.pos())) private def parseDoubleQuoteValue(): Token = @@ -111,13 +109,11 @@ private[yaml] class Scanner(str: String) extends Tokenizer { */ private def parseBlockHeader(): Unit = while (in.peek() == Some(' ')) { - indent += 1 in.skipCharacter() } if in.isNewline then in.skipCharacter() - indent = 0 parseBlockHeader() /** @@ -142,7 +138,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer { parseBlockHeader() - val foldedIndent = indent + val foldedIndent = in.column skipUntilNextIndent(foldedIndent) @tailrec @@ -151,7 +147,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer { case Some('\n') => sb.append(in.read()) skipUntilNextIndent(foldedIndent) - if (!in.isWhitespace && indent != foldedIndent) then sb.result() + if (!in.isWhitespace && in.column != foldedIndent) then sb.result() else readLiteral() case Some(char) => sb.append(in.read()) @@ -170,7 +166,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer { val chompingIndicator = parseChompingIndicator() parseBlockHeader() - val foldedIndent = indent + val foldedIndent = in.column skipUntilNextIndent(foldedIndent) def chompedEmptyLines() = @@ -192,7 +188,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer { } else { in.skipCharacter() skipUntilNextIndent(foldedIndent) - if (!in.isWhitespace && indent != foldedIndent) then sb.result() + if (!in.isWhitespace && in.column != foldedIndent) then sb.result() else sb.append(" ") readFolded() @@ -235,22 +231,27 @@ private[yaml] class Scanner(str: String) extends Tokenizer { } private def parseScalarValue(): Token = { - val sb = new StringBuilder + val sb = new StringBuilder + val scalarIndent = in.column + def readScalar(): String = in.peek() match - case Some(':') - if in.peekNext() == Some(' ') || in.peekNext() == Some('\n') || in - .peekNext() == Some('\r') => - sb.result() + case Some(':') if in.isNextWhitespace => sb.result() case Some(char) if !ctx.isAllowedSpecialCharacter(char) => sb.result() case Some(' ') if in.peekNext() == Some('#') => sb.result() - case Some('\n') | Some('\r') | None => sb.result() + case _ if in.isNewline => + skipUntilNextChar() + sb.append(' ') + if (ctx.getIndentOfLatestCollection().exists(in.column > _)) readScalar() + else sb.result() case Some(char) => sb.append(in.read()) readScalar() + case None => sb.result() - val pos = in.pos() - Scalar(readScalar().trim, ScalarStyle.Plain, pos) + val pos = in.pos() + val scalar = readScalar() + Scalar(scalar.trim, ScalarStyle.Plain, pos) } private def fetchValue(): List[Token] = @@ -264,35 +265,29 @@ private[yaml] class Scanner(str: String) extends Tokenizer { in.peek() match case Some(':') => - ctx.closeOpenedCollectionMapping(indent) + ctx.closeOpenedCollectionMapping(scalar.pos.column) in.skipCharacter() - if (ctx.shouldParseMappingEntry(indent)) then + if (ctx.shouldParseMappingEntry(scalar.pos.column)) then List(Token.Key(scalar.pos), scalar, Token.Value(scalar.pos)) else if (!ctx.isFlowMapping()) then - ctx.appendState(ReaderState.Mapping(indent)) + ctx.appendState(ReaderState.Mapping(scalar.pos.column)) List(MappingStart(scalar.pos), Token.Key(scalar.pos), scalar, Token.Value(scalar.pos)) else List(scalar) case _ => List(scalar) def skipUntilNextToken(): Unit = - while (in.peek() == Some(' ')) do - indent += 1 - in.skipCharacter() + while (in.peek() == Some(' ')) do in.skipCharacter() if in.peek() == Some('#') then skipComment() if (in.isNewline) then { in.skipCharacter() - indent = 0 skipUntilNextToken() } def skipUntilNextIndent(indentBlock: Int): Unit = - indent = 0 - while (in.peek() == Some(' ') && indent < indentBlock) do - indent += 1 - in.skipCharacter() + while (in.peek() == Some(' ') && in.column < indentBlock) do in.skipCharacter() def skipUntilNextChar() = while (in.isWhitespace) do in.skipCharacter() diff --git a/yaml/shared/src/test/scala/org/virtuslab/yaml/internal/load/parse/ScalarSpec.scala b/yaml/shared/src/test/scala/org/virtuslab/yaml/internal/load/parse/ScalarSpec.scala index 7576dd9a1..b1088ba4b 100644 --- a/yaml/shared/src/test/scala/org/virtuslab/yaml/internal/load/parse/ScalarSpec.scala +++ b/yaml/shared/src/test/scala/org/virtuslab/yaml/internal/load/parse/ScalarSpec.scala @@ -48,6 +48,61 @@ class ScalarSpec extends BaseParseSuite: assertEventsEquals(events, expectedEvents) } + test("should parse plain scalar wihth new lines") { + val yaml = + s"""description: new lines + | rest. + |properties: object + |""".stripMargin + + val reader = Scanner(yaml) + val events = ParserImpl.getEvents(reader) + + val expectedEvents = List( + StreamStart, + DocumentStart(), + MappingStart(), + Scalar("description", ScalarStyle.Plain), + Scalar( + "new lines rest.", + ScalarStyle.Plain + ), + Scalar("properties", ScalarStyle.Plain), + Scalar("object", ScalarStyle.Plain), + MappingEnd(), + DocumentEnd(), + StreamEnd + ) + + assertEventsEquals(events, expectedEvents) + } + + test("should parse multine line plain scalar value") { + val yaml = + s"""|description: multiline + | plain + | scalar + |type: string + |""".stripMargin + + val reader = Scanner(yaml) + val events = ParserImpl.getEvents(reader) + + val expectedEvents = List( + StreamStart, + DocumentStart(), + MappingStart(), + Scalar("description", ScalarStyle.Plain), + Scalar("multiline plain scalar", ScalarStyle.Plain), + Scalar("type", ScalarStyle.Plain), + Scalar("string", ScalarStyle.Plain), + MappingEnd(), + DocumentEnd(), + StreamEnd + ) + assertEventsEquals(events, expectedEvents) + } + test("should parse single quote scalar value with multiline") { val yaml = s"""description: 'multiline