From b7d0451b56fde1951811498cf6c94cdf230db3fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Bia=C5=82y?= Date: Wed, 17 Jul 2024 14:00:40 +0200 Subject: [PATCH] added escaping of all the non-printable chars as required by spec --- .../org/virtuslab/yaml/YamlEncoder.scala | 23 ++++++++++++++++++- .../virtuslab/yaml/BaseYamlEncoderSuite.scala | 6 +++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/core/shared/src/main/scala/org/virtuslab/yaml/YamlEncoder.scala b/core/shared/src/main/scala/org/virtuslab/yaml/YamlEncoder.scala index 3f4cf8da6..a3860a010 100644 --- a/core/shared/src/main/scala/org/virtuslab/yaml/YamlEncoder.scala +++ b/core/shared/src/main/scala/org/virtuslab/yaml/YamlEncoder.scala @@ -12,6 +12,27 @@ trait YamlEncoder[T] { self => } object YamlEncoder extends YamlEncoderCrossCompanionCompat { + // Define the allowed exceptions in the otherwise disallowed ranges + private val allowedExceptions = Set('\u0009', '\u000A', '\u000D', '\u0085') + + def isCharNonPrintable(c: Char): Boolean = { + if (allowedExceptions.contains(c)) false + else { + (c >= '\u0000' && c <= '\u001F') || // C0 control block (except allowed exceptions above) + c == '\u007F' || // DEL + (c >= '\u0080' && c <= '\u009F') || // C1 control block (except for NEL \u0085) + (c >= '\uD800' && c <= '\uDFFF') || // Surrogate block + c == '\uFFFE' || c == '\uFFFF' // Disallowed specific characters + } + } + + def escapeSpecialCharacters(scalar: String): String = + scalar.flatMap { char => + if (isCharNonPrintable(char)) + f"\\u${char.toInt}%04X" + else + char.toString + } def apply[T](implicit self: YamlEncoder[T]): YamlEncoder[T] = self @@ -23,7 +44,7 @@ object YamlEncoder extends YamlEncoderCrossCompanionCompat { implicit def forFloat: YamlEncoder[Float] = v => Node.ScalarNode(v.toString) implicit def forDouble: YamlEncoder[Double] = v => Node.ScalarNode(v.toString) implicit def forBoolean: YamlEncoder[Boolean] = v => Node.ScalarNode(v.toString) - implicit def forString: YamlEncoder[String] = v => Node.ScalarNode(v) + implicit def forString: YamlEncoder[String] = v => Node.ScalarNode(escapeSpecialCharacters(v)) implicit def forOption[T](implicit encoder: YamlEncoder[T]): YamlEncoder[Option[T]] = { case Some(t) => encoder.asNode(t) diff --git a/core/shared/src/test/scala-3/org/virtuslab/yaml/BaseYamlEncoderSuite.scala b/core/shared/src/test/scala-3/org/virtuslab/yaml/BaseYamlEncoderSuite.scala index 5fdf991d8..4ac1a6e0b 100644 --- a/core/shared/src/test/scala-3/org/virtuslab/yaml/BaseYamlEncoderSuite.scala +++ b/core/shared/src/test/scala-3/org/virtuslab/yaml/BaseYamlEncoderSuite.scala @@ -169,3 +169,9 @@ class YamlEncoderSuite extends munit.FunSuite: assertEquals(data.asYaml, expected) } + + test("encoding of non-printable characters") { + // yaml ends with newline + assertEquals(Char.MinValue.toString.asYaml, "\\u0000\n") + assertEquals(Char.MaxValue.toString.asYaml, "\\uFFFF\n") + }