From e8b0e13efcb232b6aaed61bd302bdb65ecbd98f1 Mon Sep 17 00:00:00 2001 From: Jamie Willis Date: Sun, 25 Jun 2023 00:27:04 +0100 Subject: [PATCH] Exposed `codePoint` combinator --- .../src/main/scala/parsley/character.scala | 17 ++++++++--------- .../src/test/scala/parsley/CharTests.scala | 8 ++++---- .../parsley/token/symbol/OriginalSymbol.scala | 4 ++-- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/parsley/shared/src/main/scala/parsley/character.scala b/parsley/shared/src/main/scala/parsley/character.scala index aaaed4081..bbea8ce18 100644 --- a/parsley/shared/src/main/scala/parsley/character.scala +++ b/parsley/shared/src/main/scala/parsley/character.scala @@ -78,7 +78,6 @@ import parsley.internal.deepembedding.singletons * [[https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedGeneralCategory.txt Unicode Character Database]]''. */ object character { - // TODO: in @note: or [[charUtf16 `charUtf16`]] /** This combinator tries to parse a single specific character `c` from the input. * * Attempts to read the given character `c` from the input stream at the current @@ -97,7 +96,7 @@ object character { * * @param c the character to parse * @return a parser that tries to read a single `c`, or fails. - * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[string `string`]]. + * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[string `string`]] or [[codePoint `codePoint`]]. * @group core */ def char(c: Char): Parsley[Char] = char(c, NotConfigured) @@ -107,25 +106,25 @@ object character { /** This combinator tries to parse a single specific codepoint `c` from the input. * * Like [[char `char`]], except it may consume two characters from the input, - * in the case where the codepoint is greater than `0xffff`. This is parsed ''atomically'' + * in the case where the code-point is greater than `0xffff`. This is parsed ''atomically'' * so that no input is consumed if the first half of the codepoint is parsed and the second * is not. * * @example {{{ - * scala> import parsley.character.charUtf16 - * scala> char(0x1F643).parse("") + * scala> import parsley.character.codePoint + * scala> codePoint(0x1F643).parse("") * val res0 = Failure(..) - * scala> char(0x1F643).parse("馃檪") + * scala> codePoint(0x1F643).parse("馃檪") * val res1 = Success(0x1F643) - * scala> char(0x1F643).parse("b馃檪") + * scala> codePoint(0x1F643).parse("b馃檪") * val res2 = Failure(..) * }}} * - * @param c the codepoint to parse + * @param c the code-point to parse * @return * @group core */ - private [parsley] def charUtf16(c: Int): Parsley[Int] = { //TODO: release along with the utf combinators + def codePoint(c: Int): Parsley[Int] = { if (Character.isBmpCodePoint(c)) char(c.toChar) #> c else new Parsley(new singletons.SupplementaryCharTok(c, NotConfigured)) } diff --git a/parsley/shared/src/test/scala/parsley/CharTests.scala b/parsley/shared/src/test/scala/parsley/CharTests.scala index 4bb3af767..becb6f32c 100644 --- a/parsley/shared/src/test/scala/parsley/CharTests.scala +++ b/parsley/shared/src/test/scala/parsley/CharTests.scala @@ -119,15 +119,15 @@ class CharTests extends ParsleyTest { } "charUtf16" should "handle BMP characters" in { - cases(charUtf16('a'))("a" -> Some('a')) - cases(charUtf16('位'))("位" -> Some('位')) + cases(codePoint('a'))("a" -> Some('a')) + cases(codePoint('位'))("位" -> Some('位')) } it should "handle multi-character codepoints" in { - cases(charUtf16(0x1F642))("馃檪" -> Some(0x1F642)) + cases(codePoint(0x1F642))("馃檪" -> Some(0x1F642)) } it should "handle multi-character codepoints atomically on fail" in { - cases(charUtf16(0x1F642) <|> charUtf16(0x1F643))("馃檭" -> Some(0x1F643)) + cases(codePoint(0x1F642) <|> codePoint(0x1F643))("馃檭" -> Some(0x1F643)) } } diff --git a/parsley/shared/src/test/scala/parsley/token/symbol/OriginalSymbol.scala b/parsley/shared/src/test/scala/parsley/token/symbol/OriginalSymbol.scala index a92340732..8abe88939 100644 --- a/parsley/shared/src/test/scala/parsley/token/symbol/OriginalSymbol.scala +++ b/parsley/shared/src/test/scala/parsley/token/symbol/OriginalSymbol.scala @@ -4,7 +4,7 @@ package parsley.token.symbol import parsley.Parsley, Parsley.{attempt, notFollowedBy, unit} -import parsley.character.{char, charUtf16, string, strings} +import parsley.character.{char, codePoint, string, strings} import parsley.errors.combinator.{ErrorMethods, empty, amend} import parsley.token.descriptions.{NameDesc, SymbolDesc} import parsley.token.errors.ErrorConfig @@ -22,7 +22,7 @@ private [token] class OriginalSymbol(nameDesc: NameDesc, symbolDesc: SymbolDesc, override def apply(name: Char): Parsley[Unit] = char(name).void private lazy val identLetter = nameDesc.identifierLetter.toNative - private def caseChar(c: Int) = if (Character.isLetter(c)) charUtf16(Character.toLowerCase(c)) <|> charUtf16(Character.toUpperCase(c)) else charUtf16(c) + private def caseChar(c: Int) = if (Character.isLetter(c)) codePoint(Character.toLowerCase(c)) <|> codePoint(Character.toUpperCase(c)) else codePoint(c) private def caseString(name: String): Parsley[Unit] = { if (symbolDesc.caseSensitive) string(name).void else {