Skip to content

Commit

Permalink
Exposed codePoint combinator
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mie6 committed Jun 24, 2023
1 parent c6ee6aa commit e8b0e13
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 15 deletions.
17 changes: 8 additions & 9 deletions parsley/shared/src/main/scala/parsley/character.scala
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ import parsley.internal.deepembedding.singletons
* [[https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedGeneralCategory.txt Unicode Character Database]]''.
*/
object character {
// TODO: in @note: or [[charUtf16 `charUtf16`]]
/** This combinator tries to parse a single specific character `c` from the input.
*
* Attempts to read the given character `c` from the input stream at the current
Expand All @@ -97,7 +96,7 @@ object character {
*
* @param c the character to parse
* @return a parser that tries to read a single `c`, or fails.
* @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[string `string`]].
* @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[string `string`]] or [[codePoint `codePoint`]].
* @group core
*/
def char(c: Char): Parsley[Char] = char(c, NotConfigured)
Expand All @@ -107,25 +106,25 @@ object character {
/** This combinator tries to parse a single specific codepoint `c` from the input.
*
* Like [[char `char`]], except it may consume two characters from the input,
* in the case where the codepoint is greater than `0xffff`. This is parsed ''atomically''
* in the case where the code-point is greater than `0xffff`. This is parsed ''atomically''
* so that no input is consumed if the first half of the codepoint is parsed and the second
* is not.
*
* @example {{{
* scala> import parsley.character.charUtf16
* scala> char(0x1F643).parse("")
* scala> import parsley.character.codePoint
* scala> codePoint(0x1F643).parse("")
* val res0 = Failure(..)
* scala> char(0x1F643).parse("🙂")
* scala> codePoint(0x1F643).parse("🙂")
* val res1 = Success(0x1F643)
* scala> char(0x1F643).parse("b🙂")
* scala> codePoint(0x1F643).parse("b🙂")
* val res2 = Failure(..)
* }}}
*
* @param c the codepoint to parse
* @param c the code-point to parse
* @return
* @group core
*/
private [parsley] def charUtf16(c: Int): Parsley[Int] = { //TODO: release along with the utf combinators
def codePoint(c: Int): Parsley[Int] = {
if (Character.isBmpCodePoint(c)) char(c.toChar) #> c
else new Parsley(new singletons.SupplementaryCharTok(c, NotConfigured))
}
Expand Down
8 changes: 4 additions & 4 deletions parsley/shared/src/test/scala/parsley/CharTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,15 @@ class CharTests extends ParsleyTest {
}

"charUtf16" should "handle BMP characters" in {
cases(charUtf16('a'))("a" -> Some('a'))
cases(charUtf16('λ'))("λ" -> Some('λ'))
cases(codePoint('a'))("a" -> Some('a'))
cases(codePoint('λ'))("λ" -> Some('λ'))
}

it should "handle multi-character codepoints" in {
cases(charUtf16(0x1F642))("🙂" -> Some(0x1F642))
cases(codePoint(0x1F642))("🙂" -> Some(0x1F642))
}

it should "handle multi-character codepoints atomically on fail" in {
cases(charUtf16(0x1F642) <|> charUtf16(0x1F643))("🙃" -> Some(0x1F643))
cases(codePoint(0x1F642) <|> codePoint(0x1F643))("🙃" -> Some(0x1F643))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
package parsley.token.symbol

import parsley.Parsley, Parsley.{attempt, notFollowedBy, unit}
import parsley.character.{char, charUtf16, string, strings}
import parsley.character.{char, codePoint, string, strings}
import parsley.errors.combinator.{ErrorMethods, empty, amend}
import parsley.token.descriptions.{NameDesc, SymbolDesc}
import parsley.token.errors.ErrorConfig
Expand All @@ -22,7 +22,7 @@ private [token] class OriginalSymbol(nameDesc: NameDesc, symbolDesc: SymbolDesc,
override def apply(name: Char): Parsley[Unit] = char(name).void

private lazy val identLetter = nameDesc.identifierLetter.toNative
private def caseChar(c: Int) = if (Character.isLetter(c)) charUtf16(Character.toLowerCase(c)) <|> charUtf16(Character.toUpperCase(c)) else charUtf16(c)
private def caseChar(c: Int) = if (Character.isLetter(c)) codePoint(Character.toLowerCase(c)) <|> codePoint(Character.toUpperCase(c)) else codePoint(c)
private def caseString(name: String): Parsley[Unit] = {
if (symbolDesc.caseSensitive) string(name).void
else {
Expand Down

0 comments on commit e8b0e13

Please sign in to comment.