Skip to content

Commit

Permalink
Position improvements (#102)
Browse files Browse the repository at this point in the history
* Initial work, uncovered bug in string adjusters

* Fixed a bug with offset adjustment compression in strings

* Factored out build
  • Loading branch information
j-mie6 authored Nov 26, 2021
1 parent e6cbe87 commit 2686a55
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 21 deletions.
5 changes: 2 additions & 3 deletions src/main/scala/parsley/internal/machine/Context.scala
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ private [parsley] final class Context(private [machine] var instrs: Array[Instr]
val exchange = new Array[Instr](preserve.size)
preserveInstrs(preserve, exchange, preserve.size - 1)
calls = new CallStack(pc + 1, instrs, preserve, exchange, at, calls)
for (idx <- preserve) instrs(idx) = instrs(idx).copy
pc = at
depth += 1
}
Expand Down Expand Up @@ -255,8 +254,8 @@ private [parsley] final class Context(private [machine] var instrs: Array[Instr]
private [machine] def moreInput: Boolean = offset < inputsz
private [machine] def updatePos(c: Char) = c match {
case '\n' => line += 1; col = 1
case '\t' => col += 4 - ((col - 1) & 3)
case _ => col += 1
case '\t' => col = ((col + 3) & -4) | 1//((col - 1) | 3) + 2
case _ => col += 1
}
private [machine] def consumeChar(): Char = {
val c = nextChar
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,19 @@ private [internal] final class StringTok private [instructions] (s: String, x: A
})
private [this] val cs = s.toCharArray
private [this] val sz = cs.length
def makeAdjusters(col: Int, line: Int, tabprefix: Option[Int]): (Int => Int, Int => Int) =
if (line > 0) ((_: Int) => col, (x: Int) => x + line)
else (tabprefix match {
case Some(prefix) =>
val outer = 4 + col + prefix
val inner = prefix - 1
(x: Int) => outer + x - ((x + inner) & 3)
case None => (x: Int) => x + col
}, (x: Int) => x)
// TODO: This could be improved by traversing back to front?
@tailrec def compute(i: Int, col: Int, line: Int)(implicit tabprefix: Option[Int]): (Int => Int, Int => Int) = {

@tailrec private [this] def compute(i: Int, lineAdjust: Int, colAdjust: StringTok.Adjust): (Int => Int, Int => Int) = {
if (i < cs.length) cs(i) match {
case '\n' => compute(i + 1, 1, line + 1)(Some(0))
case '\t' if tabprefix.isEmpty => compute(i + 1, 0, line)(Some(col))
case '\t' => compute(i + 1, col + 4 - ((col - 1) & 3), line)
case _ => compute(i + 1, col + 1, line)
case '\n' => compute(i + 1, lineAdjust + 1, new StringTok.Set)
case '\t' => compute(i + 1, lineAdjust, colAdjust.tab)
case _ => colAdjust.next; compute(i + 1, lineAdjust, colAdjust)
}
else makeAdjusters(col, line, tabprefix)
else build(lineAdjust, colAdjust)
}
private [this] def build(lineAdjust: Int, colAdjust: StringTok.Adjust): (Int => Int, Int => Int) = {
(if (lineAdjust == 0) line => line else _ + lineAdjust, colAdjust.toAdjuster)
}
private [this] val (colAdjust, lineAdjust) = compute(0, 0, 0)(None)
private [this] val (lineAdjust, colAdjust) = compute(0, 0, new StringTok.Offset)

@tailrec private def go(ctx: Context, i: Int, j: Int): Unit = {
if (j < sz && i < ctx.inputsz && ctx.input.charAt(i) == cs(j)) go(ctx, i + 1, j + 1)
Expand Down Expand Up @@ -243,6 +236,36 @@ private [internal] object CharTok {

private [internal] object StringTok {
def apply(s: String, expected: Option[String]): StringTok = new StringTok(s, s, expected)

private [StringTok] abstract class Adjust {
private [StringTok] def tab: Adjust
private [StringTok] def next: Unit
private [StringTok] def toAdjuster: Int => Int
}
// A line has been read, so any updates are fixed
private [StringTok] class Set extends Adjust {
private [this] var at = 1
// Round up to the nearest multiple of 4 /+1/
private [StringTok] def tab = { at = ((at + 3) & -4) | 1; this }
private [StringTok] def next = at += 1
private [StringTok] def toAdjuster = _ => at
}
// No information about alignment: a line or a tab hasn't been read
private [StringTok] class Offset extends Adjust {
private [this] var by = 0
private [StringTok] def tab = new OffsetAlignOffset(by)
private [StringTok] def next = by += 1
private [StringTok] def toAdjuster = if (by == 0) col => col else _ + by
}
// A tab was read, and no lines, so we adjust first, then align, and work with an aligned value
private [StringTok] class OffsetAlignOffset(firstBy: Int) extends Adjust {
private [this] var thenBy = 0
// Round up to nearest multiple of /4/ (offset from aligned, not real value)
private [StringTok] def tab = { thenBy = (thenBy | 3) + 1; this }
private [StringTok] def next = thenBy += 1
// Round up to the nearest multiple of 4 /+1/
private [StringTok] def toAdjuster = col => (((col + firstBy + 3) & -4) | 1) + thenBy
}
}

private [internal] object CharTokFastPerform {
Expand Down
22 changes: 21 additions & 1 deletion src/test/scala/parsley/CharTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import parsley.implicits.character.{charLift, stringLift}
import scala.language.implicitConversions

class CharTests extends ParsleyTest {
def stringPositionCheck(initialCol: Int, str: String) =
(string("." * initialCol) *> string(str) *> pos).parse("." * initialCol + str)

"string" should "consume succeed if it is found at head" in {
"abc".parse("abc") should not be a [Failure[_]]
}
Expand All @@ -19,7 +22,24 @@ class CharTests extends ParsleyTest {
it should "not consume input if it fails mid-string when combined with attempt" in {
(attempt("abc") <|> "ab").parse("ab") should not be a [Failure[_]]
}

it should "update positions correctly" in {
stringPositionCheck(0, "abc") shouldBe Success((1, 4))
stringPositionCheck(1, "\na") shouldBe Success((2, 2))
stringPositionCheck(0, "a\t") shouldBe Success((1, 5))
stringPositionCheck(0, "ab\t") shouldBe Success((1, 5))
stringPositionCheck(0, "abc\t") shouldBe Success((1, 5))
stringPositionCheck(0, "abcd\t") shouldBe Success((1, 9))
stringPositionCheck(0, "\na\tb") shouldBe (Success((2, 6)))
stringPositionCheck(2, "\t") shouldBe (Success((1, 5)))
}
it should "respect multiple tabs" in {
stringPositionCheck(2, "\t\t") shouldBe (Success((1, 9)))
stringPositionCheck(2, "\t\t\t") shouldBe (Success((1, 13)))
stringPositionCheck(2, "\taaa\t") shouldBe (Success((1, 9)))
stringPositionCheck(2, "\taa\taaa\t") shouldBe (Success((1, 13)))
stringPositionCheck(2, "a\t\t") shouldBe (Success((1, 9)))
stringPositionCheck(2, "aa\t") shouldBe (Success((1, 9)))
}
"anyChar" should "accept any character" in {
for (i <- 0 to 65535) anyChar.parse(i.toChar.toString) should not be a [Failure[_]]
}
Expand Down

0 comments on commit 2686a55

Please sign in to comment.