From bbbeeef0009071ed5c0e9b2061ab4d090f568e02 Mon Sep 17 00:00:00 2001
From: Miguel Young de la Sota <mcyoung@mit.edu>
Date: Mon, 21 Feb 2022 17:02:37 -0500
Subject: [PATCH 1/5] Extract ascii2der into a library package

---
 {cmd/ascii2der => ascii2der}/encoder.go      |  19 ++-
 {cmd/ascii2der => ascii2der}/encoder_test.go |   2 +-
 {cmd/ascii2der => ascii2der}/scanner.go      | 134 ++++++++-----------
 {cmd/ascii2der => ascii2der}/scanner_test.go |   8 +-
 {cmd/ascii2der => ascii2der}/values.go       |   2 +-
 {cmd/ascii2der => ascii2der}/values_test.go  |   2 +-
 cmd/ascii2der/main.go                        |   4 +-
 7 files changed, 86 insertions(+), 85 deletions(-)
 rename {cmd/ascii2der => ascii2der}/encoder.go (88%)
 rename {cmd/ascii2der => ascii2der}/encoder_test.go (99%)
 rename {cmd/ascii2der => ascii2der}/scanner.go (76%)
 rename {cmd/ascii2der => ascii2der}/scanner_test.go (99%)
 rename {cmd/ascii2der => ascii2der}/values.go (99%)
 rename {cmd/ascii2der => ascii2der}/values_test.go (99%)

diff --git a/cmd/ascii2der/encoder.go b/ascii2der/encoder.go
similarity index 88%
rename from cmd/ascii2der/encoder.go
rename to ascii2der/encoder.go
index 0d84559..6b9f787 100644
--- a/cmd/ascii2der/encoder.go
+++ b/ascii2der/encoder.go
@@ -12,15 +12,32 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package main
+package ascii2der
 
 import (
 	"errors"
 	"fmt"
+	"unicode/utf16"
 
 	"github.com/google/der-ascii/internal"
 )
 
+func appendUTF16(dst []byte, r rune) []byte {
+	if r <= 0xffff {
+		// Note this logic intentionally tolerates unpaired surrogates.
+		return append(dst, byte(r>>8), byte(r))
+	}
+
+	r1, r2 := utf16.EncodeRune(r)
+	dst = append(dst, byte(r1>>8), byte(r1))
+	dst = append(dst, byte(r2>>8), byte(r2))
+	return dst
+}
+
+func appendUTF32(dst []byte, r rune) []byte {
+	return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r))
+}
+
 func appendBase128(dst []byte, value uint32) []byte {
 	dst, err := appendBase128WithLength(dst, value, 0)
 	if err != nil {
diff --git a/cmd/ascii2der/encoder_test.go b/ascii2der/encoder_test.go
similarity index 99%
rename from cmd/ascii2der/encoder_test.go
rename to ascii2der/encoder_test.go
index 5faa8ba..8c964da 100644
--- a/cmd/ascii2der/encoder_test.go
+++ b/ascii2der/encoder_test.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package main
+package ascii2der
 
 import (
 	"bytes"
diff --git a/cmd/ascii2der/scanner.go b/ascii2der/scanner.go
similarity index 76%
rename from cmd/ascii2der/scanner.go
rename to ascii2der/scanner.go
index f4c49f2..feeacee 100644
--- a/cmd/ascii2der/scanner.go
+++ b/ascii2der/scanner.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package main
+package ascii2der
 
 import (
 	"encoding/hex"
@@ -21,14 +21,13 @@ import (
 	"regexp"
 	"strconv"
 	"strings"
-	"unicode/utf16"
 	"unicode/utf8"
 
 	"github.com/google/der-ascii/internal"
 )
 
-// A position describes a location in the input stream.
-type position struct {
+// A Position describes a location in the input stream.
+type Position struct {
 	Offset int // offset, starting at 0
 	Line   int // line number, starting at 1
 	Column int // column number, starting at 1 (byte count)
@@ -46,14 +45,14 @@ const (
 	tokenEOF
 )
 
-// A parseError is an error during parsing DER ASCII.
-type parseError struct {
-	Pos position
+// A ParseError is an error during parsing DER ASCII.
+type ParseError struct {
+	Pos Position
 	Err error
 }
 
-func (t *parseError) Error() string {
-	return fmt.Sprintf("line %d: %s", t.Pos.Line, t.Err)
+func (e *ParseError) Error() string {
+	return fmt.Sprintf("line %d: %s", e.Pos.Line, e.Err)
 }
 
 // A token is a token in a DER ASCII file.
@@ -64,7 +63,7 @@ type token struct {
 	// bytes.
 	Value []byte
 	// Pos is the position of the first byte of the token.
-	Pos position
+	Pos Position
 	// Length, for a tokenLongForm token, is the number of bytes to use to
 	// encode the length, not including the initial one.
 	Length int
@@ -75,19 +74,19 @@ var (
 	regexpOID     = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`)
 )
 
-type scanner struct {
+type Scanner struct {
 	text string
-	pos  position
+	pos  Position
 }
 
-func newScanner(text string) *scanner {
-	return &scanner{text: text, pos: position{Line: 1}}
+func NewScanner(text string) *Scanner {
+	return &Scanner{text: text, pos: Position{Line: 1}}
 }
 
-func (s *scanner) parseEscapeSequence() (rune, error) {
+func (s *Scanner) parseEscapeSequence() (rune, error) {
 	s.advance() // Skip the \. The caller is assumed to have validated it.
 	if s.isEOF() {
-		return 0, &parseError{s.pos, errors.New("expected escape character")}
+		return 0, &ParseError{s.pos, errors.New("expected escape character")}
 	}
 	switch c := s.text[s.pos.Offset]; c {
 	case 'n':
@@ -99,48 +98,48 @@ func (s *scanner) parseEscapeSequence() (rune, error) {
 	case 'x':
 		s.advance()
 		if s.pos.Offset+2 > len(s.text) {
-			return 0, &parseError{s.pos, errors.New("unfinished escape sequence")}
+			return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")}
 		}
 		b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+2])
 		if err != nil {
-			return 0, &parseError{s.pos, err}
+			return 0, &ParseError{s.pos, err}
 		}
 		s.advanceBytes(2)
 		return rune(b[0]), nil
 	case 'u':
 		s.advance()
 		if s.pos.Offset+4 > len(s.text) {
-			return 0, &parseError{s.pos, errors.New("unfinished escape sequence")}
+			return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")}
 		}
 		b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+4])
 		if err != nil {
-			return 0, &parseError{s.pos, err}
+			return 0, &ParseError{s.pos, err}
 		}
 		s.advanceBytes(4)
 		return rune(b[0])<<8 | rune(b[1]), nil
 	case 'U':
 		s.advance()
 		if s.pos.Offset+8 > len(s.text) {
-			return 0, &parseError{s.pos, errors.New("unfinished escape sequence")}
+			return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")}
 		}
 		b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+8])
 		if err != nil {
-			return 0, &parseError{s.pos, err}
+			return 0, &ParseError{s.pos, err}
 		}
 		s.advanceBytes(8)
 		return rune(b[0])<<24 | rune(b[1])<<16 | rune(b[2])<<8 | rune(b[3]), nil
 	default:
-		return 0, &parseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)}
+		return 0, &ParseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)}
 	}
 }
 
-func (s *scanner) parseQuotedString() (token, error) {
+func (s *Scanner) parseQuotedString() (token, error) {
 	s.advance() // Skip the ". The caller is assumed to have validated it.
 	start := s.pos
 	var bytes []byte
 	for {
 		if s.isEOF() {
-			return token{}, &parseError{start, errors.New("unmatched \"")}
+			return token{}, &ParseError{start, errors.New("unmatched \"")}
 		}
 		switch c := s.text[s.pos.Offset]; c {
 		case '"':
@@ -154,7 +153,7 @@ func (s *scanner) parseQuotedString() (token, error) {
 			}
 			if r > 0xff {
 				// TODO(davidben): Alternatively, should these encode as UTF-8?
-				return token{}, &parseError{escapeStart, errors.New("illegal escape for quoted string")}
+				return token{}, &ParseError{escapeStart, errors.New("illegal escape for quoted string")}
 			}
 			bytes = append(bytes, byte(r))
 		default:
@@ -164,26 +163,14 @@ func (s *scanner) parseQuotedString() (token, error) {
 	}
 }
 
-func appendUTF16(b []byte, r rune) []byte {
-	if r <= 0xffff {
-		// Note this logic intentionally tolerates unpaired surrogates.
-		return append(b, byte(r>>8), byte(r))
-	}
-
-	r1, r2 := utf16.EncodeRune(r)
-	b = append(b, byte(r1>>8), byte(r1))
-	b = append(b, byte(r2>>8), byte(r2))
-	return b
-}
-
-func (s *scanner) parseUTF16String() (token, error) {
+func (s *Scanner) parseUTF16String() (token, error) {
 	s.advance() // Skip the u. The caller is assumed to have validated it.
 	s.advance() // Skip the ". The caller is assumed to have validated it.
 	start := s.pos
 	var bytes []byte
 	for {
 		if s.isEOF() {
-			return token{}, &parseError{start, errors.New("unmatched \"")}
+			return token{}, &ParseError{start, errors.New("unmatched \"")}
 		}
 		switch c := s.text[s.pos.Offset]; c {
 		case '"':
@@ -201,7 +188,7 @@ func (s *scanner) parseUTF16String() (token, error) {
 			// legitimate replacement charaacter in the input. The documentation
 			// says errors return (RuneError, 0) or (RuneError, 1).
 			if r == utf8.RuneError && n <= 1 {
-				return token{}, &parseError{s.pos, errors.New("invalid UTF-8")}
+				return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")}
 			}
 			s.advanceBytes(n)
 			bytes = appendUTF16(bytes, r)
@@ -209,18 +196,14 @@ func (s *scanner) parseUTF16String() (token, error) {
 	}
 }
 
-func appendUTF32(b []byte, r rune) []byte {
-	return append(b, byte(r>>24), byte(r>>16), byte(r>>8), byte(r))
-}
-
-func (s *scanner) parseUTF32String() (token, error) {
+func (s *Scanner) parseUTF32String() (token, error) {
 	s.advance() // Skip the U. The caller is assumed to have validated it.
 	s.advance() // Skip the ". The caller is assumed to have validated it.
 	start := s.pos
 	var bytes []byte
 	for {
 		if s.isEOF() {
-			return token{}, &parseError{start, errors.New("unmatched \"")}
+			return token{}, &ParseError{start, errors.New("unmatched \"")}
 		}
 		switch c := s.text[s.pos.Offset]; c {
 		case '"':
@@ -238,7 +221,7 @@ func (s *scanner) parseUTF32String() (token, error) {
 			// legitimate replacement charaacter in the input. The documentation
 			// says errors return (RuneError, 0) or (RuneError, 1).
 			if r == utf8.RuneError && n <= 1 {
-				return token{}, &parseError{s.pos, errors.New("invalid UTF-8")}
+				return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")}
 			}
 			s.advanceBytes(n)
 			bytes = appendUTF32(bytes, r)
@@ -246,7 +229,7 @@ func (s *scanner) parseUTF32String() (token, error) {
 	}
 }
 
-func (s *scanner) Next() (token, error) {
+func (s *Scanner) next() (token, error) {
 again:
 	if s.isEOF() {
 		return token{Kind: tokenEOF, Pos: s.pos}, nil
@@ -290,7 +273,7 @@ again:
 			s.advance() // Skip the `.
 			bitStr, ok := s.consumeUpTo('`')
 			if !ok {
-				return token{}, &parseError{s.pos, errors.New("unmatched `")}
+				return token{}, &ParseError{s.pos, errors.New("unmatched `")}
 			}
 
 			// The leading byte is the number of "extra" bits at the end.
@@ -309,7 +292,7 @@ again:
 					bitCount++
 				case '|':
 					if sawPipe {
-						return token{}, &parseError{s.pos, errors.New("duplicate |")}
+						return token{}, &ParseError{s.pos, errors.New("duplicate |")}
 					}
 
 					// bitsRemaining is the number of bits remaining in the output that haven't
@@ -317,13 +300,13 @@ again:
 					bitsRemaining := (len(value)-1)*8 - bitCount
 					inputRemaining := len(bitStr) - i - 1
 					if inputRemaining > bitsRemaining {
-						return token{}, &parseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)}
+						return token{}, &ParseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)}
 					}
 
 					sawPipe = true
 					value[0] = byte(bitsRemaining)
 				default:
-					return token{}, &parseError{s.pos, fmt.Errorf("unexpected rune %q", r)}
+					return token{}, &ParseError{s.pos, fmt.Errorf("unexpected rune %q", r)}
 				}
 			}
 			if !sawPipe {
@@ -335,26 +318,26 @@ again:
 		s.advance()
 		hexStr, ok := s.consumeUpTo('`')
 		if !ok {
-			return token{}, &parseError{s.pos, errors.New("unmatched `")}
+			return token{}, &ParseError{s.pos, errors.New("unmatched `")}
 		}
 		bytes, err := hex.DecodeString(hexStr)
 		if err != nil {
-			return token{}, &parseError{s.pos, err}
+			return token{}, &ParseError{s.pos, err}
 		}
 		return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil
 	case '[':
 		s.advance()
 		tagStr, ok := s.consumeUpTo(']')
 		if !ok {
-			return token{}, &parseError{s.pos, errors.New("unmatched [")}
+			return token{}, &ParseError{s.pos, errors.New("unmatched [")}
 		}
 		tag, err := decodeTagString(tagStr)
 		if err != nil {
-			return token{}, &parseError{s.pos, err}
+			return token{}, &ParseError{s.pos, err}
 		}
 		value, err := appendTag(nil, tag)
 		if err != nil {
-			return token{}, &parseError{s.pos, err}
+			return token{}, &ParseError{s.pos, err}
 		}
 		return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil
 	}
@@ -381,7 +364,7 @@ loop:
 		value, err := appendTag(nil, tag)
 		if err != nil {
 			// This is impossible; built-in tags always encode.
-			return token{}, &parseError{s.pos, err}
+			return token{}, &ParseError{s.pos, err}
 		}
 		return token{Kind: tokenBytes, Value: value, Pos: start}, nil
 	}
@@ -389,7 +372,7 @@ loop:
 	if regexpInteger.MatchString(symbol) {
 		value, err := strconv.ParseInt(symbol, 10, 64)
 		if err != nil {
-			return token{}, &parseError{start, err}
+			return token{}, &ParseError{start, err}
 		}
 		return token{Kind: tokenBytes, Value: appendInteger(nil, value), Pos: s.pos}, nil
 	}
@@ -400,7 +383,7 @@ loop:
 		for _, s := range oidStr {
 			u, err := strconv.ParseUint(s, 10, 32)
 			if err != nil {
-				return token{}, &parseError{start, err}
+				return token{}, &ParseError{start, err}
 			}
 			oid = append(oid, uint32(u))
 		}
@@ -426,7 +409,7 @@ loop:
 	if isLongFormOverride(symbol) {
 		l, err := decodeLongFormOverride(symbol)
 		if err != nil {
-			return token{}, &parseError{start, err}
+			return token{}, &ParseError{start, err}
 		}
 		return token{Kind: tokenLongForm, Length: l}, nil
 	}
@@ -434,11 +417,11 @@ loop:
 	return token{}, fmt.Errorf("unrecognized symbol %q", symbol)
 }
 
-func (s *scanner) isEOF() bool {
+func (s *Scanner) isEOF() bool {
 	return s.pos.Offset >= len(s.text)
 }
 
-func (s *scanner) advance() {
+func (s *Scanner) advance() {
 	if !s.isEOF() {
 		if s.text[s.pos.Offset] == '\n' {
 			s.pos.Line++
@@ -450,13 +433,13 @@ func (s *scanner) advance() {
 	}
 }
 
-func (s *scanner) advanceBytes(n int) {
+func (s *Scanner) advanceBytes(n int) {
 	for i := 0; i < n; i++ {
 		s.advance()
 	}
 }
 
-func (s *scanner) consumeUpTo(b byte) (string, bool) {
+func (s *Scanner) consumeUpTo(b byte) (string, bool) {
 	start := s.pos.Offset
 	for !s.isEOF() {
 		if s.text[s.pos.Offset] == b {
@@ -469,22 +452,22 @@ func (s *scanner) consumeUpTo(b byte) (string, bool) {
 	return "", false
 }
 
-func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) {
+func (s *Scanner) exec(leftCurly *token) ([]byte, error) {
 	var out []byte
 	var lengthModifier *token
 	for {
-		token, err := scanner.Next()
+		token, err := s.next()
 		if err != nil {
 			return nil, err
 		}
 		if lengthModifier != nil && token.Kind != tokenLeftCurly {
-			return nil, &parseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")}
+			return nil, &ParseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")}
 		}
 		switch token.Kind {
 		case tokenBytes:
 			out = append(out, token.Value...)
 		case tokenLeftCurly:
-			child, err := asciiToDERImpl(scanner, &token)
+			child, err := s.exec(&token)
 			if err != nil {
 				return nil, err
 			}
@@ -504,7 +487,7 @@ func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) {
 			out, err = appendLength(out, len(child), lengthOverride)
 			if err != nil {
 				// appendLength may fail if the lengthModifier was incompatible.
-				return nil, &parseError{lengthModifier.Pos, err}
+				return nil, &ParseError{lengthModifier.Pos, err}
 			}
 			out = append(out, child...)
 			lengthModifier = nil
@@ -512,21 +495,20 @@ func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) {
 			if leftCurly != nil {
 				return out, nil
 			}
-			return nil, &parseError{token.Pos, errors.New("unmatched '}'")}
+			return nil, &ParseError{token.Pos, errors.New("unmatched '}'")}
 		case tokenLongForm, tokenIndefinite:
 			lengthModifier = &token
 		case tokenEOF:
 			if leftCurly == nil {
 				return out, nil
 			}
-			return nil, &parseError{leftCurly.Pos, errors.New("unmatched '{'")}
+			return nil, &ParseError{leftCurly.Pos, errors.New("unmatched '{'")}
 		default:
 			panic(token)
 		}
 	}
 }
 
-func asciiToDER(input string) ([]byte, error) {
-	scanner := newScanner(input)
-	return asciiToDERImpl(scanner, nil)
+func (s *Scanner) Exec() ([]byte, error) {
+	return s.exec(nil)
 }
diff --git a/cmd/ascii2der/scanner_test.go b/ascii2der/scanner_test.go
similarity index 99%
rename from cmd/ascii2der/scanner_test.go
rename to ascii2der/scanner_test.go
index cac14d2..ff0fe85 100644
--- a/cmd/ascii2der/scanner_test.go
+++ b/ascii2der/scanner_test.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package main
+package ascii2der
 
 import (
 	"bytes"
@@ -383,9 +383,9 @@ indefinite long-form:2`,
 }
 
 func scanAll(in string) (tokens []token, ok bool) {
-	scanner := newScanner(in)
+	scanner := NewScanner(in)
 	for {
-		token, err := scanner.Next()
+		token, err := scanner.next()
 		if err != nil {
 			return
 		}
@@ -446,7 +446,7 @@ var asciiToDERTests = []struct {
 
 func TestASCIIToDER(t *testing.T) {
 	for i, tt := range asciiToDERTests {
-		out, err := asciiToDER(tt.in)
+		out, err := NewScanner(tt.in).Exec()
 		ok := err == nil
 		if !tt.ok {
 			if ok {
diff --git a/cmd/ascii2der/values.go b/ascii2der/values.go
similarity index 99%
rename from cmd/ascii2der/values.go
rename to ascii2der/values.go
index 03e82dc..d8406e0 100644
--- a/cmd/ascii2der/values.go
+++ b/ascii2der/values.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package main
+package ascii2der
 
 import (
 	"errors"
diff --git a/cmd/ascii2der/values_test.go b/ascii2der/values_test.go
similarity index 99%
rename from cmd/ascii2der/values_test.go
rename to ascii2der/values_test.go
index 17d6a9c..ea74213 100644
--- a/cmd/ascii2der/values_test.go
+++ b/ascii2der/values_test.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package main
+package ascii2der
 
 import (
 	"testing"
diff --git a/cmd/ascii2der/main.go b/cmd/ascii2der/main.go
index ca5c73d..f379968 100644
--- a/cmd/ascii2der/main.go
+++ b/cmd/ascii2der/main.go
@@ -20,6 +20,8 @@ import (
 	"fmt"
 	"io/ioutil"
 	"os"
+
+	"github.com/google/der-ascii/ascii2der"
 )
 
 var inPath = flag.String("i", "", "input file to use (defaults to stdin)")
@@ -52,7 +54,7 @@ func main() {
 		os.Exit(1)
 	}
 
-	outBytes, err := asciiToDER(string(inBytes))
+	outBytes, err := ascii2der.NewScanner(string(inBytes)).Exec()
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "Syntax error: %s\n", err)
 		os.Exit(1)

From 9c05123218977c1940725c4384be07e0c2d904e6 Mon Sep 17 00:00:00 2001
From: Miguel Young de la Sota <mcyoung@mit.edu>
Date: Mon, 21 Feb 2022 17:31:28 -0500
Subject: [PATCH 2/5] Add doc comments to remaning functions in encoder.go

---
 ascii2der/encoder.go      | 37 +++++++++++++++++++++++++++++++------
 ascii2der/encoder_test.go | 10 +++++-----
 ascii2der/scanner.go      |  6 +++---
 3 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/ascii2der/encoder.go b/ascii2der/encoder.go
index 6b9f787..87993dc 100644
--- a/ascii2der/encoder.go
+++ b/ascii2der/encoder.go
@@ -22,9 +22,12 @@ import (
 	"github.com/google/der-ascii/internal"
 )
 
+// appendUTF16 marshals r using UTF-16 and appends the result to dst, returning
+// the updated slice.
+// 
+// This logic intentionally tolerates unpaired surrogates.
 func appendUTF16(dst []byte, r rune) []byte {
 	if r <= 0xffff {
-		// Note this logic intentionally tolerates unpaired surrogates.
 		return append(dst, byte(r>>8), byte(r))
 	}
 
@@ -34,10 +37,20 @@ func appendUTF16(dst []byte, r rune) []byte {
 	return dst
 }
 
+// appendUTF16 marshals r using UTF-32 and appends the result to dst, returning
+// the updated slice.
+// 
+// In other words, this function writes r as an integer in big-endian order.
 func appendUTF32(dst []byte, r rune) []byte {
 	return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r))
 }
 
+// appendBase128 marshals an integer in base 128, a varint format used by OIDs
+// and long-form tag numbers, and appends the result to dst, returning the
+// updated slice.
+// 
+// This function is the same as appendBase128WithLength with length set to zero,
+// which cannot fail.
 func appendBase128(dst []byte, value uint32) []byte {
 	dst, err := appendBase128WithLength(dst, value, 0)
 	if err != nil {
@@ -47,6 +60,11 @@ func appendBase128(dst []byte, value uint32) []byte {
 	return dst
 }
 
+// appendBase128 marshals an integer in base 128, a varint format used by OIDs
+// and long-form tag numbers, and appends the result to dst, returning the
+// updated slice.
+// 
+// If length is zero, the minimal length is chosen.
 func appendBase128WithLength(dst []byte, value uint32, length int) ([]byte, error) {
 	// Count how many bytes are needed.
 	var l int
@@ -137,18 +155,25 @@ func appendInteger(dst []byte, value int64) []byte {
 	return dst
 }
 
-func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, bool) {
+// appendObjectIdentifier marshals the given array of integers as an OID.
+func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, error) {
 	// Validate the input before anything is written.
-	if len(value) < 2 || value[0] > 2 || (value[0] < 2 && value[1] > 39) {
-		return dst, false
+	if len(value) < 2 {
+		return dst, errors.New("OIDs must have at least two arcs")
+	}
+	if value[0] > 2 {
+		return dst, fmt.Errorf("first arc of an OID must be one of 0, 1, or 2; got %d", value[0])
+	}
+	if value[0] < 2 && value[1] > 39 {
+		return dst, fmt.Errorf("second arc of an OID must be at most 39; got %d", value[1])
 	}
 	if value[0]*40+value[1] < value[1] {
-		return dst, false
+		return dst, errors.New("first two arcs overflowed")
 	}
 
 	dst = appendBase128(dst, value[0]*40+value[1])
 	for _, v := range value[2:] {
 		dst = appendBase128(dst, v)
 	}
-	return dst, true
+	return dst, nil
 }
diff --git a/ascii2der/encoder_test.go b/ascii2der/encoder_test.go
index 8c964da..f0472b8 100644
--- a/ascii2der/encoder_test.go
+++ b/ascii2der/encoder_test.go
@@ -162,21 +162,21 @@ var appendObjectIdentifierTests = []struct {
 
 func TestAppendObjectIdentifier(t *testing.T) {
 	for i, tt := range appendObjectIdentifierTests {
-		dst, ok := appendObjectIdentifier(nil, tt.value)
+		dst, err := appendObjectIdentifier(nil, tt.value)
 		if !tt.ok {
-			if ok {
+			if err == nil {
 				t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value)
 			} else if len(dst) != 0 {
 				t.Errorf("%d. appendObjectIdentifier did not preserve input.", i)
 			}
 		} else if !bytes.Equal(dst, tt.encoded) {
-			t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, wanted %v.", i, tt.value, dst, tt.encoded)
+			t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, %v, wanted %v.", i, tt.value, dst, err, tt.encoded)
 		}
 
 		dst = []byte{0}
-		dst, ok = appendObjectIdentifier(dst, tt.value)
+		dst, err = appendObjectIdentifier(dst, tt.value)
 		if !tt.ok {
-			if ok {
+			if err == nil {
 				t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value)
 			} else if !bytes.Equal(dst, []byte{0}) {
 				t.Errorf("%d. appendObjectIdentifier did not preserve input.", i)
diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go
index feeacee..a195bac 100644
--- a/ascii2der/scanner.go
+++ b/ascii2der/scanner.go
@@ -387,9 +387,9 @@ loop:
 			}
 			oid = append(oid, uint32(u))
 		}
-		der, ok := appendObjectIdentifier(nil, oid)
-		if !ok {
-			return token{}, errors.New("invalid OID")
+		der, err := appendObjectIdentifier(nil, oid)
+		if err != nil {
+			return token{}, &ParseError{start, err}
 		}
 		return token{Kind: tokenBytes, Value: der, Pos: s.pos}, nil
 	}

From 6f3533b174171da8ab082f82dfc44f85ec739e96 Mon Sep 17 00:00:00 2001
From: Miguel Young de la Sota <mcyoung@mit.edu>
Date: Mon, 21 Feb 2022 23:02:50 -0500
Subject: [PATCH 3/5] Document scanner.go, including an example

---
 ascii2der/encoder.go       |   8 +-
 ascii2der/examples_test.go |  31 ++++++
 ascii2der/scanner.go       | 219 +++++++++++++++++++++++++------------
 3 files changed, 186 insertions(+), 72 deletions(-)
 create mode 100644 ascii2der/examples_test.go

diff --git a/ascii2der/encoder.go b/ascii2der/encoder.go
index 87993dc..a0b04d5 100644
--- a/ascii2der/encoder.go
+++ b/ascii2der/encoder.go
@@ -24,7 +24,7 @@ import (
 
 // appendUTF16 marshals r using UTF-16 and appends the result to dst, returning
 // the updated slice.
-// 
+//
 // This logic intentionally tolerates unpaired surrogates.
 func appendUTF16(dst []byte, r rune) []byte {
 	if r <= 0xffff {
@@ -39,7 +39,7 @@ func appendUTF16(dst []byte, r rune) []byte {
 
 // appendUTF16 marshals r using UTF-32 and appends the result to dst, returning
 // the updated slice.
-// 
+//
 // In other words, this function writes r as an integer in big-endian order.
 func appendUTF32(dst []byte, r rune) []byte {
 	return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r))
@@ -48,7 +48,7 @@ func appendUTF32(dst []byte, r rune) []byte {
 // appendBase128 marshals an integer in base 128, a varint format used by OIDs
 // and long-form tag numbers, and appends the result to dst, returning the
 // updated slice.
-// 
+//
 // This function is the same as appendBase128WithLength with length set to zero,
 // which cannot fail.
 func appendBase128(dst []byte, value uint32) []byte {
@@ -63,7 +63,7 @@ func appendBase128(dst []byte, value uint32) []byte {
 // appendBase128 marshals an integer in base 128, a varint format used by OIDs
 // and long-form tag numbers, and appends the result to dst, returning the
 // updated slice.
-// 
+//
 // If length is zero, the minimal length is chosen.
 func appendBase128WithLength(dst []byte, value uint32, length int) ([]byte, error) {
 	// Count how many bytes are needed.
diff --git a/ascii2der/examples_test.go b/ascii2der/examples_test.go
new file mode 100644
index 0000000..e3fbbc9
--- /dev/null
+++ b/ascii2der/examples_test.go
@@ -0,0 +1,31 @@
+// Copyright 2015 The DER ASCII Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ascii2der
+
+import (
+	"fmt"
+)
+
+func Example() {
+	scanner := NewScanner(`
+	SEQUENCE {
+		INTEGER { "totally an integer" }
+	}
+`)
+
+	der, _ := scanner.Exec()
+	fmt.Printf("%x\n", der)
+	// Output: 30140212746f74616c6c7920616e20696e7465676572
+}
diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go
index a195bac..eb13caa 100644
--- a/ascii2der/scanner.go
+++ b/ascii2der/scanner.go
@@ -12,6 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// package ascii2der implements the DER-ASCII language described in
+// https://github.com/google/der-ascii/blob/master/language.txt.
+//
+// The Scanner type can be used to parse DER-ASCII files and output byte blobs
+// that may or may not be valid DER.
 package ascii2der
 
 import (
@@ -27,10 +32,22 @@ import (
 )
 
 // A Position describes a location in the input stream.
+//
+// The zero-value Position represents the first byte of an anonymous input file.
 type Position struct {
-	Offset int // offset, starting at 0
-	Line   int // line number, starting at 1
-	Column int // column number, starting at 1 (byte count)
+	Offset int    // Byte offset.
+	Line   int    // Line number (zero-indexed).
+	Column int    // Column number (zero-indexed byte, not rune, count).
+	File   string // Optional file name for pretty-printing.
+}
+
+// String converts a Position to a string.
+func (p Position) String() string {
+	file := p.File
+	if file == "" {
+		file = "<input>"
+	}
+	return fmt.Sprintf("%s:%d:%d", file, p.Line+1, p.Column+1)
 }
 
 // A tokenKind is a kind of token.
@@ -45,14 +62,26 @@ const (
 	tokenEOF
 )
 
-// A ParseError is an error during parsing DER ASCII.
+// A ParseError may be produced while executing a DER ASCII file, wrapping
+// another error along with a position.
+//
+// Errors produced by functions in this package my by type-asserted to
+// ParseError to try and obtain the position at which the error occurred.
 type ParseError struct {
 	Pos Position
 	Err error
 }
 
+// Error makes this type into an error type.
 func (e *ParseError) Error() string {
-	return fmt.Sprintf("line %d: %s", e.Pos.Line, e.Err)
+	return fmt.Sprintf("%s: %s", e.Pos, e.Err)
+}
+
+// Unwrap extracts the inner wrapped error.
+//
+// See errors.Unwrap().
+func (e *ParseError) Unwrap() error {
+	return e.Err
 }
 
 // A token is a token in a DER ASCII file.
@@ -74,21 +103,96 @@ var (
 	regexpOID     = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`)
 )
 
+// A Scanner represents parsing state for a DER ASCII file.
+//
+// A zero-value Scanner is ready to begin parsing (given that Input is set to
+// a valid value). However, it is recommended to use NewScanner to create a new
+// Scanner, since it can pre-populate fields other than Input with default
+// settings.
 type Scanner struct {
-	text string
-	pos  Position
+	// Input is the input text being processed.
+	Input string
+	// Position is the current position at which parsing should
+	// resume. The Offset field is used for indexing into Input; the remaining
+	// fields are used for error-reporting.
+	pos Position
+}
+
+// NewScanner creates a new scanner for parsing the given input.
+func NewScanner(input string) *Scanner {
+	return &Scanner{Input: input}
+}
+
+// SetFile sets the file path shown in this Scanner's error reports.
+func (s *Scanner) SetFile(path string) {
+	s.pos.File = path
+}
+
+// Exec consumes tokens until Input is exhausted, returning the resulting
+// encoded maybe-DER.
+func (s *Scanner) Exec() ([]byte, error) {
+	return s.exec(nil)
+}
+
+// isEOF returns whether the cursor is past the end of the input string.
+func (s *Scanner) isEOF() bool {
+	return s.pos.Offset >= len(s.Input)
+}
+
+// advance advances the scanner's cursor one position.
+//
+// Unlike just s.pos.Offset++, this will not proceed beyond the end of the
+// string, and will update the line and column information accordingly.
+func (s *Scanner) advance() {
+	if !s.isEOF() {
+		if s.Input[s.pos.Offset] == '\n' {
+			s.pos.Line++
+			s.pos.Column = 0
+		} else {
+			s.pos.Column++
+		}
+		s.pos.Offset++
+	}
 }
 
-func NewScanner(text string) *Scanner {
-	return &Scanner{text: text, pos: Position{Line: 1}}
+// advanceBytes calls advance() n times.
+func (s *Scanner) advanceBytes(n int) {
+	for i := 0; i < n; i++ {
+		s.advance()
+	}
 }
 
+// consumeUpTo advances the cursor until the given byte is seen, returning all
+// source bytes between the initial cursor position and excluding the given
+// byte.
+//
+// If EOF is reached before the byte is seen, the function returns false.
+func (s *Scanner) consumeUpTo(b byte) (string, bool) {
+	start := s.pos.Offset
+	for !s.isEOF() {
+		if s.Input[s.pos.Offset] == b {
+			ret := s.Input[start:s.pos.Offset]
+			s.advance()
+			return ret, true
+		}
+		s.advance()
+	}
+	return "", false
+}
+
+// parseEscapeSequence parses a DER-ASCII escape sequence, returning the rune
+// it escapes.
+//
+// Valid escapes are:
+// \n \" \\ \xNN \uNNNN \UNNNNNNNN
+//
+// This function assumes that the scanner's cursor is currently on a \ rune.
 func (s *Scanner) parseEscapeSequence() (rune, error) {
 	s.advance() // Skip the \. The caller is assumed to have validated it.
 	if s.isEOF() {
 		return 0, &ParseError{s.pos, errors.New("expected escape character")}
 	}
-	switch c := s.text[s.pos.Offset]; c {
+	switch c := s.Input[s.pos.Offset]; c {
 	case 'n':
 		s.advance()
 		return '\n', nil
@@ -97,10 +201,10 @@ func (s *Scanner) parseEscapeSequence() (rune, error) {
 		return rune(c), nil
 	case 'x':
 		s.advance()
-		if s.pos.Offset+2 > len(s.text) {
+		if s.pos.Offset+2 > len(s.Input) {
 			return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")}
 		}
-		b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+2])
+		b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+2])
 		if err != nil {
 			return 0, &ParseError{s.pos, err}
 		}
@@ -108,10 +212,10 @@ func (s *Scanner) parseEscapeSequence() (rune, error) {
 		return rune(b[0]), nil
 	case 'u':
 		s.advance()
-		if s.pos.Offset+4 > len(s.text) {
+		if s.pos.Offset+4 > len(s.Input) {
 			return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")}
 		}
-		b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+4])
+		b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+4])
 		if err != nil {
 			return 0, &ParseError{s.pos, err}
 		}
@@ -119,10 +223,10 @@ func (s *Scanner) parseEscapeSequence() (rune, error) {
 		return rune(b[0])<<8 | rune(b[1]), nil
 	case 'U':
 		s.advance()
-		if s.pos.Offset+8 > len(s.text) {
+		if s.pos.Offset+8 > len(s.Input) {
 			return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")}
 		}
-		b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+8])
+		b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+8])
 		if err != nil {
 			return 0, &ParseError{s.pos, err}
 		}
@@ -133,6 +237,9 @@ func (s *Scanner) parseEscapeSequence() (rune, error) {
 	}
 }
 
+// parseQuotedString parses a UTF-8 string until the next ".
+//
+// This function assumes that the scanner's cursor is currently on a " rune.
 func (s *Scanner) parseQuotedString() (token, error) {
 	s.advance() // Skip the ". The caller is assumed to have validated it.
 	start := s.pos
@@ -141,7 +248,7 @@ func (s *Scanner) parseQuotedString() (token, error) {
 		if s.isEOF() {
 			return token{}, &ParseError{start, errors.New("unmatched \"")}
 		}
-		switch c := s.text[s.pos.Offset]; c {
+		switch c := s.Input[s.pos.Offset]; c {
 		case '"':
 			s.advance()
 			return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil
@@ -163,6 +270,10 @@ func (s *Scanner) parseQuotedString() (token, error) {
 	}
 }
 
+// parseUTF16String parses a UTF-16 string until the next ".
+//
+// This function assumes that the scanner's cursor is currently on a u followed
+// by a " rune.
 func (s *Scanner) parseUTF16String() (token, error) {
 	s.advance() // Skip the u. The caller is assumed to have validated it.
 	s.advance() // Skip the ". The caller is assumed to have validated it.
@@ -172,7 +283,7 @@ func (s *Scanner) parseUTF16String() (token, error) {
 		if s.isEOF() {
 			return token{}, &ParseError{start, errors.New("unmatched \"")}
 		}
-		switch c := s.text[s.pos.Offset]; c {
+		switch c := s.Input[s.pos.Offset]; c {
 		case '"':
 			s.advance()
 			return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil
@@ -183,9 +294,9 @@ func (s *Scanner) parseUTF16String() (token, error) {
 			}
 			bytes = appendUTF16(bytes, r)
 		default:
-			r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:])
+			r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:])
 			// Note DecodeRuneInString may return utf8.RuneError if there is a
-			// legitimate replacement charaacter in the input. The documentation
+			// legitimate replacement character in the input. The documentation
 			// says errors return (RuneError, 0) or (RuneError, 1).
 			if r == utf8.RuneError && n <= 1 {
 				return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")}
@@ -196,6 +307,10 @@ func (s *Scanner) parseUTF16String() (token, error) {
 	}
 }
 
+// parseUTF32String parses a UTF-32 string until the next ".
+//
+// This function assumes that the scanner's cursor is currently on a U followed
+// by a " rune.
 func (s *Scanner) parseUTF32String() (token, error) {
 	s.advance() // Skip the U. The caller is assumed to have validated it.
 	s.advance() // Skip the ". The caller is assumed to have validated it.
@@ -205,7 +320,7 @@ func (s *Scanner) parseUTF32String() (token, error) {
 		if s.isEOF() {
 			return token{}, &ParseError{start, errors.New("unmatched \"")}
 		}
-		switch c := s.text[s.pos.Offset]; c {
+		switch c := s.Input[s.pos.Offset]; c {
 		case '"':
 			s.advance()
 			return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil
@@ -216,7 +331,7 @@ func (s *Scanner) parseUTF32String() (token, error) {
 			}
 			bytes = appendUTF32(bytes, r)
 		default:
-			r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:])
+			r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:])
 			// Note DecodeRuneInString may return utf8.RuneError if there is a
 			// legitimate replacement charaacter in the input. The documentation
 			// says errors return (RuneError, 0) or (RuneError, 1).
@@ -229,13 +344,14 @@ func (s *Scanner) parseUTF32String() (token, error) {
 	}
 }
 
+// next lexes the next token.
 func (s *Scanner) next() (token, error) {
 again:
 	if s.isEOF() {
 		return token{Kind: tokenEOF, Pos: s.pos}, nil
 	}
 
-	switch s.text[s.pos.Offset] {
+	switch s.Input[s.pos.Offset] {
 	case ' ', '\t', '\n', '\r':
 		// Skip whitespace.
 		s.advance()
@@ -244,7 +360,7 @@ again:
 		// Skip to the end of the comment.
 		s.advance()
 		for !s.isEOF() {
-			wasNewline := s.text[s.pos.Offset] == '\n'
+			wasNewline := s.Input[s.pos.Offset] == '\n'
 			s.advance()
 			if wasNewline {
 				break
@@ -260,15 +376,15 @@ again:
 	case '"':
 		return s.parseQuotedString()
 	case 'u':
-		if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' {
+		if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' {
 			return s.parseUTF16String()
 		}
 	case 'U':
-		if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' {
+		if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' {
 			return s.parseUTF32String()
 		}
 	case 'b':
-		if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '`' {
+		if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '`' {
 			s.advance() // Skip the b.
 			s.advance() // Skip the `.
 			bitStr, ok := s.consumeUpTo('`')
@@ -348,7 +464,7 @@ again:
 	s.advance()
 loop:
 	for !s.isEOF() {
-		switch s.text[s.pos.Offset] {
+		switch s.Input[s.pos.Offset] {
 		case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#':
 			break loop
 		default:
@@ -356,7 +472,7 @@ loop:
 		}
 	}
 
-	symbol := s.text[start.Offset:s.pos.Offset]
+	symbol := s.Input[start.Offset:s.pos.Offset]
 
 	// See if it is a tag.
 	tag, ok := internal.TagByName(symbol)
@@ -417,41 +533,12 @@ loop:
 	return token{}, fmt.Errorf("unrecognized symbol %q", symbol)
 }
 
-func (s *Scanner) isEOF() bool {
-	return s.pos.Offset >= len(s.text)
-}
-
-func (s *Scanner) advance() {
-	if !s.isEOF() {
-		if s.text[s.pos.Offset] == '\n' {
-			s.pos.Line++
-			s.pos.Column = 0
-		} else {
-			s.pos.Column++
-		}
-		s.pos.Offset++
-	}
-}
-
-func (s *Scanner) advanceBytes(n int) {
-	for i := 0; i < n; i++ {
-		s.advance()
-	}
-}
-
-func (s *Scanner) consumeUpTo(b byte) (string, bool) {
-	start := s.pos.Offset
-	for !s.isEOF() {
-		if s.text[s.pos.Offset] == b {
-			ret := s.text[start:s.pos.Offset]
-			s.advance()
-			return ret, true
-		}
-		s.advance()
-	}
-	return "", false
-}
-
+// exec is the main parser loop.
+//
+// The leftCurly argument, it not nil, represents the { that began the
+// length-prefixed block we're currently executing. Because we need to encode
+// the full extent of the contents of a {} before emitting the length prefix,
+// this function calls itself with a non-nil leftCurly to encode it.
 func (s *Scanner) exec(leftCurly *token) ([]byte, error) {
 	var out []byte
 	var lengthModifier *token
@@ -508,7 +595,3 @@ func (s *Scanner) exec(leftCurly *token) ([]byte, error) {
 		}
 	}
 }
-
-func (s *Scanner) Exec() ([]byte, error) {
-	return s.exec(nil)
-}

From d5d85a8d8ce195b3010b431b17e96bad0cd535e5 Mon Sep 17 00:00:00 2001
From: Miguel Young de la Sota <mcyoung@mit.edu>
Date: Mon, 21 Feb 2022 23:58:18 -0500
Subject: [PATCH 4/5] Simplify the base parsing helpers in scanner.go

---
 ascii2der/scanner.go | 176 +++++++++++++++++++++----------------------
 1 file changed, 88 insertions(+), 88 deletions(-)

diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go
index eb13caa..b0c0d81 100644
--- a/ascii2der/scanner.go
+++ b/ascii2der/scanner.go
@@ -134,17 +134,18 @@ func (s *Scanner) Exec() ([]byte, error) {
 	return s.exec(nil)
 }
 
-// isEOF returns whether the cursor is past the end of the input string.
-func (s *Scanner) isEOF() bool {
-	return s.pos.Offset >= len(s.Input)
+// isEOF returns whether the cursor is at least n bytes ahead of the end of the
+// input.
+func (s *Scanner) isEOF(n int) bool {
+	return s.pos.Offset+n >= len(s.Input)
 }
 
-// advance advances the scanner's cursor one position.
+// advance advances the scanner's cursor n positions.
 //
-// Unlike just s.pos.Offset++, this will not proceed beyond the end of the
+// Unlike just s.pos.Offset += n, this will not proceed beyond the end of the
 // string, and will update the line and column information accordingly.
-func (s *Scanner) advance() {
-	if !s.isEOF() {
+func (s *Scanner) advance(n int) {
+	for i := 0; i < n && !s.isEOF(0); i++ {
 		if s.Input[s.pos.Offset] == '\n' {
 			s.pos.Line++
 			s.pos.Column = 0
@@ -155,27 +156,30 @@ func (s *Scanner) advance() {
 	}
 }
 
-// advanceBytes calls advance() n times.
-func (s *Scanner) advanceBytes(n int) {
-	for i := 0; i < n; i++ {
-		s.advance()
+// consume advances exactly n times and returns all source bytes between the
+// initial cursor position and excluding the final cursor position.
+//
+// If EOF is reached before all n bytes are consumed, the function returns
+// false.
+func (s *Scanner) consume(n int) (string, bool) {
+	start := s.pos.Offset
+	s.advance(n)
+	if s.pos.Offset-start != n {
+		return "", false
 	}
+
+	return s.Input[start:s.pos.Offset], true
 }
 
-// consumeUpTo advances the cursor until the given byte is seen, returning all
+// consumeUntil advances the cursor until the given byte is seen, returning all
 // source bytes between the initial cursor position and excluding the given
-// byte.
+// byte. This function will advance past the searched-for byte.
 //
 // If EOF is reached before the byte is seen, the function returns false.
-func (s *Scanner) consumeUpTo(b byte) (string, bool) {
-	start := s.pos.Offset
-	for !s.isEOF() {
-		if s.Input[s.pos.Offset] == b {
-			ret := s.Input[start:s.pos.Offset]
-			s.advance()
-			return ret, true
-		}
-		s.advance()
+func (s *Scanner) consumeUntil(b byte) (string, bool) {
+	if i := strings.IndexByte(s.Input[s.pos.Offset:], b); i != -1 {
+		text, _ := s.consume(i + 1)
+		return text[:i], true
 	}
 	return "", false
 }
@@ -188,50 +192,47 @@ func (s *Scanner) consumeUpTo(b byte) (string, bool) {
 //
 // This function assumes that the scanner's cursor is currently on a \ rune.
 func (s *Scanner) parseEscapeSequence() (rune, error) {
-	s.advance() // Skip the \. The caller is assumed to have validated it.
-	if s.isEOF() {
+	s.advance(1) // Skip the \. The caller is assumed to have validated it.
+	if s.isEOF(0) {
 		return 0, &ParseError{s.pos, errors.New("expected escape character")}
 	}
+
 	switch c := s.Input[s.pos.Offset]; c {
 	case 'n':
-		s.advance()
+		s.advance(1)
 		return '\n', nil
 	case '"', '\\':
-		s.advance()
+		s.advance(1)
 		return rune(c), nil
-	case 'x':
-		s.advance()
-		if s.pos.Offset+2 > len(s.Input) {
-			return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")}
-		}
-		b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+2])
-		if err != nil {
-			return 0, &ParseError{s.pos, err}
+	case 'x', 'u', 'U':
+		s.advance(1)
+
+		var digits int
+		switch c {
+		case 'x':
+			digits = 2
+		case 'u':
+			digits = 4
+		case 'U':
+			digits = 8
 		}
-		s.advanceBytes(2)
-		return rune(b[0]), nil
-	case 'u':
-		s.advance()
-		if s.pos.Offset+4 > len(s.Input) {
+
+		hexes, ok := s.consume(digits)
+		if !ok {
 			return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")}
 		}
-		b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+4])
+
+		bytes, err := hex.DecodeString(hexes)
 		if err != nil {
 			return 0, &ParseError{s.pos, err}
 		}
-		s.advanceBytes(4)
-		return rune(b[0])<<8 | rune(b[1]), nil
-	case 'U':
-		s.advance()
-		if s.pos.Offset+8 > len(s.Input) {
-			return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")}
-		}
-		b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+8])
-		if err != nil {
-			return 0, &ParseError{s.pos, err}
+
+		var r rune
+		for _, b := range bytes {
+			r <<= 8
+			r |= rune(b)
 		}
-		s.advanceBytes(8)
-		return rune(b[0])<<24 | rune(b[1])<<16 | rune(b[2])<<8 | rune(b[3]), nil
+		return r, nil
 	default:
 		return 0, &ParseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)}
 	}
@@ -241,16 +242,16 @@ func (s *Scanner) parseEscapeSequence() (rune, error) {
 //
 // This function assumes that the scanner's cursor is currently on a " rune.
 func (s *Scanner) parseQuotedString() (token, error) {
-	s.advance() // Skip the ". The caller is assumed to have validated it.
+	s.advance(1) // Skip the ". The caller is assumed to have validated it.
 	start := s.pos
 	var bytes []byte
 	for {
-		if s.isEOF() {
+		if s.isEOF(0) {
 			return token{}, &ParseError{start, errors.New("unmatched \"")}
 		}
 		switch c := s.Input[s.pos.Offset]; c {
 		case '"':
-			s.advance()
+			s.advance(1)
 			return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil
 		case '\\':
 			escapeStart := s.pos
@@ -264,7 +265,7 @@ func (s *Scanner) parseQuotedString() (token, error) {
 			}
 			bytes = append(bytes, byte(r))
 		default:
-			s.advance()
+			s.advance(1)
 			bytes = append(bytes, c)
 		}
 	}
@@ -275,17 +276,17 @@ func (s *Scanner) parseQuotedString() (token, error) {
 // This function assumes that the scanner's cursor is currently on a u followed
 // by a " rune.
 func (s *Scanner) parseUTF16String() (token, error) {
-	s.advance() // Skip the u. The caller is assumed to have validated it.
-	s.advance() // Skip the ". The caller is assumed to have validated it.
+	s.advance(2) // Skip the u". The caller is assumed to have validated it.
 	start := s.pos
 	var bytes []byte
 	for {
-		if s.isEOF() {
+		if s.isEOF(0) {
 			return token{}, &ParseError{start, errors.New("unmatched \"")}
 		}
-		switch c := s.Input[s.pos.Offset]; c {
+
+		switch s.Input[s.pos.Offset] {
 		case '"':
-			s.advance()
+			s.advance(1)
 			return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil
 		case '\\':
 			r, err := s.parseEscapeSequence()
@@ -301,7 +302,7 @@ func (s *Scanner) parseUTF16String() (token, error) {
 			if r == utf8.RuneError && n <= 1 {
 				return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")}
 			}
-			s.advanceBytes(n)
+			s.advance(n)
 			bytes = appendUTF16(bytes, r)
 		}
 	}
@@ -312,17 +313,17 @@ func (s *Scanner) parseUTF16String() (token, error) {
 // This function assumes that the scanner's cursor is currently on a U followed
 // by a " rune.
 func (s *Scanner) parseUTF32String() (token, error) {
-	s.advance() // Skip the U. The caller is assumed to have validated it.
-	s.advance() // Skip the ". The caller is assumed to have validated it.
+	s.advance(2) // Skip the U". The caller is assumed to have validated it.
 	start := s.pos
 	var bytes []byte
 	for {
-		if s.isEOF() {
+		if s.isEOF(0) {
 			return token{}, &ParseError{start, errors.New("unmatched \"")}
 		}
-		switch c := s.Input[s.pos.Offset]; c {
+
+		switch s.Input[s.pos.Offset] {
 		case '"':
-			s.advance()
+			s.advance(1)
 			return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil
 		case '\\':
 			r, err := s.parseEscapeSequence()
@@ -338,7 +339,7 @@ func (s *Scanner) parseUTF32String() (token, error) {
 			if r == utf8.RuneError && n <= 1 {
 				return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")}
 			}
-			s.advanceBytes(n)
+			s.advance(n)
 			bytes = appendUTF32(bytes, r)
 		}
 	}
@@ -347,47 +348,46 @@ func (s *Scanner) parseUTF32String() (token, error) {
 // next lexes the next token.
 func (s *Scanner) next() (token, error) {
 again:
-	if s.isEOF() {
+	if s.isEOF(0) {
 		return token{Kind: tokenEOF, Pos: s.pos}, nil
 	}
 
 	switch s.Input[s.pos.Offset] {
 	case ' ', '\t', '\n', '\r':
 		// Skip whitespace.
-		s.advance()
+		s.advance(1)
 		goto again
 	case '#':
 		// Skip to the end of the comment.
-		s.advance()
-		for !s.isEOF() {
+		s.advance(1)
+		for !s.isEOF(0) {
 			wasNewline := s.Input[s.pos.Offset] == '\n'
-			s.advance()
+			s.advance(1)
 			if wasNewline {
 				break
 			}
 		}
 		goto again
 	case '{':
-		s.advance()
+		s.advance(1)
 		return token{Kind: tokenLeftCurly, Pos: s.pos}, nil
 	case '}':
-		s.advance()
+		s.advance(1)
 		return token{Kind: tokenRightCurly, Pos: s.pos}, nil
 	case '"':
 		return s.parseQuotedString()
 	case 'u':
-		if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' {
+		if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' {
 			return s.parseUTF16String()
 		}
 	case 'U':
-		if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' {
+		if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' {
 			return s.parseUTF32String()
 		}
 	case 'b':
-		if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '`' {
-			s.advance() // Skip the b.
-			s.advance() // Skip the `.
-			bitStr, ok := s.consumeUpTo('`')
+		if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '`' {
+			s.advance(2) // Skip the b`.
+			bitStr, ok := s.consumeUntil('`')
 			if !ok {
 				return token{}, &ParseError{s.pos, errors.New("unmatched `")}
 			}
@@ -431,8 +431,8 @@ again:
 			return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil
 		}
 	case '`':
-		s.advance()
-		hexStr, ok := s.consumeUpTo('`')
+		s.advance(1)
+		hexStr, ok := s.consumeUntil('`')
 		if !ok {
 			return token{}, &ParseError{s.pos, errors.New("unmatched `")}
 		}
@@ -442,8 +442,8 @@ again:
 		}
 		return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil
 	case '[':
-		s.advance()
-		tagStr, ok := s.consumeUpTo(']')
+		s.advance(1)
+		tagStr, ok := s.consumeUntil(']')
 		if !ok {
 			return token{}, &ParseError{s.pos, errors.New("unmatched [")}
 		}
@@ -461,14 +461,14 @@ again:
 	// Normal token. Consume up to the next whitespace character, symbol, or
 	// EOF.
 	start := s.pos
-	s.advance()
+	s.advance(1)
 loop:
-	for !s.isEOF() {
+	for !s.isEOF(0) {
 		switch s.Input[s.pos.Offset] {
 		case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#':
 			break loop
 		default:
-			s.advance()
+			s.advance(1)
 		}
 	}
 

From 51662df30b524148a7995dc787599fa8d65e586c Mon Sep 17 00:00:00 2001
From: Miguel Young de la Sota <mcyoung@mit.edu>
Date: Tue, 22 Feb 2022 00:07:22 -0500
Subject: [PATCH 5/5] Make cmd/ascii2der use the correct file name for errors

---
 cmd/ascii2der/main.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cmd/ascii2der/main.go b/cmd/ascii2der/main.go
index f379968..b6749b7 100644
--- a/cmd/ascii2der/main.go
+++ b/cmd/ascii2der/main.go
@@ -54,7 +54,10 @@ func main() {
 		os.Exit(1)
 	}
 
-	outBytes, err := ascii2der.NewScanner(string(inBytes)).Exec()
+	scanner := ascii2der.NewScanner(string(inBytes))
+	scanner.SetFile(*inPath)
+
+	outBytes, err := scanner.Exec()
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "Syntax error: %s\n", err)
 		os.Exit(1)