From bbbeeef0009071ed5c0e9b2061ab4d090f568e02 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 21 Feb 2022 17:02:37 -0500 Subject: [PATCH 1/5] Extract ascii2der into a library package --- {cmd/ascii2der => ascii2der}/encoder.go | 19 ++- {cmd/ascii2der => ascii2der}/encoder_test.go | 2 +- {cmd/ascii2der => ascii2der}/scanner.go | 134 ++++++++----------- {cmd/ascii2der => ascii2der}/scanner_test.go | 8 +- {cmd/ascii2der => ascii2der}/values.go | 2 +- {cmd/ascii2der => ascii2der}/values_test.go | 2 +- cmd/ascii2der/main.go | 4 +- 7 files changed, 86 insertions(+), 85 deletions(-) rename {cmd/ascii2der => ascii2der}/encoder.go (88%) rename {cmd/ascii2der => ascii2der}/encoder_test.go (99%) rename {cmd/ascii2der => ascii2der}/scanner.go (76%) rename {cmd/ascii2der => ascii2der}/scanner_test.go (99%) rename {cmd/ascii2der => ascii2der}/values.go (99%) rename {cmd/ascii2der => ascii2der}/values_test.go (99%) diff --git a/cmd/ascii2der/encoder.go b/ascii2der/encoder.go similarity index 88% rename from cmd/ascii2der/encoder.go rename to ascii2der/encoder.go index 0d84559..6b9f787 100644 --- a/cmd/ascii2der/encoder.go +++ b/ascii2der/encoder.go @@ -12,15 +12,32 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "errors" "fmt" + "unicode/utf16" "github.com/google/der-ascii/internal" ) +func appendUTF16(dst []byte, r rune) []byte { + if r <= 0xffff { + // Note this logic intentionally tolerates unpaired surrogates. + return append(dst, byte(r>>8), byte(r)) + } + + r1, r2 := utf16.EncodeRune(r) + dst = append(dst, byte(r1>>8), byte(r1)) + dst = append(dst, byte(r2>>8), byte(r2)) + return dst +} + +func appendUTF32(dst []byte, r rune) []byte { + return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) +} + func appendBase128(dst []byte, value uint32) []byte { dst, err := appendBase128WithLength(dst, value, 0) if err != nil { diff --git a/cmd/ascii2der/encoder_test.go b/ascii2der/encoder_test.go similarity index 99% rename from cmd/ascii2der/encoder_test.go rename to ascii2der/encoder_test.go index 5faa8ba..8c964da 100644 --- a/cmd/ascii2der/encoder_test.go +++ b/ascii2der/encoder_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "bytes" diff --git a/cmd/ascii2der/scanner.go b/ascii2der/scanner.go similarity index 76% rename from cmd/ascii2der/scanner.go rename to ascii2der/scanner.go index f4c49f2..feeacee 100644 --- a/cmd/ascii2der/scanner.go +++ b/ascii2der/scanner.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "encoding/hex" @@ -21,14 +21,13 @@ import ( "regexp" "strconv" "strings" - "unicode/utf16" "unicode/utf8" "github.com/google/der-ascii/internal" ) -// A position describes a location in the input stream. -type position struct { +// A Position describes a location in the input stream. +type Position struct { Offset int // offset, starting at 0 Line int // line number, starting at 1 Column int // column number, starting at 1 (byte count) @@ -46,14 +45,14 @@ const ( tokenEOF ) -// A parseError is an error during parsing DER ASCII. -type parseError struct { - Pos position +// A ParseError is an error during parsing DER ASCII. +type ParseError struct { + Pos Position Err error } -func (t *parseError) Error() string { - return fmt.Sprintf("line %d: %s", t.Pos.Line, t.Err) +func (e *ParseError) Error() string { + return fmt.Sprintf("line %d: %s", e.Pos.Line, e.Err) } // A token is a token in a DER ASCII file. @@ -64,7 +63,7 @@ type token struct { // bytes. Value []byte // Pos is the position of the first byte of the token. - Pos position + Pos Position // Length, for a tokenLongForm token, is the number of bytes to use to // encode the length, not including the initial one. Length int @@ -75,19 +74,19 @@ var ( regexpOID = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`) ) -type scanner struct { +type Scanner struct { text string - pos position + pos Position } -func newScanner(text string) *scanner { - return &scanner{text: text, pos: position{Line: 1}} +func NewScanner(text string) *Scanner { + return &Scanner{text: text, pos: Position{Line: 1}} } -func (s *scanner) parseEscapeSequence() (rune, error) { +func (s *Scanner) parseEscapeSequence() (rune, error) { s.advance() // Skip the \. The caller is assumed to have validated it. if s.isEOF() { - return 0, &parseError{s.pos, errors.New("expected escape character")} + return 0, &ParseError{s.pos, errors.New("expected escape character")} } switch c := s.text[s.pos.Offset]; c { case 'n': @@ -99,48 +98,48 @@ func (s *scanner) parseEscapeSequence() (rune, error) { case 'x': s.advance() if s.pos.Offset+2 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} + return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+2]) if err != nil { - return 0, &parseError{s.pos, err} + return 0, &ParseError{s.pos, err} } s.advanceBytes(2) return rune(b[0]), nil case 'u': s.advance() if s.pos.Offset+4 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} + return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+4]) if err != nil { - return 0, &parseError{s.pos, err} + return 0, &ParseError{s.pos, err} } s.advanceBytes(4) return rune(b[0])<<8 | rune(b[1]), nil case 'U': s.advance() if s.pos.Offset+8 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} + return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+8]) if err != nil { - return 0, &parseError{s.pos, err} + return 0, &ParseError{s.pos, err} } s.advanceBytes(8) return rune(b[0])<<24 | rune(b[1])<<16 | rune(b[2])<<8 | rune(b[3]), nil default: - return 0, &parseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} + return 0, &ParseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} } } -func (s *scanner) parseQuotedString() (token, error) { +func (s *Scanner) parseQuotedString() (token, error) { s.advance() // Skip the ". The caller is assumed to have validated it. start := s.pos var bytes []byte for { if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} + return token{}, &ParseError{start, errors.New("unmatched \"")} } switch c := s.text[s.pos.Offset]; c { case '"': @@ -154,7 +153,7 @@ func (s *scanner) parseQuotedString() (token, error) { } if r > 0xff { // TODO(davidben): Alternatively, should these encode as UTF-8? - return token{}, &parseError{escapeStart, errors.New("illegal escape for quoted string")} + return token{}, &ParseError{escapeStart, errors.New("illegal escape for quoted string")} } bytes = append(bytes, byte(r)) default: @@ -164,26 +163,14 @@ func (s *scanner) parseQuotedString() (token, error) { } } -func appendUTF16(b []byte, r rune) []byte { - if r <= 0xffff { - // Note this logic intentionally tolerates unpaired surrogates. - return append(b, byte(r>>8), byte(r)) - } - - r1, r2 := utf16.EncodeRune(r) - b = append(b, byte(r1>>8), byte(r1)) - b = append(b, byte(r2>>8), byte(r2)) - return b -} - -func (s *scanner) parseUTF16String() (token, error) { +func (s *Scanner) parseUTF16String() (token, error) { s.advance() // Skip the u. The caller is assumed to have validated it. s.advance() // Skip the ". The caller is assumed to have validated it. start := s.pos var bytes []byte for { if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} + return token{}, &ParseError{start, errors.New("unmatched \"")} } switch c := s.text[s.pos.Offset]; c { case '"': @@ -201,7 +188,7 @@ func (s *scanner) parseUTF16String() (token, error) { // legitimate replacement charaacter in the input. The documentation // says errors return (RuneError, 0) or (RuneError, 1). if r == utf8.RuneError && n <= 1 { - return token{}, &parseError{s.pos, errors.New("invalid UTF-8")} + return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} } s.advanceBytes(n) bytes = appendUTF16(bytes, r) @@ -209,18 +196,14 @@ func (s *scanner) parseUTF16String() (token, error) { } } -func appendUTF32(b []byte, r rune) []byte { - return append(b, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) -} - -func (s *scanner) parseUTF32String() (token, error) { +func (s *Scanner) parseUTF32String() (token, error) { s.advance() // Skip the U. The caller is assumed to have validated it. s.advance() // Skip the ". The caller is assumed to have validated it. start := s.pos var bytes []byte for { if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} + return token{}, &ParseError{start, errors.New("unmatched \"")} } switch c := s.text[s.pos.Offset]; c { case '"': @@ -238,7 +221,7 @@ func (s *scanner) parseUTF32String() (token, error) { // legitimate replacement charaacter in the input. The documentation // says errors return (RuneError, 0) or (RuneError, 1). if r == utf8.RuneError && n <= 1 { - return token{}, &parseError{s.pos, errors.New("invalid UTF-8")} + return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} } s.advanceBytes(n) bytes = appendUTF32(bytes, r) @@ -246,7 +229,7 @@ func (s *scanner) parseUTF32String() (token, error) { } } -func (s *scanner) Next() (token, error) { +func (s *Scanner) next() (token, error) { again: if s.isEOF() { return token{Kind: tokenEOF, Pos: s.pos}, nil @@ -290,7 +273,7 @@ again: s.advance() // Skip the `. bitStr, ok := s.consumeUpTo('`') if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched `")} + return token{}, &ParseError{s.pos, errors.New("unmatched `")} } // The leading byte is the number of "extra" bits at the end. @@ -309,7 +292,7 @@ again: bitCount++ case '|': if sawPipe { - return token{}, &parseError{s.pos, errors.New("duplicate |")} + return token{}, &ParseError{s.pos, errors.New("duplicate |")} } // bitsRemaining is the number of bits remaining in the output that haven't @@ -317,13 +300,13 @@ again: bitsRemaining := (len(value)-1)*8 - bitCount inputRemaining := len(bitStr) - i - 1 if inputRemaining > bitsRemaining { - return token{}, &parseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)} + return token{}, &ParseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)} } sawPipe = true value[0] = byte(bitsRemaining) default: - return token{}, &parseError{s.pos, fmt.Errorf("unexpected rune %q", r)} + return token{}, &ParseError{s.pos, fmt.Errorf("unexpected rune %q", r)} } } if !sawPipe { @@ -335,26 +318,26 @@ again: s.advance() hexStr, ok := s.consumeUpTo('`') if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched `")} + return token{}, &ParseError{s.pos, errors.New("unmatched `")} } bytes, err := hex.DecodeString(hexStr) if err != nil { - return token{}, &parseError{s.pos, err} + return token{}, &ParseError{s.pos, err} } return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil case '[': s.advance() tagStr, ok := s.consumeUpTo(']') if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched [")} + return token{}, &ParseError{s.pos, errors.New("unmatched [")} } tag, err := decodeTagString(tagStr) if err != nil { - return token{}, &parseError{s.pos, err} + return token{}, &ParseError{s.pos, err} } value, err := appendTag(nil, tag) if err != nil { - return token{}, &parseError{s.pos, err} + return token{}, &ParseError{s.pos, err} } return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil } @@ -381,7 +364,7 @@ loop: value, err := appendTag(nil, tag) if err != nil { // This is impossible; built-in tags always encode. - return token{}, &parseError{s.pos, err} + return token{}, &ParseError{s.pos, err} } return token{Kind: tokenBytes, Value: value, Pos: start}, nil } @@ -389,7 +372,7 @@ loop: if regexpInteger.MatchString(symbol) { value, err := strconv.ParseInt(symbol, 10, 64) if err != nil { - return token{}, &parseError{start, err} + return token{}, &ParseError{start, err} } return token{Kind: tokenBytes, Value: appendInteger(nil, value), Pos: s.pos}, nil } @@ -400,7 +383,7 @@ loop: for _, s := range oidStr { u, err := strconv.ParseUint(s, 10, 32) if err != nil { - return token{}, &parseError{start, err} + return token{}, &ParseError{start, err} } oid = append(oid, uint32(u)) } @@ -426,7 +409,7 @@ loop: if isLongFormOverride(symbol) { l, err := decodeLongFormOverride(symbol) if err != nil { - return token{}, &parseError{start, err} + return token{}, &ParseError{start, err} } return token{Kind: tokenLongForm, Length: l}, nil } @@ -434,11 +417,11 @@ loop: return token{}, fmt.Errorf("unrecognized symbol %q", symbol) } -func (s *scanner) isEOF() bool { +func (s *Scanner) isEOF() bool { return s.pos.Offset >= len(s.text) } -func (s *scanner) advance() { +func (s *Scanner) advance() { if !s.isEOF() { if s.text[s.pos.Offset] == '\n' { s.pos.Line++ @@ -450,13 +433,13 @@ func (s *scanner) advance() { } } -func (s *scanner) advanceBytes(n int) { +func (s *Scanner) advanceBytes(n int) { for i := 0; i < n; i++ { s.advance() } } -func (s *scanner) consumeUpTo(b byte) (string, bool) { +func (s *Scanner) consumeUpTo(b byte) (string, bool) { start := s.pos.Offset for !s.isEOF() { if s.text[s.pos.Offset] == b { @@ -469,22 +452,22 @@ func (s *scanner) consumeUpTo(b byte) (string, bool) { return "", false } -func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) { +func (s *Scanner) exec(leftCurly *token) ([]byte, error) { var out []byte var lengthModifier *token for { - token, err := scanner.Next() + token, err := s.next() if err != nil { return nil, err } if lengthModifier != nil && token.Kind != tokenLeftCurly { - return nil, &parseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} + return nil, &ParseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} } switch token.Kind { case tokenBytes: out = append(out, token.Value...) case tokenLeftCurly: - child, err := asciiToDERImpl(scanner, &token) + child, err := s.exec(&token) if err != nil { return nil, err } @@ -504,7 +487,7 @@ func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) { out, err = appendLength(out, len(child), lengthOverride) if err != nil { // appendLength may fail if the lengthModifier was incompatible. - return nil, &parseError{lengthModifier.Pos, err} + return nil, &ParseError{lengthModifier.Pos, err} } out = append(out, child...) lengthModifier = nil @@ -512,21 +495,20 @@ func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) { if leftCurly != nil { return out, nil } - return nil, &parseError{token.Pos, errors.New("unmatched '}'")} + return nil, &ParseError{token.Pos, errors.New("unmatched '}'")} case tokenLongForm, tokenIndefinite: lengthModifier = &token case tokenEOF: if leftCurly == nil { return out, nil } - return nil, &parseError{leftCurly.Pos, errors.New("unmatched '{'")} + return nil, &ParseError{leftCurly.Pos, errors.New("unmatched '{'")} default: panic(token) } } } -func asciiToDER(input string) ([]byte, error) { - scanner := newScanner(input) - return asciiToDERImpl(scanner, nil) +func (s *Scanner) Exec() ([]byte, error) { + return s.exec(nil) } diff --git a/cmd/ascii2der/scanner_test.go b/ascii2der/scanner_test.go similarity index 99% rename from cmd/ascii2der/scanner_test.go rename to ascii2der/scanner_test.go index cac14d2..ff0fe85 100644 --- a/cmd/ascii2der/scanner_test.go +++ b/ascii2der/scanner_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "bytes" @@ -383,9 +383,9 @@ indefinite long-form:2`, } func scanAll(in string) (tokens []token, ok bool) { - scanner := newScanner(in) + scanner := NewScanner(in) for { - token, err := scanner.Next() + token, err := scanner.next() if err != nil { return } @@ -446,7 +446,7 @@ var asciiToDERTests = []struct { func TestASCIIToDER(t *testing.T) { for i, tt := range asciiToDERTests { - out, err := asciiToDER(tt.in) + out, err := NewScanner(tt.in).Exec() ok := err == nil if !tt.ok { if ok { diff --git a/cmd/ascii2der/values.go b/ascii2der/values.go similarity index 99% rename from cmd/ascii2der/values.go rename to ascii2der/values.go index 03e82dc..d8406e0 100644 --- a/cmd/ascii2der/values.go +++ b/ascii2der/values.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "errors" diff --git a/cmd/ascii2der/values_test.go b/ascii2der/values_test.go similarity index 99% rename from cmd/ascii2der/values_test.go rename to ascii2der/values_test.go index 17d6a9c..ea74213 100644 --- a/cmd/ascii2der/values_test.go +++ b/ascii2der/values_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "testing" diff --git a/cmd/ascii2der/main.go b/cmd/ascii2der/main.go index ca5c73d..f379968 100644 --- a/cmd/ascii2der/main.go +++ b/cmd/ascii2der/main.go @@ -20,6 +20,8 @@ import ( "fmt" "io/ioutil" "os" + + "github.com/google/der-ascii/ascii2der" ) var inPath = flag.String("i", "", "input file to use (defaults to stdin)") @@ -52,7 +54,7 @@ func main() { os.Exit(1) } - outBytes, err := asciiToDER(string(inBytes)) + outBytes, err := ascii2der.NewScanner(string(inBytes)).Exec() if err != nil { fmt.Fprintf(os.Stderr, "Syntax error: %s\n", err) os.Exit(1) From 9c05123218977c1940725c4384be07e0c2d904e6 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 21 Feb 2022 17:31:28 -0500 Subject: [PATCH 2/5] Add doc comments to remaning functions in encoder.go --- ascii2der/encoder.go | 37 +++++++++++++++++++++++++++++++------ ascii2der/encoder_test.go | 10 +++++----- ascii2der/scanner.go | 6 +++--- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/ascii2der/encoder.go b/ascii2der/encoder.go index 6b9f787..87993dc 100644 --- a/ascii2der/encoder.go +++ b/ascii2der/encoder.go @@ -22,9 +22,12 @@ import ( "github.com/google/der-ascii/internal" ) +// appendUTF16 marshals r using UTF-16 and appends the result to dst, returning +// the updated slice. +// +// This logic intentionally tolerates unpaired surrogates. func appendUTF16(dst []byte, r rune) []byte { if r <= 0xffff { - // Note this logic intentionally tolerates unpaired surrogates. return append(dst, byte(r>>8), byte(r)) } @@ -34,10 +37,20 @@ func appendUTF16(dst []byte, r rune) []byte { return dst } +// appendUTF16 marshals r using UTF-32 and appends the result to dst, returning +// the updated slice. +// +// In other words, this function writes r as an integer in big-endian order. func appendUTF32(dst []byte, r rune) []byte { return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) } +// appendBase128 marshals an integer in base 128, a varint format used by OIDs +// and long-form tag numbers, and appends the result to dst, returning the +// updated slice. +// +// This function is the same as appendBase128WithLength with length set to zero, +// which cannot fail. func appendBase128(dst []byte, value uint32) []byte { dst, err := appendBase128WithLength(dst, value, 0) if err != nil { @@ -47,6 +60,11 @@ func appendBase128(dst []byte, value uint32) []byte { return dst } +// appendBase128 marshals an integer in base 128, a varint format used by OIDs +// and long-form tag numbers, and appends the result to dst, returning the +// updated slice. +// +// If length is zero, the minimal length is chosen. func appendBase128WithLength(dst []byte, value uint32, length int) ([]byte, error) { // Count how many bytes are needed. var l int @@ -137,18 +155,25 @@ func appendInteger(dst []byte, value int64) []byte { return dst } -func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, bool) { +// appendObjectIdentifier marshals the given array of integers as an OID. +func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, error) { // Validate the input before anything is written. - if len(value) < 2 || value[0] > 2 || (value[0] < 2 && value[1] > 39) { - return dst, false + if len(value) < 2 { + return dst, errors.New("OIDs must have at least two arcs") + } + if value[0] > 2 { + return dst, fmt.Errorf("first arc of an OID must be one of 0, 1, or 2; got %d", value[0]) + } + if value[0] < 2 && value[1] > 39 { + return dst, fmt.Errorf("second arc of an OID must be at most 39; got %d", value[1]) } if value[0]*40+value[1] < value[1] { - return dst, false + return dst, errors.New("first two arcs overflowed") } dst = appendBase128(dst, value[0]*40+value[1]) for _, v := range value[2:] { dst = appendBase128(dst, v) } - return dst, true + return dst, nil } diff --git a/ascii2der/encoder_test.go b/ascii2der/encoder_test.go index 8c964da..f0472b8 100644 --- a/ascii2der/encoder_test.go +++ b/ascii2der/encoder_test.go @@ -162,21 +162,21 @@ var appendObjectIdentifierTests = []struct { func TestAppendObjectIdentifier(t *testing.T) { for i, tt := range appendObjectIdentifierTests { - dst, ok := appendObjectIdentifier(nil, tt.value) + dst, err := appendObjectIdentifier(nil, tt.value) if !tt.ok { - if ok { + if err == nil { t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value) } else if len(dst) != 0 { t.Errorf("%d. appendObjectIdentifier did not preserve input.", i) } } else if !bytes.Equal(dst, tt.encoded) { - t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, wanted %v.", i, tt.value, dst, tt.encoded) + t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, %v, wanted %v.", i, tt.value, dst, err, tt.encoded) } dst = []byte{0} - dst, ok = appendObjectIdentifier(dst, tt.value) + dst, err = appendObjectIdentifier(dst, tt.value) if !tt.ok { - if ok { + if err == nil { t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value) } else if !bytes.Equal(dst, []byte{0}) { t.Errorf("%d. appendObjectIdentifier did not preserve input.", i) diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go index feeacee..a195bac 100644 --- a/ascii2der/scanner.go +++ b/ascii2der/scanner.go @@ -387,9 +387,9 @@ loop: } oid = append(oid, uint32(u)) } - der, ok := appendObjectIdentifier(nil, oid) - if !ok { - return token{}, errors.New("invalid OID") + der, err := appendObjectIdentifier(nil, oid) + if err != nil { + return token{}, &ParseError{start, err} } return token{Kind: tokenBytes, Value: der, Pos: s.pos}, nil } From 6f3533b174171da8ab082f82dfc44f85ec739e96 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 21 Feb 2022 23:02:50 -0500 Subject: [PATCH 3/5] Document scanner.go, including an example --- ascii2der/encoder.go | 8 +- ascii2der/examples_test.go | 31 ++++++ ascii2der/scanner.go | 219 +++++++++++++++++++++++++------------ 3 files changed, 186 insertions(+), 72 deletions(-) create mode 100644 ascii2der/examples_test.go diff --git a/ascii2der/encoder.go b/ascii2der/encoder.go index 87993dc..a0b04d5 100644 --- a/ascii2der/encoder.go +++ b/ascii2der/encoder.go @@ -24,7 +24,7 @@ import ( // appendUTF16 marshals r using UTF-16 and appends the result to dst, returning // the updated slice. -// +// // This logic intentionally tolerates unpaired surrogates. func appendUTF16(dst []byte, r rune) []byte { if r <= 0xffff { @@ -39,7 +39,7 @@ func appendUTF16(dst []byte, r rune) []byte { // appendUTF16 marshals r using UTF-32 and appends the result to dst, returning // the updated slice. -// +// // In other words, this function writes r as an integer in big-endian order. func appendUTF32(dst []byte, r rune) []byte { return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) @@ -48,7 +48,7 @@ func appendUTF32(dst []byte, r rune) []byte { // appendBase128 marshals an integer in base 128, a varint format used by OIDs // and long-form tag numbers, and appends the result to dst, returning the // updated slice. -// +// // This function is the same as appendBase128WithLength with length set to zero, // which cannot fail. func appendBase128(dst []byte, value uint32) []byte { @@ -63,7 +63,7 @@ func appendBase128(dst []byte, value uint32) []byte { // appendBase128 marshals an integer in base 128, a varint format used by OIDs // and long-form tag numbers, and appends the result to dst, returning the // updated slice. -// +// // If length is zero, the minimal length is chosen. func appendBase128WithLength(dst []byte, value uint32, length int) ([]byte, error) { // Count how many bytes are needed. diff --git a/ascii2der/examples_test.go b/ascii2der/examples_test.go new file mode 100644 index 0000000..e3fbbc9 --- /dev/null +++ b/ascii2der/examples_test.go @@ -0,0 +1,31 @@ +// Copyright 2015 The DER ASCII Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ascii2der + +import ( + "fmt" +) + +func Example() { + scanner := NewScanner(` + SEQUENCE { + INTEGER { "totally an integer" } + } +`) + + der, _ := scanner.Exec() + fmt.Printf("%x\n", der) + // Output: 30140212746f74616c6c7920616e20696e7465676572 +} diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go index a195bac..eb13caa 100644 --- a/ascii2der/scanner.go +++ b/ascii2der/scanner.go @@ -12,6 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +// package ascii2der implements the DER-ASCII language described in +// https://github.com/google/der-ascii/blob/master/language.txt. +// +// The Scanner type can be used to parse DER-ASCII files and output byte blobs +// that may or may not be valid DER. package ascii2der import ( @@ -27,10 +32,22 @@ import ( ) // A Position describes a location in the input stream. +// +// The zero-value Position represents the first byte of an anonymous input file. type Position struct { - Offset int // offset, starting at 0 - Line int // line number, starting at 1 - Column int // column number, starting at 1 (byte count) + Offset int // Byte offset. + Line int // Line number (zero-indexed). + Column int // Column number (zero-indexed byte, not rune, count). + File string // Optional file name for pretty-printing. +} + +// String converts a Position to a string. +func (p Position) String() string { + file := p.File + if file == "" { + file = "" + } + return fmt.Sprintf("%s:%d:%d", file, p.Line+1, p.Column+1) } // A tokenKind is a kind of token. @@ -45,14 +62,26 @@ const ( tokenEOF ) -// A ParseError is an error during parsing DER ASCII. +// A ParseError may be produced while executing a DER ASCII file, wrapping +// another error along with a position. +// +// Errors produced by functions in this package my by type-asserted to +// ParseError to try and obtain the position at which the error occurred. type ParseError struct { Pos Position Err error } +// Error makes this type into an error type. func (e *ParseError) Error() string { - return fmt.Sprintf("line %d: %s", e.Pos.Line, e.Err) + return fmt.Sprintf("%s: %s", e.Pos, e.Err) +} + +// Unwrap extracts the inner wrapped error. +// +// See errors.Unwrap(). +func (e *ParseError) Unwrap() error { + return e.Err } // A token is a token in a DER ASCII file. @@ -74,21 +103,96 @@ var ( regexpOID = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`) ) +// A Scanner represents parsing state for a DER ASCII file. +// +// A zero-value Scanner is ready to begin parsing (given that Input is set to +// a valid value). However, it is recommended to use NewScanner to create a new +// Scanner, since it can pre-populate fields other than Input with default +// settings. type Scanner struct { - text string - pos Position + // Input is the input text being processed. + Input string + // Position is the current position at which parsing should + // resume. The Offset field is used for indexing into Input; the remaining + // fields are used for error-reporting. + pos Position +} + +// NewScanner creates a new scanner for parsing the given input. +func NewScanner(input string) *Scanner { + return &Scanner{Input: input} +} + +// SetFile sets the file path shown in this Scanner's error reports. +func (s *Scanner) SetFile(path string) { + s.pos.File = path +} + +// Exec consumes tokens until Input is exhausted, returning the resulting +// encoded maybe-DER. +func (s *Scanner) Exec() ([]byte, error) { + return s.exec(nil) +} + +// isEOF returns whether the cursor is past the end of the input string. +func (s *Scanner) isEOF() bool { + return s.pos.Offset >= len(s.Input) +} + +// advance advances the scanner's cursor one position. +// +// Unlike just s.pos.Offset++, this will not proceed beyond the end of the +// string, and will update the line and column information accordingly. +func (s *Scanner) advance() { + if !s.isEOF() { + if s.Input[s.pos.Offset] == '\n' { + s.pos.Line++ + s.pos.Column = 0 + } else { + s.pos.Column++ + } + s.pos.Offset++ + } } -func NewScanner(text string) *Scanner { - return &Scanner{text: text, pos: Position{Line: 1}} +// advanceBytes calls advance() n times. +func (s *Scanner) advanceBytes(n int) { + for i := 0; i < n; i++ { + s.advance() + } } +// consumeUpTo advances the cursor until the given byte is seen, returning all +// source bytes between the initial cursor position and excluding the given +// byte. +// +// If EOF is reached before the byte is seen, the function returns false. +func (s *Scanner) consumeUpTo(b byte) (string, bool) { + start := s.pos.Offset + for !s.isEOF() { + if s.Input[s.pos.Offset] == b { + ret := s.Input[start:s.pos.Offset] + s.advance() + return ret, true + } + s.advance() + } + return "", false +} + +// parseEscapeSequence parses a DER-ASCII escape sequence, returning the rune +// it escapes. +// +// Valid escapes are: +// \n \" \\ \xNN \uNNNN \UNNNNNNNN +// +// This function assumes that the scanner's cursor is currently on a \ rune. func (s *Scanner) parseEscapeSequence() (rune, error) { s.advance() // Skip the \. The caller is assumed to have validated it. if s.isEOF() { return 0, &ParseError{s.pos, errors.New("expected escape character")} } - switch c := s.text[s.pos.Offset]; c { + switch c := s.Input[s.pos.Offset]; c { case 'n': s.advance() return '\n', nil @@ -97,10 +201,10 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { return rune(c), nil case 'x': s.advance() - if s.pos.Offset+2 > len(s.text) { + if s.pos.Offset+2 > len(s.Input) { return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+2]) + b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+2]) if err != nil { return 0, &ParseError{s.pos, err} } @@ -108,10 +212,10 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { return rune(b[0]), nil case 'u': s.advance() - if s.pos.Offset+4 > len(s.text) { + if s.pos.Offset+4 > len(s.Input) { return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+4]) + b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+4]) if err != nil { return 0, &ParseError{s.pos, err} } @@ -119,10 +223,10 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { return rune(b[0])<<8 | rune(b[1]), nil case 'U': s.advance() - if s.pos.Offset+8 > len(s.text) { + if s.pos.Offset+8 > len(s.Input) { return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+8]) + b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+8]) if err != nil { return 0, &ParseError{s.pos, err} } @@ -133,6 +237,9 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { } } +// parseQuotedString parses a UTF-8 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a " rune. func (s *Scanner) parseQuotedString() (token, error) { s.advance() // Skip the ". The caller is assumed to have validated it. start := s.pos @@ -141,7 +248,7 @@ func (s *Scanner) parseQuotedString() (token, error) { if s.isEOF() { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.text[s.pos.Offset]; c { + switch c := s.Input[s.pos.Offset]; c { case '"': s.advance() return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil @@ -163,6 +270,10 @@ func (s *Scanner) parseQuotedString() (token, error) { } } +// parseUTF16String parses a UTF-16 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a u followed +// by a " rune. func (s *Scanner) parseUTF16String() (token, error) { s.advance() // Skip the u. The caller is assumed to have validated it. s.advance() // Skip the ". The caller is assumed to have validated it. @@ -172,7 +283,7 @@ func (s *Scanner) parseUTF16String() (token, error) { if s.isEOF() { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.text[s.pos.Offset]; c { + switch c := s.Input[s.pos.Offset]; c { case '"': s.advance() return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil @@ -183,9 +294,9 @@ func (s *Scanner) parseUTF16String() (token, error) { } bytes = appendUTF16(bytes, r) default: - r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:]) + r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:]) // Note DecodeRuneInString may return utf8.RuneError if there is a - // legitimate replacement charaacter in the input. The documentation + // legitimate replacement character in the input. The documentation // says errors return (RuneError, 0) or (RuneError, 1). if r == utf8.RuneError && n <= 1 { return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} @@ -196,6 +307,10 @@ func (s *Scanner) parseUTF16String() (token, error) { } } +// parseUTF32String parses a UTF-32 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a U followed +// by a " rune. func (s *Scanner) parseUTF32String() (token, error) { s.advance() // Skip the U. The caller is assumed to have validated it. s.advance() // Skip the ". The caller is assumed to have validated it. @@ -205,7 +320,7 @@ func (s *Scanner) parseUTF32String() (token, error) { if s.isEOF() { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.text[s.pos.Offset]; c { + switch c := s.Input[s.pos.Offset]; c { case '"': s.advance() return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil @@ -216,7 +331,7 @@ func (s *Scanner) parseUTF32String() (token, error) { } bytes = appendUTF32(bytes, r) default: - r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:]) + r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:]) // Note DecodeRuneInString may return utf8.RuneError if there is a // legitimate replacement charaacter in the input. The documentation // says errors return (RuneError, 0) or (RuneError, 1). @@ -229,13 +344,14 @@ func (s *Scanner) parseUTF32String() (token, error) { } } +// next lexes the next token. func (s *Scanner) next() (token, error) { again: if s.isEOF() { return token{Kind: tokenEOF, Pos: s.pos}, nil } - switch s.text[s.pos.Offset] { + switch s.Input[s.pos.Offset] { case ' ', '\t', '\n', '\r': // Skip whitespace. s.advance() @@ -244,7 +360,7 @@ again: // Skip to the end of the comment. s.advance() for !s.isEOF() { - wasNewline := s.text[s.pos.Offset] == '\n' + wasNewline := s.Input[s.pos.Offset] == '\n' s.advance() if wasNewline { break @@ -260,15 +376,15 @@ again: case '"': return s.parseQuotedString() case 'u': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' { + if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' { return s.parseUTF16String() } case 'U': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' { + if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' { return s.parseUTF32String() } case 'b': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '`' { + if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '`' { s.advance() // Skip the b. s.advance() // Skip the `. bitStr, ok := s.consumeUpTo('`') @@ -348,7 +464,7 @@ again: s.advance() loop: for !s.isEOF() { - switch s.text[s.pos.Offset] { + switch s.Input[s.pos.Offset] { case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#': break loop default: @@ -356,7 +472,7 @@ loop: } } - symbol := s.text[start.Offset:s.pos.Offset] + symbol := s.Input[start.Offset:s.pos.Offset] // See if it is a tag. tag, ok := internal.TagByName(symbol) @@ -417,41 +533,12 @@ loop: return token{}, fmt.Errorf("unrecognized symbol %q", symbol) } -func (s *Scanner) isEOF() bool { - return s.pos.Offset >= len(s.text) -} - -func (s *Scanner) advance() { - if !s.isEOF() { - if s.text[s.pos.Offset] == '\n' { - s.pos.Line++ - s.pos.Column = 0 - } else { - s.pos.Column++ - } - s.pos.Offset++ - } -} - -func (s *Scanner) advanceBytes(n int) { - for i := 0; i < n; i++ { - s.advance() - } -} - -func (s *Scanner) consumeUpTo(b byte) (string, bool) { - start := s.pos.Offset - for !s.isEOF() { - if s.text[s.pos.Offset] == b { - ret := s.text[start:s.pos.Offset] - s.advance() - return ret, true - } - s.advance() - } - return "", false -} - +// exec is the main parser loop. +// +// The leftCurly argument, it not nil, represents the { that began the +// length-prefixed block we're currently executing. Because we need to encode +// the full extent of the contents of a {} before emitting the length prefix, +// this function calls itself with a non-nil leftCurly to encode it. func (s *Scanner) exec(leftCurly *token) ([]byte, error) { var out []byte var lengthModifier *token @@ -508,7 +595,3 @@ func (s *Scanner) exec(leftCurly *token) ([]byte, error) { } } } - -func (s *Scanner) Exec() ([]byte, error) { - return s.exec(nil) -} From d5d85a8d8ce195b3010b431b17e96bad0cd535e5 Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Mon, 21 Feb 2022 23:58:18 -0500 Subject: [PATCH 4/5] Simplify the base parsing helpers in scanner.go --- ascii2der/scanner.go | 176 +++++++++++++++++++++---------------------- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go index eb13caa..b0c0d81 100644 --- a/ascii2der/scanner.go +++ b/ascii2der/scanner.go @@ -134,17 +134,18 @@ func (s *Scanner) Exec() ([]byte, error) { return s.exec(nil) } -// isEOF returns whether the cursor is past the end of the input string. -func (s *Scanner) isEOF() bool { - return s.pos.Offset >= len(s.Input) +// isEOF returns whether the cursor is at least n bytes ahead of the end of the +// input. +func (s *Scanner) isEOF(n int) bool { + return s.pos.Offset+n >= len(s.Input) } -// advance advances the scanner's cursor one position. +// advance advances the scanner's cursor n positions. // -// Unlike just s.pos.Offset++, this will not proceed beyond the end of the +// Unlike just s.pos.Offset += n, this will not proceed beyond the end of the // string, and will update the line and column information accordingly. -func (s *Scanner) advance() { - if !s.isEOF() { +func (s *Scanner) advance(n int) { + for i := 0; i < n && !s.isEOF(0); i++ { if s.Input[s.pos.Offset] == '\n' { s.pos.Line++ s.pos.Column = 0 @@ -155,27 +156,30 @@ func (s *Scanner) advance() { } } -// advanceBytes calls advance() n times. -func (s *Scanner) advanceBytes(n int) { - for i := 0; i < n; i++ { - s.advance() +// consume advances exactly n times and returns all source bytes between the +// initial cursor position and excluding the final cursor position. +// +// If EOF is reached before all n bytes are consumed, the function returns +// false. +func (s *Scanner) consume(n int) (string, bool) { + start := s.pos.Offset + s.advance(n) + if s.pos.Offset-start != n { + return "", false } + + return s.Input[start:s.pos.Offset], true } -// consumeUpTo advances the cursor until the given byte is seen, returning all +// consumeUntil advances the cursor until the given byte is seen, returning all // source bytes between the initial cursor position and excluding the given -// byte. +// byte. This function will advance past the searched-for byte. // // If EOF is reached before the byte is seen, the function returns false. -func (s *Scanner) consumeUpTo(b byte) (string, bool) { - start := s.pos.Offset - for !s.isEOF() { - if s.Input[s.pos.Offset] == b { - ret := s.Input[start:s.pos.Offset] - s.advance() - return ret, true - } - s.advance() +func (s *Scanner) consumeUntil(b byte) (string, bool) { + if i := strings.IndexByte(s.Input[s.pos.Offset:], b); i != -1 { + text, _ := s.consume(i + 1) + return text[:i], true } return "", false } @@ -188,50 +192,47 @@ func (s *Scanner) consumeUpTo(b byte) (string, bool) { // // This function assumes that the scanner's cursor is currently on a \ rune. func (s *Scanner) parseEscapeSequence() (rune, error) { - s.advance() // Skip the \. The caller is assumed to have validated it. - if s.isEOF() { + s.advance(1) // Skip the \. The caller is assumed to have validated it. + if s.isEOF(0) { return 0, &ParseError{s.pos, errors.New("expected escape character")} } + switch c := s.Input[s.pos.Offset]; c { case 'n': - s.advance() + s.advance(1) return '\n', nil case '"', '\\': - s.advance() + s.advance(1) return rune(c), nil - case 'x': - s.advance() - if s.pos.Offset+2 > len(s.Input) { - return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+2]) - if err != nil { - return 0, &ParseError{s.pos, err} + case 'x', 'u', 'U': + s.advance(1) + + var digits int + switch c { + case 'x': + digits = 2 + case 'u': + digits = 4 + case 'U': + digits = 8 } - s.advanceBytes(2) - return rune(b[0]), nil - case 'u': - s.advance() - if s.pos.Offset+4 > len(s.Input) { + + hexes, ok := s.consume(digits) + if !ok { return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} } - b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+4]) + + bytes, err := hex.DecodeString(hexes) if err != nil { return 0, &ParseError{s.pos, err} } - s.advanceBytes(4) - return rune(b[0])<<8 | rune(b[1]), nil - case 'U': - s.advance() - if s.pos.Offset+8 > len(s.Input) { - return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.Input[s.pos.Offset : s.pos.Offset+8]) - if err != nil { - return 0, &ParseError{s.pos, err} + + var r rune + for _, b := range bytes { + r <<= 8 + r |= rune(b) } - s.advanceBytes(8) - return rune(b[0])<<24 | rune(b[1])<<16 | rune(b[2])<<8 | rune(b[3]), nil + return r, nil default: return 0, &ParseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} } @@ -241,16 +242,16 @@ func (s *Scanner) parseEscapeSequence() (rune, error) { // // This function assumes that the scanner's cursor is currently on a " rune. func (s *Scanner) parseQuotedString() (token, error) { - s.advance() // Skip the ". The caller is assumed to have validated it. + s.advance(1) // Skip the ". The caller is assumed to have validated it. start := s.pos var bytes []byte for { - if s.isEOF() { + if s.isEOF(0) { return token{}, &ParseError{start, errors.New("unmatched \"")} } switch c := s.Input[s.pos.Offset]; c { case '"': - s.advance() + s.advance(1) return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil case '\\': escapeStart := s.pos @@ -264,7 +265,7 @@ func (s *Scanner) parseQuotedString() (token, error) { } bytes = append(bytes, byte(r)) default: - s.advance() + s.advance(1) bytes = append(bytes, c) } } @@ -275,17 +276,17 @@ func (s *Scanner) parseQuotedString() (token, error) { // This function assumes that the scanner's cursor is currently on a u followed // by a " rune. func (s *Scanner) parseUTF16String() (token, error) { - s.advance() // Skip the u. The caller is assumed to have validated it. - s.advance() // Skip the ". The caller is assumed to have validated it. + s.advance(2) // Skip the u". The caller is assumed to have validated it. start := s.pos var bytes []byte for { - if s.isEOF() { + if s.isEOF(0) { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.Input[s.pos.Offset]; c { + + switch s.Input[s.pos.Offset] { case '"': - s.advance() + s.advance(1) return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil case '\\': r, err := s.parseEscapeSequence() @@ -301,7 +302,7 @@ func (s *Scanner) parseUTF16String() (token, error) { if r == utf8.RuneError && n <= 1 { return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} } - s.advanceBytes(n) + s.advance(n) bytes = appendUTF16(bytes, r) } } @@ -312,17 +313,17 @@ func (s *Scanner) parseUTF16String() (token, error) { // This function assumes that the scanner's cursor is currently on a U followed // by a " rune. func (s *Scanner) parseUTF32String() (token, error) { - s.advance() // Skip the U. The caller is assumed to have validated it. - s.advance() // Skip the ". The caller is assumed to have validated it. + s.advance(2) // Skip the U". The caller is assumed to have validated it. start := s.pos var bytes []byte for { - if s.isEOF() { + if s.isEOF(0) { return token{}, &ParseError{start, errors.New("unmatched \"")} } - switch c := s.Input[s.pos.Offset]; c { + + switch s.Input[s.pos.Offset] { case '"': - s.advance() + s.advance(1) return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil case '\\': r, err := s.parseEscapeSequence() @@ -338,7 +339,7 @@ func (s *Scanner) parseUTF32String() (token, error) { if r == utf8.RuneError && n <= 1 { return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} } - s.advanceBytes(n) + s.advance(n) bytes = appendUTF32(bytes, r) } } @@ -347,47 +348,46 @@ func (s *Scanner) parseUTF32String() (token, error) { // next lexes the next token. func (s *Scanner) next() (token, error) { again: - if s.isEOF() { + if s.isEOF(0) { return token{Kind: tokenEOF, Pos: s.pos}, nil } switch s.Input[s.pos.Offset] { case ' ', '\t', '\n', '\r': // Skip whitespace. - s.advance() + s.advance(1) goto again case '#': // Skip to the end of the comment. - s.advance() - for !s.isEOF() { + s.advance(1) + for !s.isEOF(0) { wasNewline := s.Input[s.pos.Offset] == '\n' - s.advance() + s.advance(1) if wasNewline { break } } goto again case '{': - s.advance() + s.advance(1) return token{Kind: tokenLeftCurly, Pos: s.pos}, nil case '}': - s.advance() + s.advance(1) return token{Kind: tokenRightCurly, Pos: s.pos}, nil case '"': return s.parseQuotedString() case 'u': - if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' { + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' { return s.parseUTF16String() } case 'U': - if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '"' { + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' { return s.parseUTF32String() } case 'b': - if s.pos.Offset+1 < len(s.Input) && s.Input[s.pos.Offset+1] == '`' { - s.advance() // Skip the b. - s.advance() // Skip the `. - bitStr, ok := s.consumeUpTo('`') + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '`' { + s.advance(2) // Skip the b`. + bitStr, ok := s.consumeUntil('`') if !ok { return token{}, &ParseError{s.pos, errors.New("unmatched `")} } @@ -431,8 +431,8 @@ again: return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil } case '`': - s.advance() - hexStr, ok := s.consumeUpTo('`') + s.advance(1) + hexStr, ok := s.consumeUntil('`') if !ok { return token{}, &ParseError{s.pos, errors.New("unmatched `")} } @@ -442,8 +442,8 @@ again: } return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil case '[': - s.advance() - tagStr, ok := s.consumeUpTo(']') + s.advance(1) + tagStr, ok := s.consumeUntil(']') if !ok { return token{}, &ParseError{s.pos, errors.New("unmatched [")} } @@ -461,14 +461,14 @@ again: // Normal token. Consume up to the next whitespace character, symbol, or // EOF. start := s.pos - s.advance() + s.advance(1) loop: - for !s.isEOF() { + for !s.isEOF(0) { switch s.Input[s.pos.Offset] { case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#': break loop default: - s.advance() + s.advance(1) } } From 51662df30b524148a7995dc787599fa8d65e586c Mon Sep 17 00:00:00 2001 From: Miguel Young de la Sota Date: Tue, 22 Feb 2022 00:07:22 -0500 Subject: [PATCH 5/5] Make cmd/ascii2der use the correct file name for errors --- cmd/ascii2der/main.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/ascii2der/main.go b/cmd/ascii2der/main.go index f379968..b6749b7 100644 --- a/cmd/ascii2der/main.go +++ b/cmd/ascii2der/main.go @@ -54,7 +54,10 @@ func main() { os.Exit(1) } - outBytes, err := ascii2der.NewScanner(string(inBytes)).Exec() + scanner := ascii2der.NewScanner(string(inBytes)) + scanner.SetFile(*inPath) + + outBytes, err := scanner.Exec() if err != nil { fmt.Fprintf(os.Stderr, "Syntax error: %s\n", err) os.Exit(1)