wip(parser): partly finished Message parser

romdo · Aug 26, 2021 · bf44c4a · bf44c4a
1 parent 5174ed3
commit bf44c4a
Show file tree

Hide file tree

Showing 7 changed files with 759 additions and 40 deletions.
diff --git a/buffer.go b/buffer.go
@@ -1,38 +1,5 @@
 package conventionalcommit
 
-import (
-	"regexp"
-)
-
-// footerToken will match against all variations of Conventional Commit footer
-// formats.
-//
-// Examples of valid footer tokens:
-//
-//  Approved-by: John Carter
-//  ReviewdBy: Noctis
-//  Fixes #49
-//  Reverts #SOL-42
-//  BREAKING CHANGE: Flux capacitor no longer exists.
-//  BREAKING-CHANGE: Time will flow backwads
-//
-// Examples of invalid footer tokens:
-//
-//  Approved-by:
-//  Approved-by:John Carter
-//  Approved by: John Carter
-//    ReviewdBy: Noctis
-//  Fixes#49
-//  Fixes #
-//  Fixes 49
-//  BREAKING CHANGE:Flux capacitor no longer exists.
-//  Breaking Change: Flux capacitor no longer exists.
-//  Breaking-Change: Time will flow backwads
-//
-var footerToken = regexp.MustCompile(
-	`^(?:([\w-]+)\s+(#.+)|([\w-]+|BREAKING[\s-]CHANGE):\s+(.+))$`,
-)
-
 // Buffer represents a commit message in a more structured form than a simple
 // string or byte slice. This makes it easier to process a message for the
 // purposes of extracting detailed information, linting, and formatting.
@@ -119,11 +86,11 @@ func NewBuffer(message []byte) *Buffer {
 		lastLen++
 	}
 
-	// If last paragraph starts with a Convention Commit footer token, it is the
-	// foot section, otherwise it is part of the body.
+	// If last paragraph starts with a Conventional Commit footer token, it is
+	// the foot section, otherwise it is part of the body.
 	if lastLen > 0 {
 		line := buf.lines[buf.lastLine-lastLen+1]
-		if footerToken.Match(line.Content) {
+		if FooterToken.Match(line.Content) {
 			buf.footLen = lastLen
 		}
 	}
@@ -176,6 +143,15 @@ func (s *Buffer) Lines() Lines {
 	return s.lines[s.firstLine : s.lastLine+1]
 }
 
+// LinesRaw returns all lines of the buffer including any blank lines at the
+// beginning and end of the buffer.
+func (s *Buffer) LinesRaw() Lines {
+	return s.lines
+}
+
+// LineCount returns number of lines in the buffer after discarding blank lines
+// from the beginning and end of the buffer. Effectively counting all lines from
+// the first to the last line which contain any non-whitespace characters.
 func (s *Buffer) LineCount() int {
 	if s.headLen == 0 {
 		return 0
@@ -184,6 +160,12 @@ func (s *Buffer) LineCount() int {
 	return (s.lastLine + 1) - s.firstLine
 }
 
+// LineCountRaw returns the number of lines in the buffer including any blank
+// lines at the beginning and end of the buffer.
+func (s *Buffer) LineCountRaw() int {
+	return len(s.lines)
+}
+
 // Bytes renders the Buffer back into a byte slice, without any leading or
 // trailing whitespace lines. Leading whitespace on the first line which
 // contains non-whitespace characters is retained. It is only whole lines

diff --git a/buffer_test.go b/buffer_test.go
@@ -994,6 +994,55 @@ func BenchmarkBuffer_Lines(b *testing.B) {
 	}
 }
 
+func TestBuffer_LinesRaw(t *testing.T) {
+	for _, tt := range bufferTestCases {
+		t.Run(tt.name, func(t *testing.T) {
+			want := tt.wantBuffer.lines[0:]
+
+			got := tt.wantBuffer.LinesRaw()
+
+			assert.Equal(t, want, got)
+		})
+	}
+}
+
+func TestBuffer_LineCount(t *testing.T) {
+	for _, tt := range bufferTestCases {
+		t.Run(tt.name, func(t *testing.T) {
+			want := tt.wantLines[1]
+
+			got := tt.wantBuffer.LineCount()
+
+			assert.Equal(t, want, got)
+		})
+	}
+}
+
+func BenchmarkBuffer_LineCount(b *testing.B) {
+	for _, tt := range bufferTestCases {
+		if tt.bytes == nil {
+			continue
+		}
+		b.Run(tt.name, func(b *testing.B) {
+			for n := 0; n < b.N; n++ {
+				_ = tt.wantBuffer.LineCount()
+			}
+		})
+	}
+}
+
+func TestBuffer_LineCountRaw(t *testing.T) {
+	for _, tt := range bufferTestCases {
+		t.Run(tt.name, func(t *testing.T) {
+			want := len(tt.wantBuffer.lines)
+
+			got := tt.wantBuffer.LineCountRaw()
+
+			assert.Equal(t, want, got)
+		})
+	}
+}
+
 func TestBuffer_Bytes(t *testing.T) {
 	for _, tt := range bufferTestCases {
 		if tt.bytes == nil {

diff --git a/line.go b/line.go
@@ -58,23 +58,23 @@ type Lines []*Line
 // basis.
 func NewLines(content []byte) Lines {
 	r := Lines{}
-	cLen := len(content)
+	length := len(content)
 
-	if cLen == 0 {
+	if length == 0 {
 		return r
 	}
 
 	// List of start/end offsets for each line break.
 	var breaks [][]int
 
 	// Locate each line break within content.
-	for i := 0; i < cLen; i++ {
+	for i := 0; i < length; i++ {
 		switch content[i] {
 		case lf:
 			breaks = append(breaks, []int{i, i + 1})
 		case cr:
 			b := []int{i, i + 1}
-			if i+1 < cLen && content[i+1] == lf {
+			if i+1 < length && content[i+1] == lf {
 				b[1]++
 				i++
 			}

diff --git a/message.go b/message.go
@@ -0,0 +1,178 @@
+package conventionalcommit
+
+import (
+	"errors"
+	"fmt"
+	"regexp"
+	"strings"
+)
+
+var (
+	Err             = errors.New("conventionalcommit")
+	ErrEmptyMessage = fmt.Errorf("%w: empty message", Err)
+)
+
+// HeaderToken will match a Conventional Commit formatted subject line, to
+// extract type, scope, breaking change (bool), and description.
+//
+// It is intentionally VERY forgiving so as to be able to extract the various
+// parts even when things aren't quite right.
+var HeaderToken = regexp.MustCompile(
+	`^([^\(\)\r\n]*?)(\((.*?)\)\s*)?(!)?(\s*\:)\s(.*)$`,
+)
+
+// FooterToken will match against all variations of Conventional Commit footer
+// formats.
+//
+// Examples of valid footer tokens:
+//
+//  Approved-by: John Carter
+//  ReviewdBy: Noctis
+//  Fixes #49
+//  Reverts #SOL-42
+//  BREAKING CHANGE: Flux capacitor no longer exists.
+//  BREAKING-CHANGE: Time will flow backwads
+//
+// Examples of invalid footer tokens:
+//
+//  Approved-by:
+//  Approved-by:John Carter
+//  Approved by: John Carter
+//    ReviewdBy: Noctis
+//  Fixes#49
+//  Fixes #
+//  Fixes 49
+//  BREAKING CHANGE:Flux capacitor no longer exists.
+//  Breaking Change: Flux capacitor no longer exists.
+//  Breaking-Change: Time will flow backwads
+//
+var FooterToken = regexp.MustCompile(
+	`^([\w-]+|BREAKING[\s-]CHANGE)(?:\s*(:)\s+|\s+(#))(.+)$`,
+)
+
+// Message represents a Conventional Commit message in a structured way.
+type Message struct {
+	// Type indicates what kind of a change the commit message describes.
+	Type string
+
+	// Scope indicates the context/component/area that the change affects.
+	Scope string
+
+	// Description is the primary description for the commit.
+	Description string
+
+	// Body is the main text body of the commit message. Effectively all text
+	// between the subject line, and any footers if present.
+	Body string
+
+	// Footers are all footers which are not references or breaking changes.
+	Footers []*Footer
+
+	// References are all footers defined with a reference style token, for
+	// example:
+	//
+	//  Fixes #42
+	References []*Reference
+
+	// Breaking is set to true if the message subject included the "!" breaking
+	// change indicator.
+	Breaking bool
+
+	// BreakingChanges includes the descriptions from all BREAKING CHANGE
+	// footers.
+	BreakingChanges []string
+}
+
+func NewMessage(buf *Buffer) (*Message, error) {
+	msg := &Message{}
+	count := buf.LineCount()
+
+	if count == 0 {
+		return nil, ErrEmptyMessage
+	}
+
+	msg.Description = buf.Head().Join("\n")
+	if m := HeaderToken.FindStringSubmatch(msg.Description); len(m) > 0 {
+		msg.Type = strings.TrimSpace(m[1])
+		msg.Scope = strings.TrimSpace(m[3])
+		msg.Breaking = m[4] == "!"
+		msg.Description = m[6]
+	}
+
+	msg.Body = buf.Body().Join("\n")
+
+	if foot := buf.Foot(); len(foot) > 0 {
+		footers := parseFooters(foot)
+
+		for _, f := range footers {
+			name := string(f.name)
+			value := string(f.value)
+
+			switch {
+			case f.ref:
+				msg.References = append(msg.References, &Reference{
+					Name:  name,
+					Value: value,
+				})
+			case name == "BREAKING CHANGE" || name == "BREAKING-CHANGE":
+				msg.BreakingChanges = append(msg.BreakingChanges, value)
+			default:
+				msg.Footers = append(msg.Footers, &Footer{
+					Name:  name,
+					Value: value,
+				})
+			}
+		}
+	}
+
+	return msg, nil
+}
+
+func (s *Message) IsBreakingChange() bool {
+	return s.Breaking || len(s.BreakingChanges) > 0
+}
+
+func parseFooters(lines Lines) []*rawFooter {
+	var footers []*rawFooter
+	footer := &rawFooter{}
+	for _, line := range lines {
+		if m := FooterToken.FindSubmatch(line.Content); m != nil {
+			if len(footer.name) > 0 {
+				footers = append(footers, footer)
+			}
+
+			footer = &rawFooter{}
+			if len(m[3]) > 0 {
+				footer.ref = true
+				footer.value = []byte{hash}
+			}
+			footer.name = m[1]
+			footer.value = append(footer.value, m[4]...)
+		} else if len(footer.name) > 0 {
+			footer.value = append(footer.value, lf)
+			footer.value = append(footer.value, line.Content...)
+		}
+	}
+
+	if len(footer.name) > 0 {
+		footers = append(footers, footer)
+	}
+
+	return footers
+}
+
+type rawFooter struct {
+	name  []byte
+	value []byte
+	ref   bool
+}
+
+type Footer struct {
+	Name  string
+	Value string
+}
+
+type Reference struct {
+	Name  string
+	Value string
+}