From 758deef7e9cc2197ae959b2769b652640de99ca9 Mon Sep 17 00:00:00 2001 From: Jim Myhrberg Date: Sun, 15 Aug 2021 18:17:32 +0100 Subject: [PATCH 1/3] refactor(parser): rework RawMessage into Buffer The old RawMessage implementation effectively brute forced the initial processing of a comment message by breaking it down into lines, and grouping them into paragraphs. This is useful, but, we actually only need the first paragraph, the last paragraph, and then everything between. So there's no need to break down the message into each paragraph. In theory, the Buffer implementation is more performant than RawMessage was, but most importantly I think it will be easier to work with it. --- buffer.go | 215 +++++++++ buffer_test.go | 1107 +++++++++++++++++++++++++++++++++++++++++++ line.go | 76 ++- line_test.go | 403 ++++++++++++++-- paragraph.go | 30 -- paragraph_test.go | 338 ------------- raw_message.go | 50 -- raw_message_test.go | 641 ------------------------- 8 files changed, 1746 insertions(+), 1114 deletions(-) create mode 100644 buffer.go create mode 100644 buffer_test.go delete mode 100644 paragraph.go delete mode 100644 paragraph_test.go delete mode 100644 raw_message.go delete mode 100644 raw_message_test.go diff --git a/buffer.go b/buffer.go new file mode 100644 index 0000000..5679b9a --- /dev/null +++ b/buffer.go @@ -0,0 +1,215 @@ +package conventionalcommit + +import ( + "regexp" +) + +// footerToken will match against all variations of Conventional Commit footer +// formats. +// +// Examples of valid footer tokens: +// +// Approved-by: John Carter +// ReviewdBy: Noctis +// Fixes #49 +// Reverts #SOL-42 +// BREAKING CHANGE: Flux capacitor no longer exists. +// BREAKING-CHANGE: Time will flow backwads +// +// Examples of invalid footer tokens: +// +// Approved-by: +// Approved-by:John Carter +// Approved by: John Carter +// ReviewdBy: Noctis +// Fixes#49 +// Fixes # +// Fixes 49 +// BREAKING CHANGE:Flux capacitor no longer exists. +// Breaking Change: Flux capacitor no longer exists. +// Breaking-Change: Time will flow backwads +// +var footerToken = regexp.MustCompile( + `^(?:([\w-]+)\s+(#.+)|([\w-]+|BREAKING[\s-]CHANGE):\s+(.+))$`, +) + +// Buffer represents a commit message in a more structured form than a simple +// string or byte slice. This makes it easier to process a message for the +// purposes of extracting detailed information, linting, and formatting. +// +// The commit message is conceptually broken down into two three separate +// sections: +// +// - Head section holds the commit message subject/description, along with type +// and scope for conventional commits. The head section should only ever be a +// single line according to git convention, but Buffer supports multi-line +// headers so they can be parsed and handled as needed. +// +// - Body section holds the rest of the message. Except if the last paragraph +// starts with a footer token, then the last paragraph is omitted from the +// body section. +// +// - Foot section holds conventional commit footers. It is always the last +// paragraph of a commit message, and is only considered to be the foot +// section if the first line of the paragraph beings with a footer token. +// +// Each section is returned as a Lines type, which provides per-line access to +// the text within the section. +type Buffer struct { + // firstLine is the lines offset for the first line which contains any + // non-whitespace character. + firstLine int + + // lastLine is the lines offset for the last line which contains any + // non-whitespace character. + lastLine int + + // headLen is the number of lines that the headLen section (first paragraph) + // spans. + headLen int + + // footLen is the number of lines that the footLen section (last paragraph) + // spans. + footLen int + + // lines is a list of all individual lines of text in the commit message, + // which also includes the original line number, making it easy to pass a + // single Line around while still knowing where in the original commit + // message it belongs. + lines Lines +} + +// NewBuffer returns a Buffer, with the given commit message broken down into +// individual lines of text, with sequential non-empty lines grouped into +// paragraphs. +func NewBuffer(message []byte) *Buffer { + buf := &Buffer{ + lines: Lines{}, + } + + if len(message) == 0 { + return buf + } + + buf.lines = NewLines(message) + // Find fist non-whitespace line. + if i := buf.lines.FirstTextIndex(); i > -1 { + buf.firstLine = i + } + + // Find last non-whitespace line. + if i := buf.lines.LastTextIndex(); i > -1 { + buf.lastLine = i + } + + // Determine number of lines in first paragraph (head section). + for i := buf.firstLine; i <= buf.lastLine; i++ { + if buf.lines[i].Blank() { + break + } + buf.headLen++ + } + + // Determine number of lines in the last paragraph. + lastLen := 0 + for i := buf.lastLine; i > buf.firstLine+buf.headLen; i-- { + if buf.lines[i].Blank() { + break + } + lastLen++ + } + + // If last paragraph starts with a Convention Commit footer token, it is the + // foot section, otherwise it is part of the body. + if lastLen > 0 { + line := buf.lines[buf.lastLine-lastLen+1] + if footerToken.Match(line.Content) { + buf.footLen = lastLen + } + } + + return buf +} + +// Head returns the first paragraph, defined as the first group of sequential +// lines which contain any non-whitespace characters. +func (s *Buffer) Head() Lines { + return s.lines[s.firstLine : s.firstLine+s.headLen] +} + +// Body returns all lines between the first and last paragraphs. If the body is +// surrounded by multiple empty lines, they will be removed, ensuring first and +// last line of body is not a blank whitespace line. +func (s *Buffer) Body() Lines { + if s.firstLine == s.lastLine { + return Lines{} + } + + first := s.firstLine + s.headLen + 1 + last := s.lastLine + 1 + + if s.footLen > 0 { + last -= s.footLen + } + + return s.lines[first:last].Trim() +} + +// Head returns the last paragraph, defined as the last group of sequential +// lines which contain any non-whitespace characters. +func (s *Buffer) Foot() Lines { + if s.footLen == 0 { + return Lines{} + } + + return s.lines[s.lastLine-s.footLen+1 : s.lastLine+1] +} + +// Lines returns all lines with any blank lines from the beginning and end of +// the buffer removed. Effectively all lines from the first to the last line +// which contain any non-whitespace characters. +func (s *Buffer) Lines() Lines { + if s.lastLine+1 > len(s.lines) || (s.lastLine == 0 && s.lines[0].Blank()) { + return Lines{} + } + + return s.lines[s.firstLine : s.lastLine+1] +} + +func (s *Buffer) LineCount() int { + if s.headLen == 0 { + return 0 + } + + return (s.lastLine + 1) - s.firstLine +} + +// Bytes renders the Buffer back into a byte slice, without any leading or +// trailing whitespace lines. Leading whitespace on the first line which +// contains non-whitespace characters is retained. It is only whole lines +// consisting of only whitespace which are excluded. +func (s *Buffer) Bytes() []byte { + return s.Lines().Bytes() +} + +// String renders the Buffer back into a string, without any leading or trailing +// whitespace lines. Leading whitespace on the first line which contains +// non-whitespace characters is retained. It is only whole lines consisting of +// only whitespace which are excluded. +func (s *Buffer) String() string { + return s.Lines().String() +} + +// BytesRaw renders the Buffer back into a byte slice which is identical to the +// original input byte slice given to NewBuffer. This includes retaining the +// original line break types for each line. +func (s *Buffer) BytesRaw() []byte { + return s.lines.Bytes() +} + +// StringRaw renders the Buffer back into a string which is identical to the +// original input byte slice given to NewBuffer. This includes retaining the +// original line break types for each line. +func (s *Buffer) StringRaw() string { + return s.lines.String() +} diff --git a/buffer_test.go b/buffer_test.go new file mode 100644 index 0000000..4b0aa9f --- /dev/null +++ b/buffer_test.go @@ -0,0 +1,1107 @@ +package conventionalcommit + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +var bufferTestCases = []struct { + name string + bytes []byte + wantBuffer *Buffer + wantHead []int + wantBody []int + wantFoot []int + wantLines [2]int +}{ + { + name: "nil", + bytes: nil, + wantBuffer: &Buffer{ + lines: Lines{}, + }, + wantHead: []int{}, + wantBody: []int{}, + wantFoot: []int{}, + wantLines: [2]int{0, 0}, + }, + { + name: "empty", + bytes: []byte(""), + wantBuffer: &Buffer{ + lines: Lines{}, + }, + wantHead: []int{}, + wantBody: []int{}, + wantFoot: []int{}, + wantLines: [2]int{0, 0}, + }, + { + name: "single whitespace line", + bytes: []byte(" "), + wantBuffer: &Buffer{ + lines: Lines{ + {Number: 1, Content: []byte(" "), Break: []byte{}}, + }, + }, + wantHead: []int{}, + wantBody: []int{}, + wantFoot: []int{}, + wantLines: [2]int{0, 0}, + }, + { + name: "multiple whitespace lines", + bytes: []byte("\n\n \n\n\t\n"), + wantBuffer: &Buffer{ + lines: Lines{ + {Number: 1, Content: []byte(""), Break: []byte("\n")}, + {Number: 2, Content: []byte(""), Break: []byte("\n")}, + {Number: 3, Content: []byte(" "), Break: []byte("\n")}, + {Number: 4, Content: []byte(""), Break: []byte("\n")}, + {Number: 5, Content: []byte("\t"), Break: []byte("\n")}, + {Number: 6, Content: []byte(""), Break: []byte{}}, + }, + }, + wantHead: []int{}, + wantBody: []int{}, + wantFoot: []int{}, + wantLines: [2]int{0, 0}, + }, + { + name: "single line", + bytes: []byte("fix: a broken thing"), + wantBuffer: &Buffer{ + headLen: 1, + lines: Lines{ + { + Number: 1, + Content: []byte("fix: a broken thing"), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{}, + wantFoot: []int{}, + wantLines: [2]int{0, 1}, + }, + { + name: "single line surrounded by whitespace", + bytes: []byte("\n \n\nfix: a broken thing\n\t\n"), + wantBuffer: &Buffer{ + firstLine: 3, + lastLine: 3, + headLen: 1, + lines: Lines{ + {Number: 1, Content: []byte(""), Break: []byte("\n")}, + {Number: 2, Content: []byte(" "), Break: []byte("\n")}, + {Number: 3, Content: []byte(""), Break: []byte("\n")}, + { + Number: 4, + Content: []byte("fix: a broken thing"), + Break: []byte("\n"), + }, + {Number: 5, Content: []byte("\t"), Break: []byte("\n")}, + {Number: 6, Content: []byte(""), Break: []byte{}}, + }, + }, + wantHead: []int{3}, + wantBody: []int{}, + wantFoot: []int{}, + wantLines: [2]int{3, 1}, + }, + { + name: "subject and body", + bytes: []byte("fix: a broken thing\n\nIt is now fixed."), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 2, + headLen: 1, + footLen: 0, + lines: Lines{ + { + Number: 1, + Content: []byte("fix: a broken thing"), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte("It is now fixed."), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2}, + wantFoot: []int{}, + wantLines: [2]int{0, 3}, + }, + { + name: "subject and body with word footer token", + bytes: []byte(`fix: a broken thing + +It is now fixed. + +Reviewed-by: John Carter`), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 4, + headLen: 1, + footLen: 1, + lines: Lines{ + { + Number: 1, + Content: []byte("fix: a broken thing"), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte("It is now fixed."), + Break: []byte("\n"), + }, + { + Number: 4, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 5, + Content: []byte("Reviewed-by: John Carter"), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2}, + wantFoot: []int{4}, + wantLines: [2]int{0, 5}, + }, + { + name: "subject and body with reference footer token", + bytes: []byte(`fix: a broken thing + +It is now fixed. + +Fixes #39`), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 4, + headLen: 1, + footLen: 1, + lines: Lines{ + { + Number: 1, + Content: []byte("fix: a broken thing"), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte("It is now fixed."), + Break: []byte("\n"), + }, + { + Number: 4, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 5, + Content: []byte("Fixes #39"), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2}, + wantFoot: []int{4}, + wantLines: [2]int{0, 5}, + }, + { + name: "subject and body with BREAKING CHANGE footer", + bytes: []byte(`refactor!: re-transpile the fugiator + +This should improve performance. + +BREAKING CHANGE: New argument is required, or BOOM!`), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 4, + headLen: 1, + footLen: 1, + lines: Lines{ + { + Number: 1, + Content: []byte("refactor!: re-transpile the fugiator"), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte("This should improve performance."), + Break: []byte("\n"), + }, + { + Number: 4, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 5, + Content: []byte( + "BREAKING CHANGE: New argument is required, or BOOM!", + ), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2}, + wantFoot: []int{4}, + wantLines: [2]int{0, 5}, + }, + { + name: "subject and body with BREAKING-CHANGE footer", + bytes: []byte(`refactor!: re-transpile the fugiator + +This should improve performance. + +BREAKING-CHANGE: New argument is required, or BOOM!`), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 4, + headLen: 1, + footLen: 1, + lines: Lines{ + { + Number: 1, + Content: []byte("refactor!: re-transpile the fugiator"), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte("This should improve performance."), + Break: []byte("\n"), + }, + { + Number: 4, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 5, + Content: []byte( + "BREAKING-CHANGE: New argument is required, or BOOM!", + ), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2}, + wantFoot: []int{4}, + wantLines: [2]int{0, 5}, + }, + { + name: "subject and body with invalid footer token", + bytes: []byte(`refactor!: re-transpile the fugiator + +This should improve performance. + +Reviewed by: John Carter`), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 4, + headLen: 1, + footLen: 0, + lines: Lines{ + { + Number: 1, + Content: []byte("refactor!: re-transpile the fugiator"), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte("This should improve performance."), + Break: []byte("\n"), + }, + { + Number: 4, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 5, + Content: []byte("Reviewed by: John Carter"), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2, 3, 4}, + wantFoot: []int{}, + wantLines: [2]int{0, 5}, + }, + { + name: "subject and body with valid footer token on second line", + bytes: []byte(`refactor!: re-transpile the fugiator + +This should improve performance. + +the invalid footer starts here +Reviewed-by: John Carter`), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 5, + headLen: 1, + footLen: 0, + lines: Lines{ + { + Number: 1, + Content: []byte("refactor!: re-transpile the fugiator"), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte("This should improve performance."), + Break: []byte("\n"), + }, + { + Number: 4, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 5, + Content: []byte("the invalid footer starts here"), + Break: []byte("\n"), + }, + { + Number: 6, + Content: []byte("Reviewed-by: John Carter"), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2, 3, 4, 5}, + wantFoot: []int{}, + wantLines: [2]int{0, 6}, + }, + { + name: "subject and body with CRLF line breaks", + bytes: []byte("fix: a broken thing\r\n\r\nIt is now fixed."), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 2, + headLen: 1, + footLen: 0, + lines: Lines{ + { + Number: 1, + Content: []byte("fix: a broken thing"), + Break: []byte("\r\n"), + }, + { + Number: 2, + Content: []byte(""), + Break: []byte("\r\n"), + }, + { + Number: 3, + Content: []byte("It is now fixed."), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2}, + wantFoot: []int{}, + wantLines: [2]int{0, 3}, + }, + { + name: "subject and body with CR line breaks", + bytes: []byte("fix: a broken thing\r\rIt is now fixed."), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 2, + headLen: 1, + footLen: 0, + lines: Lines{ + { + Number: 1, + Content: []byte("fix: a broken thing"), + Break: []byte("\r"), + }, + { + Number: 2, + Content: []byte(""), + Break: []byte("\r"), + }, + { + Number: 3, + Content: []byte("It is now fixed."), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2}, + wantFoot: []int{}, + wantLines: [2]int{0, 3}, + }, + { + name: "separated by whitespace line", + bytes: []byte("fix: a broken thing\n \nIt is now fixed."), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 2, + headLen: 1, + footLen: 0, + lines: Lines{ + { + Number: 1, + Content: []byte("fix: a broken thing"), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte(" "), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte("It is now fixed."), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{2}, + wantFoot: []int{}, + wantLines: [2]int{0, 3}, + }, + { + name: "multi-line head and body", + bytes: []byte(` + +foo +bar + +foz +baz + +hello +world + +`), + wantBuffer: &Buffer{ + firstLine: 2, + lastLine: 9, + headLen: 2, + footLen: 0, + lines: Lines{ + {Number: 1, Content: []byte(""), Break: []byte("\n")}, + {Number: 2, Content: []byte(""), Break: []byte("\n")}, + {Number: 3, Content: []byte("foo"), Break: []byte("\n")}, + {Number: 4, Content: []byte("bar"), Break: []byte("\n")}, + {Number: 5, Content: []byte(""), Break: []byte("\n")}, + {Number: 6, Content: []byte("foz"), Break: []byte("\n")}, + {Number: 7, Content: []byte("baz"), Break: []byte("\n")}, + {Number: 8, Content: []byte(""), Break: []byte("\n")}, + {Number: 9, Content: []byte("hello"), Break: []byte("\n")}, + {Number: 10, Content: []byte("world"), Break: []byte("\n")}, + {Number: 11, Content: []byte(""), Break: []byte("\n")}, + {Number: 12, Content: []byte(""), Break: []byte{}}, + }, + }, + wantHead: []int{2, 3}, + wantBody: []int{5, 6, 7, 8, 9}, + wantFoot: []int{}, + wantLines: [2]int{2, 8}, + }, + { + name: "body surrounded by whitespace lines", + bytes: []byte(` + +foo +bar + + + +foz +baz + + + +hello +world + + +`), + wantBuffer: &Buffer{ + firstLine: 2, + lastLine: 13, + headLen: 2, + footLen: 0, + lines: Lines{ + {Number: 1, Content: []byte(""), Break: []byte("\n")}, + {Number: 2, Content: []byte(""), Break: []byte("\n")}, + {Number: 3, Content: []byte("foo"), Break: []byte("\n")}, + {Number: 4, Content: []byte("bar"), Break: []byte("\n")}, + {Number: 5, Content: []byte(""), Break: []byte("\n")}, + {Number: 6, Content: []byte(""), Break: []byte("\n")}, + {Number: 7, Content: []byte(""), Break: []byte("\n")}, + {Number: 8, Content: []byte("foz"), Break: []byte("\n")}, + {Number: 9, Content: []byte("baz"), Break: []byte("\n")}, + {Number: 10, Content: []byte(""), Break: []byte("\n")}, + {Number: 11, Content: []byte(""), Break: []byte("\n")}, + {Number: 12, Content: []byte(""), Break: []byte("\n")}, + {Number: 13, Content: []byte("hello"), Break: []byte("\n")}, + {Number: 14, Content: []byte("world"), Break: []byte("\n")}, + {Number: 15, Content: []byte(""), Break: []byte("\n")}, + {Number: 16, Content: []byte(""), Break: []byte("\n")}, + {Number: 17, Content: []byte(""), Break: []byte{}}, + }, + }, + wantHead: []int{2, 3}, + wantBody: []int{7, 8, 9, 10, 11, 12, 13}, + wantFoot: []int{}, + wantLines: [2]int{2, 12}, + }, + { + name: "whitespace-only body", + bytes: []byte(` + +foo +bar + + + + +Approved-by: John Smith + +`), + wantBuffer: &Buffer{ + firstLine: 2, + lastLine: 8, + headLen: 2, + footLen: 1, + lines: Lines{ + {Number: 1, Content: []byte(""), Break: []byte("\n")}, + {Number: 2, Content: []byte(""), Break: []byte("\n")}, + {Number: 3, Content: []byte("foo"), Break: []byte("\n")}, + {Number: 4, Content: []byte("bar"), Break: []byte("\n")}, + {Number: 5, Content: []byte(""), Break: []byte("\n")}, + {Number: 6, Content: []byte(""), Break: []byte("\n")}, + {Number: 7, Content: []byte(""), Break: []byte("\n")}, + {Number: 8, Content: []byte(""), Break: []byte("\n")}, + { + Number: 9, + Content: []byte("Approved-by: John Smith"), + Break: []byte("\n"), + }, + {Number: 10, Content: []byte(""), Break: []byte("\n")}, + {Number: 11, Content: []byte(""), Break: []byte{}}, + }, + }, + wantHead: []int{2, 3}, + wantBody: []int{}, + wantFoot: []int{8}, + wantLines: [2]int{2, 7}, + }, + { + name: "subject and body surrounded by whitespace", + bytes: []byte( + "\n \nfix: a broken thing\n\nIt is now fixed.\n \n\n", + ), + wantBuffer: &Buffer{ + firstLine: 2, + lastLine: 4, + headLen: 1, + footLen: 0, + lines: Lines{ + { + Number: 1, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte(" "), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte("fix: a broken thing"), + Break: []byte("\n"), + }, + { + Number: 4, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 5, + Content: []byte("It is now fixed."), + Break: []byte("\n"), + }, + { + Number: 6, + Content: []byte(" "), + Break: []byte("\n"), + }, + { + Number: 7, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 8, + Content: []byte(""), + Break: []byte{}, + }, + }, + }, + wantHead: []int{2}, + wantBody: []int{4}, + wantFoot: []int{}, + wantLines: [2]int{2, 3}, + }, + { + name: "subject and long body", + bytes: []byte(`fix: something broken + +Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec hendrerit +tempor tellus. Donec pretium posuere tellus. Proin quam nisl, tincidunt et, +mattis eget, convallis nec, purus. Cum sociis natoque penatibus et magnis dis +parturient montes, nascetur ridiculous mus. Nulla posuere. Donec vitae dolor. +Nullam tristique diam non turpis. Cras placerat accumsan nulla. Nullam rutrum. +Nam vestibulum accumsan nisl. + +Nullam eu ante vel est convallis dignissim. Fusce suscipit, wisi nec facilisis +facilisis, est dui fermentum leo, quis tempor ligula erat quis odio. Nunc porta +vulputate tellus. Nunc rutrum turpis sed pede. Sed bibendum. Aliquam posuere. +Nunc aliquet, augue nec adipiscing interdum, lacus tellus malesuada massa, quis +varius mi purus non odio. + +Phasellus lacus. Nam euismod tellus id erat. Pellentesque condimentum, magna ut +suscipit hendrerit, ipsum augue ornare nulla, non luctus diam neque sit amet +urna. Curabitur vulputate vestibulum lorem. Fusce sagittis, libero non molestie +mollis, magna orci ultrices dolor, at vulputate neque nulla lacinia eros. Sed id +ligula quis est convallis tempor. Curabitur lacinia pulvinar nibh. Nam a +sapien.`), + wantBuffer: &Buffer{ + firstLine: 0, + lastLine: 20, + headLen: 1, + footLen: 0, + lines: Lines{ + { + Number: 1, + Content: []byte("fix: something broken"), + Break: []byte("\n"), + }, + {Number: 2, Content: []byte(""), Break: []byte("\n")}, + { + Number: 3, + Content: []byte( + "Lorem ipsum dolor sit amet, consectetuer " + + "adipiscing elit. Donec hendrerit"), + Break: []byte("\n"), + }, + { + Number: 4, + Content: []byte( + "tempor tellus. Donec pretium posuere tellus. " + + "Proin quam nisl, tincidunt et,"), + Break: []byte("\n"), + }, + { + Number: 5, + Content: []byte( + "mattis eget, convallis nec, purus. Cum sociis " + + "natoque penatibus et magnis dis"), + Break: []byte("\n"), + }, + { + Number: 6, + Content: []byte( + "parturient montes, nascetur ridiculous mus. " + + "Nulla posuere. Donec vitae dolor."), + Break: []byte("\n"), + }, + { + Number: 7, + Content: []byte( + "Nullam tristique diam non turpis. Cras placerat " + + "accumsan nulla. Nullam rutrum."), + Break: []byte("\n"), + }, + { + Number: 8, + Content: []byte( + "Nam vestibulum accumsan nisl."), + Break: []byte("\n"), + }, + {Number: 9, Content: []byte(""), Break: []byte("\n")}, + { + Number: 10, + Content: []byte( + "Nullam eu ante vel est convallis dignissim. " + + "Fusce suscipit, wisi nec facilisis", + ), + Break: []byte("\n"), + }, + { + Number: 11, + Content: []byte( + "facilisis, est dui fermentum leo, quis tempor " + + "ligula erat quis odio. Nunc porta", + ), + Break: []byte("\n"), + }, + { + Number: 12, + Content: []byte( + "vulputate tellus. Nunc rutrum turpis sed pede. " + + "Sed bibendum. Aliquam posuere.", + ), + Break: []byte("\n"), + }, + { + Number: 13, + Content: []byte( + "Nunc aliquet, augue nec adipiscing interdum, " + + "lacus tellus malesuada massa, quis", + ), + Break: []byte("\n"), + }, + { + Number: 14, + Content: []byte("varius mi purus non odio."), + Break: []byte("\n"), + }, + {Number: 15, Content: []byte(""), Break: []byte("\n")}, + { + Number: 16, + Content: []byte("Phasellus lacus. Nam euismod tellus id " + + "erat. Pellentesque condimentum, magna ut"), + Break: []byte("\n"), + }, + { + Number: 17, + Content: []byte("suscipit hendrerit, ipsum augue ornare " + + "nulla, non luctus diam neque sit amet"), + Break: []byte("\n"), + }, + { + Number: 18, + Content: []byte("urna. Curabitur vulputate vestibulum " + + "lorem. Fusce sagittis, libero non molestie"), + Break: []byte("\n"), + }, + { + Number: 19, + Content: []byte("mollis, magna orci ultrices dolor, at " + + "vulputate neque nulla lacinia eros. Sed id"), + Break: []byte("\n"), + }, + { + Number: 20, + Content: []byte("ligula quis est convallis tempor. " + + "Curabitur lacinia pulvinar nibh. Nam a"), + Break: []byte("\n"), + }, + { + Number: 21, + Content: []byte("sapien."), + Break: []byte{}, + }, + }, + }, + wantHead: []int{0}, + wantBody: []int{ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + }, + wantFoot: []int{}, + wantLines: [2]int{0, 21}, + }, +} + +func TestNewBuffer(t *testing.T) { + for _, tt := range bufferTestCases { + t.Run(tt.name, func(t *testing.T) { + got := NewBuffer(tt.bytes) + + assert.Equal(t, tt.wantBuffer, got) + }) + } +} + +func BenchmarkNewBuffer(b *testing.B) { + for _, tt := range bufferTestCases { + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = NewBuffer(tt.bytes) + } + }) + } +} + +func TestBuffer_Head(t *testing.T) { + for _, tt := range bufferTestCases { + t.Run(tt.name, func(t *testing.T) { + want := Lines{} + for _, i := range tt.wantHead { + want = append(want, tt.wantBuffer.lines[i]) + } + + got := tt.wantBuffer.Head() + + assert.Equal(t, want, got) + }) + } +} + +func BenchmarkBuffer_Head(b *testing.B) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.wantBuffer.Head() + } + }) + } +} + +func TestBuffer_Body(t *testing.T) { + for _, tt := range bufferTestCases { + t.Run(tt.name, func(t *testing.T) { + want := Lines{} + for _, i := range tt.wantBody { + want = append(want, tt.wantBuffer.lines[i]) + } + + got := tt.wantBuffer.Body() + + assert.Equal(t, want, got) + }) + } +} + +func BenchmarkBuffer_Body(b *testing.B) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.wantBuffer.Body() + } + }) + } +} + +func TestBuffer_Foot(t *testing.T) { + for _, tt := range bufferTestCases { + t.Run(tt.name, func(t *testing.T) { + want := Lines{} + for _, i := range tt.wantFoot { + want = append(want, tt.wantBuffer.lines[i]) + } + + got := tt.wantBuffer.Foot() + + assert.Equal(t, want, got) + }) + } +} + +func BenchmarkBuffer_Foot(b *testing.B) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.wantBuffer.Foot() + } + }) + } +} + +func TestBuffer_Lines(t *testing.T) { + for _, tt := range bufferTestCases { + t.Run(tt.name, func(t *testing.T) { + start := tt.wantLines[0] + end := tt.wantLines[0] + tt.wantLines[1] + want := tt.wantBuffer.lines[start:end] + + got := tt.wantBuffer.Lines() + + assert.Equal(t, want, got) + }) + } +} + +func BenchmarkBuffer_Lines(b *testing.B) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.wantBuffer.Lines() + } + }) + } +} + +func TestBuffer_Bytes(t *testing.T) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + t.Run(tt.name, func(t *testing.T) { + start := tt.wantLines[0] + end := tt.wantLines[0] + tt.wantLines[1] + want := tt.wantBuffer.lines[start:end].Bytes() + + got := tt.wantBuffer.Bytes() + + assert.Equal(t, want, got) + }) + } +} + +func BenchmarkMessage_Bytes(b *testing.B) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.wantBuffer.Bytes() + } + }) + } +} + +func TestBuffer_String(t *testing.T) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + t.Run(tt.name, func(t *testing.T) { + start := tt.wantLines[0] + end := tt.wantLines[0] + tt.wantLines[1] + want := tt.wantBuffer.lines[start:end].String() + + got := tt.wantBuffer.String() + + assert.Equal(t, want, got) + }) + } +} + +func BenchmarkMessage_String(b *testing.B) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.wantBuffer.String() + } + }) + } +} + +func TestBuffer_BytesRaw(t *testing.T) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + t.Run(tt.name, func(t *testing.T) { + got := tt.wantBuffer.BytesRaw() + + assert.Equal(t, tt.bytes, got) + }) + } +} + +func BenchmarkBuffer_BytesRaw(b *testing.B) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.wantBuffer.BytesRaw() + } + }) + } +} + +func TestBuffer_StringRaw(t *testing.T) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + t.Run(tt.name, func(t *testing.T) { + got := tt.wantBuffer.StringRaw() + + assert.Equal(t, string(tt.bytes), got) + }) + } +} + +func BenchmarkBuffer_StringRaw(b *testing.B) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.wantBuffer.StringRaw() + } + }) + } +} diff --git a/line.go b/line.go index f492401..4bfbd39 100644 --- a/line.go +++ b/line.go @@ -1,5 +1,10 @@ package conventionalcommit +import ( + "bytes" + "strings" +) + const ( lf = 10 // linefeed ("\n") character cr = 13 // carriage return ("\r") character @@ -20,6 +25,17 @@ type Line struct { Break []byte } +// Empty returns true if line content has a length of zero. +func (s *Line) Empty() bool { + return len(s.Content) == 0 +} + +// Blank returns true if line content has a length of zero after leading and +// trailing white space has been trimmed. +func (s *Line) Blank() bool { + return len(bytes.TrimSpace(s.Content)) == 0 +} + // Lines is a slice of *Line types with some helper methods attached. type Lines []*Line @@ -28,8 +44,9 @@ type Lines []*Line // basis. func NewLines(content []byte) Lines { r := Lines{} + cLen := len(content) - if len(content) == 0 { + if cLen == 0 { return r } @@ -37,12 +54,13 @@ func NewLines(content []byte) Lines { var breaks [][]int // Locate each line break within content. - for i := 0; i < len(content); i++ { - if content[i] == lf { + for i := 0; i < cLen; i++ { + switch content[i] { + case lf: breaks = append(breaks, []int{i, i + 1}) - } else if content[i] == cr { + case cr: b := []int{i, i + 1} - if i+1 < len(content) && content[i+1] == lf { + if i+1 < cLen && content[i+1] == lf { b[1]++ i++ } @@ -76,6 +94,45 @@ func NewLines(content []byte) Lines { return r } +// FirstTextIndex returns the line offset of the first line which contains any +// non-whitespace characters. +func (s Lines) FirstTextIndex() int { + for i, line := range s { + if !line.Blank() { + return i + } + } + + return -1 +} + +// LastTextIndex returns the line offset of the last line which contains any +// non-whitespace characters. +func (s Lines) LastTextIndex() int { + for i := len(s) - 1; i >= 0; i-- { + if !s[i].Blank() { + return i + } + } + + return -1 +} + +// Trim returns a new Lines instance where all leading and trailing whitespace +// lines have been removed, based on index values from FirstTextIndex() and +// LastTextIndex(). +// +// If there are no lines with non-whitespace characters, a empty Lines type is +// returned. +func (s Lines) Trim() Lines { + start := s.FirstTextIndex() + if start == -1 { + return Lines{} + } + + return s[start : s.LastTextIndex()+1] +} + // Bytes combines all Lines into a single byte slice, retaining the original // line break types for each line. func (s Lines) Bytes() []byte { @@ -100,3 +157,12 @@ func (s Lines) Bytes() []byte { func (s Lines) String() string { return string(s.Bytes()) } + +func (s Lines) Join(sep string) string { + r := make([]string, 0, len(s)) + for _, line := range s { + r = append(r, string(line.Content)) + } + + return strings.Join(r, sep) +} diff --git a/line_test.go b/line_test.go index 4d39057..5ea956f 100644 --- a/line_test.go +++ b/line_test.go @@ -6,6 +6,156 @@ import ( "github.com/stretchr/testify/assert" ) +func TestLine_Empty(t *testing.T) { + tests := []struct { + name string + line *Line + want bool + }{ + { + name: "nil", + line: &Line{}, + want: true, + }, + { + name: "empty", + line: &Line{ + Number: 1, + Content: []byte(""), + Break: []byte{}, + }, + want: true, + }, + { + name: "space only", + line: &Line{ + Number: 1, + Content: []byte(" "), + Break: []byte{}, + }, + want: false, + }, + { + name: "tab only", + line: &Line{ + Number: 1, + Content: []byte("\t\t"), + Break: []byte{}, + }, + want: false, + }, + { + name: "spaces and tabs", + line: &Line{ + Number: 1, + Content: []byte(" \t "), + Break: []byte{}, + }, + want: false, + }, + { + name: "text", + line: &Line{ + Number: 1, + Content: []byte("foobar"), + Break: []byte{}, + }, + want: false, + }, + { + name: "text with surrounding white space", + line: &Line{ + Number: 1, + Content: []byte(" foobar "), + Break: []byte{}, + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.line.Empty() + + assert.Equal(t, tt.want, got) + }) + } +} + +func TestLine_Blank(t *testing.T) { + tests := []struct { + name string + line *Line + want bool + }{ + { + name: "nil", + line: &Line{}, + want: true, + }, + { + name: "empty", + line: &Line{ + Number: 1, + Content: []byte(""), + Break: []byte{}, + }, + want: true, + }, + { + name: "space only", + line: &Line{ + Number: 1, + Content: []byte(" "), + Break: []byte{}, + }, + want: true, + }, + { + name: "tab only", + line: &Line{ + Number: 1, + Content: []byte("\t\t"), + Break: []byte{}, + }, + want: true, + }, + { + name: "spaces and tabs", + line: &Line{ + Number: 1, + Content: []byte(" \t "), + Break: []byte{}, + }, + want: true, + }, + { + name: "text", + line: &Line{ + Number: 1, + Content: []byte("foobar"), + Break: []byte{}, + }, + want: false, + }, + { + name: "text with surrounding white space", + line: &Line{ + Number: 1, + Content: []byte(" foobar "), + Break: []byte{}, + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.line.Blank() + + assert.Equal(t, tt.want, got) + }) + } +} + func TestNewLines(t *testing.T) { tests := []struct { name string @@ -290,11 +440,67 @@ func TestNewLines(t *testing.T) { } } -var linesBytesTestCases = []struct { - name string - lines Lines - want []byte +var linesTestCases = []struct { + name string + lines Lines + bytes []byte + firstTextIndex int + lastTextIndex int }{ + { + name: "no lines", + lines: Lines{}, + bytes: []byte(""), + firstTextIndex: -1, + lastTextIndex: -1, + }, + { + name: "empty line", + lines: Lines{ + { + Number: 1, + Content: []byte(""), + }, + }, + bytes: []byte(""), + firstTextIndex: -1, + lastTextIndex: -1, + }, + { + name: "whitespace line", + lines: Lines{ + { + Number: 1, + Content: []byte(" "), + }, + }, + bytes: []byte(" "), + firstTextIndex: -1, + lastTextIndex: -1, + }, + { + name: "multiple whitespace lines", + lines: Lines{ + { + Number: 1, + Content: []byte(" "), + Break: []byte("\n"), + }, + { + Number: 2, + Content: []byte("\t"), + Break: []byte("\n"), + }, + { + Number: 3, + Content: []byte(" "), + Break: []byte{}, + }, + }, + bytes: []byte(" \n\t\n "), + firstTextIndex: -1, + lastTextIndex: -1, + }, { name: "single line", lines: Lines{ @@ -303,7 +509,9 @@ var linesBytesTestCases = []struct { Content: []byte("hello world"), }, }, - want: []byte("hello world"), + bytes: []byte("hello world"), + firstTextIndex: 0, + lastTextIndex: 0, }, { name: "single line with trailing LF", @@ -319,7 +527,9 @@ var linesBytesTestCases = []struct { Break: []byte{}, }, }, - want: []byte("hello world\n"), + bytes: []byte("hello world\n"), + firstTextIndex: 0, + lastTextIndex: 0, }, { name: "single line with trailing CRLF", @@ -335,7 +545,9 @@ var linesBytesTestCases = []struct { Break: []byte{}, }, }, - want: []byte("hello world\r\n"), + bytes: []byte("hello world\r\n"), + firstTextIndex: 0, + lastTextIndex: 0, }, { name: "single line with trailing CR", @@ -351,64 +563,72 @@ var linesBytesTestCases = []struct { Break: []byte{}, }, }, - want: []byte("hello world\r"), + bytes: []byte("hello world\r"), + firstTextIndex: 0, + lastTextIndex: 0, }, { name: "multi-line separated by LF", lines: Lines{ { - Number: 3, + Number: 1, + Content: []byte(""), + Break: []byte("\n"), + }, + { + Number: 2, Content: []byte("Aliquam feugiat tellus ut neque."), Break: []byte("\n"), }, { - Number: 4, + Number: 3, Content: []byte("Sed bibendum."), Break: []byte("\n"), }, { - Number: 5, + Number: 4, Content: []byte("Nullam libero mauris, consequat."), Break: []byte("\n"), }, { - Number: 6, + Number: 5, Content: []byte(""), Break: []byte("\n"), }, { - Number: 7, + Number: 6, Content: []byte("Integer placerat tristique nisl."), Break: []byte("\n"), }, { - Number: 8, + Number: 7, Content: []byte("Etiam vel neque nec dui bibendum."), Break: []byte("\n"), }, { - Number: 9, + Number: 8, Content: []byte(""), Break: []byte("\n"), }, { - Number: 10, + Number: 9, Content: []byte(""), Break: []byte("\n"), }, { - Number: 11, + Number: 10, Content: []byte("Nullam libero mauris, dictum id, arcu."), Break: []byte("\n"), }, { - Number: 12, + Number: 11, Content: []byte(""), Break: []byte{}, }, }, - want: []byte( - "Aliquam feugiat tellus ut neque.\n" + + bytes: []byte( + "\n" + + "Aliquam feugiat tellus ut neque.\n" + "Sed bibendum.\n" + "Nullam libero mauris, consequat.\n" + "\n" + @@ -418,63 +638,71 @@ var linesBytesTestCases = []struct { "\n" + "Nullam libero mauris, dictum id, arcu.\n", ), + firstTextIndex: 1, + lastTextIndex: 9, }, { name: "multi-line separated by CRLF", lines: Lines{ { - Number: 3, + Number: 1, + Content: []byte(""), + Break: []byte("\r\n"), + }, + { + Number: 2, Content: []byte("Aliquam feugiat tellus ut neque."), Break: []byte("\r\n"), }, { - Number: 4, + Number: 3, Content: []byte("Sed bibendum."), Break: []byte("\r\n"), }, { - Number: 5, + Number: 4, Content: []byte("Nullam libero mauris, consequat."), Break: []byte("\r\n"), }, { - Number: 6, + Number: 5, Content: []byte(""), Break: []byte("\r\n"), }, { - Number: 7, + Number: 6, Content: []byte("Integer placerat tristique nisl."), Break: []byte("\r\n"), }, { - Number: 8, + Number: 7, Content: []byte("Etiam vel neque nec dui bibendum."), Break: []byte("\r\n"), }, { - Number: 9, + Number: 8, Content: []byte(""), Break: []byte("\r\n"), }, { - Number: 10, + Number: 9, Content: []byte(""), Break: []byte("\r\n"), }, { - Number: 11, + Number: 10, Content: []byte("Nullam libero mauris, dictum id, arcu."), Break: []byte("\r\n"), }, { - Number: 12, + Number: 11, Content: []byte(""), Break: []byte{}, }, }, - want: []byte( - "Aliquam feugiat tellus ut neque.\r\n" + + bytes: []byte( + "\r\n" + + "Aliquam feugiat tellus ut neque.\r\n" + "Sed bibendum.\r\n" + "Nullam libero mauris, consequat.\r\n" + "\r\n" + @@ -484,63 +712,71 @@ var linesBytesTestCases = []struct { "\r\n" + "Nullam libero mauris, dictum id, arcu.\r\n", ), + firstTextIndex: 1, + lastTextIndex: 9, }, { name: "multi-line separated by CR", lines: Lines{ { - Number: 3, + Number: 1, + Content: []byte(""), + Break: []byte("\r"), + }, + { + Number: 2, Content: []byte("Aliquam feugiat tellus ut neque."), Break: []byte("\r"), }, { - Number: 4, + Number: 3, Content: []byte("Sed bibendum."), Break: []byte("\r"), }, { - Number: 5, + Number: 4, Content: []byte("Nullam libero mauris, consequat."), Break: []byte("\r"), }, { - Number: 6, + Number: 5, Content: []byte(""), Break: []byte("\r"), }, { - Number: 7, + Number: 6, Content: []byte("Integer placerat tristique nisl."), Break: []byte("\r"), }, { - Number: 8, + Number: 7, Content: []byte("Etiam vel neque nec dui bibendum."), Break: []byte("\r"), }, { - Number: 9, + Number: 8, Content: []byte(""), Break: []byte("\r"), }, { - Number: 10, + Number: 9, Content: []byte(""), Break: []byte("\r"), }, { - Number: 11, + Number: 10, Content: []byte("Nullam libero mauris, dictum id, arcu."), Break: []byte("\r"), }, { - Number: 12, + Number: 11, Content: []byte(""), Break: []byte{}, }, }, - want: []byte( - "Aliquam feugiat tellus ut neque.\r" + + bytes: []byte( + "\r" + + "Aliquam feugiat tellus ut neque.\r" + "Sed bibendum.\r" + "Nullam libero mauris, consequat.\r" + "\r" + @@ -550,21 +786,88 @@ var linesBytesTestCases = []struct { "\r" + "Nullam libero mauris, dictum id, arcu.\r", ), + firstTextIndex: 1, + lastTextIndex: 9, }, } +func TestLines_FirstTextIndex(t *testing.T) { + for _, tt := range linesTestCases { + t.Run(tt.name, func(t *testing.T) { + got := tt.lines.FirstTextIndex() + + assert.Equal(t, tt.firstTextIndex, got) + }) + } +} + +func BenchmarkLines_FirstTextIndex(b *testing.B) { + for _, tt := range linesTestCases { + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.lines.FirstTextIndex() + } + }) + } +} + +func TestLines_LastTextIndex(t *testing.T) { + for _, tt := range linesTestCases { + t.Run(tt.name, func(t *testing.T) { + got := tt.lines.LastTextIndex() + + assert.Equal(t, tt.lastTextIndex, got) + }) + } +} + +func BenchmarkLines_LastTextIndex(b *testing.B) { + for _, tt := range linesTestCases { + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.lines.LastTextIndex() + } + }) + } +} + +func TestLines_Trim(t *testing.T) { + for _, tt := range linesTestCases { + t.Run(tt.name, func(t *testing.T) { + want := Lines{} + if tt.firstTextIndex != -1 { + want = tt.lines[tt.firstTextIndex : tt.lastTextIndex+1] + } + + got := tt.lines.Trim() + + assert.Equal(t, want, got) + }) + } +} + +func BenchmarkLines_Trim(b *testing.B) { + for _, tt := range linesTestCases { + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.lines.Trim() + } + }) + } +} + func TestLines_Bytes(t *testing.T) { - for _, tt := range linesBytesTestCases { + for _, tt := range linesTestCases { t.Run(tt.name, func(t *testing.T) { got := tt.lines.Bytes() - assert.Equal(t, tt.want, got) + assert.Equal(t, tt.bytes, got) }) } } func BenchmarkLines_Bytes(b *testing.B) { - for _, tt := range linesBytesTestCases { + for _, tt := range linesTestCases { b.Run(tt.name, func(b *testing.B) { for n := 0; n < b.N; n++ { _ = tt.lines.Bytes() @@ -574,17 +877,17 @@ func BenchmarkLines_Bytes(b *testing.B) { } func TestLines_String(t *testing.T) { - for _, tt := range linesBytesTestCases { + for _, tt := range linesTestCases { t.Run(tt.name, func(t *testing.T) { got := tt.lines.String() - assert.Equal(t, string(tt.want), got) + assert.Equal(t, string(tt.bytes), got) }) } } func BenchmarkLines_String(b *testing.B) { - for _, tt := range linesBytesTestCases { + for _, tt := range linesTestCases { b.Run(tt.name, func(b *testing.B) { for n := 0; n < b.N; n++ { _ = tt.lines.String() diff --git a/paragraph.go b/paragraph.go deleted file mode 100644 index deaad3c..0000000 --- a/paragraph.go +++ /dev/null @@ -1,30 +0,0 @@ -package conventionalcommit - -import "bytes" - -// Paragraph represents a textual paragraph defined as; A continuous sequence of -// textual lines which are not empty or and do not consist of only whitespace. -type Paragraph struct { - // Lines is a list of lines which collectively form a paragraph. - Lines Lines -} - -func NewParagraphs(lines Lines) []*Paragraph { - r := []*Paragraph{} - - paragraph := &Paragraph{Lines: Lines{}} - for _, line := range lines { - if len(bytes.TrimSpace(line.Content)) > 0 { - paragraph.Lines = append(paragraph.Lines, line) - } else if len(paragraph.Lines) > 0 { - r = append(r, paragraph) - paragraph = &Paragraph{Lines: Lines{}} - } - } - - if len(paragraph.Lines) > 0 { - r = append(r, paragraph) - } - - return r -} diff --git a/paragraph_test.go b/paragraph_test.go deleted file mode 100644 index db7fc52..0000000 --- a/paragraph_test.go +++ /dev/null @@ -1,338 +0,0 @@ -package conventionalcommit - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestNewParagraphs(t *testing.T) { - tests := []struct { - name string - lines Lines - want []*Paragraph - }{ - { - name: "nil", - lines: nil, - want: []*Paragraph{}, - }, - { - name: "no lines", - lines: Lines{}, - want: []*Paragraph{}, - }, - { - name: "single empty line", - lines: Lines{ - { - Number: 1, - Content: []byte{}, - Break: []byte{}, - }, - }, - want: []*Paragraph{}, - }, - { - name: "multiple empty lines", - lines: Lines{ - { - Number: 1, - Content: []byte{}, - Break: []byte("\n"), - }, - { - Number: 2, - Content: []byte{}, - Break: []byte("\n"), - }, - { - Number: 3, - Content: []byte{}, - Break: []byte{}, - }, - }, - want: []*Paragraph{}, - }, - { - name: "single whitespace line", - lines: Lines{ - { - Number: 1, - Content: []byte("\t "), - Break: []byte{}, - }, - }, - want: []*Paragraph{}, - }, - { - name: "multiple whitespace lines", - lines: Lines{ - { - Number: 1, - Content: []byte{}, - Break: []byte("\t "), - }, - { - Number: 2, - Content: []byte{}, - Break: []byte("\t "), - }, - { - Number: 3, - Content: []byte("\t "), - Break: []byte{}, - }, - }, - want: []*Paragraph{}, - }, - { - name: "single line", - lines: Lines{ - { - Number: 1, - Content: []byte("hello world"), - Break: []byte{}, - }, - }, - want: []*Paragraph{ - { - Lines: Lines{ - { - Number: 1, - Content: []byte("hello world"), - Break: []byte{}, - }, - }, - }, - }, - }, - { - name: "multiple lines", - lines: Lines{ - { - Number: 1, - Content: []byte("hello world"), - Break: []byte("\n"), - }, - { - Number: 2, - Content: []byte("foo bar"), - Break: []byte{}, - }, - }, - want: []*Paragraph{ - { - Lines: Lines{ - { - Number: 1, - Content: []byte("hello world"), - Break: []byte("\n"), - }, - { - Number: 2, - Content: []byte("foo bar"), - Break: []byte{}, - }, - }, - }, - }, - }, - { - name: "multiple lines with trailing line break", - lines: Lines{ - { - Number: 1, - Content: []byte("hello world"), - Break: []byte("\n"), - }, - { - Number: 2, - Content: []byte("foo bar"), - Break: []byte("\n"), - }, - { - Number: 3, - Content: []byte(""), - Break: []byte{}, - }, - }, - want: []*Paragraph{ - { - Lines: Lines{ - { - Number: 1, - Content: []byte("hello world"), - Break: []byte("\n"), - }, - { - Number: 2, - Content: []byte("foo bar"), - Break: []byte("\n"), - }, - }, - }, - }, - }, - { - name: "multiple paragraphs with excess blank lines", - lines: Lines{ - { - Number: 1, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 2, - Content: []byte("\t "), - Break: []byte("\r\n"), - }, - { - Number: 3, - Content: []byte("Aliquam feugiat tellus ut neque."), - Break: []byte("\r"), - }, - { - Number: 4, - Content: []byte("Sed bibendum."), - Break: []byte("\r"), - }, - { - Number: 5, - Content: []byte("Nullam libero mauris, consequat."), - Break: []byte("\n"), - }, - { - Number: 6, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 7, - Content: []byte("Integer placerat tristique nisl."), - Break: []byte("\n"), - }, - { - Number: 8, - Content: []byte("Etiam vel neque nec dui bibendum."), - Break: []byte("\n"), - }, - { - Number: 9, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 10, - Content: []byte(" "), - Break: []byte("\n"), - }, - { - Number: 11, - Content: []byte("\t\t"), - Break: []byte("\n"), - }, - { - Number: 12, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 13, - Content: []byte("Donec hendrerit tempor tellus."), - Break: []byte("\n"), - }, - { - Number: 14, - Content: []byte("In id erat non orci commodo lobortis."), - Break: []byte("\n"), - }, - { - Number: 15, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 16, - Content: []byte(" "), - Break: []byte("\n"), - }, - { - Number: 17, - Content: []byte("\t\t"), - Break: []byte("\n"), - }, - { - Number: 18, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 18, - Content: []byte(""), - Break: []byte{}, - }, - }, - want: []*Paragraph{ - { - Lines: Lines{ - { - Number: 3, - Content: []byte("Aliquam feugiat tellus ut neque."), - Break: []byte("\r"), - }, - { - Number: 4, - Content: []byte("Sed bibendum."), - Break: []byte("\r"), - }, - { - Number: 5, - Content: []byte("Nullam libero mauris, consequat."), - Break: []byte("\n"), - }, - }, - }, - { - Lines: Lines{ - { - Number: 7, - Content: []byte("Integer placerat tristique nisl."), - Break: []byte("\n"), - }, - { - Number: 8, - Content: []byte( - "Etiam vel neque nec dui bibendum.", - ), - Break: []byte("\n"), - }, - }, - }, - { - Lines: Lines{ - { - Number: 13, - Content: []byte("Donec hendrerit tempor tellus."), - Break: []byte("\n"), - }, - { - Number: 14, - Content: []byte( - "In id erat non orci commodo lobortis.", - ), - Break: []byte("\n"), - }, - }, - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := NewParagraphs(tt.lines) - - assert.Equal(t, tt.want, got) - }) - } -} diff --git a/raw_message.go b/raw_message.go deleted file mode 100644 index 01c0f8b..0000000 --- a/raw_message.go +++ /dev/null @@ -1,50 +0,0 @@ -package conventionalcommit - -// RawMessage represents a commit message in a more structured form than a -// simple string or byte slice. This makes it easier to process a message for -// the purposes of extracting detailed information, linting, and formatting. -type RawMessage struct { - // Lines is a list of all individual lines of text in the commit message, - // which also includes the original line number, making it easy to pass a - // single Line around while still knowing where in the original commit - // message it belongs. - Lines Lines - - // Paragraphs is a list of textual paragraphs in the commit message. A - // paragraph is defined as any continuous sequence of lines which are not - // empty or consist of only whitespace. - Paragraphs []*Paragraph -} - -// NewRawMessage returns a RawMessage, with the given commit message broken down -// into individual lines of text, with sequential non-empty lines grouped into -// paragraphs. -func NewRawMessage(message []byte) *RawMessage { - r := &RawMessage{ - Lines: Lines{}, - Paragraphs: []*Paragraph{}, - } - - if len(message) == 0 { - return r - } - - r.Lines = NewLines(message) - r.Paragraphs = NewParagraphs(r.Lines) - - return r -} - -// Bytes renders the RawMessage back into a byte slice which is identical to the -// original input byte slice given to NewRawMessage. This includes retaining the -// original line break types for each line. -func (s *RawMessage) Bytes() []byte { - return s.Lines.Bytes() -} - -// String renders the RawMessage back into a string which is identical to the -// original input byte slice given to NewRawMessage. This includes retaining the -// original line break types for each line. -func (s *RawMessage) String() string { - return s.Lines.String() -} diff --git a/raw_message_test.go b/raw_message_test.go deleted file mode 100644 index f7653a6..0000000 --- a/raw_message_test.go +++ /dev/null @@ -1,641 +0,0 @@ -package conventionalcommit - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -var rawMessageTestCases = []struct { - name string - bytes []byte - rawMessage *RawMessage -}{ - { - name: "nil", - bytes: nil, - rawMessage: &RawMessage{ - Lines: Lines{}, - Paragraphs: []*Paragraph{}, - }, - }, - { - name: "empty", - bytes: []byte(""), - rawMessage: &RawMessage{ - Lines: Lines{}, - Paragraphs: []*Paragraph{}, - }, - }, - { - name: "single space", - bytes: []byte(" "), - rawMessage: &RawMessage{ - Lines: Lines{ - { - Number: 1, - Content: []byte(" "), - Break: []byte{}, - }, - }, - Paragraphs: []*Paragraph{}, - }, - }, - { - name: "subject only", - bytes: []byte("fix: a broken thing"), - rawMessage: &RawMessage{ - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte{}, - }, - }, - Paragraphs: []*Paragraph{ - { - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte{}, - }, - }, - }, - }, - }, - }, - { - name: "subject and body", - bytes: []byte("fix: a broken thing\n\nIt is now fixed."), - rawMessage: &RawMessage{ - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte("\n"), - }, - { - Number: 2, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 3, - Content: []byte("It is now fixed."), - Break: []byte{}, - }, - }, - Paragraphs: []*Paragraph{ - { - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte("\n"), - }, - }, - }, - { - Lines: Lines{ - { - Number: 3, - Content: []byte("It is now fixed."), - Break: []byte{}, - }, - }, - }, - }, - }, - }, - { - name: "subject and body with CRLF line breaks", - bytes: []byte("fix: a broken thing\r\n\r\nIt is now fixed."), - rawMessage: &RawMessage{ - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte("\r\n"), - }, - { - Number: 2, - Content: []byte(""), - Break: []byte("\r\n"), - }, - { - Number: 3, - Content: []byte("It is now fixed."), - Break: []byte{}, - }, - }, - Paragraphs: []*Paragraph{ - { - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte("\r\n"), - }, - }, - }, - { - Lines: Lines{ - { - Number: 3, - Content: []byte("It is now fixed."), - Break: []byte{}, - }, - }, - }, - }, - }, - }, - { - name: "subject and body with CR line breaks", - bytes: []byte("fix: a broken thing\r\rIt is now fixed."), - rawMessage: &RawMessage{ - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte("\r"), - }, - { - Number: 2, - Content: []byte(""), - Break: []byte("\r"), - }, - { - Number: 3, - Content: []byte("It is now fixed."), - Break: []byte{}, - }, - }, - Paragraphs: []*Paragraph{ - { - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte("\r"), - }, - }, - }, - { - Lines: Lines{ - { - Number: 3, - Content: []byte("It is now fixed."), - Break: []byte{}, - }, - }, - }, - }, - }, - }, - { - name: "separated by whitespace line", - bytes: []byte("fix: a broken thing\n \nIt is now fixed."), - rawMessage: &RawMessage{ - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte("\n"), - }, - { - Number: 2, - Content: []byte(" "), - Break: []byte("\n"), - }, - { - Number: 3, - Content: []byte("It is now fixed."), - Break: []byte{}, - }, - }, - Paragraphs: []*Paragraph{ - { - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: a broken thing"), - Break: []byte("\n"), - }, - }, - }, - { - Lines: Lines{ - { - Number: 3, - Content: []byte("It is now fixed."), - Break: []byte{}, - }, - }, - }, - }, - }, - }, - { - name: "subject and long body", - bytes: []byte(`fix: something broken - -Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec hendrerit -tempor tellus. Donec pretium posuere tellus. Proin quam nisl, tincidunt et, -mattis eget, convallis nec, purus. Cum sociis natoque penatibus et magnis dis -parturient montes, nascetur ridiculous mus. Nulla posuere. Donec vitae dolor. -Nullam tristique diam non turpis. Cras placerat accumsan nulla. Nullam rutrum. -Nam vestibulum accumsan nisl. - -Nullam eu ante vel est convallis dignissim. Fusce suscipit, wisi nec facilisis -facilisis, est dui fermentum leo, quis tempor ligula erat quis odio. Nunc porta -vulputate tellus. Nunc rutrum turpis sed pede. Sed bibendum. Aliquam posuere. -Nunc aliquet, augue nec adipiscing interdum, lacus tellus malesuada massa, quis -varius mi purus non odio. Pellentesque condimentum, magna ut suscipit hendrerit, -ipsum augue ornare nulla, non luctus diam neque sit amet urna. Curabitur -vulputate vestibulum lorem. Fusce sagittis, libero non molestie mollis, magna -orci ultrices dolor, at vulputate neque nulla lacinia eros. Sed id ligula quis -est convallis tempor. Curabitur lacinia pulvinar nibh. Nam a sapien. - -Phasellus lacus. Nam euismod tellus id erat.`), - rawMessage: &RawMessage{ - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: something broken"), - Break: []byte("\n"), - }, - { - Number: 2, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 3, - Content: []byte( - "Lorem ipsum dolor sit amet, consectetuer " + - "adipiscing elit. Donec hendrerit"), - Break: []byte("\n"), - }, - { - Number: 4, - Content: []byte( - "tempor tellus. Donec pretium posuere tellus. " + - "Proin quam nisl, tincidunt et,"), - Break: []byte("\n"), - }, - { - Number: 5, - Content: []byte( - "mattis eget, convallis nec, purus. Cum sociis " + - "natoque penatibus et magnis dis"), - Break: []byte("\n"), - }, - { - Number: 6, - Content: []byte( - "parturient montes, nascetur ridiculous mus. " + - "Nulla posuere. Donec vitae dolor."), - Break: []byte("\n"), - }, - { - Number: 7, - Content: []byte( - "Nullam tristique diam non turpis. Cras placerat " + - "accumsan nulla. Nullam rutrum."), - Break: []byte("\n"), - }, - { - Number: 8, - Content: []byte( - "Nam vestibulum accumsan nisl."), - Break: []byte("\n"), - }, - { - Number: 9, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 10, - Content: []byte( - "Nullam eu ante vel est convallis dignissim. " + - "Fusce suscipit, wisi nec facilisis", - ), - Break: []byte("\n"), - }, - { - Number: 11, - Content: []byte( - "facilisis, est dui fermentum leo, quis tempor " + - "ligula erat quis odio. Nunc porta", - ), - Break: []byte("\n"), - }, - { - Number: 12, - Content: []byte( - "vulputate tellus. Nunc rutrum turpis sed pede. " + - "Sed bibendum. Aliquam posuere.", - ), - Break: []byte("\n"), - }, - { - Number: 13, - Content: []byte( - "Nunc aliquet, augue nec adipiscing interdum, " + - "lacus tellus malesuada massa, quis", - ), - Break: []byte("\n"), - }, - { - Number: 14, - Content: []byte( - "varius mi purus non odio. Pellentesque " + - "condimentum, magna ut suscipit hendrerit,", - ), - Break: []byte("\n"), - }, - { - Number: 15, - Content: []byte( - "ipsum augue ornare nulla, non luctus diam neque " + - "sit amet urna. Curabitur", - ), - Break: []byte("\n"), - }, - { - Number: 16, - Content: []byte( - "vulputate vestibulum lorem. Fusce sagittis, " + - "libero non molestie mollis, magna", - ), - Break: []byte("\n"), - }, - { - Number: 17, - Content: []byte( - "orci ultrices dolor, at vulputate neque nulla " + - "lacinia eros. Sed id ligula quis", - ), - Break: []byte("\n"), - }, - { - Number: 18, - Content: []byte( - "est convallis tempor. Curabitur lacinia " + - "pulvinar nibh. Nam a sapien.", - ), - Break: []byte("\n"), - }, - { - Number: 19, - Content: []byte(""), - Break: []byte("\n"), - }, - { - Number: 20, - Content: []byte( - "Phasellus lacus. Nam euismod tellus id erat.", - ), - Break: []byte{}, - }, - }, - Paragraphs: []*Paragraph{ - { - Lines: Lines{ - { - Number: 1, - Content: []byte("fix: something broken"), - Break: []byte("\n"), - }, - }, - }, - { - Lines: Lines{ - { - Number: 3, - Content: []byte( - "Lorem ipsum dolor sit amet, " + - "consectetuer adipiscing elit. Donec " + - "hendrerit", - ), - Break: []byte("\n"), - }, - { - Number: 4, - Content: []byte( - "tempor tellus. Donec pretium posuere " + - "tellus. Proin quam nisl, tincidunt " + - "et,", - ), - Break: []byte("\n"), - }, - { - Number: 5, - Content: []byte( - "mattis eget, convallis nec, purus. Cum " + - "sociis natoque penatibus et magnis " + - "dis", - ), - Break: []byte("\n"), - }, - { - Number: 6, - Content: []byte( - "parturient montes, nascetur ridiculous " + - "mus. Nulla posuere. Donec vitae " + - "dolor.", - ), - Break: []byte("\n"), - }, - { - Number: 7, - Content: []byte( - "Nullam tristique diam non turpis. Cras " + - "placerat accumsan nulla. Nullam " + - "rutrum.", - ), - Break: []byte("\n"), - }, - { - Number: 8, - Content: []byte( - "Nam vestibulum accumsan nisl.", - ), - Break: []byte("\n"), - }, - }, - }, - { - Lines: Lines{ - { - Number: 10, - Content: []byte( - "Nullam eu ante vel est convallis " + - "dignissim. Fusce suscipit, wisi nec " + - "facilisis", - ), - Break: []byte("\n"), - }, - { - Number: 11, - Content: []byte( - "facilisis, est dui fermentum leo, quis " + - "tempor ligula erat quis odio. Nunc " + - "porta", - ), - Break: []byte("\n"), - }, - { - Number: 12, - Content: []byte( - "vulputate tellus. Nunc rutrum turpis " + - "sed pede. Sed bibendum. Aliquam " + - "posuere.", - ), - Break: []byte("\n"), - }, - { - Number: 13, - Content: []byte( - "Nunc aliquet, augue nec adipiscing " + - "interdum, lacus tellus malesuada " + - "massa, quis", - ), - Break: []byte("\n"), - }, - { - Number: 14, - Content: []byte( - "varius mi purus non odio. Pellentesque " + - "condimentum, magna ut suscipit " + - "hendrerit,", - ), - Break: []byte("\n"), - }, - { - Number: 15, - Content: []byte( - "ipsum augue ornare nulla, non luctus " + - "diam neque sit amet urna. Curabitur", - ), - Break: []byte("\n"), - }, - { - Number: 16, - Content: []byte( - "vulputate vestibulum lorem. Fusce " + - "sagittis, libero non molestie " + - "mollis, magna", - ), - Break: []byte("\n"), - }, - { - Number: 17, - Content: []byte( - "orci ultrices dolor, at vulputate neque " + - "nulla lacinia eros. Sed id ligula " + - "quis", - ), - Break: []byte("\n"), - }, - { - Number: 18, - Content: []byte( - "est convallis tempor. Curabitur lacinia " + - "pulvinar nibh. Nam a sapien.", - ), - Break: []byte("\n"), - }, - }, - }, - { - Lines: Lines{ - { - Number: 20, - Content: []byte( - "Phasellus lacus. Nam euismod tellus id " + - "erat.", - ), - Break: []byte{}, - }, - }, - }, - }, - }, - }, -} - -func TestNewRawMessage(t *testing.T) { - for _, tt := range rawMessageTestCases { - t.Run(tt.name, func(t *testing.T) { - got := NewRawMessage(tt.bytes) - - assert.Equal(t, tt.rawMessage, got) - }) - } -} - -func BenchmarkNewRawMessage(b *testing.B) { - for _, tt := range rawMessageTestCases { - b.Run(tt.name, func(b *testing.B) { - for n := 0; n < b.N; n++ { - _ = NewRawMessage(tt.bytes) - } - }) - } -} - -func TestRawMessage_Bytes(t *testing.T) { - for _, tt := range rawMessageTestCases { - if tt.bytes == nil { - continue - } - t.Run(tt.name, func(t *testing.T) { - got := tt.rawMessage.Bytes() - - assert.Equal(t, tt.bytes, got) - }) - } -} - -func BenchmarkRawMessage_Bytes(b *testing.B) { - for _, tt := range rawMessageTestCases { - if tt.bytes == nil { - continue - } - b.Run(tt.name, func(b *testing.B) { - for n := 0; n < b.N; n++ { - _ = tt.rawMessage.Bytes() - } - }) - } -} - -func TestRawMessage_String(t *testing.T) { - for _, tt := range rawMessageTestCases { - if tt.bytes == nil { - continue - } - t.Run(tt.name, func(t *testing.T) { - got := tt.rawMessage.String() - - assert.Equal(t, string(tt.bytes), got) - }) - } -} - -func BenchmarkRawMessage_String(b *testing.B) { - for _, tt := range rawMessageTestCases { - if tt.bytes == nil { - continue - } - b.Run(tt.name, func(b *testing.B) { - for n := 0; n < b.N; n++ { - _ = tt.rawMessage.String() - } - }) - } -} From 5174ed35ca6b6fe9aedde49ac06fe5584795d7e7 Mon Sep 17 00:00:00 2001 From: Jim Myhrberg Date: Thu, 26 Aug 2021 01:37:02 +0100 Subject: [PATCH 2/3] feat(parser): add Comment method to check if a line is a comment --- line.go | 18 ++++++++-- line_test.go | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 2 deletions(-) diff --git a/line.go b/line.go index 4bfbd39..e642da5 100644 --- a/line.go +++ b/line.go @@ -6,8 +6,10 @@ import ( ) const ( - lf = 10 // linefeed ("\n") character - cr = 13 // carriage return ("\r") character + lf = 10 // ASCII linefeed ("\n") character. + cr = 13 // ASCII carriage return ("\r") character. + hash = 35 // ASCII hash ("#") character. + ) // Line represents a single line of text defined as; A continuous sequence of @@ -36,6 +38,18 @@ func (s *Line) Blank() bool { return len(bytes.TrimSpace(s.Content)) == 0 } +// Comment returns true if line content is a commit comment, where the first +// non-whitespace character in the line is a hash (#). +func (s *Line) Comment() bool { + trimmed := bytes.TrimSpace(s.Content) + + if len(trimmed) == 0 { + return false + } + + return trimmed[0] == hash +} + // Lines is a slice of *Line types with some helper methods attached. type Lines []*Line diff --git a/line_test.go b/line_test.go index 5ea956f..da5344f 100644 --- a/line_test.go +++ b/line_test.go @@ -156,6 +156,99 @@ func TestLine_Blank(t *testing.T) { } } +func TestLine_Comment(t *testing.T) { + tests := []struct { + name string + line *Line + want bool + }{ + { + name: "nil", + line: &Line{}, + want: false, + }, + { + name: "empty", + line: &Line{ + Number: 1, + Content: []byte(""), + Break: []byte{}, + }, + want: false, + }, + { + name: "space only", + line: &Line{ + Number: 1, + Content: []byte(" "), + Break: []byte{}, + }, + want: false, + }, + { + name: "tab only", + line: &Line{ + Number: 1, + Content: []byte("\t\t"), + Break: []byte{}, + }, + want: false, + }, + { + name: "spaces and tabs", + line: &Line{ + Number: 1, + Content: []byte(" \t "), + Break: []byte{}, + }, + want: false, + }, + { + name: "text", + line: &Line{ + Number: 1, + Content: []byte("foobar"), + Break: []byte{}, + }, + want: false, + }, + { + name: "beings with hash", + line: &Line{ + Number: 1, + Content: []byte("# foo bar"), + Break: []byte{}, + }, + want: true, + }, + { + name: "beings with hash after whitespace", + line: &Line{ + Number: 1, + Content: []byte(" \t # foo bar"), + Break: []byte{}, + }, + want: true, + }, + { + name: "has hash in middle of text", + line: &Line{ + Number: 1, + Content: []byte(" foo # bar"), + Break: []byte{}, + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.line.Comment() + + assert.Equal(t, tt.want, got) + }) + } +} + func TestNewLines(t *testing.T) { tests := []struct { name string From bf44c4a64839b3c8597c8be429cefd8c8853e718 Mon Sep 17 00:00:00 2001 From: Jim Myhrberg Date: Sun, 15 Aug 2021 21:30:58 +0100 Subject: [PATCH 3/3] wip(parser): partly finished Message parser --- buffer.go | 54 +++---- buffer_test.go | 49 ++++++ line.go | 8 +- message.go | 178 ++++++++++++++++++++ message_test.go | 80 +++++++++ parse.go | 9 ++ parse_test.go | 421 ++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 759 insertions(+), 40 deletions(-) create mode 100644 message.go create mode 100644 message_test.go create mode 100644 parse.go create mode 100644 parse_test.go diff --git a/buffer.go b/buffer.go index 5679b9a..c60446b 100644 --- a/buffer.go +++ b/buffer.go @@ -1,38 +1,5 @@ package conventionalcommit -import ( - "regexp" -) - -// footerToken will match against all variations of Conventional Commit footer -// formats. -// -// Examples of valid footer tokens: -// -// Approved-by: John Carter -// ReviewdBy: Noctis -// Fixes #49 -// Reverts #SOL-42 -// BREAKING CHANGE: Flux capacitor no longer exists. -// BREAKING-CHANGE: Time will flow backwads -// -// Examples of invalid footer tokens: -// -// Approved-by: -// Approved-by:John Carter -// Approved by: John Carter -// ReviewdBy: Noctis -// Fixes#49 -// Fixes # -// Fixes 49 -// BREAKING CHANGE:Flux capacitor no longer exists. -// Breaking Change: Flux capacitor no longer exists. -// Breaking-Change: Time will flow backwads -// -var footerToken = regexp.MustCompile( - `^(?:([\w-]+)\s+(#.+)|([\w-]+|BREAKING[\s-]CHANGE):\s+(.+))$`, -) - // Buffer represents a commit message in a more structured form than a simple // string or byte slice. This makes it easier to process a message for the // purposes of extracting detailed information, linting, and formatting. @@ -119,11 +86,11 @@ func NewBuffer(message []byte) *Buffer { lastLen++ } - // If last paragraph starts with a Convention Commit footer token, it is the - // foot section, otherwise it is part of the body. + // If last paragraph starts with a Conventional Commit footer token, it is + // the foot section, otherwise it is part of the body. if lastLen > 0 { line := buf.lines[buf.lastLine-lastLen+1] - if footerToken.Match(line.Content) { + if FooterToken.Match(line.Content) { buf.footLen = lastLen } } @@ -176,6 +143,15 @@ func (s *Buffer) Lines() Lines { return s.lines[s.firstLine : s.lastLine+1] } +// LinesRaw returns all lines of the buffer including any blank lines at the +// beginning and end of the buffer. +func (s *Buffer) LinesRaw() Lines { + return s.lines +} + +// LineCount returns number of lines in the buffer after discarding blank lines +// from the beginning and end of the buffer. Effectively counting all lines from +// the first to the last line which contain any non-whitespace characters. func (s *Buffer) LineCount() int { if s.headLen == 0 { return 0 @@ -184,6 +160,12 @@ func (s *Buffer) LineCount() int { return (s.lastLine + 1) - s.firstLine } +// LineCountRaw returns the number of lines in the buffer including any blank +// lines at the beginning and end of the buffer. +func (s *Buffer) LineCountRaw() int { + return len(s.lines) +} + // Bytes renders the Buffer back into a byte slice, without any leading or // trailing whitespace lines. Leading whitespace on the first line which // contains non-whitespace characters is retained. It is only whole lines diff --git a/buffer_test.go b/buffer_test.go index 4b0aa9f..556f72b 100644 --- a/buffer_test.go +++ b/buffer_test.go @@ -994,6 +994,55 @@ func BenchmarkBuffer_Lines(b *testing.B) { } } +func TestBuffer_LinesRaw(t *testing.T) { + for _, tt := range bufferTestCases { + t.Run(tt.name, func(t *testing.T) { + want := tt.wantBuffer.lines[0:] + + got := tt.wantBuffer.LinesRaw() + + assert.Equal(t, want, got) + }) + } +} + +func TestBuffer_LineCount(t *testing.T) { + for _, tt := range bufferTestCases { + t.Run(tt.name, func(t *testing.T) { + want := tt.wantLines[1] + + got := tt.wantBuffer.LineCount() + + assert.Equal(t, want, got) + }) + } +} + +func BenchmarkBuffer_LineCount(b *testing.B) { + for _, tt := range bufferTestCases { + if tt.bytes == nil { + continue + } + b.Run(tt.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + _ = tt.wantBuffer.LineCount() + } + }) + } +} + +func TestBuffer_LineCountRaw(t *testing.T) { + for _, tt := range bufferTestCases { + t.Run(tt.name, func(t *testing.T) { + want := len(tt.wantBuffer.lines) + + got := tt.wantBuffer.LineCountRaw() + + assert.Equal(t, want, got) + }) + } +} + func TestBuffer_Bytes(t *testing.T) { for _, tt := range bufferTestCases { if tt.bytes == nil { diff --git a/line.go b/line.go index e642da5..cbbbf0e 100644 --- a/line.go +++ b/line.go @@ -58,9 +58,9 @@ type Lines []*Line // basis. func NewLines(content []byte) Lines { r := Lines{} - cLen := len(content) + length := len(content) - if cLen == 0 { + if length == 0 { return r } @@ -68,13 +68,13 @@ func NewLines(content []byte) Lines { var breaks [][]int // Locate each line break within content. - for i := 0; i < cLen; i++ { + for i := 0; i < length; i++ { switch content[i] { case lf: breaks = append(breaks, []int{i, i + 1}) case cr: b := []int{i, i + 1} - if i+1 < cLen && content[i+1] == lf { + if i+1 < length && content[i+1] == lf { b[1]++ i++ } diff --git a/message.go b/message.go new file mode 100644 index 0000000..6c48bd8 --- /dev/null +++ b/message.go @@ -0,0 +1,178 @@ +package conventionalcommit + +import ( + "errors" + "fmt" + "regexp" + "strings" +) + +var ( + Err = errors.New("conventionalcommit") + ErrEmptyMessage = fmt.Errorf("%w: empty message", Err) +) + +// HeaderToken will match a Conventional Commit formatted subject line, to +// extract type, scope, breaking change (bool), and description. +// +// It is intentionally VERY forgiving so as to be able to extract the various +// parts even when things aren't quite right. +var HeaderToken = regexp.MustCompile( + `^([^\(\)\r\n]*?)(\((.*?)\)\s*)?(!)?(\s*\:)\s(.*)$`, +) + +// FooterToken will match against all variations of Conventional Commit footer +// formats. +// +// Examples of valid footer tokens: +// +// Approved-by: John Carter +// ReviewdBy: Noctis +// Fixes #49 +// Reverts #SOL-42 +// BREAKING CHANGE: Flux capacitor no longer exists. +// BREAKING-CHANGE: Time will flow backwads +// +// Examples of invalid footer tokens: +// +// Approved-by: +// Approved-by:John Carter +// Approved by: John Carter +// ReviewdBy: Noctis +// Fixes#49 +// Fixes # +// Fixes 49 +// BREAKING CHANGE:Flux capacitor no longer exists. +// Breaking Change: Flux capacitor no longer exists. +// Breaking-Change: Time will flow backwads +// +var FooterToken = regexp.MustCompile( + `^([\w-]+|BREAKING[\s-]CHANGE)(?:\s*(:)\s+|\s+(#))(.+)$`, +) + +// Message represents a Conventional Commit message in a structured way. +type Message struct { + // Type indicates what kind of a change the commit message describes. + Type string + + // Scope indicates the context/component/area that the change affects. + Scope string + + // Description is the primary description for the commit. + Description string + + // Body is the main text body of the commit message. Effectively all text + // between the subject line, and any footers if present. + Body string + + // Footers are all footers which are not references or breaking changes. + Footers []*Footer + + // References are all footers defined with a reference style token, for + // example: + // + // Fixes #42 + References []*Reference + + // Breaking is set to true if the message subject included the "!" breaking + // change indicator. + Breaking bool + + // BreakingChanges includes the descriptions from all BREAKING CHANGE + // footers. + BreakingChanges []string +} + +func NewMessage(buf *Buffer) (*Message, error) { + msg := &Message{} + count := buf.LineCount() + + if count == 0 { + return nil, ErrEmptyMessage + } + + msg.Description = buf.Head().Join("\n") + if m := HeaderToken.FindStringSubmatch(msg.Description); len(m) > 0 { + msg.Type = strings.TrimSpace(m[1]) + msg.Scope = strings.TrimSpace(m[3]) + msg.Breaking = m[4] == "!" + msg.Description = m[6] + } + + msg.Body = buf.Body().Join("\n") + + if foot := buf.Foot(); len(foot) > 0 { + footers := parseFooters(foot) + + for _, f := range footers { + name := string(f.name) + value := string(f.value) + + switch { + case f.ref: + msg.References = append(msg.References, &Reference{ + Name: name, + Value: value, + }) + case name == "BREAKING CHANGE" || name == "BREAKING-CHANGE": + msg.BreakingChanges = append(msg.BreakingChanges, value) + default: + msg.Footers = append(msg.Footers, &Footer{ + Name: name, + Value: value, + }) + } + } + } + + return msg, nil +} + +func (s *Message) IsBreakingChange() bool { + return s.Breaking || len(s.BreakingChanges) > 0 +} + +func parseFooters(lines Lines) []*rawFooter { + var footers []*rawFooter + footer := &rawFooter{} + for _, line := range lines { + if m := FooterToken.FindSubmatch(line.Content); m != nil { + if len(footer.name) > 0 { + footers = append(footers, footer) + } + + footer = &rawFooter{} + if len(m[3]) > 0 { + footer.ref = true + footer.value = []byte{hash} + } + footer.name = m[1] + footer.value = append(footer.value, m[4]...) + } else if len(footer.name) > 0 { + footer.value = append(footer.value, lf) + footer.value = append(footer.value, line.Content...) + } + } + + if len(footer.name) > 0 { + footers = append(footers, footer) + } + + return footers +} + +type rawFooter struct { + name []byte + value []byte + ref bool +} + +type Footer struct { + Name string + Value string +} + +type Reference struct { + Name string + Value string +} diff --git a/message_test.go b/message_test.go new file mode 100644 index 0000000..64b0e39 --- /dev/null +++ b/message_test.go @@ -0,0 +1,80 @@ +package conventionalcommit + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestMessage_IsBreakingChange(t *testing.T) { + type fields struct { + Breaking bool + BreakingChanges []string + } + tests := []struct { + name string + fields fields + want bool + }{ + { + name: "false breaking flag, no change texts", + fields: fields{ + Breaking: false, + BreakingChanges: []string{}, + }, + want: false, + }, + { + name: "true breaking flag, no change texts", + fields: fields{ + Breaking: true, + BreakingChanges: []string{}, + }, + want: true, + }, + { + name: "false breaking flag, 1 change texts", + fields: fields{ + Breaking: false, + BreakingChanges: []string{"be careful"}, + }, + want: true, + }, + { + name: "true breaking flag, 1 change texts", + fields: fields{ + Breaking: true, + BreakingChanges: []string{"be careful"}, + }, + want: true, + }, + { + name: "false breaking flag, 3 change texts", + fields: fields{ + Breaking: false, + BreakingChanges: []string{"be careful", "oops", "ouch"}, + }, + want: true, + }, + { + name: "true breaking flag, 3 change texts", + fields: fields{ + Breaking: true, + BreakingChanges: []string{"be careful", "oops", "ouch"}, + }, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + msg := &Message{ + Breaking: tt.fields.Breaking, + BreakingChanges: tt.fields.BreakingChanges, + } + + got := msg.IsBreakingChange() + + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/parse.go b/parse.go new file mode 100644 index 0000000..13bda34 --- /dev/null +++ b/parse.go @@ -0,0 +1,9 @@ +package conventionalcommit + +// Parse parses a conventional commit message and returns it as a *Message +// struct. +func Parse(message []byte) (*Message, error) { + buffer := NewBuffer(message) + + return NewMessage(buffer) +} diff --git a/parse_test.go b/parse_test.go new file mode 100644 index 0000000..13e2414 --- /dev/null +++ b/parse_test.go @@ -0,0 +1,421 @@ +package conventionalcommit + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParse(t *testing.T) { + tests := []struct { + name string + message []byte + want *Message + wantErr string + }{ + { + name: "empty", + message: []byte{}, + wantErr: "conventionalcommit: empty message", + }, + { + name: "description only", + message: []byte("change a thing"), + want: &Message{ + Description: "change a thing", + }, + }, + { + name: "description and body", + message: []byte(`change a thing + +more stuff +and more`, + ), + want: &Message{ + Description: "change a thing", + Body: "more stuff\nand more", + }, + }, + { + name: "type and description", + message: []byte("feat: change a thing"), + want: &Message{ + Type: "feat", + Description: "change a thing", + }, + }, + { + name: "type, description and body", + message: []byte( + "feat: change a thing\n\nmore stuff\nand more", + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + Body: "more stuff\nand more", + }, + }, + { + name: "type, scope and description", + message: []byte("feat(token): change a thing"), + want: &Message{ + Type: "feat", + Scope: "token", + Description: "change a thing", + }, + }, + { + name: "type, scope, description and body", + message: []byte( + `feat(token): change a thing + +more stuff +and more`, + ), + want: &Message{ + Type: "feat", + Scope: "token", + Description: "change a thing", + Body: "more stuff\nand more", + }, + }, + { + name: "breaking change in subject line", + message: []byte( + `feat!: change a thing + +more stuff +and more`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + Body: "more stuff\nand more", + Breaking: true, + }, + }, + { + name: "breaking change in subject line with scope", + message: []byte( + `feat(token)!: change a thing + +more stuff +and more`, + ), + want: &Message{ + Type: "feat", + Scope: "token", + Description: "change a thing", + Body: "more stuff\nand more", + Breaking: true, + }, + }, + + { + name: "BREAKING CHANGE footer", + message: []byte( + `feat: change a thing + +BREAKING CHANGE: will blow up +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + BreakingChanges: []string{"will blow up"}, + }, + }, + { + name: "BREAKING-CHANGE footer", + message: []byte( + `feat(token): change a thing + +BREAKING-CHANGE: maybe not +`, + ), + want: &Message{ + Type: "feat", + Scope: "token", + Description: "change a thing", + BreakingChanges: []string{"maybe not"}, + }, + }, + { + name: "reference footer", + message: []byte( + `feat: change a thing + +Fixes #349 +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + References: []*Reference{ + {Name: "Fixes", Value: "#349"}, + }, + }, + }, + { + name: "reference (alt) footer", + message: []byte( + `feat: change a thing + +Reverts #SOL-934 +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + References: []*Reference{ + {Name: "Reverts", Value: "#SOL-934"}, + }, + }, + }, + { + name: "token footer", + message: []byte( + `feat: change a thing + +Approved-by: John Carter +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + Footers: []*Footer{ + {Name: "Approved-by", Value: "John Carter"}, + }, + }, + }, + { + name: "token (alt) footer", + message: []byte( + `feat: change a thing + +ReviewedBy: Noctis +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + Footers: []*Footer{ + {Name: "ReviewedBy", Value: "Noctis"}, + }, + }, + }, + + { + name: "BREAKING CHANGE footer with body", + message: []byte( + `feat: change a thing + +more stuff +and more + +BREAKING CHANGE: will blow up +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + Body: "more stuff\nand more", + BreakingChanges: []string{"will blow up"}, + }, + }, + { + name: "BREAKING-CHANGE footer with body", + message: []byte( + `feat(token): change a thing + +more stuff +and more + +BREAKING-CHANGE: maybe not +`, + ), + want: &Message{ + Type: "feat", + Scope: "token", + Description: "change a thing", + Body: "more stuff\nand more", + BreakingChanges: []string{"maybe not"}, + }, + }, + { + name: "reference footer with body", + message: []byte( + `feat: change a thing + +more stuff +and more + +Fixes #349 +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + Body: "more stuff\nand more", + References: []*Reference{ + {Name: "Fixes", Value: "#349"}, + }, + }, + }, + { + name: "reference (alt) footer with body", + message: []byte( + `feat: change a thing + +more stuff +and more + +Reverts #SOL-934 +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + Body: "more stuff\nand more", + References: []*Reference{ + {Name: "Reverts", Value: "#SOL-934"}, + }, + }, + }, + { + name: "token footer with body", + message: []byte( + `feat: change a thing + +more stuff +and more + +Approved-by: John Carter +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + Body: "more stuff\nand more", + Footers: []*Footer{ + {Name: "Approved-by", Value: "John Carter"}, + }, + }, + }, + { + name: "token (alt) footer with body", + message: []byte( + `feat: change a thing + +more stuff +and more + +ReviewedBy: Noctis +`, + ), + want: &Message{ + Type: "feat", + Description: "change a thing", + Body: "more stuff\nand more", + Footers: []*Footer{ + {Name: "ReviewedBy", Value: "Noctis"}, + }, + }, + }, + { + name: "type, scope, description, body and footers", + message: []byte( + `feat(token): change a thing + +more stuff +and more + +BREAKING CHANGE: will blow up +BREAKING-CHANGE: maybe not +Fixes #349 +Reverts #SOL-934 +Approved-by: John Carter +ReviewedBy: Noctis +`, + ), + want: &Message{ + Type: "feat", + Scope: "token", + Description: "change a thing", + Body: "more stuff\nand more", + Footers: []*Footer{ + {Name: "Approved-by", Value: "John Carter"}, + {Name: "ReviewedBy", Value: "Noctis"}, + }, + References: []*Reference{ + {Name: "Fixes", Value: "#349"}, + {Name: "Reverts", Value: "#SOL-934"}, + }, + BreakingChanges: []string{"will blow up", "maybe not"}, + }, + }, + { + name: "multi-line footers", + message: []byte( + `feat(token): change a thing + +Some stuff + +BREAKING CHANGE: Nam euismod tellus id erat. Cum sociis natoque penatibus +et magnis dis parturient montes, nascetur ridiculous mus. +Approved-by: John Carter +and Noctis +Fixes #SOL-349 and also +#SOL-9440 +`, + ), + want: &Message{ + Type: "feat", + Scope: "token", + Description: "change a thing", + Body: "Some stuff", + Footers: []*Footer{ + {Name: "Approved-by", Value: "John Carter\nand Noctis"}, + }, + References: []*Reference{ + {Name: "Fixes", Value: "#SOL-349 and also\n#SOL-9440"}, + }, + BreakingChanges: []string{ + `Nam euismod tellus id erat. Cum sociis natoque penatibus +et magnis dis parturient montes, nascetur ridiculous mus.`, + }, + }, + }, + { + name: "indented footer", + message: []byte( + `feat(token): change a thing + +Some stuff + + Approved-by: John Carter +`, + ), + want: &Message{ + Type: "feat", + Scope: "token", + Description: "change a thing", + Body: "Some stuff\n\n Approved-by: John Carter", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := Parse(tt.message) + + if tt.wantErr != "" { + assert.EqualError(t, err, tt.wantErr) + } else { + assert.NoError(t, err) + } + + assert.Equal(t, tt.want, got) + }) + } +}