From 09b44ce3e51994f19a6595988987d03ed9e729f6 Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Fri, 6 Sep 2024 14:25:37 +0200 Subject: [PATCH] feat: Add the `DiffUnified()` method for formatting a unified diff. This adds a new `DiffUnified()` method which takes a `[]Diff` slice and formats it in the "unified diff" format. The `[]Diff` slice does not necessarily need to be a linewise diff. The implementation deals gracefully with missing newlines at the end of either side, as well as empty inputs on either side. The method accepts two optional arguments: `UnifiedLabels()` for setting the labels for the two inputs and `UnifiedContextLines()` for setting the number of context lines printed. Example usage: ```go unified := dmp.DiffUnified(diffs, diffmatchpatch.UnifiedLabels("text1", "text2"), diffmatchpatch.UnifiedContextLines(3)) ``` A convenience method, `Unified()`, takes two string inputs and returns the unified diff. Fixes: #124 --- diffmatchpatch/unified.go | 454 +++++++++++++++++++++++++++++++++ diffmatchpatch/unified_test.go | 272 ++++++++++++++++++++ go.mod | 1 + go.sum | 2 + 4 files changed, 729 insertions(+) create mode 100644 diffmatchpatch/unified.go create mode 100644 diffmatchpatch/unified_test.go diff --git a/diffmatchpatch/unified.go b/diffmatchpatch/unified.go new file mode 100644 index 0000000..5056d25 --- /dev/null +++ b/diffmatchpatch/unified.go @@ -0,0 +1,454 @@ +package diffmatchpatch + +import ( + "fmt" + "strings" +) + +// Unified computes the differences between text1 and text2 and formats the differences in the "unified diff" format. +// Optionally pass UnifiedOption to set the new/old labels and context lines. +func (dmp *DiffMatchPatch) Unified(text1, text2 string, opts ...UnifiedOption) string { + options := newUnifiedOptions(opts) + + text1Enc, text2Enc, lines := dmp.DiffLinesToChars(text1, text2) + + diffs := dmp.DiffMain(text1Enc, text2Enc, false) + diffs = dmp.DiffCharsToLines(diffs, lines) + + unified := toUnified(diffs, options) + + return unified.String() +} + +// DiffUnified formats the diffs slice in the "unified diff" format. +// Optionally pass UnifiedOption to set the new/old labels and context lines. +func (dmp *DiffMatchPatch) DiffUnified(diffs []Diff, opts ...UnifiedOption) string { + options := newUnifiedOptions(opts) + + u := toUnified(diffs, options) + + return u.String() +} + +// toUnified takes a file contents and a sequence of edits, and calculates +// a unified diff that represents those edits. +func toUnified(diffs []Diff, opts unifiedOptions) unified { + maxCtx := opts.contextLines * 2 + u := unified{ + label1: opts.text1Label, + label2: opts.text2Label, + } + + if isEqual(diffs) { + return u + } + + diffs = diffLinewise(diffs) + + var ( + h *hunk + + lineNo1 int + lineNo2 int + context []Diff + ) + for _, diff := range diffs { + switch diff.Type { + case DiffDelete: + lineNo1++ + case DiffInsert: + lineNo2++ + case DiffEqual: + lineNo1++ + lineNo2++ + } + + if diff.Type == DiffEqual { + context = append(context, diff) + continue + } + + // close previous hunk + if h != nil && len(context) > maxCtx { + cl := len(context) + if cl > opts.contextLines { + cl = opts.contextLines + } + + h.diffs = append(h.diffs, context[:cl]...) + + u.hunks = append(u.hunks, *h) + h = nil + } + + // start new hunk + if h == nil { + cl := len(context) + if cl > opts.contextLines { + cl = opts.contextLines + } + + l1 := lineNo1 - cl + l2 := lineNo2 - cl + + // When starting a new hunk, the line number for lineNo1 XOR lineNo2 + // as already been advanced, but not the other. Account for that in + // l1 or l2. + switch diff.Type { + case DiffDelete: + l2++ + case DiffInsert: + l1++ + } + + h = &hunk{ + fromLine: l1, + toLine: l2, + diffs: context[len(context)-cl:], + } + + context = nil + } + + h.diffs = append(h.diffs, context...) + context = nil + + h.diffs = append(h.diffs, diff) + } + + // close last hunk + if h != nil { + cl := len(context) + if cl > opts.contextLines { + cl = opts.contextLines + } + + h.diffs = append(h.diffs, context[:cl]...) + + u.hunks = append(u.hunks, *h) + h = nil + } + + return u +} + +func isEqual(diffs []Diff) bool { + for _, diff := range diffs { + if diff.Type != DiffEqual { + return false + } + } + + return true +} + +// diffLinewise splits and merged diffs so that each individual diff represents one line, including the final newline character. +func diffLinewise(diffs []Diff) []Diff { + var ( + ret []Diff + line1, line2 string + ) + + diffs = diffCleanupNewline(diffs) + + add := func(d Diff) { + switch d.Type { + case DiffDelete: + line1 = line1 + d.Text + case DiffInsert: + line2 = line2 + d.Text + default: // equal + line1 = line1 + d.Text + line2 = line2 + d.Text + } + + if strings.HasSuffix(line1, "\n") && line1 == line2 { + ret = append(ret, Diff{ + Type: DiffEqual, + Text: line1, + }) + + line1, line2 = "", "" + } + + if strings.HasSuffix(line1, "\n") { + ret = append(ret, Diff{ + Type: DiffDelete, + Text: line1, + }) + + line1 = "" + } + + if strings.HasSuffix(line2, "\n") { + ret = append(ret, Diff{ + Type: DiffInsert, + Text: line2, + }) + + line2 = "" + } + } + + for _, diff := range diffs { + for _, segment := range strings.SplitAfter(diff.Text, "\n") { + add(Diff{ + Type: diff.Type, + Text: segment, + }) + } + } + + // line1 and/or line2 may be non-empty if there is no newline at the end of file. + if line1 != "" && line1 == line2 { + ret = append(ret, Diff{ + Type: DiffEqual, + Text: line1, + }) + + line1, line2 = "", "" + } + + if line1 != "" { + ret = append(ret, Diff{ + Type: DiffDelete, + Text: line1, + }) + + line1 = "" + } + + if line2 != "" { + ret = append(ret, Diff{ + Type: DiffInsert, + Text: line2, + }) + + line2 = "" + } + + return reorderDeletionsFirst(ret) +} + +// diffCleanupNewline looks for single edits surrounded on both sides by equalities which can be shifted sideways to align on newlines. +func diffCleanupNewline(diffs []Diff) []Diff { + var ret []Diff + + for i := 0; i < len(diffs); i++ { + if i < len(diffs)-2 && diffs[i].Type == DiffEqual && diffs[i+1].Type != DiffEqual && diffs[i+2].Type == DiffEqual { + common := prefixWithNewline(diffs[i+1].Text, diffs[i+2].Text) + + // Convert ["=", "±", "="] + // to ["=", "±", "="] + if common != "" { + ret = append(ret, + Diff{ + Type: DiffEqual, + Text: diffs[i].Text + common, + }, + Diff{ + Type: diffs[i+1].Type, + Text: strings.TrimPrefix(diffs[i+1].Text, common) + common, + }, + Diff{ + Type: DiffEqual, + Text: strings.TrimPrefix(diffs[i+2].Text, common), + }, + ) + + i += 2 + continue + } + } + + ret = append(ret, diffs[i]) + } + + return ret +} + +// prefixWithNewline returns the longest common prefix between text1 and text2, up to and including a newline character. +// If text1 and text2 do not have a common prefix, or the common prefix does not include a newline character, the empty string is returned. +func prefixWithNewline(text1, text2 string) string { + prefix := New().DiffCommonPrefix(text1, text2) + + index := strings.LastIndex(text1[:prefix], "\n") + if index != -1 { + return text1[:index+1] + } + + return "" +} + +// reorderDeletionsFirst reorders changes so that deletions come before insertions, without crossing an equality boundary. +func reorderDeletionsFirst(diffs []Diff) []Diff { + var ( + ret []Diff + deletions []Diff + insertions []Diff + ) + + for _, diff := range diffs { + switch diff.Type { + case DiffDelete: + deletions = append(deletions, diff) + case DiffInsert: + insertions = append(insertions, diff) + case DiffEqual: + ret = append(ret, deletions...) + deletions = nil + + ret = append(ret, insertions...) + insertions = nil + + ret = append(ret, diff) + } + } + + ret = append(ret, deletions...) + ret = append(ret, insertions...) + + return ret +} + +// unified represents modifications in a form conducive to printing a unified diff. +type unified struct { + label1, label2 string + hunks []hunk +} + +// hunk is a list of nearby changes, deperated by at most 2*contextLines lines. +type hunk struct { + // The line in the original source where the hunk starts. + fromLine int + // The line in the original source where the hunk finishes. + toLine int + // List of modifications. Each Diff represents one deleted, inserted, or equal line. + diffs []Diff +} + +// numLines returns the number of lines in the hunk for text1 and text2. +func (h hunk) numLines() (n1, n2 int) { + for _, diff := range h.diffs { + switch diff.Type { + case DiffDelete: + n1++ + case DiffInsert: + n2++ + case DiffEqual: + n1++ + n2++ + } + } + + return n1, n2 +} + +func (h hunk) String() string { + var b strings.Builder + + fmt.Fprint(&b, "@@") + + numLines1, numLines2 := h.numLines() + + switch { + case numLines1 > 1: + fmt.Fprintf(&b, " -%d,%d", h.fromLine, numLines1) + case h.fromLine == 1 && numLines1 == 0: + // Mimic GNU diff -u behavior when adding to empty file. + fmt.Fprintf(&b, " -0,0") + default: + fmt.Fprintf(&b, " -%d", h.fromLine) + } + + switch { + case numLines2 > 1: + fmt.Fprintf(&b, " +%d,%d", h.toLine, numLines2) + case h.toLine == 1 && numLines2 == 0: + // Mimic GNU diff -u behavior when adding to empty file. + fmt.Fprintf(&b, " +0,0") + default: + fmt.Fprintf(&b, " +%d", h.toLine) + } + + fmt.Fprint(&b, " @@\n") + + for _, diff := range h.diffs { + switch diff.Type { + case DiffDelete: + fmt.Fprintf(&b, "-%s", diff.Text) + case DiffInsert: + fmt.Fprintf(&b, "+%s", diff.Text) + default: + fmt.Fprintf(&b, " %s", diff.Text) + } + if !strings.HasSuffix(diff.Text, "\n") { + fmt.Fprintf(&b, "\n\\ No newline at end of file\n") + } + } + + return b.String() +} + +// String converts a unified diff to the standard textual form for that diff. +// The output of this function can be passed to tools like patch. +func (u unified) String() string { + if len(u.hunks) == 0 { + return "" + } + var b strings.Builder + fmt.Fprintf(&b, "--- %s\n", u.label1) + fmt.Fprintf(&b, "+++ %s\n", u.label2) + for _, hunk := range u.hunks { + fmt.Fprint(&b, hunk) + } + + return b.String() +} + +// DefaultContextLines is the number of unchanged lines of surrounding +// context displayed by Unified. +const DefaultContextLines = 3 + +// UnifiedOption is an option for DiffUnified(). +type UnifiedOption func(*unifiedOptions) + +type unifiedOptions struct { + contextLines int + text1Label string + text2Label string +} + +func newUnifiedOptions(opts []UnifiedOption) unifiedOptions { + ret := unifiedOptions{ + contextLines: DefaultContextLines, + text1Label: "text1", + text2Label: "text2", + } + + for _, o := range opts { + o(&ret) + } + + return ret +} + +// UnifiedContextLines sets the number of unchanged lines of surrounding context +// printed. Defaults to DefaultContextLines. +func UnifiedContextLines(lines int) UnifiedOption { + if lines <= 0 { + lines = DefaultContextLines + } + + return func(o *unifiedOptions) { + o.contextLines = lines + } +} + +// UnifiedLabels sets the labels for the old and new files. Defaults to "text1" and "text2". +func UnifiedLabels(oldLabel, newLabel string) UnifiedOption { + return func(o *unifiedOptions) { + o.text1Label = oldLabel + o.text2Label = newLabel + } +} diff --git a/diffmatchpatch/unified_test.go b/diffmatchpatch/unified_test.go new file mode 100644 index 0000000..de6cf06 --- /dev/null +++ b/diffmatchpatch/unified_test.go @@ -0,0 +1,272 @@ +package diffmatchpatch_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/sergi/go-diff/diffmatchpatch" +) + +func TestDiffUnified(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + text1 string + text2 string + want string + }{ + { + name: "No changes", + text1: "Hello, world!\n", + text2: "Hello, world!\n", + want: "", + }, + { + name: "Insertion at beginning", + text1: "Hello, world!\n", + text2: "New line\nHello, world!\n", + want: "--- text1\n+++ text2\n@@ -1 +1,2 @@\n+New line\n Hello, world!\n", + }, + { + name: "Insertion at end", + text1: "Hello, world!\n", + text2: "Hello, world!\nNew line\n", + want: "--- text1\n+++ text2\n@@ -1 +1,2 @@\n Hello, world!\n+New line\n", + }, + { + name: "Insertion middle", + text1: "Hello, world!\nHello, world!\n", + text2: "Hello, world!\nNew line\nHello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,2 +1,3 @@\n Hello, world!\n+New line\n Hello, world!\n", + }, + { + name: "Removal at beginning", + text1: "Old line\nHello, world!\n", + text2: "Hello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,2 +1 @@\n-Old line\n Hello, world!\n", + }, + { + name: "Removal at end", + text1: "Hello, world!\nOld line\n", + text2: "Hello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,2 +1 @@\n Hello, world!\n-Old line\n", + }, + { + name: "Removal middle", + text1: "Hello, world!\nOld line\nHello, world!\n", + text2: "Hello, world!\nHello, world!\n", + want: "--- text1\n+++ text2\n@@ -1,3 +1,2 @@\n Hello, world!\n-Old line\n Hello, world!\n", + }, + { + name: "Replacement", + text1: "Prefix\nHello, world!\nSuffix\n", + text2: "Prefix\nHello, Golang!\nSuffix\n", + want: "--- text1\n+++ text2\n@@ -1,3 +1,3 @@\n Prefix\n-Hello, world!\n+Hello, Golang!\n Suffix\n", + }, + { + name: "Insertion", + text1: makeContext(10, 0), + text2: makeContext(5, 0) + "INSERTION\n" + makeContext(5, 5), + want: "--- text1\n+++ text2\n@@ -3,6 +3,7 @@\n context2\n context3\n context4\n+INSERTION\n context5\n context6\n context7\n", + }, + { + name: "Multiple hunks", + text1: makeContext(20, 0), + text2: makeContext(5, 0) + "INSERTION1\n" + makeContext(10, 5) + "INSERTION2\n" + makeContext(5, 15), + want: `--- text1 ++++ text2 +@@ -3,6 +3,7 @@ + context2 + context3 + context4 ++INSERTION1 + context5 + context6 + context7 +@@ -13,6 +14,7 @@ + context12 + context13 + context14 ++INSERTION2 + context15 + context16 + context17 +`, + }, + { + name: "Merge hunk with <= 5 lines of context", + text1: makeContext(15, 0), + text2: makeContext(5, 0) + "INSERTION1\n" + makeContext(5, 5) + "INSERTION2\n" + makeContext(5, 10), + want: `--- text1 ++++ text2 +@@ -3,11 +3,13 @@ + context2 + context3 + context4 ++INSERTION1 + context5 + context6 + context7 + context8 + context9 ++INSERTION2 + context10 + context11 + context12 +`, + }, + { + name: "Insert without newline", + text1: "context1", + text2: "context1\nnew line", + want: `--- text1 ++++ text2 +@@ -1 +1,2 @@ +-context1 +\ No newline at end of file ++context1 ++new line +\ No newline at end of file +`, + }, + { + name: "Removal without newline", + text1: "context1\nold line", + text2: "context1", + want: `--- text1 ++++ text2 +@@ -1,2 +1 @@ +-context1 +-old line +\ No newline at end of file ++context1 +\ No newline at end of file +`, + }, + { + name: "context without newline", + text1: "context0\nold1\ncontext1", + text2: "context0\nnew1\ncontext1", + want: `--- text1 ++++ text2 +@@ -1,3 +1,3 @@ + context0 +-old1 ++new1 + context1 +\ No newline at end of file +`, + }, + { + name: "Replace multiple subsequent lines", + text1: makeContext(5, 0) + "old1\nold2\nold3\n" + makeContext(5, 5), + text2: makeContext(5, 0) + "new1\nnew2\nnew3\n" + makeContext(5, 5), + want: `--- text1 ++++ text2 +@@ -3,9 +3,9 @@ + context2 + context3 + context4 +-old1 +-old2 +-old3 ++new1 ++new2 ++new3 + context5 + context6 + context7 +`, + }, + { + name: "empty text1", + text1: "", + text2: "new1\n", + want: `--- text1 ++++ text2 +@@ -0,0 +1 @@ ++new1 +`, + }, + { + name: "empty text2", + text1: "old1\n", + text2: "", + want: `--- text1 ++++ text2 +@@ -1 +0,0 @@ +-old1 +`, + }, + } + + for _, tc := range cases { + // Un-alias tc for compatibility with Go <1.22. + tc := tc + + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + dmp := diffmatchpatch.New() + + got := dmp.Unified(tc.text1, tc.text2, diffmatchpatch.UnifiedLabels("text1", "text2")) + + t.Logf("dmp.Unified() =\n%s", got) + + if got != tc.want { + t.Errorf("Unified() output differs (-want/+got):\n%s", cmp.Diff(tc.want, got)) + } + + // DiffLinesToChars / DiffCharsToLines is not required for correct results. + diffs := dmp.DiffMain(tc.text1, tc.text2, false) + + got = dmp.DiffUnified(diffs, diffmatchpatch.UnifiedLabels("text1", "text2"), diffmatchpatch.UnifiedContextLines(3)) + if got != tc.want { + t.Errorf("DiffUnified() output differs (-want/+got):\n%s", cmp.Diff(tc.want, got)) + } + + }) + } +} + +func makeContext(n, start int) string { + var b strings.Builder + + for i := start; i < start+n; i++ { + fmt.Fprintf(&b, "context%d\n", i) + } + + return b.String() +} + +func ExampleDiffMatchPatch_DiffUnified() { + text1 := "Prefix\nHello, world!\nSuffix\n" + text2 := "Prefix\nHello, Golang!\nSuffix\n" + + dmp := diffmatchpatch.New() + + // Pre-process the inputs so that each codepoint in text[12]End represents one line. + text1Enc, text2Enc, lines := dmp.DiffLinesToChars(text1, text2) + + // Run the diff algorithm on the preprocessed inputs. + diffs := dmp.DiffMain(text1Enc, text2Enc, false) + + // Expand the diffs back into the full lines they represent. + diffs = dmp.DiffCharsToLines(diffs, lines) + + // Format as unified diff. + unifiedDiff := dmp.DiffUnified(diffs, diffmatchpatch.UnifiedLabels("old.txt", "new.txt")) + + fmt.Print(unifiedDiff) + // Output: + // --- old.txt + // +++ new.txt + // @@ -1,3 +1,3 @@ + // Prefix + // -Hello, world! + // +Hello, Golang! + // Suffix +} diff --git a/go.mod b/go.mod index c7886ce..23378c1 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,7 @@ module github.com/sergi/go-diff require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/google/go-cmp v0.6.0 github.com/kr/pretty v0.1.0 // indirect github.com/stretchr/testify v1.4.0 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect diff --git a/go.sum b/go.sum index 8dd9f36..737e654 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=