From 09b44ce3e51994f19a6595988987d03ed9e729f6 Mon Sep 17 00:00:00 2001
From: Florian Forster <fforster@gitlab.com>
Date: Fri, 6 Sep 2024 14:25:37 +0200
Subject: [PATCH] feat: Add the `DiffUnified()` method for formatting a unified
 diff.

This adds a new `DiffUnified()` method which takes a `[]Diff` slice and formats
it in the "unified diff" format. The `[]Diff` slice does not necessarily need
to be a linewise diff.

The implementation deals gracefully with missing newlines at the end of either
side, as well as empty inputs on either side.

The method accepts two optional arguments: `UnifiedLabels()` for setting the
labels for the two inputs and `UnifiedContextLines()` for setting the number of
context lines printed.

Example usage:

```go
unified := dmp.DiffUnified(diffs,
	diffmatchpatch.UnifiedLabels("text1", "text2"),
	diffmatchpatch.UnifiedContextLines(3))
```

A convenience method, `Unified()`, takes two string inputs and returns the
unified diff.

Fixes: #124
---
 diffmatchpatch/unified.go      | 454 +++++++++++++++++++++++++++++++++
 diffmatchpatch/unified_test.go | 272 ++++++++++++++++++++
 go.mod                         |   1 +
 go.sum                         |   2 +
 4 files changed, 729 insertions(+)
 create mode 100644 diffmatchpatch/unified.go
 create mode 100644 diffmatchpatch/unified_test.go

diff --git a/diffmatchpatch/unified.go b/diffmatchpatch/unified.go
new file mode 100644
index 0000000..5056d25
--- /dev/null
+++ b/diffmatchpatch/unified.go
@@ -0,0 +1,454 @@
+package diffmatchpatch
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Unified computes the differences between text1 and text2 and formats the differences in the "unified diff" format.
+// Optionally pass UnifiedOption to set the new/old labels and context lines.
+func (dmp *DiffMatchPatch) Unified(text1, text2 string, opts ...UnifiedOption) string {
+	options := newUnifiedOptions(opts)
+
+	text1Enc, text2Enc, lines := dmp.DiffLinesToChars(text1, text2)
+
+	diffs := dmp.DiffMain(text1Enc, text2Enc, false)
+	diffs = dmp.DiffCharsToLines(diffs, lines)
+
+	unified := toUnified(diffs, options)
+
+	return unified.String()
+}
+
+// DiffUnified formats the diffs slice in the "unified diff" format.
+// Optionally pass UnifiedOption to set the new/old labels and context lines.
+func (dmp *DiffMatchPatch) DiffUnified(diffs []Diff, opts ...UnifiedOption) string {
+	options := newUnifiedOptions(opts)
+
+	u := toUnified(diffs, options)
+
+	return u.String()
+}
+
+// toUnified takes a file contents and a sequence of edits, and calculates
+// a unified diff that represents those edits.
+func toUnified(diffs []Diff, opts unifiedOptions) unified {
+	maxCtx := opts.contextLines * 2
+	u := unified{
+		label1: opts.text1Label,
+		label2: opts.text2Label,
+	}
+
+	if isEqual(diffs) {
+		return u
+	}
+
+	diffs = diffLinewise(diffs)
+
+	var (
+		h *hunk
+
+		lineNo1 int
+		lineNo2 int
+		context []Diff
+	)
+	for _, diff := range diffs {
+		switch diff.Type {
+		case DiffDelete:
+			lineNo1++
+		case DiffInsert:
+			lineNo2++
+		case DiffEqual:
+			lineNo1++
+			lineNo2++
+		}
+
+		if diff.Type == DiffEqual {
+			context = append(context, diff)
+			continue
+		}
+
+		// close previous hunk
+		if h != nil && len(context) > maxCtx {
+			cl := len(context)
+			if cl > opts.contextLines {
+				cl = opts.contextLines
+			}
+
+			h.diffs = append(h.diffs, context[:cl]...)
+
+			u.hunks = append(u.hunks, *h)
+			h = nil
+		}
+
+		// start new hunk
+		if h == nil {
+			cl := len(context)
+			if cl > opts.contextLines {
+				cl = opts.contextLines
+			}
+
+			l1 := lineNo1 - cl
+			l2 := lineNo2 - cl
+
+			// When starting a new hunk, the line number for lineNo1 XOR lineNo2
+			// as already been advanced, but not the other. Account for that in
+			// l1 or l2.
+			switch diff.Type {
+			case DiffDelete:
+				l2++
+			case DiffInsert:
+				l1++
+			}
+
+			h = &hunk{
+				fromLine: l1,
+				toLine:   l2,
+				diffs:    context[len(context)-cl:],
+			}
+
+			context = nil
+		}
+
+		h.diffs = append(h.diffs, context...)
+		context = nil
+
+		h.diffs = append(h.diffs, diff)
+	}
+
+	// close last hunk
+	if h != nil {
+		cl := len(context)
+		if cl > opts.contextLines {
+			cl = opts.contextLines
+		}
+
+		h.diffs = append(h.diffs, context[:cl]...)
+
+		u.hunks = append(u.hunks, *h)
+		h = nil
+	}
+
+	return u
+}
+
+func isEqual(diffs []Diff) bool {
+	for _, diff := range diffs {
+		if diff.Type != DiffEqual {
+			return false
+		}
+	}
+
+	return true
+}
+
+// diffLinewise splits and merged diffs so that each individual diff represents one line, including the final newline character.
+func diffLinewise(diffs []Diff) []Diff {
+	var (
+		ret          []Diff
+		line1, line2 string
+	)
+
+	diffs = diffCleanupNewline(diffs)
+
+	add := func(d Diff) {
+		switch d.Type {
+		case DiffDelete:
+			line1 = line1 + d.Text
+		case DiffInsert:
+			line2 = line2 + d.Text
+		default: // equal
+			line1 = line1 + d.Text
+			line2 = line2 + d.Text
+		}
+
+		if strings.HasSuffix(line1, "\n") && line1 == line2 {
+			ret = append(ret, Diff{
+				Type: DiffEqual,
+				Text: line1,
+			})
+
+			line1, line2 = "", ""
+		}
+
+		if strings.HasSuffix(line1, "\n") {
+			ret = append(ret, Diff{
+				Type: DiffDelete,
+				Text: line1,
+			})
+
+			line1 = ""
+		}
+
+		if strings.HasSuffix(line2, "\n") {
+			ret = append(ret, Diff{
+				Type: DiffInsert,
+				Text: line2,
+			})
+
+			line2 = ""
+		}
+	}
+
+	for _, diff := range diffs {
+		for _, segment := range strings.SplitAfter(diff.Text, "\n") {
+			add(Diff{
+				Type: diff.Type,
+				Text: segment,
+			})
+		}
+	}
+
+	// line1 and/or line2 may be non-empty if there is no newline at the end of file.
+	if line1 != "" && line1 == line2 {
+		ret = append(ret, Diff{
+			Type: DiffEqual,
+			Text: line1,
+		})
+
+		line1, line2 = "", ""
+	}
+
+	if line1 != "" {
+		ret = append(ret, Diff{
+			Type: DiffDelete,
+			Text: line1,
+		})
+
+		line1 = ""
+	}
+
+	if line2 != "" {
+		ret = append(ret, Diff{
+			Type: DiffInsert,
+			Text: line2,
+		})
+
+		line2 = ""
+	}
+
+	return reorderDeletionsFirst(ret)
+}
+
+// diffCleanupNewline looks for single edits surrounded on both sides by equalities which can be shifted sideways to align on newlines.
+func diffCleanupNewline(diffs []Diff) []Diff {
+	var ret []Diff
+
+	for i := 0; i < len(diffs); i++ {
+		if i < len(diffs)-2 && diffs[i].Type == DiffEqual && diffs[i+1].Type != DiffEqual && diffs[i+2].Type == DiffEqual {
+			common := prefixWithNewline(diffs[i+1].Text, diffs[i+2].Text)
+
+			// Convert ["=<equal>", "±<common\n><change>", "=<common\n><equal>"]
+			// to ["=<equal><common\n>", "±<change><common\n>", "=<equal>"]
+			if common != "" {
+				ret = append(ret,
+					Diff{
+						Type: DiffEqual,
+						Text: diffs[i].Text + common,
+					},
+					Diff{
+						Type: diffs[i+1].Type,
+						Text: strings.TrimPrefix(diffs[i+1].Text, common) + common,
+					},
+					Diff{
+						Type: DiffEqual,
+						Text: strings.TrimPrefix(diffs[i+2].Text, common),
+					},
+				)
+
+				i += 2
+				continue
+			}
+		}
+
+		ret = append(ret, diffs[i])
+	}
+
+	return ret
+}
+
+// prefixWithNewline returns the longest common prefix between text1 and text2, up to and including a newline character.
+// If text1 and text2 do not have a common prefix, or the common prefix does not include a newline character, the empty string is returned.
+func prefixWithNewline(text1, text2 string) string {
+	prefix := New().DiffCommonPrefix(text1, text2)
+
+	index := strings.LastIndex(text1[:prefix], "\n")
+	if index != -1 {
+		return text1[:index+1]
+	}
+
+	return ""
+}
+
+// reorderDeletionsFirst reorders changes so that deletions come before insertions, without crossing an equality boundary.
+func reorderDeletionsFirst(diffs []Diff) []Diff {
+	var (
+		ret        []Diff
+		deletions  []Diff
+		insertions []Diff
+	)
+
+	for _, diff := range diffs {
+		switch diff.Type {
+		case DiffDelete:
+			deletions = append(deletions, diff)
+		case DiffInsert:
+			insertions = append(insertions, diff)
+		case DiffEqual:
+			ret = append(ret, deletions...)
+			deletions = nil
+
+			ret = append(ret, insertions...)
+			insertions = nil
+
+			ret = append(ret, diff)
+		}
+	}
+
+	ret = append(ret, deletions...)
+	ret = append(ret, insertions...)
+
+	return ret
+}
+
+// unified represents modifications in a form conducive to printing a unified diff.
+type unified struct {
+	label1, label2 string
+	hunks          []hunk
+}
+
+// hunk is a list of nearby changes, deperated by at most 2*contextLines lines.
+type hunk struct {
+	// The line in the original source where the hunk starts.
+	fromLine int
+	// The line in the original source where the hunk finishes.
+	toLine int
+	// List of modifications. Each Diff represents one deleted, inserted, or equal line.
+	diffs []Diff
+}
+
+// numLines returns the number of lines in the hunk for text1 and text2.
+func (h hunk) numLines() (n1, n2 int) {
+	for _, diff := range h.diffs {
+		switch diff.Type {
+		case DiffDelete:
+			n1++
+		case DiffInsert:
+			n2++
+		case DiffEqual:
+			n1++
+			n2++
+		}
+	}
+
+	return n1, n2
+}
+
+func (h hunk) String() string {
+	var b strings.Builder
+
+	fmt.Fprint(&b, "@@")
+
+	numLines1, numLines2 := h.numLines()
+
+	switch {
+	case numLines1 > 1:
+		fmt.Fprintf(&b, " -%d,%d", h.fromLine, numLines1)
+	case h.fromLine == 1 && numLines1 == 0:
+		// Mimic GNU diff -u behavior when adding to empty file.
+		fmt.Fprintf(&b, " -0,0")
+	default:
+		fmt.Fprintf(&b, " -%d", h.fromLine)
+	}
+
+	switch {
+	case numLines2 > 1:
+		fmt.Fprintf(&b, " +%d,%d", h.toLine, numLines2)
+	case h.toLine == 1 && numLines2 == 0:
+		// Mimic GNU diff -u behavior when adding to empty file.
+		fmt.Fprintf(&b, " +0,0")
+	default:
+		fmt.Fprintf(&b, " +%d", h.toLine)
+	}
+
+	fmt.Fprint(&b, " @@\n")
+
+	for _, diff := range h.diffs {
+		switch diff.Type {
+		case DiffDelete:
+			fmt.Fprintf(&b, "-%s", diff.Text)
+		case DiffInsert:
+			fmt.Fprintf(&b, "+%s", diff.Text)
+		default:
+			fmt.Fprintf(&b, " %s", diff.Text)
+		}
+		if !strings.HasSuffix(diff.Text, "\n") {
+			fmt.Fprintf(&b, "\n\\ No newline at end of file\n")
+		}
+	}
+
+	return b.String()
+}
+
+// String converts a unified diff to the standard textual form for that diff.
+// The output of this function can be passed to tools like patch.
+func (u unified) String() string {
+	if len(u.hunks) == 0 {
+		return ""
+	}
+	var b strings.Builder
+	fmt.Fprintf(&b, "--- %s\n", u.label1)
+	fmt.Fprintf(&b, "+++ %s\n", u.label2)
+	for _, hunk := range u.hunks {
+		fmt.Fprint(&b, hunk)
+	}
+
+	return b.String()
+}
+
+// DefaultContextLines is the number of unchanged lines of surrounding
+// context displayed by Unified.
+const DefaultContextLines = 3
+
+// UnifiedOption is an option for DiffUnified().
+type UnifiedOption func(*unifiedOptions)
+
+type unifiedOptions struct {
+	contextLines int
+	text1Label   string
+	text2Label   string
+}
+
+func newUnifiedOptions(opts []UnifiedOption) unifiedOptions {
+	ret := unifiedOptions{
+		contextLines: DefaultContextLines,
+		text1Label:   "text1",
+		text2Label:   "text2",
+	}
+
+	for _, o := range opts {
+		o(&ret)
+	}
+
+	return ret
+}
+
+// UnifiedContextLines sets the number of unchanged lines of surrounding context
+// printed. Defaults to DefaultContextLines.
+func UnifiedContextLines(lines int) UnifiedOption {
+	if lines <= 0 {
+		lines = DefaultContextLines
+	}
+
+	return func(o *unifiedOptions) {
+		o.contextLines = lines
+	}
+}
+
+// UnifiedLabels sets the labels for the old and new files. Defaults to "text1" and "text2".
+func UnifiedLabels(oldLabel, newLabel string) UnifiedOption {
+	return func(o *unifiedOptions) {
+		o.text1Label = oldLabel
+		o.text2Label = newLabel
+	}
+}
diff --git a/diffmatchpatch/unified_test.go b/diffmatchpatch/unified_test.go
new file mode 100644
index 0000000..de6cf06
--- /dev/null
+++ b/diffmatchpatch/unified_test.go
@@ -0,0 +1,272 @@
+package diffmatchpatch_test
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/sergi/go-diff/diffmatchpatch"
+)
+
+func TestDiffUnified(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name  string
+		text1 string
+		text2 string
+		want  string
+	}{
+		{
+			name:  "No changes",
+			text1: "Hello, world!\n",
+			text2: "Hello, world!\n",
+			want:  "",
+		},
+		{
+			name:  "Insertion at beginning",
+			text1: "Hello, world!\n",
+			text2: "New line\nHello, world!\n",
+			want:  "--- text1\n+++ text2\n@@ -1 +1,2 @@\n+New line\n Hello, world!\n",
+		},
+		{
+			name:  "Insertion at end",
+			text1: "Hello, world!\n",
+			text2: "Hello, world!\nNew line\n",
+			want:  "--- text1\n+++ text2\n@@ -1 +1,2 @@\n Hello, world!\n+New line\n",
+		},
+		{
+			name:  "Insertion middle",
+			text1: "Hello, world!\nHello, world!\n",
+			text2: "Hello, world!\nNew line\nHello, world!\n",
+			want:  "--- text1\n+++ text2\n@@ -1,2 +1,3 @@\n Hello, world!\n+New line\n Hello, world!\n",
+		},
+		{
+			name:  "Removal at beginning",
+			text1: "Old line\nHello, world!\n",
+			text2: "Hello, world!\n",
+			want:  "--- text1\n+++ text2\n@@ -1,2 +1 @@\n-Old line\n Hello, world!\n",
+		},
+		{
+			name:  "Removal at end",
+			text1: "Hello, world!\nOld line\n",
+			text2: "Hello, world!\n",
+			want:  "--- text1\n+++ text2\n@@ -1,2 +1 @@\n Hello, world!\n-Old line\n",
+		},
+		{
+			name:  "Removal middle",
+			text1: "Hello, world!\nOld line\nHello, world!\n",
+			text2: "Hello, world!\nHello, world!\n",
+			want:  "--- text1\n+++ text2\n@@ -1,3 +1,2 @@\n Hello, world!\n-Old line\n Hello, world!\n",
+		},
+		{
+			name:  "Replacement",
+			text1: "Prefix\nHello, world!\nSuffix\n",
+			text2: "Prefix\nHello, Golang!\nSuffix\n",
+			want:  "--- text1\n+++ text2\n@@ -1,3 +1,3 @@\n Prefix\n-Hello, world!\n+Hello, Golang!\n Suffix\n",
+		},
+		{
+			name:  "Insertion",
+			text1: makeContext(10, 0),
+			text2: makeContext(5, 0) + "INSERTION\n" + makeContext(5, 5),
+			want:  "--- text1\n+++ text2\n@@ -3,6 +3,7 @@\n context2\n context3\n context4\n+INSERTION\n context5\n context6\n context7\n",
+		},
+		{
+			name:  "Multiple hunks",
+			text1: makeContext(20, 0),
+			text2: makeContext(5, 0) + "INSERTION1\n" + makeContext(10, 5) + "INSERTION2\n" + makeContext(5, 15),
+			want: `--- text1
++++ text2
+@@ -3,6 +3,7 @@
+ context2
+ context3
+ context4
++INSERTION1
+ context5
+ context6
+ context7
+@@ -13,6 +14,7 @@
+ context12
+ context13
+ context14
++INSERTION2
+ context15
+ context16
+ context17
+`,
+		},
+		{
+			name:  "Merge hunk with <= 5 lines of context",
+			text1: makeContext(15, 0),
+			text2: makeContext(5, 0) + "INSERTION1\n" + makeContext(5, 5) + "INSERTION2\n" + makeContext(5, 10),
+			want: `--- text1
++++ text2
+@@ -3,11 +3,13 @@
+ context2
+ context3
+ context4
++INSERTION1
+ context5
+ context6
+ context7
+ context8
+ context9
++INSERTION2
+ context10
+ context11
+ context12
+`,
+		},
+		{
+			name:  "Insert without newline",
+			text1: "context1",
+			text2: "context1\nnew line",
+			want: `--- text1
++++ text2
+@@ -1 +1,2 @@
+-context1
+\ No newline at end of file
++context1
++new line
+\ No newline at end of file
+`,
+		},
+		{
+			name:  "Removal without newline",
+			text1: "context1\nold line",
+			text2: "context1",
+			want: `--- text1
++++ text2
+@@ -1,2 +1 @@
+-context1
+-old line
+\ No newline at end of file
++context1
+\ No newline at end of file
+`,
+		},
+		{
+			name:  "context without newline",
+			text1: "context0\nold1\ncontext1",
+			text2: "context0\nnew1\ncontext1",
+			want: `--- text1
++++ text2
+@@ -1,3 +1,3 @@
+ context0
+-old1
++new1
+ context1
+\ No newline at end of file
+`,
+		},
+		{
+			name:  "Replace multiple subsequent lines",
+			text1: makeContext(5, 0) + "old1\nold2\nold3\n" + makeContext(5, 5),
+			text2: makeContext(5, 0) + "new1\nnew2\nnew3\n" + makeContext(5, 5),
+			want: `--- text1
++++ text2
+@@ -3,9 +3,9 @@
+ context2
+ context3
+ context4
+-old1
+-old2
+-old3
++new1
++new2
++new3
+ context5
+ context6
+ context7
+`,
+		},
+		{
+			name:  "empty text1",
+			text1: "",
+			text2: "new1\n",
+			want: `--- text1
++++ text2
+@@ -0,0 +1 @@
++new1
+`,
+		},
+		{
+			name:  "empty text2",
+			text1: "old1\n",
+			text2: "",
+			want: `--- text1
++++ text2
+@@ -1 +0,0 @@
+-old1
+`,
+		},
+	}
+
+	for _, tc := range cases {
+		// Un-alias tc for compatibility with Go <1.22.
+		tc := tc
+
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			dmp := diffmatchpatch.New()
+
+			got := dmp.Unified(tc.text1, tc.text2, diffmatchpatch.UnifiedLabels("text1", "text2"))
+
+			t.Logf("dmp.Unified() =\n%s", got)
+
+			if got != tc.want {
+				t.Errorf("Unified() output differs (-want/+got):\n%s", cmp.Diff(tc.want, got))
+			}
+
+			// DiffLinesToChars / DiffCharsToLines is not required for correct results.
+			diffs := dmp.DiffMain(tc.text1, tc.text2, false)
+
+			got = dmp.DiffUnified(diffs, diffmatchpatch.UnifiedLabels("text1", "text2"), diffmatchpatch.UnifiedContextLines(3))
+			if got != tc.want {
+				t.Errorf("DiffUnified() output differs (-want/+got):\n%s", cmp.Diff(tc.want, got))
+			}
+
+		})
+	}
+}
+
+func makeContext(n, start int) string {
+	var b strings.Builder
+
+	for i := start; i < start+n; i++ {
+		fmt.Fprintf(&b, "context%d\n", i)
+	}
+
+	return b.String()
+}
+
+func ExampleDiffMatchPatch_DiffUnified() {
+	text1 := "Prefix\nHello, world!\nSuffix\n"
+	text2 := "Prefix\nHello, Golang!\nSuffix\n"
+
+	dmp := diffmatchpatch.New()
+
+	// Pre-process the inputs so that each codepoint in text[12]End represents one line.
+	text1Enc, text2Enc, lines := dmp.DiffLinesToChars(text1, text2)
+
+	// Run the diff algorithm on the preprocessed inputs.
+	diffs := dmp.DiffMain(text1Enc, text2Enc, false)
+
+	// Expand the diffs back into the full lines they represent.
+	diffs = dmp.DiffCharsToLines(diffs, lines)
+
+	// Format as unified diff.
+	unifiedDiff := dmp.DiffUnified(diffs, diffmatchpatch.UnifiedLabels("old.txt", "new.txt"))
+
+	fmt.Print(unifiedDiff)
+	// Output:
+	// --- old.txt
+	// +++ new.txt
+	// @@ -1,3 +1,3 @@
+	//  Prefix
+	// -Hello, world!
+	// +Hello, Golang!
+	//  Suffix
+}
diff --git a/go.mod b/go.mod
index c7886ce..23378c1 100644
--- a/go.mod
+++ b/go.mod
@@ -2,6 +2,7 @@ module github.com/sergi/go-diff
 
 require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/google/go-cmp v0.6.0
 	github.com/kr/pretty v0.1.0 // indirect
 	github.com/stretchr/testify v1.4.0
 	gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
diff --git a/go.sum b/go.sum
index 8dd9f36..737e654 100644
--- a/go.sum
+++ b/go.sum
@@ -2,6 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=