From 8049412ebdee0a41d36b2b6197f2d565e78882de Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 30 Apr 2018 23:54:13 +0200 Subject: [PATCH 01/19] linear-assignment: a function to solve least-cost assignment problems The problem solved by the code introduced in this commit goes like this: given two sets of items, and a cost matrix which says how much it "costs" to assign any given item of the first set to any given item of the second, assign all items (except when the sets have different size) in the cheapest way. We use the Jonker-Volgenant algorithm to solve the assignment problem to answer questions such as: given two different versions of a topic branch (or iterations of a patch series), what is the best pairing of commits/patches between the different versions? Signed-off-by: Johannes Schindelin --- Makefile | 1 + linear-assignment.c | 203 ++++++++++++++++++++++++++++++++++++++++++++ linear-assignment.h | 22 +++++ 3 files changed, 226 insertions(+) create mode 100644 linear-assignment.c create mode 100644 linear-assignment.h diff --git a/Makefile b/Makefile index 40a82efcea7d41..9eb18c9139c080 100644 --- a/Makefile +++ b/Makefile @@ -853,6 +853,7 @@ LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o LIB_OBJS += grep.o LIB_OBJS += hashmap.o +LIB_OBJS += linear-assignment.o LIB_OBJS += help.o LIB_OBJS += hex.o LIB_OBJS += ident.o diff --git a/linear-assignment.c b/linear-assignment.c new file mode 100644 index 00000000000000..0b0344b5f54295 --- /dev/null +++ b/linear-assignment.c @@ -0,0 +1,203 @@ +/* + * Based on: Jonker, R., & Volgenant, A. (1987). A shortest augmenting path + * algorithm for dense and sparse linear assignment problems. Computing, + * 38(4), 325-340. + */ +#include "cache.h" +#include "linear-assignment.h" + +#define COST(column, row) cost[(column) + column_count * (row)] + +/* + * The parameter `cost` is the cost matrix: the cost to assign column j to row + * i is `cost[j + column_count * i]. + */ +void compute_assignment(int column_count, int row_count, int *cost, + int *column2row, int *row2column) +{ + int *v, *d; + int *free_row, free_count = 0, saved_free_count, *pred, *col; + int i, j, phase; + + memset(column2row, -1, sizeof(int) * column_count); + memset(row2column, -1, sizeof(int) * row_count); + ALLOC_ARRAY(v, column_count); + + /* column reduction */ + for (j = column_count - 1; j >= 0; j--) { + int i1 = 0; + + for (i = 1; i < row_count; i++) + if (COST(j, i1) > COST(j, i)) + i1 = i; + v[j] = COST(j, i1); + if (row2column[i1] == -1) { + /* row i1 unassigned */ + row2column[i1] = j; + column2row[j] = i1; + } else { + if (row2column[i1] >= 0) + row2column[i1] = -2 - row2column[i1]; + column2row[j] = -1; + } + } + + /* reduction transfer */ + ALLOC_ARRAY(free_row, row_count); + for (i = 0; i < row_count; i++) { + int j1 = row2column[i]; + if (j1 == -1) + free_row[free_count++] = i; + else if (j1 < -1) + row2column[i] = -2 - j1; + else { + int min = COST(!j1, i) - v[!j1]; + for (j = 1; j < column_count; j++) + if (j != j1 && min > COST(j, i) - v[j]) + min = COST(j, i) - v[j]; + v[j1] -= min; + } + } + + if (free_count == + (column_count < row_count ? row_count - column_count : 0)) { + free(v); + free(free_row); + return; + } + + /* augmenting row reduction */ + for (phase = 0; phase < 2; phase++) { + int k = 0; + + saved_free_count = free_count; + free_count = 0; + while (k < saved_free_count) { + int u1, u2; + int j1 = 0, j2, i0; + + i = free_row[k++]; + u1 = COST(j1, i) - v[j1]; + j2 = -1; + u2 = INT_MAX; + for (j = 1; j < column_count; j++) { + int c = COST(j, i) - v[j]; + if (u2 > c) { + if (u1 < c) { + u2 = c; + j2 = j; + } else { + u2 = u1; + u1 = c; + j2 = j1; + j1 = j; + } + } + } + if (j2 < 0) { + j2 = j1; + u2 = u1; + } + + i0 = column2row[j1]; + if (u1 < u2) + v[j1] -= u2 - u1; + else if (i0 >= 0) { + j1 = j2; + i0 = column2row[j1]; + } + + if (i0 >= 0) { + if (u1 < u2) + free_row[--k] = i0; + else + free_row[free_count++] = i0; + } + row2column[i] = j1; + column2row[j1] = i; + } + } + + /* augmentation */ + saved_free_count = free_count; + ALLOC_ARRAY(d, column_count); + ALLOC_ARRAY(pred, column_count); + ALLOC_ARRAY(col, column_count); + for (free_count = 0; free_count < saved_free_count; free_count++) { + int i1 = free_row[free_count], low = 0, up = 0, last, k; + int min, c, u1; + + for (j = 0; j < column_count; j++) { + d[j] = COST(j, i1) - v[j]; + pred[j] = i1; + col[j] = j; + } + + j = -1; + do { + last = low; + min = d[col[up++]]; + for (k = up; k < column_count; k++) { + j = col[k]; + c = d[j]; + if (c <= min) { + if (c < min) { + up = low; + min = c; + } + col[k] = col[up]; + col[up++] = j; + } + } + for (k = low; k < up; k++) + if (column2row[col[k]] == -1) + goto update; + + /* scan a row */ + do { + int j1 = col[low++]; + + i = column2row[j1]; + u1 = COST(j1, i) - v[j1] - min; + for (k = up; k < column_count; k++) { + j = col[k]; + c = COST(j, i) - v[j] - u1; + if (c < d[j]) { + d[j] = c; + pred[j] = i; + if (c == min) { + if (column2row[j] == -1) + goto update; + col[k] = col[up]; + col[up++] = j; + } + } + } + } while (low != up); + } while (low == up); + +update: + /* updating of the column pieces */ + for (k = 0; k < last; k++) { + int j1 = col[k]; + v[j1] += d[j1] - min; + } + + /* augmentation */ + do { + if (j < 0) + BUG("negative j: %d", j); + i = pred[j]; + column2row[j] = i; + k = j; + j = row2column[i]; + row2column[i] = k; + } while (i1 != i); + } + + free(col); + free(pred); + free(d); + free(v); + free(free_row); +} diff --git a/linear-assignment.h b/linear-assignment.h new file mode 100644 index 00000000000000..fc4c502c85a139 --- /dev/null +++ b/linear-assignment.h @@ -0,0 +1,22 @@ +#ifndef HUNGARIAN_H +#define HUNGARIAN_H + +/* + * Compute an assignment of columns -> rows (and vice versa) such that every + * column is assigned to at most one row (and vice versa) minimizing the + * overall cost. + * + * The parameter `cost` is the cost matrix: the cost to assign column j to row + * i is `cost[j + column_count * i]. + * + * The arrays column2row and row2column will be populated with the respective + * assignments (-1 for unassigned, which can happen only if column_count != + * row_count). + */ +void compute_assignment(int column_count, int row_count, int *cost, + int *column2row, int *row2column); + +/* The maximal cost in the cost matrix (to prevent integer overflows). */ +#define COST_MAX (1<<16) + +#endif From d6099802b3139ee015c5de04fbdcdc1557bcbe55 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 1 May 2018 21:42:28 +0200 Subject: [PATCH 02/19] Introduce `range-diff` to compare iterations of a topic branch This command does not do a whole lot so far, apart from showing a usage that is oddly similar to that of `git tbdiff`. And for a good reason: the next commits will turn `range-branch` into a full-blown replacement for `tbdiff`. At this point, we ignore tbdiff's color options, as they will all be implemented later using diff_options. Signed-off-by: Johannes Schindelin --- .gitignore | 1 + Makefile | 1 + builtin.h | 1 + builtin/range-diff.c | 25 +++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + 6 files changed, 30 insertions(+) create mode 100644 builtin/range-diff.c diff --git a/.gitignore b/.gitignore index dc51231eb23cc9..e70d59fda3b0f8 100644 --- a/.gitignore +++ b/.gitignore @@ -112,6 +112,7 @@ /git-pull /git-push /git-quiltimport +/git-range-diff /git-read-tree /git-rebase /git-rebase--am diff --git a/Makefile b/Makefile index 9eb18c9139c080..405cebad86b5a6 100644 --- a/Makefile +++ b/Makefile @@ -1038,6 +1038,7 @@ BUILTIN_OBJS += builtin/prune-packed.o BUILTIN_OBJS += builtin/prune.o BUILTIN_OBJS += builtin/pull.o BUILTIN_OBJS += builtin/push.o +BUILTIN_OBJS += builtin/range-diff.o BUILTIN_OBJS += builtin/read-tree.o BUILTIN_OBJS += builtin/rebase--helper.o BUILTIN_OBJS += builtin/receive-pack.o diff --git a/builtin.h b/builtin.h index 42378f3aa471eb..ae28adc9b23702 100644 --- a/builtin.h +++ b/builtin.h @@ -200,6 +200,7 @@ extern int cmd_prune(int argc, const char **argv, const char *prefix); extern int cmd_prune_packed(int argc, const char **argv, const char *prefix); extern int cmd_pull(int argc, const char **argv, const char *prefix); extern int cmd_push(int argc, const char **argv, const char *prefix); +extern int cmd_range_diff(int argc, const char **argv, const char *prefix); extern int cmd_read_tree(int argc, const char **argv, const char *prefix); extern int cmd_rebase__helper(int argc, const char **argv, const char *prefix); extern int cmd_receive_pack(int argc, const char **argv, const char *prefix); diff --git a/builtin/range-diff.c b/builtin/range-diff.c new file mode 100644 index 00000000000000..36788ea4f2b7a7 --- /dev/null +++ b/builtin/range-diff.c @@ -0,0 +1,25 @@ +#include "cache.h" +#include "builtin.h" +#include "parse-options.h" + +static const char * const builtin_range_diff_usage[] = { +N_("git range-diff [] .. .."), +N_("git range-diff [] ..."), +N_("git range-diff [] "), +NULL +}; + +int cmd_range_diff(int argc, const char **argv, const char *prefix) +{ + int creation_factor = 60; + struct option options[] = { + OPT_INTEGER(0, "creation-factor", &creation_factor, + N_("Percentage by which creation is weighted")), + OPT_END() + }; + + argc = parse_options(argc, argv, NULL, options, + builtin_range_diff_usage, 0); + + return 0; +} diff --git a/command-list.txt b/command-list.txt index a1fad28fd82da1..0a3a70c3d7f806 100644 --- a/command-list.txt +++ b/command-list.txt @@ -103,6 +103,7 @@ git-prune-packed plumbingmanipulators git-pull mainporcelain remote git-push mainporcelain remote git-quiltimport foreignscminterface +git-range-diff mainporcelain git-read-tree plumbingmanipulators git-rebase mainporcelain history git-receive-pack synchelpers diff --git a/git.c b/git.c index 4ff02d14679438..5dcb21fd4b2b6a 100644 --- a/git.c +++ b/git.c @@ -445,6 +445,7 @@ static struct cmd_struct commands[] = { { "prune-packed", cmd_prune_packed, RUN_SETUP }, { "pull", cmd_pull, RUN_SETUP | NEED_WORK_TREE }, { "push", cmd_push, RUN_SETUP }, + { "range-diff", cmd_range_diff, RUN_SETUP | USE_PAGER }, { "read-tree", cmd_read_tree, RUN_SETUP | SUPPORT_SUPER_PREFIX}, { "rebase--helper", cmd_rebase__helper, RUN_SETUP | NEED_WORK_TREE }, { "receive-pack", cmd_receive_pack }, From 86bfce1f0df9a35c3f8ee319934f2dbc9cdaf554 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 2 May 2018 02:34:01 +0200 Subject: [PATCH 03/19] range-diff: first rudimentary implementation At this stage, `git range-diff` can determine corresponding commits of two related commit ranges. This makes use of the recently introduced implementation of the Hungarian algorithm. The core of this patch is a straight port of the ideas of tbdiff, the apparently dormant project at https://github.com/trast/tbdiff. The output does not at all match `tbdiff`'s output yet, as this patch really concentrates on getting the patch matching part right. Note: due to differences in the diff algorithm (`tbdiff` uses the Python module `difflib`, Git uses its xdiff fork), the cost matrix calculated by `range-diff` is different (but very similar) to the one calculated by `tbdiff`. Therefore, it is possible that they find different matching commits in corner cases (e.g. when a patch was split into two patches of roughly equal length). Signed-off-by: Johannes Schindelin --- Makefile | 1 + builtin/range-diff.c | 47 ++++++- range-diff.c | 307 +++++++++++++++++++++++++++++++++++++++++++ range-diff.h | 7 + 4 files changed, 359 insertions(+), 3 deletions(-) create mode 100644 range-diff.c create mode 100644 range-diff.h diff --git a/Makefile b/Makefile index 405cebad86b5a6..80f76e22571762 100644 --- a/Makefile +++ b/Makefile @@ -904,6 +904,7 @@ LIB_OBJS += progress.o LIB_OBJS += prompt.o LIB_OBJS += protocol.o LIB_OBJS += quote.o +LIB_OBJS += range-diff.o LIB_OBJS += reachable.o LIB_OBJS += read-cache.o LIB_OBJS += reflog-walk.o diff --git a/builtin/range-diff.c b/builtin/range-diff.c index 36788ea4f2b7a7..c37a72100fbd19 100644 --- a/builtin/range-diff.c +++ b/builtin/range-diff.c @@ -1,6 +1,7 @@ #include "cache.h" #include "builtin.h" #include "parse-options.h" +#include "range-diff.h" static const char * const builtin_range_diff_usage[] = { N_("git range-diff [] .. .."), @@ -17,9 +18,49 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix) N_("Percentage by which creation is weighted")), OPT_END() }; + int res = 0; + struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT; - argc = parse_options(argc, argv, NULL, options, - builtin_range_diff_usage, 0); + argc = parse_options(argc, argv, NULL, options, builtin_range_diff_usage, + 0); - return 0; + if (argc == 2) { + if (!strstr(argv[0], "..")) + warning(_("no .. in range: '%s'"), argv[0]); + strbuf_addstr(&range1, argv[0]); + + if (!strstr(argv[1], "..")) + warning(_("no .. in range: '%s'"), argv[1]); + strbuf_addstr(&range2, argv[1]); + } else if (argc == 3) { + strbuf_addf(&range1, "%s..%s", argv[0], argv[1]); + strbuf_addf(&range2, "%s..%s", argv[0], argv[2]); + } else if (argc == 1) { + const char *b = strstr(argv[0], "..."), *a = argv[0]; + int a_len; + + if (!b) + die(_("single arg format requires a symmetric range")); + + a_len = (int)(b - a); + if (!a_len) { + a = "HEAD"; + a_len = strlen(a); + } + b += 3; + if (!*b) + b = "HEAD"; + strbuf_addf(&range1, "%s..%.*s", b, a_len, a); + strbuf_addf(&range2, "%.*s..%s", a_len, a, b); + } else { + error(_("need two commit ranges")); + usage_with_options(builtin_range_diff_usage, options); + } + + res = show_range_diff(range1.buf, range2.buf, creation_factor); + + strbuf_release(&range1); + strbuf_release(&range2); + + return res; } diff --git a/range-diff.c b/range-diff.c new file mode 100644 index 00000000000000..e7e5f08b96d867 --- /dev/null +++ b/range-diff.c @@ -0,0 +1,307 @@ +#include "cache.h" +#include "range-diff.h" +#include "string-list.h" +#include "run-command.h" +#include "argv-array.h" +#include "hashmap.h" +#include "xdiff-interface.h" +#include "linear-assignment.h" + +struct patch_util { + /* For the search for an exact match */ + struct hashmap_entry e; + const char *diff, *patch; + + int i; + int diffsize; + size_t diff_offset; + /* the index of the matching item in the other branch, or -1 */ + int matching; + struct object_id oid; +}; + +/* + * Reads the patches into a string list, with the `util` field being populated + * as struct object_id (will need to be free()d). + */ +static int read_patches(const char *range, struct string_list *list) +{ + struct child_process cp = CHILD_PROCESS_INIT; + FILE *in; + struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT; + struct patch_util *util = NULL; + int in_header = 1; + + argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", + "--reverse", "--date-order", "--decorate=no", + "--no-abbrev-commit", range, + NULL); + cp.out = -1; + cp.no_stdin = 1; + cp.git_cmd = 1; + + if (start_command(&cp)) + return error_errno(_("could not start `log`")); + in = fdopen(cp.out, "r"); + if (!in) { + error_errno(_("could not read `log` output")); + finish_command(&cp); + return -1; + } + + while (strbuf_getline(&line, in) != EOF) { + const char *p; + + if (skip_prefix(line.buf, "commit ", &p)) { + if (util) { + string_list_append(list, buf.buf)->util = util; + strbuf_reset(&buf); + } + util = xcalloc(sizeof(*util), 1); + if (get_oid(p, &util->oid)) { + error(_("could not parse commit '%s'"), p); + free(util); + string_list_clear(list, 1); + strbuf_release(&buf); + strbuf_release(&line); + fclose(in); + finish_command(&cp); + return -1; + } + util->matching = -1; + in_header = 1; + continue; + } + + if (starts_with(line.buf, "diff --git")) { + in_header = 0; + strbuf_addch(&buf, '\n'); + if (!util->diff_offset) + util->diff_offset = buf.len; + strbuf_addbuf(&buf, &line); + } else if (in_header) { + if (starts_with(line.buf, "Author: ")) { + strbuf_addbuf(&buf, &line); + strbuf_addstr(&buf, "\n\n"); + } else if (starts_with(line.buf, " ")) { + strbuf_addbuf(&buf, &line); + strbuf_addch(&buf, '\n'); + } + continue; + } else if (starts_with(line.buf, "@@ ")) + strbuf_addstr(&buf, "@@"); + else if (line.buf[0] && !starts_with(line.buf, "index ")) + /* + * A completely blank (not ' \n', which is context) + * line is not valid in a diff. We skip it + * silently, because this neatly handles the blank + * separator line between commits in git-log + * output. + */ + strbuf_addbuf(&buf, &line); + else + continue; + + strbuf_addch(&buf, '\n'); + util->diffsize++; + } + fclose(in); + strbuf_release(&line); + + if (util) + string_list_append(list, buf.buf)->util = util; + strbuf_release(&buf); + + if (finish_command(&cp)) + return -1; + + return 0; +} + +static int patch_util_cmp(const void *dummy, const struct patch_util *a, + const struct patch_util *b, const char *keydata) +{ + return strcmp(a->diff, keydata ? keydata : b->diff); +} + +static void find_exact_matches(struct string_list *a, struct string_list *b) +{ + struct hashmap map; + int i; + + hashmap_init(&map, (hashmap_cmp_fn)patch_util_cmp, NULL, 0); + + /* First, add the patches of a to a hash map */ + for (i = 0; i < a->nr; i++) { + struct patch_util *util = a->items[i].util; + + util->i = i; + util->patch = a->items[i].string; + util->diff = util->patch + util->diff_offset; + hashmap_entry_init(util, strhash(util->diff)); + hashmap_add(&map, util); + } + + /* Now try to find exact matches in b */ + for (i = 0; i < b->nr; i++) { + struct patch_util *util = b->items[i].util, *other; + + util->i = i; + util->patch = b->items[i].string; + util->diff = util->patch + util->diff_offset; + hashmap_entry_init(util, strhash(util->diff)); + other = hashmap_remove(&map, util, NULL); + if (other) { + if (other->matching >= 0) + BUG("already assigned!"); + + other->matching = i; + util->matching = other->i; + } + } + + hashmap_free(&map, 0); +} + +static void diffsize_consume(void *data, char *line, unsigned long len) +{ + (*(int *)data)++; +} + +static int diffsize(const char *a, const char *b) +{ + xpparam_t pp = { 0 }; + xdemitconf_t cfg = { 0 }; + mmfile_t mf1, mf2; + int count = 0; + + mf1.ptr = (char *)a; + mf1.size = strlen(a); + mf2.ptr = (char *)b; + mf2.size = strlen(b); + + cfg.ctxlen = 3; + if (!xdi_diff_outf(&mf1, &mf2, diffsize_consume, &count, &pp, &cfg)) + return count; + + error(_("failed to generate diff")); + return COST_MAX; +} + +static void get_correspondences(struct string_list *a, struct string_list *b, + int creation_factor) +{ + int n = a->nr + b->nr; + int *cost, c, *a2b, *b2a; + int i, j; + + ALLOC_ARRAY(cost, st_mult(n, n)); + ALLOC_ARRAY(a2b, n); + ALLOC_ARRAY(b2a, n); + + for (i = 0; i < a->nr; i++) { + struct patch_util *a_util = a->items[i].util; + + for (j = 0; j < b->nr; j++) { + struct patch_util *b_util = b->items[j].util; + + if (a_util->matching == j) + c = 0; + else if (a_util->matching < 0 && b_util->matching < 0) + c = diffsize(a_util->diff, b_util->diff); + else + c = COST_MAX; + cost[i + n * j] = c; + } + + c = a_util->matching < 0 ? + a_util->diffsize * creation_factor / 100 : COST_MAX; + for (j = b->nr; j < n; j++) + cost[i + n * j] = c; + } + + for (j = 0; j < b->nr; j++) { + struct patch_util *util = b->items[j].util; + + c = util->matching < 0 ? + util->diffsize * creation_factor / 100 : COST_MAX; + for (i = a->nr; i < n; i++) + cost[i + n * j] = c; + } + + for (i = a->nr; i < n; i++) + for (j = b->nr; j < n; j++) + cost[i + n * j] = 0; + + compute_assignment(n, n, cost, a2b, b2a); + + for (i = 0; i < a->nr; i++) + if (a2b[i] >= 0 && a2b[i] < b->nr) { + struct patch_util *a_util = a->items[i].util; + struct patch_util *b_util = b->items[a2b[i]].util; + + a_util->matching = a2b[i]; + b_util->matching = i; + } + + free(cost); + free(a2b); + free(b2a); +} + +static const char *short_oid(struct patch_util *util) +{ + return find_unique_abbrev(util->oid.hash, DEFAULT_ABBREV); +} + +static void output(struct string_list *a, struct string_list *b) +{ + int i; + + for (i = 0; i < b->nr; i++) { + struct patch_util *util = b->items[i].util, *prev; + + if (util->matching < 0) + printf("-: -------- > %d: %s\n", + i + 1, short_oid(util)); + else { + prev = a->items[util->matching].util; + printf("%d: %s ! %d: %s\n", + util->matching + 1, short_oid(prev), + i + 1, short_oid(util)); + } + } + + for (i = 0; i < a->nr; i++) { + struct patch_util *util = a->items[i].util; + + if (util->matching < 0) + printf("%d: %s < -: --------\n", + i + 1, short_oid(util)); + } +} + +int show_range_diff(const char *range1, const char *range2, + int creation_factor) +{ + int res = 0; + + struct string_list branch1 = STRING_LIST_INIT_DUP; + struct string_list branch2 = STRING_LIST_INIT_DUP; + + if (read_patches(range1, &branch1)) + res = error(_("could not parse log for '%s'"), range1); + if (!res && read_patches(range2, &branch2)) + res = error(_("could not parse log for '%s'"), range2); + + if (!res) { + find_exact_matches(&branch1, &branch2); + get_correspondences(&branch1, &branch2, creation_factor); + output(&branch1, &branch2); + } + + string_list_clear(&branch1, 1); + string_list_clear(&branch2, 1); + + return res; +} diff --git a/range-diff.h b/range-diff.h new file mode 100644 index 00000000000000..dd30449c47b16c --- /dev/null +++ b/range-diff.h @@ -0,0 +1,7 @@ +#ifndef BRANCH_DIFF_H +#define BRANCH_DIFF_H + +int show_range_diff(const char *range1, const char *range2, + int creation_factor); + +#endif From a559f79d8cb89810a4b657d54c3e4efd62a6e212 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 2 May 2018 12:22:29 +0200 Subject: [PATCH 04/19] range-diff: improve the order of the shown commits This patch lets `git range-diff` use the same order as tbdiff. The idea is simple: for left-to-right readers, it is natural to assume that the `git range-diff` is performed between an older vs a newer version of the branch. As such, the user is probably more interested in the question "where did this come from?" rather than "where did that one go?". To that end, we list the commits in the order of the second commit range ("the newer version"), inserting the unmatched commits of the first commit range as soon as all their predecessors have been shown. Signed-off-by: Johannes Schindelin --- range-diff.c | 59 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/range-diff.c b/range-diff.c index e7e5f08b96d867..dafa2b23bc4af3 100644 --- a/range-diff.c +++ b/range-diff.c @@ -12,7 +12,7 @@ struct patch_util { struct hashmap_entry e; const char *diff, *patch; - int i; + int i, shown; int diffsize; size_t diff_offset; /* the index of the matching item in the other branch, or -1 */ @@ -256,28 +256,49 @@ static const char *short_oid(struct patch_util *util) static void output(struct string_list *a, struct string_list *b) { - int i; - - for (i = 0; i < b->nr; i++) { - struct patch_util *util = b->items[i].util, *prev; + int i = 0, j = 0; + + /* + * We assume the user is really more interested in the second argument + * ("newer" version). To that end, we print the output in the order of + * the RHS (the `b` parameter). To put the LHS (the `a` parameter) + * commits that are no longer in the RHS into a good place, we place + * them once we have shown all of their predecessors in the LHS. + */ + + while (i < a->nr || j < b->nr) { + struct patch_util *a_util, *b_util; + a_util = i < a->nr ? a->items[i].util : NULL; + b_util = j < b->nr ? b->items[j].util : NULL; + + /* Skip all the already-shown commits from the LHS. */ + while (i < a->nr && a_util->shown) + a_util = ++i < a->nr ? a->items[i].util : NULL; + + /* Show unmatched LHS commit whose predecessors were shown. */ + if (i < a->nr && a_util->matching < 0) { + printf("%d: %s < -: --------\n", + i + 1, short_oid(a_util)); + i++; + continue; + } - if (util->matching < 0) + /* Show unmatched RHS commits. */ + while (j < b->nr && b_util->matching < 0) { printf("-: -------- > %d: %s\n", - i + 1, short_oid(util)); - else { - prev = a->items[util->matching].util; - printf("%d: %s ! %d: %s\n", - util->matching + 1, short_oid(prev), - i + 1, short_oid(util)); + j + 1, short_oid(b_util)); + b_util = ++j < b->nr ? b->items[j].util : NULL; } - } - - for (i = 0; i < a->nr; i++) { - struct patch_util *util = a->items[i].util; - if (util->matching < 0) - printf("%d: %s < -: --------\n", - i + 1, short_oid(util)); + /* Show matching LHS/RHS pair. */ + if (j < b->nr) { + a_util = a->items[b_util->matching].util; + printf("%d: %s ! %d: %s\n", + b_util->matching + 1, short_oid(a_util), + j + 1, short_oid(b_util)); + a_util->shown = 1; + j++; + } } } From b487b74fe372ede887a82cdf5b343944514533cc Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 6 May 2018 17:26:19 +0200 Subject: [PATCH 05/19] range-diff: also show the diff between patches Just like tbdiff, we now show the diff between matching patches. This is a "diff of two diffs", so it can be a bit daunting to read for the beginner. An alternative would be to display an interdiff, i.e. the hypothetical diff which is the result of first reverting the old diff and then applying the new diff. Especially when rebasing often, an interdiff is often not feasible, though: if the old diff cannot be applied in reverse (due to a moving upstream), an interdiff can simply not be inferred. This commit brings `range-diff` closer to feature parity with regard to tbdiff. To make `git range-diff` respect e.g. color.diff.* settings, we have to adjust git_branch_config() accordingly. Note: while we now parse diff options such as --color, the effect is not yet the same as in tbdiff, where also the commit pairs would be colored. This is left for a later commit. Note also: while tbdiff accepts the `--no-patches` option to suppress these diffs between patches, we prefer the `-s` option that is automatically supported via our use of diff_opt_parse(). Signed-off-by: Johannes Schindelin --- builtin/range-diff.c | 25 +++++++++++++++++++++---- range-diff.c | 34 +++++++++++++++++++++++++++++++--- range-diff.h | 4 +++- 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/builtin/range-diff.c b/builtin/range-diff.c index c37a72100fbd19..5f12bbfa963739 100644 --- a/builtin/range-diff.c +++ b/builtin/range-diff.c @@ -2,6 +2,7 @@ #include "builtin.h" #include "parse-options.h" #include "range-diff.h" +#include "config.h" static const char * const builtin_range_diff_usage[] = { N_("git range-diff [] .. .."), @@ -13,16 +14,31 @@ NULL int cmd_range_diff(int argc, const char **argv, const char *prefix) { int creation_factor = 60; + struct diff_options diffopt = { NULL }; struct option options[] = { OPT_INTEGER(0, "creation-factor", &creation_factor, N_("Percentage by which creation is weighted")), OPT_END() }; - int res = 0; + int i, j, res = 0; struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT; - argc = parse_options(argc, argv, NULL, options, builtin_range_diff_usage, - 0); + git_config(git_diff_ui_config, NULL); + + diff_setup(&diffopt); + diffopt.output_format = DIFF_FORMAT_PATCH; + + argc = parse_options(argc, argv, NULL, options, + builtin_range_diff_usage, PARSE_OPT_KEEP_UNKNOWN); + + for (i = j = 0; i < argc; i++) { + int c = diff_opt_parse(&diffopt, argv + i, argc - i, prefix); + + if (!c) + argv[j++] = argv[i]; + } + argc = j; + diff_setup_done(&diffopt); if (argc == 2) { if (!strstr(argv[0], "..")) @@ -57,7 +73,8 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix) usage_with_options(builtin_range_diff_usage, options); } - res = show_range_diff(range1.buf, range2.buf, creation_factor); + res = show_range_diff(range1.buf, range2.buf, creation_factor, + &diffopt); strbuf_release(&range1); strbuf_release(&range2); diff --git a/range-diff.c b/range-diff.c index dafa2b23bc4af3..c143b5fe7fa601 100644 --- a/range-diff.c +++ b/range-diff.c @@ -6,6 +6,7 @@ #include "hashmap.h" #include "xdiff-interface.h" #include "linear-assignment.h" +#include "diffcore.h" struct patch_util { /* For the search for an exact match */ @@ -254,7 +255,31 @@ static const char *short_oid(struct patch_util *util) return find_unique_abbrev(util->oid.hash, DEFAULT_ABBREV); } -static void output(struct string_list *a, struct string_list *b) +static struct diff_filespec *get_filespec(const char *name, const char *p) +{ + struct diff_filespec *spec = alloc_filespec(name); + + fill_filespec(spec, &null_oid, 0, 0644); + spec->data = (char *)p; + spec->size = strlen(p); + spec->should_munmap = 0; + spec->is_stdin = 1; + + return spec; +} + +static void patch_diff(const char *a, const char *b, + struct diff_options *diffopt) +{ + diff_queue(&diff_queued_diff, + get_filespec("a", a), get_filespec("b", b)); + + diffcore_std(diffopt); + diff_flush(diffopt); +} + +static void output(struct string_list *a, struct string_list *b, + struct diff_options *diffopt) { int i = 0, j = 0; @@ -296,6 +321,9 @@ static void output(struct string_list *a, struct string_list *b) printf("%d: %s ! %d: %s\n", b_util->matching + 1, short_oid(a_util), j + 1, short_oid(b_util)); + if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT)) + patch_diff(a->items[b_util->matching].string, + b->items[j].string, diffopt); a_util->shown = 1; j++; } @@ -303,7 +331,7 @@ static void output(struct string_list *a, struct string_list *b) } int show_range_diff(const char *range1, const char *range2, - int creation_factor) + int creation_factor, struct diff_options *diffopt) { int res = 0; @@ -318,7 +346,7 @@ int show_range_diff(const char *range1, const char *range2, if (!res) { find_exact_matches(&branch1, &branch2); get_correspondences(&branch1, &branch2, creation_factor); - output(&branch1, &branch2); + output(&branch1, &branch2, diffopt); } string_list_clear(&branch1, 1); diff --git a/range-diff.h b/range-diff.h index dd30449c47b16c..aea9d43f34b4ca 100644 --- a/range-diff.h +++ b/range-diff.h @@ -1,7 +1,9 @@ #ifndef BRANCH_DIFF_H #define BRANCH_DIFF_H +#include "diff.h" + int show_range_diff(const char *range1, const char *range2, - int creation_factor); + int creation_factor, struct diff_options *diffopt); #endif From aaa6eb04e34e3c3a0a063d52b560c781b85451c5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 2 May 2018 16:49:09 +0200 Subject: [PATCH 06/19] range-diff: right-trim commit messages When comparing commit messages, we need to keep in mind that they are indented by four spaces. That is, empty lines are no longer empty, but have "trailing whitespace". When displaying them in color, that results in those nagging red lines. Let's just right-trim the lines in the commit message, it's not like trailing white-space in the commit messages are important enough to care about in `git range-diff`. Signed-off-by: Johannes Schindelin --- range-diff.c | 1 + 1 file changed, 1 insertion(+) diff --git a/range-diff.c b/range-diff.c index c143b5fe7fa601..5763429ff5bd53 100644 --- a/range-diff.c +++ b/range-diff.c @@ -85,6 +85,7 @@ static int read_patches(const char *range, struct string_list *list) strbuf_addbuf(&buf, &line); strbuf_addstr(&buf, "\n\n"); } else if (starts_with(line.buf, " ")) { + strbuf_rtrim(&line); strbuf_addbuf(&buf, &line); strbuf_addch(&buf, '\n'); } From aedfa93b48e8b65fbe887adb8abc972c7aedaaee Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 2 May 2018 16:52:21 +0200 Subject: [PATCH 07/19] range-diff: indent the diffs just like tbdiff The main information in the `range-diff` view comes from the list of matching and non-matching commits, the diffs are additional information. Indenting them helps with the reading flow. Signed-off-by: Johannes Schindelin --- builtin/range-diff.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/builtin/range-diff.c b/builtin/range-diff.c index 5f12bbfa963739..660e1f96197c0a 100644 --- a/builtin/range-diff.c +++ b/builtin/range-diff.c @@ -11,6 +11,11 @@ N_("git range-diff [] "), NULL }; +static struct strbuf *output_prefix_cb(struct diff_options *opt, void *data) +{ + return data; +} + int cmd_range_diff(int argc, const char **argv, const char *prefix) { int creation_factor = 60; @@ -21,12 +26,16 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix) OPT_END() }; int i, j, res = 0; + struct strbuf four_spaces = STRBUF_INIT; struct strbuf range1 = STRBUF_INIT, range2 = STRBUF_INIT; git_config(git_diff_ui_config, NULL); diff_setup(&diffopt); diffopt.output_format = DIFF_FORMAT_PATCH; + diffopt.output_prefix = output_prefix_cb; + strbuf_addstr(&four_spaces, " "); + diffopt.output_prefix_data = &four_spaces; argc = parse_options(argc, argv, NULL, options, builtin_range_diff_usage, PARSE_OPT_KEEP_UNKNOWN); @@ -78,6 +87,7 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix) strbuf_release(&range1); strbuf_release(&range2); + strbuf_release(&four_spaces); return res; } From b2a5a04bbf383c4e4691ead671b9a1209f31aabb Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 2 May 2018 16:53:50 +0200 Subject: [PATCH 08/19] range-diff: suppress the diff headers When showing the diff between corresponding patches of the two branch versions, we have to make up a fake filename to run the diff machinery. That filename does not carry any meaningful information, hence tbdiff suppresses it. So we should, too. Signed-off-by: Johannes Schindelin --- builtin/range-diff.c | 1 + diff.c | 5 ++++- diff.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/builtin/range-diff.c b/builtin/range-diff.c index 660e1f96197c0a..7c0967220ab792 100644 --- a/builtin/range-diff.c +++ b/builtin/range-diff.c @@ -33,6 +33,7 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix) diff_setup(&diffopt); diffopt.output_format = DIFF_FORMAT_PATCH; + diffopt.flags.suppress_diff_headers = 1; diffopt.output_prefix = output_prefix_cb; strbuf_addstr(&four_spaces, " "); diffopt.output_prefix_data = &four_spaces; diff --git a/diff.c b/diff.c index c9e42a1ec1606d..616134e6c64373 100644 --- a/diff.c +++ b/diff.c @@ -3197,13 +3197,16 @@ static void builtin_diff(const char *name_a, memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); memset(&ecbdata, 0, sizeof(ecbdata)); + if (o->flags.suppress_diff_headers) + lbl[0] = NULL; ecbdata.label_path = lbl; ecbdata.color_diff = want_color(o->use_color); ecbdata.ws_rule = whitespace_rule(name_b); if (ecbdata.ws_rule & WS_BLANK_AT_EOF) check_blank_at_eof(&mf1, &mf2, &ecbdata); ecbdata.opt = o; - ecbdata.header = header.len ? &header : NULL; + if (header.len && !o->flags.suppress_diff_headers) + ecbdata.header = &header; xpp.flags = o->xdl_opts; xpp.anchors = o->anchors; xpp.anchors_nr = o->anchors_nr; diff --git a/diff.h b/diff.h index d29560f822ca0e..0dd6a71af604e0 100644 --- a/diff.h +++ b/diff.h @@ -94,6 +94,7 @@ struct diff_flags { unsigned funccontext:1; unsigned default_follow_renames:1; unsigned stat_with_summary:1; + unsigned suppress_diff_headers:1; }; static inline void diff_flags_or(struct diff_flags *a, From 909a19449f885d4c45d7e2f7edb570af9086f5db Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 2 May 2018 23:35:48 +0200 Subject: [PATCH 09/19] range-diff: adjust the output of the commit pairs This change brings `git range-diff` yet another step closer to feature parity with tbdiff: it now shows the oneline, too, and indicates with `=` when the commits have identical diffs. Signed-off-by: Johannes Schindelin --- range-diff.c | 68 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 9 deletions(-) diff --git a/range-diff.c b/range-diff.c index 5763429ff5bd53..64c0fcdbde962c 100644 --- a/range-diff.c +++ b/range-diff.c @@ -7,6 +7,8 @@ #include "xdiff-interface.h" #include "linear-assignment.h" #include "diffcore.h" +#include "commit.h" +#include "pretty.h" struct patch_util { /* For the search for an exact match */ @@ -251,9 +253,59 @@ static void get_correspondences(struct string_list *a, struct string_list *b, free(b2a); } -static const char *short_oid(struct patch_util *util) +static void output_pair_header(struct strbuf *buf, + struct patch_util *a_util, + struct patch_util *b_util) { - return find_unique_abbrev(util->oid.hash, DEFAULT_ABBREV); + static char *dashes; + struct object_id *oid = a_util ? &a_util->oid : &b_util->oid; + struct commit *commit; + + if (!dashes) { + char *p; + + dashes = xstrdup(find_unique_abbrev(oid->hash, DEFAULT_ABBREV)); + for (p = dashes; *p; p++) + *p = '-'; + } + + strbuf_reset(buf); + if (!a_util) + strbuf_addf(buf, "-: %s ", dashes); + else + strbuf_addf(buf, "%d: %s ", a_util->i + 1, + find_unique_abbrev(a_util->oid.hash, + DEFAULT_ABBREV)); + + if (!a_util) + strbuf_addch(buf, '>'); + else if (!b_util) + strbuf_addch(buf, '<'); + else if (strcmp(a_util->patch, b_util->patch)) + strbuf_addch(buf, '!'); + else + strbuf_addch(buf, '='); + + if (!b_util) + strbuf_addf(buf, " -: %s", dashes); + else + strbuf_addf(buf, " %d: %s", b_util->i + 1, + find_unique_abbrev(b_util->oid.hash, + DEFAULT_ABBREV)); + + commit = lookup_commit_reference(oid); + if (commit) { + const char *commit_buffer = get_commit_buffer(commit, NULL); + const char *subject; + + find_commit_subject(commit_buffer, &subject); + strbuf_addch(buf, ' '); + format_subject(buf, subject, " "); + unuse_commit_buffer(commit, commit_buffer); + } + strbuf_addch(buf, '\n'); + + fwrite(buf->buf, buf->len, 1, stdout); } static struct diff_filespec *get_filespec(const char *name, const char *p) @@ -282,6 +334,7 @@ static void patch_diff(const char *a, const char *b, static void output(struct string_list *a, struct string_list *b, struct diff_options *diffopt) { + struct strbuf buf = STRBUF_INIT; int i = 0, j = 0; /* @@ -303,25 +356,21 @@ static void output(struct string_list *a, struct string_list *b, /* Show unmatched LHS commit whose predecessors were shown. */ if (i < a->nr && a_util->matching < 0) { - printf("%d: %s < -: --------\n", - i + 1, short_oid(a_util)); + output_pair_header(&buf, a_util, NULL); i++; continue; } /* Show unmatched RHS commits. */ while (j < b->nr && b_util->matching < 0) { - printf("-: -------- > %d: %s\n", - j + 1, short_oid(b_util)); + output_pair_header(&buf, NULL, b_util); b_util = ++j < b->nr ? b->items[j].util : NULL; } /* Show matching LHS/RHS pair. */ if (j < b->nr) { a_util = a->items[b_util->matching].util; - printf("%d: %s ! %d: %s\n", - b_util->matching + 1, short_oid(a_util), - j + 1, short_oid(b_util)); + output_pair_header(&buf, a_util, b_util); if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT)) patch_diff(a->items[b_util->matching].string, b->items[j].string, diffopt); @@ -329,6 +378,7 @@ static void output(struct string_list *a, struct string_list *b, j++; } } + strbuf_release(&buf); } int show_range_diff(const char *range1, const char *range2, From a02bdb8e52d53931e75b4ab321e11058a9077132 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 6 May 2018 17:35:03 +0200 Subject: [PATCH 10/19] range-diff: do not show "function names" in hunk headers We are comparing complete, formatted commit messages with patches. There are no function names here, so stop looking for them. Signed-off-by: Johannes Schindelin --- range-diff.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/range-diff.c b/range-diff.c index 64c0fcdbde962c..1c6d0019db39a6 100644 --- a/range-diff.c +++ b/range-diff.c @@ -9,6 +9,7 @@ #include "diffcore.h" #include "commit.h" #include "pretty.h" +#include "userdiff.h" struct patch_util { /* For the search for an exact match */ @@ -308,6 +309,10 @@ static void output_pair_header(struct strbuf *buf, fwrite(buf->buf, buf->len, 1, stdout); } +static struct userdiff_driver no_func_name = { + .funcname = { "$^", 0 } +}; + static struct diff_filespec *get_filespec(const char *name, const char *p) { struct diff_filespec *spec = alloc_filespec(name); @@ -317,6 +322,7 @@ static struct diff_filespec *get_filespec(const char *name, const char *p) spec->size = strlen(p); spec->should_munmap = 0; spec->is_stdin = 1; + spec->driver = &no_func_name; return spec; } From 4c71ec82d75f4f74289bfdb124ed148e34c0ac8c Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Wed, 2 May 2018 17:19:37 +0200 Subject: [PATCH 11/19] range-diff: add tests These are essentially lifted from https://github.com/trast/tbdiff, with light touch-ups to account for the command now being an option of `git branch`. Apart from renaming `tbdiff` to `range-diff`, only one test case needed to be adjusted: 11 - 'changed message'. The underlying reason it had to be adjusted is that diff generation is sometimes ambiguous. In this case, a comment line and an empty line are added, but it is ambiguous whether they were added after the existing empty line, or whether an empty line and the comment line are added *before* the existing empty line. And apparently xdiff picks a different option here than Python's difflib. Signed-off-by: Johannes Schindelin --- t/.gitattributes | 1 + t/t3206-range-diff.sh | 145 ++++++++++ t/t3206/history.export | 604 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 750 insertions(+) create mode 100755 t/t3206-range-diff.sh create mode 100644 t/t3206/history.export diff --git a/t/.gitattributes b/t/.gitattributes index 00ef1342f4085f..9617d71f53b0b2 100644 --- a/t/.gitattributes +++ b/t/.gitattributes @@ -19,5 +19,6 @@ t[0-9][0-9][0-9][0-9]/* -whitespace /t5515/* eol=lf /t556x_common eol=lf /t7500/* eol=lf +/t7910/* eol=lf /t8005/*.txt eol=lf /t9*/*.dump eol=lf diff --git a/t/t3206-range-diff.sh b/t/t3206-range-diff.sh new file mode 100755 index 00000000000000..2237c7f4af9464 --- /dev/null +++ b/t/t3206-range-diff.sh @@ -0,0 +1,145 @@ +#!/bin/sh + +test_description='range-diff tests' + +. ./test-lib.sh + +# Note that because of the range-diff's heuristics, test_commit does more +# harm than good. We need some real history. + +test_expect_success 'setup' ' + git fast-import < "$TEST_DIRECTORY"/t3206/history.export +' + +test_expect_success 'simple A..B A..C (unmodified)' ' + git range-diff --no-color master..topic master..unmodified \ + >actual && + cat >expected <<-EOF && + 1: 4de457d = 1: 35b9b25 s/5/A/ + 2: fccce22 = 2: de345ab s/4/A/ + 3: 147e64e = 3: 9af6654 s/11/B/ + 4: a63e992 = 4: 2901f77 s/12/B/ + EOF + test_cmp expected actual +' + +test_expect_success 'simple B...C (unmodified)' ' + git range-diff --no-color topic...unmodified >actual && + # same "expected" as above + test_cmp expected actual +' + +test_expect_success 'simple A B C (unmodified)' ' + git range-diff --no-color master topic unmodified >actual && + # same "expected" as above + test_cmp expected actual +' + +test_expect_success 'trivial reordering' ' + git range-diff --no-color master topic reordered >actual && + cat >expected <<-EOF && + 1: 4de457d = 1: aca177a s/5/A/ + 3: 147e64e = 2: 14ad629 s/11/B/ + 4: a63e992 = 3: ee58208 s/12/B/ + 2: fccce22 = 4: 307b27a s/4/A/ + EOF + test_cmp expected actual +' + +test_expect_success 'removed a commit' ' + git range-diff --no-color master topic removed >actual && + cat >expected <<-EOF && + 1: 4de457d = 1: 7657159 s/5/A/ + 2: fccce22 < -: ------- s/4/A/ + 3: 147e64e = 2: 43d84d3 s/11/B/ + 4: a63e992 = 3: a740396 s/12/B/ + EOF + test_cmp expected actual +' + +test_expect_success 'added a commit' ' + git range-diff --no-color master topic added >actual && + cat >expected <<-EOF && + 1: 4de457d = 1: 2716022 s/5/A/ + 2: fccce22 = 2: b62accd s/4/A/ + -: ------- > 3: df46cfa s/6/A/ + 3: 147e64e = 4: 3e64548 s/11/B/ + 4: a63e992 = 5: 12b4063 s/12/B/ + EOF + test_cmp expected actual +' + +test_expect_success 'new base, A B C' ' + git range-diff --no-color master topic rebased >actual && + cat >expected <<-EOF && + 1: 4de457d = 1: cc9c443 s/5/A/ + 2: fccce22 = 2: c5d9641 s/4/A/ + 3: 147e64e = 3: 28cc2b6 s/11/B/ + 4: a63e992 = 4: 5628ab7 s/12/B/ + EOF + test_cmp expected actual +' + +test_expect_success 'new base, B...C' ' + # this syntax includes the commits from master! + git range-diff --no-color topic...rebased >actual && + cat >expected <<-EOF && + -: ------- > 1: a31b12e unrelated + 1: 4de457d = 2: cc9c443 s/5/A/ + 2: fccce22 = 3: c5d9641 s/4/A/ + 3: 147e64e = 4: 28cc2b6 s/11/B/ + 4: a63e992 = 5: 5628ab7 s/12/B/ + EOF + test_cmp expected actual +' + +test_expect_success 'changed commit' ' + git range-diff --no-color topic...changed >actual && + cat >expected <<-EOF && + 1: 4de457d = 1: a4b3333 s/5/A/ + 2: fccce22 = 2: f51d370 s/4/A/ + 3: 147e64e ! 3: 0559556 s/11/B/ + @@ -10,7 +10,7 @@ + 9 + 10 + -11 + -+B + ++BB + 12 + 13 + 14 + 4: a63e992 ! 4: d966c5c s/12/B/ + @@ -8,7 +8,7 @@ + @@ + 9 + 10 + - B + + BB + -12 + +B + 13 + EOF + test_cmp expected actual +' + +test_expect_success 'changed message' ' + git range-diff --no-color topic...changed-message >actual && + sed s/Z/\ /g >expected <<-EOF && + 1: 4de457d = 1: f686024 s/5/A/ + 2: fccce22 ! 2: 4ab067d s/4/A/ + @@ -2,6 +2,8 @@ + Z + Z s/4/A/ + Z + + Also a silly comment here! + + + Zdiff --git a/file b/file + Z--- a/file + Z+++ b/file + 3: 147e64e = 3: b9cb956 s/11/B/ + 4: a63e992 = 4: 8add5f1 s/12/B/ + EOF + test_cmp expected actual +' + +test_done diff --git a/t/t3206/history.export b/t/t3206/history.export new file mode 100644 index 00000000000000..b8ffff0940d6f1 --- /dev/null +++ b/t/t3206/history.export @@ -0,0 +1,604 @@ +blob +mark :1 +data 51 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 + +reset refs/heads/removed +commit refs/heads/removed +mark :2 +author Thomas Rast 1374424921 +0200 +committer Thomas Rast 1374484724 +0200 +data 8 +initial +M 100644 :1 file + +blob +mark :3 +data 51 +1 +2 +3 +4 +A +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/topic +mark :4 +author Thomas Rast 1374485014 +0200 +committer Thomas Rast 1374485014 +0200 +data 7 +s/5/A/ +from :2 +M 100644 :3 file + +blob +mark :5 +data 51 +1 +2 +3 +A +A +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/topic +mark :6 +author Thomas Rast 1374485024 +0200 +committer Thomas Rast 1374485024 +0200 +data 7 +s/4/A/ +from :4 +M 100644 :5 file + +blob +mark :7 +data 50 +1 +2 +3 +A +A +6 +7 +8 +9 +10 +B +12 +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/topic +mark :8 +author Thomas Rast 1374485036 +0200 +committer Thomas Rast 1374485036 +0200 +data 8 +s/11/B/ +from :6 +M 100644 :7 file + +blob +mark :9 +data 49 +1 +2 +3 +A +A +6 +7 +8 +9 +10 +B +B +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/topic +mark :10 +author Thomas Rast 1374485044 +0200 +committer Thomas Rast 1374485044 +0200 +data 8 +s/12/B/ +from :8 +M 100644 :9 file + +blob +mark :11 +data 10 +unrelated + +commit refs/heads/master +mark :12 +author Thomas Rast 1374485127 +0200 +committer Thomas Rast 1374485127 +0200 +data 10 +unrelated +from :2 +M 100644 :11 otherfile + +commit refs/heads/rebased +mark :13 +author Thomas Rast 1374485014 +0200 +committer Thomas Rast 1374485137 +0200 +data 7 +s/5/A/ +from :12 +M 100644 :3 file + +commit refs/heads/rebased +mark :14 +author Thomas Rast 1374485024 +0200 +committer Thomas Rast 1374485138 +0200 +data 7 +s/4/A/ +from :13 +M 100644 :5 file + +commit refs/heads/rebased +mark :15 +author Thomas Rast 1374485036 +0200 +committer Thomas Rast 1374485138 +0200 +data 8 +s/11/B/ +from :14 +M 100644 :7 file + +commit refs/heads/rebased +mark :16 +author Thomas Rast 1374485044 +0200 +committer Thomas Rast 1374485138 +0200 +data 8 +s/12/B/ +from :15 +M 100644 :9 file + +commit refs/heads/added +mark :17 +author Thomas Rast 1374485014 +0200 +committer Thomas Rast 1374485341 +0200 +data 7 +s/5/A/ +from :2 +M 100644 :3 file + +commit refs/heads/added +mark :18 +author Thomas Rast 1374485024 +0200 +committer Thomas Rast 1374485341 +0200 +data 7 +s/4/A/ +from :17 +M 100644 :5 file + +blob +mark :19 +data 51 +1 +2 +3 +A +A +A +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/added +mark :20 +author Thomas Rast 1374485186 +0200 +committer Thomas Rast 1374485341 +0200 +data 7 +s/6/A/ +from :18 +M 100644 :19 file + +blob +mark :21 +data 50 +1 +2 +3 +A +A +A +7 +8 +9 +10 +B +12 +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/added +mark :22 +author Thomas Rast 1374485036 +0200 +committer Thomas Rast 1374485341 +0200 +data 8 +s/11/B/ +from :20 +M 100644 :21 file + +blob +mark :23 +data 49 +1 +2 +3 +A +A +A +7 +8 +9 +10 +B +B +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/added +mark :24 +author Thomas Rast 1374485044 +0200 +committer Thomas Rast 1374485341 +0200 +data 8 +s/12/B/ +from :22 +M 100644 :23 file + +commit refs/heads/reordered +mark :25 +author Thomas Rast 1374485014 +0200 +committer Thomas Rast 1374485350 +0200 +data 7 +s/5/A/ +from :2 +M 100644 :3 file + +blob +mark :26 +data 50 +1 +2 +3 +4 +A +6 +7 +8 +9 +10 +B +12 +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/reordered +mark :27 +author Thomas Rast 1374485036 +0200 +committer Thomas Rast 1374485350 +0200 +data 8 +s/11/B/ +from :25 +M 100644 :26 file + +blob +mark :28 +data 49 +1 +2 +3 +4 +A +6 +7 +8 +9 +10 +B +B +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/reordered +mark :29 +author Thomas Rast 1374485044 +0200 +committer Thomas Rast 1374485350 +0200 +data 8 +s/12/B/ +from :27 +M 100644 :28 file + +commit refs/heads/reordered +mark :30 +author Thomas Rast 1374485024 +0200 +committer Thomas Rast 1374485350 +0200 +data 7 +s/4/A/ +from :29 +M 100644 :9 file + +commit refs/heads/changed +mark :31 +author Thomas Rast 1374485014 +0200 +committer Thomas Rast 1374485507 +0200 +data 7 +s/5/A/ +from :2 +M 100644 :3 file + +commit refs/heads/changed +mark :32 +author Thomas Rast 1374485024 +0200 +committer Thomas Rast 1374485507 +0200 +data 7 +s/4/A/ +from :31 +M 100644 :5 file + +blob +mark :33 +data 51 +1 +2 +3 +A +A +6 +7 +8 +9 +10 +BB +12 +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/changed +mark :34 +author Thomas Rast 1374485036 +0200 +committer Thomas Rast 1374485507 +0200 +data 8 +s/11/B/ +from :32 +M 100644 :33 file + +blob +mark :35 +data 50 +1 +2 +3 +A +A +6 +7 +8 +9 +10 +BB +B +13 +14 +15 +16 +17 +18 +19 +20 + +commit refs/heads/changed +mark :36 +author Thomas Rast 1374485044 +0200 +committer Thomas Rast 1374485507 +0200 +data 8 +s/12/B/ +from :34 +M 100644 :35 file + +commit refs/heads/changed-message +mark :37 +author Thomas Rast 1374485014 +0200 +committer Thomas Rast 1374485530 +0200 +data 7 +s/5/A/ +from :2 +M 100644 :3 file + +commit refs/heads/changed-message +mark :38 +author Thomas Rast 1374485024 +0200 +committer Thomas Rast 1374485530 +0200 +data 35 +s/4/A/ + +Also a silly comment here! +from :37 +M 100644 :5 file + +commit refs/heads/changed-message +mark :39 +author Thomas Rast 1374485036 +0200 +committer Thomas Rast 1374485536 +0200 +data 8 +s/11/B/ +from :38 +M 100644 :7 file + +commit refs/heads/changed-message +mark :40 +author Thomas Rast 1374485044 +0200 +committer Thomas Rast 1374485536 +0200 +data 8 +s/12/B/ +from :39 +M 100644 :9 file + +commit refs/heads/unmodified +mark :41 +author Thomas Rast 1374485014 +0200 +committer Thomas Rast 1374485631 +0200 +data 7 +s/5/A/ +from :2 +M 100644 :3 file + +commit refs/heads/unmodified +mark :42 +author Thomas Rast 1374485024 +0200 +committer Thomas Rast 1374485631 +0200 +data 7 +s/4/A/ +from :41 +M 100644 :5 file + +commit refs/heads/unmodified +mark :43 +author Thomas Rast 1374485036 +0200 +committer Thomas Rast 1374485632 +0200 +data 8 +s/11/B/ +from :42 +M 100644 :7 file + +commit refs/heads/unmodified +mark :44 +author Thomas Rast 1374485044 +0200 +committer Thomas Rast 1374485632 +0200 +data 8 +s/12/B/ +from :43 +M 100644 :9 file + +commit refs/heads/removed +mark :45 +author Thomas Rast 1374485014 +0200 +committer Thomas Rast 1374486061 +0200 +data 7 +s/5/A/ +from :2 +M 100644 :3 file + +commit refs/heads/removed +mark :46 +author Thomas Rast 1374485036 +0200 +committer Thomas Rast 1374486061 +0200 +data 8 +s/11/B/ +from :45 +M 100644 :26 file + +commit refs/heads/removed +mark :47 +author Thomas Rast 1374485044 +0200 +committer Thomas Rast 1374486061 +0200 +data 8 +s/12/B/ +from :46 +M 100644 :28 file + +reset refs/heads/removed +from :47 + From ab6e271089fc400ca9c485dd169a2a28d7ab062a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 May 2018 01:32:19 +0200 Subject: [PATCH 12/19] range-diff: use color for the commit pairs Arguably the most important part of `git range-diff`'s output is the list of commits in the two branches, together with their relationships. For that reason, tbdiff introduced color-coding that is pretty intuitive, especially for unchanged patches (all dim yellow, like the first line in `git show`'s output) vs modified patches (old commit is red, new commit is green). Let's imitate that color scheme. Signed-off-by: Johannes Schindelin --- range-diff.c | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/range-diff.c b/range-diff.c index 1c6d0019db39a6..afff559f0632fb 100644 --- a/range-diff.c +++ b/range-diff.c @@ -254,13 +254,19 @@ static void get_correspondences(struct string_list *a, struct string_list *b, free(b2a); } -static void output_pair_header(struct strbuf *buf, +static void output_pair_header(struct diff_options *diffopt, struct strbuf *buf, struct patch_util *a_util, struct patch_util *b_util) { static char *dashes; struct object_id *oid = a_util ? &a_util->oid : &b_util->oid; struct commit *commit; + char status; + const char *color_reset = diff_get_color_opt(diffopt, DIFF_RESET); + const char *color_old = diff_get_color_opt(diffopt, DIFF_FILE_OLD); + const char *color_new = diff_get_color_opt(diffopt, DIFF_FILE_NEW); + const char *color_commit = diff_get_color_opt(diffopt, DIFF_COMMIT); + const char *color; if (!dashes) { char *p; @@ -270,7 +276,22 @@ static void output_pair_header(struct strbuf *buf, *p = '-'; } + if (!b_util) { + color = color_old; + status = '<'; + } else if (!a_util) { + color = color_new; + status = '>'; + } else if (strcmp(a_util->patch, b_util->patch)) { + color = color_commit; + status = '!'; + } else { + color = color_commit; + status = '='; + } + strbuf_reset(buf); + strbuf_addstr(buf, status == '!' ? color_old : color); if (!a_util) strbuf_addf(buf, "-: %s ", dashes); else @@ -278,14 +299,11 @@ static void output_pair_header(struct strbuf *buf, find_unique_abbrev(a_util->oid.hash, DEFAULT_ABBREV)); - if (!a_util) - strbuf_addch(buf, '>'); - else if (!b_util) - strbuf_addch(buf, '<'); - else if (strcmp(a_util->patch, b_util->patch)) - strbuf_addch(buf, '!'); - else - strbuf_addch(buf, '='); + if (status == '!') + strbuf_addf(buf, "%s%s", color_reset, color); + strbuf_addch(buf, status); + if (status == '!') + strbuf_addf(buf, "%s%s", color_reset, color_new); if (!b_util) strbuf_addf(buf, " -: %s", dashes); @@ -299,12 +317,15 @@ static void output_pair_header(struct strbuf *buf, const char *commit_buffer = get_commit_buffer(commit, NULL); const char *subject; + if (status == '!') + strbuf_addf(buf, "%s%s", color_reset, color); + find_commit_subject(commit_buffer, &subject); strbuf_addch(buf, ' '); format_subject(buf, subject, " "); unuse_commit_buffer(commit, commit_buffer); } - strbuf_addch(buf, '\n'); + strbuf_addf(buf, "%s\n", color_reset); fwrite(buf->buf, buf->len, 1, stdout); } @@ -362,21 +383,21 @@ static void output(struct string_list *a, struct string_list *b, /* Show unmatched LHS commit whose predecessors were shown. */ if (i < a->nr && a_util->matching < 0) { - output_pair_header(&buf, a_util, NULL); + output_pair_header(diffopt, &buf, a_util, NULL); i++; continue; } /* Show unmatched RHS commits. */ while (j < b->nr && b_util->matching < 0) { - output_pair_header(&buf, NULL, b_util); + output_pair_header(diffopt, &buf, NULL, b_util); b_util = ++j < b->nr ? b->items[j].util : NULL; } /* Show matching LHS/RHS pair. */ if (j < b->nr) { a_util = a->items[b_util->matching].util; - output_pair_header(&buf, a_util, b_util); + output_pair_header(diffopt, &buf, a_util, b_util); if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT)) patch_diff(a->items[b_util->matching].string, b->items[j].string, diffopt); From 4844e5e924e252320d3394fb915386f1f1075422 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 May 2018 02:14:31 +0200 Subject: [PATCH 13/19] color: add the meta color GIT_COLOR_REVERSE This "color" simply reverts background and foreground. It will be used in the upcoming "dual color" mode of `git range-diff`, where we will reverse colors for the -/+ markers and the fragment headers of the "outer" diff. Signed-off-by: Johannes Schindelin --- color.h | 1 + 1 file changed, 1 insertion(+) diff --git a/color.h b/color.h index 5b744e1bc68617..33e786342a7747 100644 --- a/color.h +++ b/color.h @@ -44,6 +44,7 @@ struct strbuf; #define GIT_COLOR_BG_CYAN "\033[46m" #define GIT_COLOR_FAINT "\033[2m" #define GIT_COLOR_FAINT_ITALIC "\033[2;3m" +#define GIT_COLOR_REVERSE "\033[7m" /* A special value meaning "no color selected" */ #define GIT_COLOR_NIL "NIL" From ac8326178608cc83f1ec3902220f7706ad4a88a4 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 May 2018 02:17:02 +0200 Subject: [PATCH 14/19] diff: add an internal option to dual-color diffs of diffs When diffing diffs, it can be quite daunting to figure out what the heck is going on, as there are nested +/- signs. Let's make this easier by adding a flag in diff_options that allows color-coding the outer diff sign with inverted colors, so that the preimage and postimage is colored like the diff it is. Of course, this really only makes sense when the preimage and postimage *are* diffs. So let's not expose this flag via a command-line option for now. This is a feature that was invented by git-tbdiff, and it will be used by `git range-diff` in the next commit, by offering it via a new option: `--dual-color`. Signed-off-by: Johannes Schindelin --- diff.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++----------- diff.h | 1 + 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/diff.c b/diff.c index 616134e6c64373..4c7733e89ed857 100644 --- a/diff.c +++ b/diff.c @@ -570,14 +570,18 @@ static void check_blank_at_eof(mmfile_t *mf1, mmfile_t *mf2, ecbdata->blank_at_eof_in_postimage = (at - l2) + 1; } -static void emit_line_0(struct diff_options *o, const char *set, const char *reset, +static void emit_line_0(struct diff_options *o, + const char *set, unsigned reverse, const char *reset, int first, const char *line, int len) { int has_trailing_newline, has_trailing_carriage_return; int nofirst; FILE *file = o->file; - fputs(diff_line_prefix(o), file); + if (first) + fputs(diff_line_prefix(o), file); + else if (!len) + return; if (len == 0) { has_trailing_newline = (first == '\n'); @@ -595,8 +599,10 @@ static void emit_line_0(struct diff_options *o, const char *set, const char *res } if (len || !nofirst) { + if (reverse && want_color(o->use_color)) + fputs(GIT_COLOR_REVERSE, file); fputs(set, file); - if (!nofirst) + if (first && !nofirst) fputc(first, file); fwrite(line, len, 1, file); fputs(reset, file); @@ -610,7 +616,7 @@ static void emit_line_0(struct diff_options *o, const char *set, const char *res static void emit_line(struct diff_options *o, const char *set, const char *reset, const char *line, int len) { - emit_line_0(o, set, reset, line[0], line+1, len-1); + emit_line_0(o, set, 0, reset, line[0], line+1, len-1); } enum diff_symbol { @@ -970,7 +976,8 @@ static void dim_moved_lines(struct diff_options *o) static void emit_line_ws_markup(struct diff_options *o, const char *set, const char *reset, - const char *line, int len, char sign, + const char *line, int len, + const char *set_sign, char sign, unsigned ws_rule, int blank_at_eof) { const char *ws = NULL; @@ -981,14 +988,20 @@ static void emit_line_ws_markup(struct diff_options *o, ws = NULL; } - if (!ws) - emit_line_0(o, set, reset, sign, line, len); - else if (blank_at_eof) + if (!ws && !set_sign) + emit_line_0(o, set, 0, reset, sign, line, len); + else if (!ws) { + /* Emit just the prefix, then the rest. */ + emit_line_0(o, set_sign ? set_sign : set, !!set_sign, reset, + sign, "", 0); + emit_line_0(o, set, 0, reset, 0, line, len); + } else if (blank_at_eof) /* Blank line at EOF - paint '+' as well */ - emit_line_0(o, ws, reset, sign, line, len); + emit_line_0(o, ws, 0, reset, sign, line, len); else { /* Emit just the prefix, then the rest. */ - emit_line_0(o, set, reset, sign, "", 0); + emit_line_0(o, set_sign ? set_sign : set, !!set_sign, reset, + sign, "", 0); ws_check_emit(line, len, ws_rule, o->file, set, reset, ws); } @@ -998,7 +1011,7 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, struct emitted_diff_symbol *eds) { static const char *nneof = " No newline at end of file\n"; - const char *context, *reset, *set, *meta, *fraginfo; + const char *context, *reset, *set, *set_sign, *meta, *fraginfo; struct strbuf sb = STRBUF_INIT; enum diff_symbol s = eds->s; @@ -1011,7 +1024,7 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, context = diff_get_color_opt(o, DIFF_CONTEXT); reset = diff_get_color_opt(o, DIFF_RESET); putc('\n', o->file); - emit_line_0(o, context, reset, '\\', + emit_line_0(o, context, 0, reset, '\\', nneof, strlen(nneof)); break; case DIFF_SYMBOL_SUBMODULE_HEADER: @@ -1038,7 +1051,18 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, case DIFF_SYMBOL_CONTEXT: set = diff_get_color_opt(o, DIFF_CONTEXT); reset = diff_get_color_opt(o, DIFF_RESET); - emit_line_ws_markup(o, set, reset, line, len, ' ', + set_sign = NULL; + if (o->flags.dual_color_diffed_diffs) { + char c = !len ? 0 : line[0]; + + if (c == '+') + set = diff_get_color_opt(o, DIFF_FILE_NEW); + else if (c == '@') + set = diff_get_color_opt(o, DIFF_FRAGINFO); + else if (c == '-') + set = diff_get_color_opt(o, DIFF_FILE_OLD); + } + emit_line_ws_markup(o, set, reset, line, len, set_sign, ' ', flags & (DIFF_SYMBOL_CONTENT_WS_MASK), 0); break; case DIFF_SYMBOL_PLUS: @@ -1065,7 +1089,20 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, set = diff_get_color_opt(o, DIFF_FILE_NEW); } reset = diff_get_color_opt(o, DIFF_RESET); - emit_line_ws_markup(o, set, reset, line, len, '+', + if (!o->flags.dual_color_diffed_diffs) + set_sign = NULL; + else { + char c = !len ? 0 : line[0]; + + set_sign = set; + if (c == '-') + set = diff_get_color_opt(o, DIFF_FILE_OLD); + else if (c == '@') + set = diff_get_color_opt(o, DIFF_FRAGINFO); + else if (c != '+') + set = diff_get_color_opt(o, DIFF_CONTEXT); + } + emit_line_ws_markup(o, set, reset, line, len, set_sign, '+', flags & DIFF_SYMBOL_CONTENT_WS_MASK, flags & DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF); break; @@ -1093,7 +1130,20 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, set = diff_get_color_opt(o, DIFF_FILE_OLD); } reset = diff_get_color_opt(o, DIFF_RESET); - emit_line_ws_markup(o, set, reset, line, len, '-', + if (!o->flags.dual_color_diffed_diffs) + set_sign = NULL; + else { + char c = !len ? 0 : line[0]; + + set_sign = set; + if (c == '+') + set = diff_get_color_opt(o, DIFF_FILE_NEW); + else if (c == '@') + set = diff_get_color_opt(o, DIFF_FRAGINFO); + else if (c != '-') + set = diff_get_color_opt(o, DIFF_CONTEXT); + } + emit_line_ws_markup(o, set, reset, line, len, set_sign, '-', flags & DIFF_SYMBOL_CONTENT_WS_MASK, 0); break; case DIFF_SYMBOL_WORDS_PORCELAIN: @@ -1284,6 +1334,7 @@ static void emit_hunk_header(struct emit_callback *ecbdata, const char *frag = diff_get_color(ecbdata->color_diff, DIFF_FRAGINFO); const char *func = diff_get_color(ecbdata->color_diff, DIFF_FUNCINFO); const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET); + const char *reverse = ecbdata->color_diff ? GIT_COLOR_REVERSE : ""; static const char atat[2] = { '@', '@' }; const char *cp, *ep; struct strbuf msgbuf = STRBUF_INIT; @@ -1304,6 +1355,8 @@ static void emit_hunk_header(struct emit_callback *ecbdata, ep += 2; /* skip over @@ */ /* The hunk header in fraginfo color */ + if (ecbdata->opt->flags.dual_color_diffed_diffs) + strbuf_addstr(&msgbuf, reverse); strbuf_addstr(&msgbuf, frag); strbuf_add(&msgbuf, line, ep - line); strbuf_addstr(&msgbuf, reset); diff --git a/diff.h b/diff.h index 0dd6a71af604e0..b14dd6d5feecd1 100644 --- a/diff.h +++ b/diff.h @@ -95,6 +95,7 @@ struct diff_flags { unsigned default_follow_renames:1; unsigned stat_with_summary:1; unsigned suppress_diff_headers:1; + unsigned dual_color_diffed_diffs:1; }; static inline void diff_flags_or(struct diff_flags *a, From b5de55d469a06a9d37f434fb3ff3f220ad56ceca Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 May 2018 03:01:00 +0200 Subject: [PATCH 15/19] range-diff: offer to dual-color the diffs When showing what changed between old and new commits, we show a diff of the patches. This diff is a diff between diffs, therefore there are nested +/- signs, and it can be relatively hard to understand what is going on. With the --dual-color option, the preimage and the postimage are colored like the diffs they are, and the *outer* +/- sign is inverted for clarity. Signed-off-by: Johannes Schindelin --- builtin/range-diff.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/builtin/range-diff.c b/builtin/range-diff.c index 7c0967220ab792..e8f7fe4522f0bf 100644 --- a/builtin/range-diff.c +++ b/builtin/range-diff.c @@ -20,9 +20,12 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix) { int creation_factor = 60; struct diff_options diffopt = { NULL }; + int dual_color = 0; struct option options[] = { OPT_INTEGER(0, "creation-factor", &creation_factor, N_("Percentage by which creation is weighted")), + OPT_BOOL(0, "dual-color", &dual_color, + N_("color both diff and diff-between-diffs")), OPT_END() }; int i, j, res = 0; @@ -50,6 +53,11 @@ int cmd_range_diff(int argc, const char **argv, const char *prefix) argc = j; diff_setup_done(&diffopt); + if (dual_color) { + diffopt.use_color = 1; + diffopt.flags.dual_color_diffed_diffs = 1; + } + if (argc == 2) { if (!strstr(argv[0], "..")) warning(_("no .. in range: '%s'"), argv[0]); From 926ec7e3c46c2fc408b62530351fda5473ea36c7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 May 2018 03:11:58 +0200 Subject: [PATCH 16/19] range-diff --dual-color: work around bogus white-space warning When displaying a diff of diffs, it is possible that there is an outer `+` before a context line. That happens when the context changed between old and new commit. When that context line starts with a tab (after the space that marks it as context line), our diff machinery spits out a white-space error (space before tab), but in this case, that is incorrect. Work around this by detecting that situation and simply *not* printing the space in that case. This is slightly improper a fix because it is conceivable that an output_prefix might be configured with *just* the right length to let that tab jump to a different tab stop depending whether we emit that space or not. However, the proper fix would be relatively ugly and intrusive because it would have to *weaken* the WS_SPACE_BEFORE_TAB option in ws.c. Besides, we do not expose the --dual-color option in cases other than the `git range-diff` command, which only uses a hard-coded output_prefix of four spaces (which misses the problem by one column... ;-)). Signed-off-by: Johannes Schindelin --- diff.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/diff.c b/diff.c index 4c7733e89ed857..949c5b37399275 100644 --- a/diff.c +++ b/diff.c @@ -1101,6 +1101,12 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, set = diff_get_color_opt(o, DIFF_FRAGINFO); else if (c != '+') set = diff_get_color_opt(o, DIFF_CONTEXT); + /* Avoid space-before-tab warning */ + if (c == ' ' && (len < 2 || line[1] == '\t' || + line[1] == '\r' || line[1] == '\n')) { + line++; + len--; + } } emit_line_ws_markup(o, set, reset, line, len, set_sign, '+', flags & DIFF_SYMBOL_CONTENT_WS_MASK, From 8ee93d391262b7df91e143fd908e42f164f63e1b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 May 2018 15:50:53 +0200 Subject: [PATCH 17/19] range-diff: add a man page The bulk of this patch consists of a heavily butchered version of tbdiff's README written by Thomas Rast and Thomas Gummerer, lifted from https://github.com/trast/tbdiff. Signed-off-by: Johannes Schindelin --- Documentation/git-range-diff.txt | 235 +++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 Documentation/git-range-diff.txt diff --git a/Documentation/git-range-diff.txt b/Documentation/git-range-diff.txt new file mode 100644 index 00000000000000..189236cc6fc9b5 --- /dev/null +++ b/Documentation/git-range-diff.txt @@ -0,0 +1,235 @@ +git-range-diff(1) +================== + +NAME +---- +git-range-diff - Compare two commit ranges (e.g. two versions of a branch) + +SYNOPSIS +-------- +[verse] +'git range-diff' [--color=[]] [--no-color] [] + [--dual-color] [--creation-factor=] + ( | ... | ) + +DESCRIPTION +----------- + +This command shows the differences between two versions of a patch +series, or more generally, two commit ranges (ignoring merges). + +To that end, it first finds pairs of commits from both commit ranges +that correspond with each other. Two commits are said to correspond when +the diff between their patches (i.e. the author information, the commit +message and the commit diff) is reasonably small compared to the +patches' size. See ``Algorithm` below for details. + +Finally, the list of matching commits is shown in the order of the +second commit range, with unmatched commits being inserted just after +all of their ancestors have been shown. + + +OPTIONS +------- +--dual-color:: + When the commit diffs differ, recreate the original diffs' + coloring, and add outer -/+ diff markers with the *background* + being red/green to make it easier to see e.g. when there was a + change in what exact lines were added. + +--creation-factor=:: + Set the creation/deletion cost fudge factor to ``. + Defaults to 60. Try a larger value if `git range-diff` erroneously + considers a large change a total rewrite (deletion of one commit + and addition of another), and a smaller one in the reverse case. + See the ``Algorithm`` section below for an explanation why this is + needed. + + :: + Compare the commits specified by the two ranges, where + `` is considered an older version of ``. + +...:: + Equivalent to passing `..` and `..`. + + :: + Equivalent to passing `..` and `..`. + Note that `` does not need to be the exact branch point + of the branches. Example: after rebasing a branch `my-topic`, + `git range-diff my-topic@{u} my-topic@{1} my-topic` would + show the differences introduced by the rebase. + +`git range-diff` also accepts the regular diff options (see +linkgit:git-diff[1]), most notably the `--color=[]` and +`--no-color` options. These options are used when generating the "diff +between patches", i.e. to compare the author, commit message and diff of +corresponding old/new commits. There is currently no means to tweak the +diff options passed to `git log` when generating those patches. + + +CONFIGURATION +------------- +This command uses the `diff.color.*` and `pager.range-diff` settings +(the latter is on by default). +See linkgit:git-config[1]. + + +EXAMPLES +-------- + +When a rebase required merge conflicts to be resolved, compare the changes +introduced by the rebase directly afterwards using: + +------------ +$ git range-diff @{u} @{1} @ +------------ + + +A typical output of `git range-diff` would look like this: + +------------ +-: ------- > 1: 0ddba11 Prepare for the inevitable! +1: c0debee = 2: cab005e Add a helpful message at the start +2: f00dbal ! 3: decafe1 Describe a bug + @@ -1,3 +1,3 @@ + Author: A U Thor + + -TODO: Describe a bug + +Describe a bug + @@ -324,5 +324,6 + This is expected. + + -+What is unexpected is that it will also crash. + ++Unexpectedly, it also crashes. This is a bug, and the jury is + ++still out there how to fix it best. See ticket #314 for details. + + Contact +3: bedead < -: ------- TO-UNDO +------------ + +In this example, there are 3 old and 3 new commits, where the developer +removed the 3rd, added a new one before the first two, and modified the +commit message of the 2nd commit as well its diff. + +When the output goes to a terminal, it is color-coded by default, just +like regular `git diff`'s output. In addition, the first line (adding a +commit) is green, the last line (deleting a commit) is red, the second +line (with a perfect match) is yellow like the commit header of `git +show`'s output, and the third line colors the old commit red, the new +one green and the rest like `git show`'s commit header. + +The color-coded diff is actually a bit hard to read, though, as it +colors the entire lines red or green. The line that added "What is +unexpected" in the old commit, for example, is completely red, even if +the intent of the old commit was to add something. + +To help with that, use the `--dual-color` mode. In this mode, the diff +of diffs will retain the original diff colors, and prefix the lines with +-/+ markers that have their *background* red or green, to make it more +obvious that they describe how the diff itself changed. + + +Algorithm +--------- + +The general idea is this: we generate a cost matrix between the commits +in both commit ranges, then solve the least-cost assignment. + +To avoid false positives (e.g. when a patch has been removed, and an +unrelated patch has been added between two iterations of the same patch +series), the cost matrix is extended to allow for that, by adding +fixed-cost entries for wholesale deletes/adds. + +Example: Let commits `1--2` be the first iteration of a patch series and +`A--C` the second iteration. Let's assume that `A` is a cherry-pick of +`2,` and `C` is a cherry-pick of `1` but with a small modification (say, +a fixed typo). Visualize the commits as a bipartite graph: + +------------ + 1 A + + 2 B + + C +------------ + +We are looking for a "best" explanation of the new series in terms of +the old one. We can represent an "explanation" as an edge in the graph: + + +------------ + 1 A + / + 2 --------' B + + C +------------ + +This explanation comes for "free" because there was no change. Similarly +`C` could be explained using `1`, but that comes at some cost c>0 +because of the modification: + +------------ + 1 ----. A + | / + 2 ----+---' B + | + `----- C + c>0 +------------ + +In mathematical terms, what we are looking for is some sort of a minimum +cost bipartite matching; `1` is matched to `C` at some cost, etc. The +underlying graph is in fact a complete bipartite graph; the cost we +associate with every edge is the size of the diff between the two +commits' patches. To explain also new commits, we introduce dummy nodes +on both sides: + +------------ + 1 ----. A + | / + 2 ----+---' B + | + o `----- C + c>0 + o o + + o o +------------ + +The cost of an edge `o--C` is the size of `C`'s diff, modified by a +fudge factor that should be smaller than 100%. The cost of an edge +`o--o` is free. The fudge factor is necessary because even if `1` and +`C` have nothing in common, they may still share a few empty lines and +such, possibly making the assignment `1--C`, `o--o` slightly cheaper +than `1--o`, `o--C` even if `1` and `C` have nothing in common. With the +fudge factor we require a much larger common part to consider patches as +corresponding. + +The overall time needed to compute this algorithm is the time needed to +compute n+m commit diffs and then n*m diffs of patches, plus the time +needed to compute the least-cost assigment between n and m diffs. Git +uses an implementation of the Jonker-Volgenant algorithm to solve the +assignment problem, which has cubic runtime complexity. The matching +found in this case will look like this: + +------------ + 1 ----. A + | / + 2 ----+---' B + .--+-----' + o -' `----- C + c>0 + o ---------- o + + o ---------- o +------------ + + +SEE ALSO +-------- +linkgit:git-log[1] + +GIT +--- +Part of the linkgit:git[1] suite From 40ee939bc4a40dba4d0a5858d31c0727810ce58b Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 3 May 2018 16:44:25 +0200 Subject: [PATCH 18/19] completion: support `git range-diff` Tab completion of `git range-diff` is very convenient, especially given that the revision arguments to specify the commit ranges to compare are typically more complex than, say, your grandfather's `git log` arguments. Signed-off-by: Johannes Schindelin squash! WIP completion: support `git range-diff` Revert "WIP completion: support `git range-diff`" This reverts commit 2e7af652af9e53a19fd947f8ebe37a78043afa49. --- contrib/completion/git-completion.bash | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index f69cb5cdff7d26..4fed6a72aff494 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1924,6 +1924,20 @@ _git_push () __git_complete_remote_or_refspec } +_git_range_diff () +{ + case "$cur" in + --*) + __gitcomp " + --creation-factor= --dual-color + $__git_diff_common_options + " + return + ;; + esac + __git_complete_revlist +} + _git_rebase () { __git_find_repo_path From 7b16bd37d6b106100630e4ed980562c8d8587fbf Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 5 May 2018 21:52:20 +0200 Subject: [PATCH 19/19] range-diff: left-pad patch numbers As pointed out by Elijah Newren, tbdiff has this neat little alignment trick where it outputs the commit pairs with patch numbers that are padded to the maximal patch number's width: 1: cafedead = 1: acefade first patch [...] 314: beefeada < 314: facecab up to PI! Let's do the same in range-diff, too. Signed-off-by: Johannes Schindelin --- range-diff.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/range-diff.c b/range-diff.c index afff559f0632fb..d9b3eefde7c251 100644 --- a/range-diff.c +++ b/range-diff.c @@ -254,7 +254,8 @@ static void get_correspondences(struct string_list *a, struct string_list *b, free(b2a); } -static void output_pair_header(struct diff_options *diffopt, struct strbuf *buf, +static void output_pair_header(struct diff_options *diffopt, int patch_no_width, + struct strbuf *buf, struct patch_util *a_util, struct patch_util *b_util) { @@ -293,9 +294,9 @@ static void output_pair_header(struct diff_options *diffopt, struct strbuf *buf, strbuf_reset(buf); strbuf_addstr(buf, status == '!' ? color_old : color); if (!a_util) - strbuf_addf(buf, "-: %s ", dashes); + strbuf_addf(buf, "%*s: %s ", patch_no_width, "-", dashes); else - strbuf_addf(buf, "%d: %s ", a_util->i + 1, + strbuf_addf(buf, "%*d: %s ", patch_no_width, a_util->i + 1, find_unique_abbrev(a_util->oid.hash, DEFAULT_ABBREV)); @@ -306,9 +307,9 @@ static void output_pair_header(struct diff_options *diffopt, struct strbuf *buf, strbuf_addf(buf, "%s%s", color_reset, color_new); if (!b_util) - strbuf_addf(buf, " -: %s", dashes); + strbuf_addf(buf, " %*s: %s", patch_no_width, "-", dashes); else - strbuf_addf(buf, " %d: %s", b_util->i + 1, + strbuf_addf(buf, " %*d: %s", patch_no_width, b_util->i + 1, find_unique_abbrev(b_util->oid.hash, DEFAULT_ABBREV)); @@ -362,6 +363,7 @@ static void output(struct string_list *a, struct string_list *b, struct diff_options *diffopt) { struct strbuf buf = STRBUF_INIT; + int patch_no_width = decimal_width(1 + (a->nr > b->nr ? a->nr : b->nr)); int i = 0, j = 0; /* @@ -383,21 +385,24 @@ static void output(struct string_list *a, struct string_list *b, /* Show unmatched LHS commit whose predecessors were shown. */ if (i < a->nr && a_util->matching < 0) { - output_pair_header(diffopt, &buf, a_util, NULL); + output_pair_header(diffopt, patch_no_width, &buf, + a_util, NULL); i++; continue; } /* Show unmatched RHS commits. */ while (j < b->nr && b_util->matching < 0) { - output_pair_header(diffopt, &buf, NULL, b_util); + output_pair_header(diffopt, patch_no_width, &buf, + NULL, b_util); b_util = ++j < b->nr ? b->items[j].util : NULL; } /* Show matching LHS/RHS pair. */ if (j < b->nr) { a_util = a->items[b_util->matching].util; - output_pair_header(diffopt, &buf, a_util, b_util); + output_pair_header(diffopt, patch_no_width, &buf, + a_util, b_util); if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT)) patch_diff(a->items[b_util->matching].string, b->items[j].string, diffopt);