From 119bdc3ecb84717613eeef10f958f32cd7ff6b9d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Oct 2016 00:07:21 -0400 Subject: [PATCH] add diff-pairs tool This takes the output of `diff-tree -z --raw` and feeds it back to the later stages of the diff machinery to produce diffs in other formats. Because the interim format contains any whole-tree copy/rename information, you can safely feed segments of the tree diff to get progressive patch-format diffs. So something like: git diff-tree -r -z $a $b | git diff-pairs -p should give you the same output that `git diff-tree -p` would have. Likewise, feeding each pair individually works, too: git diff-tree -r -z -M $a $b | perl -0ne ' my $meta = $_; my $path = <>; # only renames have an extra path my $path2 = <> if $meta =~ /[RC]\d+/; print STDERR "feeding one diff\n"; open(my $fh, "|git diff-pairs -p"); print $fh $meta, $path, $path2; ' The renames will still be shown just as if the diff had been done in one process. Signed-off-by: Jeff King --- .gitignore | 1 + Documentation/git-diff-pairs.txt | 66 ++++++++++++ Makefile | 1 + builtin.h | 1 + builtin/diff-pairs.c | 174 +++++++++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + t/t4070-diff-pairs.sh | 82 +++++++++++++++ 8 files changed, 327 insertions(+) create mode 100644 Documentation/git-diff-pairs.txt create mode 100644 builtin/diff-pairs.c create mode 100755 t/t4070-diff-pairs.sh diff --git a/.gitignore b/.gitignore index 6687bd6db4c0a6..8acb0c8153be4d 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ /git-diff /git-diff-files /git-diff-index +/git-diff-pairs /git-diff-tree /git-difftool /git-difftool--helper diff --git a/Documentation/git-diff-pairs.txt b/Documentation/git-diff-pairs.txt new file mode 100644 index 00000000000000..78fc1ecfcd3d11 --- /dev/null +++ b/Documentation/git-diff-pairs.txt @@ -0,0 +1,66 @@ +git-diff-pairs(1) +================= + +NAME +---- +git-diff-pairs - Compare blob pairs generated by `diff-tree --raw` + +SYNOPSIS +-------- +[verse] +'git diff-pairs' [diff-options] + +DESCRIPTION +----------- + +Given the output of `diff-tree -z` on its stdin, `diff-pairs` will +reformat that output into whatever format is requested on its command +line. For example: + +----------------------------- +git diff-tree -z -M $a $b | +git diff-pairs -p +----------------------------- + +will compute the tree diff in one step (including renames), and then +`diff-pairs` will compute and format the blob-level diffs for each pair. +This can be used to modify the raw diff in the middle (without having to +parse or re-create more complicated formats like `--patch`), or to +compute diffs progressively over the course of multiple invocations of +`diff-pairs`. + +Each blob pair is fed to the diff machinery individually and the output +flushed immediately, meaning it is safe to interactively read and write +from `diff-pairs`. + +OPTIONS +------- + +All diff options below are accepted, but note that tree-wide options +like `-M` are effectively noops, as we consider only one pair at a time. + +include::diff-options.txt[] + +include::diff-generate-patch.txt[] + +BUGS +---- + +`diff-pairs` should handle any input generated by `diff-tree --raw -z`. +It may choke or otherwise misbehave on output from `diff-files`, etc. + +Here's an incomplete list of things that `diff-pairs` could do, but +doesn't (mostly in the name of simplicity): + + - Only `-z` input is accepted, not normal `--raw` input. + + - Abbreviated sha1s are rejected in the input from `diff-tree`; if you + want to abbreviate the output, you can pass `--abbrev` to + `diff-pairs`. + + - Pathspecs are not handled by `diff-pairs`; you can limit the diff via + the initial `diff-tree` invocation. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index d06c9a8ffa7b63..36aa812bccf7d4 100644 --- a/Makefile +++ b/Makefile @@ -1234,6 +1234,7 @@ BUILTIN_OBJS += builtin/describe.o BUILTIN_OBJS += builtin/diagnose.o BUILTIN_OBJS += builtin/diff-files.o BUILTIN_OBJS += builtin/diff-index.o +BUILTIN_OBJS += builtin/diff-pairs.o BUILTIN_OBJS += builtin/diff-tree.o BUILTIN_OBJS += builtin/diff.o BUILTIN_OBJS += builtin/difftool.o diff --git a/builtin.h b/builtin.h index f7b166b33484d3..b2d2e9eb07a271 100644 --- a/builtin.h +++ b/builtin.h @@ -152,6 +152,7 @@ int cmd_diagnose(int argc, const char **argv, const char *prefix, struct reposit int cmd_diff_files(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_diff_index(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_diff(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_diff_pairs(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_diff_tree(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_difftool(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_env__helper(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/diff-pairs.c b/builtin/diff-pairs.c new file mode 100644 index 00000000000000..4e968059e0cf1a --- /dev/null +++ b/builtin/diff-pairs.c @@ -0,0 +1,174 @@ +#include "builtin.h" +#include "commit.h" +#include "diff.h" +#include "diffcore.h" +#include "revision.h" +#include "config.h" +#include "builtin.h" +#include "hex.h" + +static const char diff_pairs_usage[] = +"git diff-pairs [diff-options]\n" +"\n" +"Reads pairs of blobs from stdin in 'diff-tree -z' syntax:\n" +"\n" +" : \\0\0[path2\0]\n" +"\n" +"and outputs the diff for each a/b pair to stdout."; + +static unsigned parse_mode_or_die(const char *mode, const char **endp) +{ + uint16_t ret; + + *endp = parse_mode(mode, &ret); + if (!*endp) + die("unable to parse mode: %s", mode); + return ret; +} + +static void parse_oid(const char *p, struct object_id *oid, const char **endp, + const struct git_hash_algo *algop) +{ + if (parse_oid_hex_algop(p, oid, endp, algop) || *(*endp)++ != ' ') + die("unable to parse object id: %s", p); +} + +static unsigned short parse_score(const char *score) +{ + unsigned long ret; + char *endp; + + errno = 0; + ret = strtoul(score, &endp, 10); + ret *= MAX_SCORE / 100; + if (errno || endp == score || *endp || (unsigned short)ret != ret) + die("unable to parse rename/copy score: %s", score); + return ret; +} + +/* + * The pair-creation is mostly done by diff_change and diff_addremove, + * which queue the filepair without returning it. So we have to resort + * to pulling it out of the global diff queue. + */ +static void set_pair_status(char status) +{ + /* + * If we have no items in the queue, for some reason the pair wasn't + * worth queueing. This generally shouldn't happen (since it means + * dropping some parts of the diff), but the user can trigger it with + * things like --ignore-submodules. If they do, the only sensible thing + * is for us to play along and skip it. + */ + if (!diff_queued_diff.nr) + return; + + diff_queued_diff.queue[0]->status = status; +} + +int cmd_diff_pairs(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + struct rev_info revs; + struct strbuf meta = STRBUF_INIT; + struct strbuf path = STRBUF_INIT; + struct strbuf path_dst = STRBUF_INIT; + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage(diff_pairs_usage); + + repo_init_revisions(repo, &revs, prefix); + repo_config(repo, git_diff_basic_config, NULL); + revs.disable_stdin = 1; + argc = setup_revisions(argc, argv, &revs, NULL); + + /* Don't allow pathspecs at all. */ + if (argc > 1) + usage(diff_pairs_usage); + + if (!revs.diffopt.output_format) + revs.diffopt.output_format = DIFF_FORMAT_RAW; + + while (1) { + unsigned mode_a, mode_b; + struct object_id oid_a, oid_b; + char status; + const char *p; + + if (strbuf_getline_nul(&meta, stdin) == EOF) + break; + + p = meta.buf; + if (*p == ':') + p++; + + mode_a = parse_mode_or_die(p, &p); + mode_b = parse_mode_or_die(p, &p); + + parse_oid(p, &oid_a, &p, repo->hash_algo); + parse_oid(p, &oid_b, &p, repo->hash_algo); + + status = *p++; + + if (strbuf_getline_nul(&path, stdin) == EOF) + die("got EOF while reading path"); + + switch (status) { + case DIFF_STATUS_ADDED: + diff_addremove(&revs.diffopt, '+', + mode_b, &oid_b, + 1, path.buf, 0); + set_pair_status(status); + break; + + case DIFF_STATUS_DELETED: + diff_addremove(&revs.diffopt, '-', + mode_a, &oid_a, + 1, path.buf, 0); + set_pair_status(status); + break; + + case DIFF_STATUS_TYPE_CHANGED: + case DIFF_STATUS_MODIFIED: + diff_change(&revs.diffopt, + mode_a, mode_b, + &oid_a, &oid_b, + 1, 1, path.buf, 0, 0); + set_pair_status(status); + break; + + case DIFF_STATUS_RENAMED: + case DIFF_STATUS_COPIED: + { + struct diff_filespec *a, *b; + struct diff_filepair *pair; + + if (strbuf_getline_nul(&path_dst, stdin) == EOF) + die("got EOF while reading secondary path"); + + a = alloc_filespec(path.buf); + b = alloc_filespec(path_dst.buf); + fill_filespec(a, &oid_a, 1, mode_a); + fill_filespec(b, &oid_b, 1, mode_b); + + pair = diff_queue(&diff_queued_diff, a, b); + pair->status = status; + pair->score = parse_score(p); + pair->renamed_pair = 1; + } + break; + + default: + die("unknown diff status: %c", status); + } + + diff_flush(&revs.diffopt); + } + + strbuf_release(&meta); + strbuf_release(&path); + strbuf_release(&path_dst); + release_revisions(&revs); + + return 0; +} diff --git a/command-list.txt b/command-list.txt index e0bb87b3b5c278..bb8acd51d89892 100644 --- a/command-list.txt +++ b/command-list.txt @@ -95,6 +95,7 @@ git-diagnose ancillaryinterrogators git-diff mainporcelain info git-diff-files plumbinginterrogators git-diff-index plumbinginterrogators +git-diff-pairs plumbinginterrogators git-diff-tree plumbinginterrogators git-difftool ancillaryinterrogators complete git-fast-export ancillarymanipulators diff --git a/git.c b/git.c index c2c1b8e22c2d91..e20b668b224f98 100644 --- a/git.c +++ b/git.c @@ -544,6 +544,7 @@ static struct cmd_struct commands[] = { { "diff", cmd_diff, NO_PARSEOPT }, { "diff-files", cmd_diff_files, RUN_SETUP | NEED_WORK_TREE | NO_PARSEOPT }, { "diff-index", cmd_diff_index, RUN_SETUP | NO_PARSEOPT }, + { "diff-pairs", cmd_diff_pairs, RUN_SETUP | NO_PARSEOPT }, { "diff-tree", cmd_diff_tree, RUN_SETUP | NO_PARSEOPT }, { "difftool", cmd_difftool, RUN_SETUP_GENTLY }, { "fast-export", cmd_fast_export, RUN_SETUP }, diff --git a/t/t4070-diff-pairs.sh b/t/t4070-diff-pairs.sh new file mode 100755 index 00000000000000..7f974dd7ee7e2f --- /dev/null +++ b/t/t4070-diff-pairs.sh @@ -0,0 +1,82 @@ +#!/bin/sh + +test_description='basic diff-pairs tests' +. ./test-lib.sh + +# This creates a diff with added, modified, deleted, renamed, copied, and +# typechange entries. That includes one in a subdirectory for non-recursive +# tests, and both exact and inexact similarity scores. +test_expect_success 'create commit with various diffs' ' + echo to-be-gone >deleted && + echo original >modified && + echo now-a-file >symlink && + test_seq 200 >two-hundred && + test_seq 201 500 >five-hundred && + git add . && + test_tick && + git commit -m base && + git tag base && + + echo now-here >added && + echo new >modified && + rm deleted && + mkdir subdir && + echo content >subdir/file && + mv two-hundred renamed && + test_seq 201 500 | sed s/300/modified/ >copied && + rm symlink && + git add -A . && + test_ln_s_add dest symlink && + test_tick && + git commit -m new && + git tag new +' + +test_expect_success 'diff-pairs recreates --raw' ' + git diff-tree -r -M -C -C base new >expect && + # note that diff-pairs uses the default abbrev, + # so we must tweak that for identical output + git diff-tree -r -M -C -C -z base new | + git diff-pairs --no-abbrev >actual && + test_cmp expect actual +' + +test_expect_success 'diff-pairs can create -p output' ' + git diff-tree -p -M -C -C base new >expect && + git diff-tree -r -M -C -C -z base new | + git diff-pairs -p >actual && + test_cmp expect actual +' + +test_expect_success 'non-recursive --raw retains tree entry' ' + git diff-tree base new >expect && + git diff-tree -z base new | + git diff-pairs --no-abbrev >actual && + test_cmp expect actual +' + +test_expect_success 'split input across multiple diff-pairs' ' + write_script split-raw-diff "$PERL_PATH" <<-\EOF && + $/ = "\0"; + while (<>) { + my $meta = $_; + my $path = <>; + # renames have an extra path + my $path2 = <> if $meta =~ /[RC]\d+/; + + open(my $fh, ">", sprintf "diff%03d", $.); + print $fh $meta, $path, $path2; + } + EOF + + git diff-tree -p -M -C -C base new >expect && + + git diff-tree -r -z -M -C -C base new | + ./split-raw-diff && + for i in diff*; do + git diff-pairs -p <$i || return 1 + done >actual && + test_cmp expect actual +' + +test_done