From 102fdd2e07f72919060224b3eb3560524d6cf1f9 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:17 +0200 Subject: pickaxe/style: consolidate declarations and assignments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor contains() to do its assignments at the same time that it does its declarations. This code could have been refactored in ef90ab66e8e (pickaxe: use textconv for -S counting, 2012-10-28) when a function call between the declarations and assignments was removed. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index a9c6d60df2..a278b9b71d 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -70,13 +70,9 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) { - unsigned int cnt; - unsigned long sz; - const char *data; - - sz = mf->size; - data = mf->ptr; - cnt = 0; + unsigned int cnt = 0; + unsigned long sz = mf->size; + const char *data = mf->ptr; if (regexp) { regmatch_t regmatch; -- cgit 1.2.3-korg From 03c1f14acf5169d8e6c06a60c28ee5a6cfb9fd54 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:19 +0200 Subject: pickaxe: refactor function selection in diffcore-pickaxe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's hard to read this codepath at a glance and reason about exactly what combination of -G and -S will compile either regexes or kwset, and whether we'll then dispatch to "diff_grep" or "has_changes". Then in the "--find-object" case we aren't using the callback function, but were previously passing down "has_changes". Refactor this code to exhaustively check "opts", it's now more obvious what callback function (or none) we want under what mode. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index a278b9b71d..953b6ec1b4 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -228,6 +228,7 @@ void diffcore_pickaxe(struct diff_options *o) int opts = o->pickaxe_opts; regex_t regex, *regexp = NULL; kwset_t kws = NULL; + pickaxe_fn fn; if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { int cflags = REG_EXTENDED | REG_NEWLINE; @@ -235,6 +236,20 @@ void diffcore_pickaxe(struct diff_options *o) cflags |= REG_ICASE; regcomp_or_die(®ex, needle, cflags); regexp = ®ex; + + if (opts & DIFF_PICKAXE_KIND_G) + fn = diff_grep; + else if (opts & DIFF_PICKAXE_REGEX) + fn = has_changes; + else + /* + * We don't need to check the combination of + * -G and --pickaxe-regex, by the time we get + * here diff.c has already died if they're + * combined. See the usage tests in + * t4209-log-pickaxe.sh. + */ + BUG("unreachable"); } else if (opts & DIFF_PICKAXE_KIND_S) { if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE && has_non_ascii(needle)) { @@ -251,10 +266,14 @@ void diffcore_pickaxe(struct diff_options *o) kwsincr(kws, needle, strlen(needle)); kwsprep(kws); } + fn = has_changes; + } else if (opts & DIFF_PICKAXE_KIND_OBJFIND) { + fn = NULL; + } else { + BUG("unknown pickaxe_opts flag"); } - pickaxe(&diff_queued_diff, o, regexp, kws, - (opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes); + pickaxe(&diff_queued_diff, o, regexp, kws, fn); if (regexp) regfree(regexp); -- cgit 1.2.3-korg From 2e197a759221921d42c5e94ae3f4897cd456ebb4 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:20 +0200 Subject: pickaxe: assert that we must have a needle under -G or -S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assert early in diffcore_pickaxe() that we've got a needle to work with under -G and -S. This code is redundant to the check -G and -S get from parse-options.c's get_arg(), which I'm adding a test for. This check dates back to e1b161161d (diffcore-pickaxe: fix infinite loop on zero-length needle, 2007-01-25) when "git log -S" could send this code into an infinite loop. It was then later refactored in 8fa4b09fb1 (pickaxe: hoist empty needle check, 2012-10-28) into its current form, but it seemingly wasn't noticed that in the meantime a move to the parse-options.c API in dea007fb4c (diff: parse separate options like -S foo, 2010-08-05) had made it redundant. Let's retain some of the paranoia here with a BUG(), but there's no need to be checking this in the pickaxe_match() inner loop. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 6 +++--- t/t4209-log-pickaxe.sh | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 953b6ec1b4..88b6ca840f 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -132,9 +132,6 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o, oidset_contains(o->objfind, &p->two->oid)); } - if (!o->pickaxe[0]) - return 0; - if (o->flags.allow_textconv) { textconv_one = get_textconv(o->repo, p->one); textconv_two = get_textconv(o->repo, p->two); @@ -230,6 +227,9 @@ void diffcore_pickaxe(struct diff_options *o) kwset_t kws = NULL; pickaxe_fn fn; + if (opts & ~DIFF_PICKAXE_KIND_OBJFIND && + (!needle || !*needle)) + BUG("should have needle under -G or -S"); if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { int cflags = REG_EXTENDED | REG_NEWLINE; if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE) diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh index 16166ffd3e..3f9aad0fdb 100755 --- a/t/t4209-log-pickaxe.sh +++ b/t/t4209-log-pickaxe.sh @@ -56,6 +56,12 @@ test_expect_success setup ' ' test_expect_success 'usage' ' + test_expect_code 129 git log -S 2>err && + test_i18ngrep "switch.*requires a value" err && + + test_expect_code 129 git log -G 2>err && + test_i18ngrep "switch.*requires a value" err && + test_expect_code 128 git log -Gregex -Sstring 2>err && grep "mutually exclusive" err && -- cgit 1.2.3-korg From 52e011cd2b13e00fe40b1d59986948330b61e030 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:21 +0200 Subject: pickaxe -S: support content with NULs under --pickaxe-regex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bug in the matching routine powering -S --pickaxe-regex so that we won't abort early on content that has NULs in it. We've had a hard requirement on REG_STARTEND since 2f8952250a8 (regex: add regexec_buf() that can work on a non NUL-terminated string, 2016-09-21), but this sanity check dates back to d01d8c67828 (Support for pickaxe matching regular expressions, 2006-03-29). It wasn't needed anymore, and as the now-passing test shows, actively getting in our way. Since we always require REG_STARTEND support we do not need to stop at NULs. If we are dealing with a haystack with NUL in it. The needle may be behind that NUL. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 4 ++-- t/t4209-log-pickaxe.sh | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 88b6ca840f..be0dd683b6 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -78,12 +78,12 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) regmatch_t regmatch; int flags = 0; - while (sz && *data && + while (sz && !regexec_buf(regexp, data, sz, 1, ®match, flags)) { flags |= REG_NOTBOL; data += regmatch.rm_eo; sz -= regmatch.rm_eo; - if (sz && *data && regmatch.rm_so == regmatch.rm_eo) { + if (sz && regmatch.rm_so == regmatch.rm_eo) { data++; sz--; } diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh index 3f9aad0fdb..75795d0b49 100755 --- a/t/t4209-log-pickaxe.sh +++ b/t/t4209-log-pickaxe.sh @@ -215,4 +215,12 @@ test_expect_success 'log -S looks into binary files' ' test_cmp log full-log ' +test_expect_success 'log -S --pickaxe-regex looks into binary files' ' + git -C GS-bin-txt log --pickaxe-regex -Sa >log && + test_cmp log full-log && + + git -C GS-bin-txt log --pickaxe-regex -S"[a]" >log && + test_cmp log full-log +' + test_done -- cgit 1.2.3-korg From 5d35a9531cc9af717383bc11c2dfa4edc020ac56 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:22 +0200 Subject: pickaxe: rename variables in has_changes() for brevity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the {one,two}_contains variables to c{1,2}. This will make a follow-up change easier to read. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index be0dd683b6..23362a2359 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -108,9 +108,9 @@ static int has_changes(mmfile_t *one, mmfile_t *two, struct diff_options *o, regex_t *regexp, kwset_t kws) { - unsigned int one_contains = one ? contains(one, regexp, kws) : 0; - unsigned int two_contains = two ? contains(two, regexp, kws) : 0; - return one_contains != two_contains; + unsigned int c1 = one ? contains(one, regexp, kws) : 0; + unsigned int c2 = two ? contains(two, regexp, kws) : 0; + return c1 != c2; } static int pickaxe_match(struct diff_filepair *p, struct diff_options *o, -- cgit 1.2.3-korg From 5b0672a26e51387bc18adad89eaa3ebb131b2e33 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:23 +0200 Subject: pickaxe -S: slightly optimize contains() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the "log -S" switch counts occurrences of on the pre-image and post-image of a change. As soon as we know we had e.g. 1 before and 2 now we can stop, we don't need to keep counting past 2. With this change a diff between A and B may have different performance characteristics than between B and A. That's OK in this case, since we'll emit the same output, and the effect is to make one of them better. I'm picking a check of "one" first on the assumption that it's a more common case to have files grow over time than not. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 23362a2359..b7494fdf89 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -68,7 +68,8 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, return ecbdata.hit; } -static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) +static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws, + unsigned int limit) { unsigned int cnt = 0; unsigned long sz = mf->size; @@ -88,6 +89,9 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) sz--; } cnt++; + + if (limit && cnt == limit) + return cnt; } } else { /* Classic exact string match */ @@ -99,6 +103,9 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) sz -= offset + kwsm.size[0]; data += offset + kwsm.size[0]; cnt++; + + if (limit && cnt == limit) + return cnt; } } return cnt; @@ -108,8 +115,8 @@ static int has_changes(mmfile_t *one, mmfile_t *two, struct diff_options *o, regex_t *regexp, kwset_t kws) { - unsigned int c1 = one ? contains(one, regexp, kws) : 0; - unsigned int c2 = two ? contains(two, regexp, kws) : 0; + unsigned int c1 = one ? contains(one, regexp, kws, 0) : 0; + unsigned int c2 = two ? contains(two, regexp, kws, c1 + 1) : 0; return c1 != c2; } -- cgit 1.2.3-korg From a8d5eb6dc0d61625667b0d8155c425d3629baa12 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:24 +0200 Subject: xdiff-interface: prepare for allowing early return MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the function prototype of xdiff_emit_line_fn to return an "int" instead of "void". Change all of those functions to "return 0", nothing checks those return values yet, and no behavior is being changed. In subsequent commits the interface will be changed to allow early return via this new return value. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- combine-diff.c | 5 +++-- diff.c | 26 +++++++++++++++----------- diffcore-pickaxe.c | 7 ++++--- range-diff.c | 3 ++- xdiff-interface.c | 3 ++- xdiff-interface.h | 2 +- 6 files changed, 27 insertions(+), 19 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/combine-diff.c b/combine-diff.c index 06635f91bc..a12d3bc0d9 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -403,11 +403,11 @@ static void consume_hunk(void *state_, state->sline[state->nb-1].p_lno[state->n] = state->ob; } -static void consume_line(void *state_, char *line, unsigned long len) +static int consume_line(void *state_, char *line, unsigned long len) { struct combine_diff_state *state = state_; if (!state->lost_bucket) - return; /* not in any hunk yet */ + return 0; /* not in any hunk yet */ switch (line[0]) { case '-': append_lost(state->lost_bucket, state->n, line+1, len-1); @@ -417,6 +417,7 @@ static void consume_line(void *state_, char *line, unsigned long len) state->lno++; break; } + return 0; } static void combine_diff(struct repository *r, diff --git a/diff.c b/diff.c index c1f47a7f01..7a03c581c7 100644 --- a/diff.c +++ b/diff.c @@ -2336,7 +2336,7 @@ static void find_lno(const char *line, struct emit_callback *ecbdata) ecbdata->lno_in_postimage = strtol(p + 1, NULL, 10); } -static void fn_out_consume(void *priv, char *line, unsigned long len) +static int fn_out_consume(void *priv, char *line, unsigned long len) { struct emit_callback *ecbdata = priv; struct diff_options *o = ecbdata->opt; @@ -2372,7 +2372,7 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) len = sane_truncate_line(line, len); find_lno(line, ecbdata); emit_hunk_header(ecbdata, line, len); - return; + return 0; } if (ecbdata->diff_words) { @@ -2382,11 +2382,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) if (line[0] == '-') { diff_words_append(line, len, &ecbdata->diff_words->minus); - return; + return 0; } else if (line[0] == '+') { diff_words_append(line, len, &ecbdata->diff_words->plus); - return; + return 0; } else if (starts_with(line, "\\ ")) { /* * Eat the "no newline at eof" marker as if we @@ -2395,11 +2395,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) * defer processing. If this is the end of * preimage, more "+" lines may come after it. */ - return; + return 0; } diff_words_flush(ecbdata); emit_diff_symbol(o, s, line, len, 0); - return; + return 0; } switch (line[0]) { @@ -2423,6 +2423,7 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) line, len, 0); break; } + return 0; } static void pprint_rename(struct strbuf *name, const char *a, const char *b) @@ -2522,7 +2523,7 @@ static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat, return x; } -static void diffstat_consume(void *priv, char *line, unsigned long len) +static int diffstat_consume(void *priv, char *line, unsigned long len) { struct diffstat_t *diffstat = priv; struct diffstat_file *x = diffstat->files[diffstat->nr - 1]; @@ -2531,6 +2532,7 @@ static void diffstat_consume(void *priv, char *line, unsigned long len) x->added++; else if (line[0] == '-') x->deleted++; + return 0; } const char mime_boundary_leader[] = "------------"; @@ -3208,7 +3210,7 @@ static void checkdiff_consume_hunk(void *priv, data->lineno = nb - 1; } -static void checkdiff_consume(void *priv, char *line, unsigned long len) +static int checkdiff_consume(void *priv, char *line, unsigned long len) { struct checkdiff_t *data = priv; int marker_size = data->conflict_marker_size; @@ -3232,7 +3234,7 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) } bad = ws_check(line + 1, len - 1, data->ws_rule); if (!bad) - return; + return 0; data->status |= bad; err = whitespace_error_string(bad); fprintf(data->o->file, "%s%s:%d: %s.\n", @@ -3244,6 +3246,7 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) } else if (line[0] == ' ') { data->lineno++; } + return 0; } static unsigned char *deflate_it(char *data, @@ -6121,17 +6124,18 @@ void flush_one_hunk(struct object_id *result, git_hash_ctx *ctx) } } -static void patch_id_consume(void *priv, char *line, unsigned long len) +static int patch_id_consume(void *priv, char *line, unsigned long len) { struct patch_id_t *data = priv; int new_len; if (len > 12 && starts_with(line, "\\ ")) - return; + return 0; new_len = remove_space(line, len); the_hash_algo->update_fn(data->ctx, line, new_len); data->patchlen += new_len; + return 0; } static void patch_id_add_string(git_hash_ctx *ctx, const char *str) diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index b7494fdf89..27aa20be35 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -19,21 +19,22 @@ struct diffgrep_cb { int hit; }; -static void diffgrep_consume(void *priv, char *line, unsigned long len) +static int diffgrep_consume(void *priv, char *line, unsigned long len) { struct diffgrep_cb *data = priv; regmatch_t regmatch; if (line[0] != '+' && line[0] != '-') - return; + return 0; if (data->hit) /* * NEEDSWORK: we should have a way to terminate the * caller early. */ - return; + return 0; data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1, ®match, 0); + return 0; } static int diff_grep(mmfile_t *one, mmfile_t *two, diff --git a/range-diff.c b/range-diff.c index 116fb0735c..83c90f946e 100644 --- a/range-diff.c +++ b/range-diff.c @@ -274,9 +274,10 @@ static void find_exact_matches(struct string_list *a, struct string_list *b) hashmap_clear(&map); } -static void diffsize_consume(void *data, char *line, unsigned long len) +static int diffsize_consume(void *data, char *line, unsigned long len) { (*(int *)data)++; + return 0; } static void diffsize_hunk(void *data, long ob, long on, long nb, long nn, diff --git a/xdiff-interface.c b/xdiff-interface.c index 4d20069302..5d8c8c67dc 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -31,7 +31,7 @@ static int xdiff_out_hunk(void *priv_, return 0; } -static void consume_one(void *priv_, char *s, unsigned long size) +static int consume_one(void *priv_, char *s, unsigned long size) { struct xdiff_emit_state *priv = priv_; char *ep; @@ -43,6 +43,7 @@ static void consume_one(void *priv_, char *s, unsigned long size) size -= this_size; s += this_size; } + return 0; } static int xdiff_outf(void *priv_, mmbuffer_t *mb, int nbuf) diff --git a/xdiff-interface.h b/xdiff-interface.h index 93df26900c..0198f9632f 100644 --- a/xdiff-interface.h +++ b/xdiff-interface.h @@ -11,7 +11,7 @@ */ #define MAX_XDIFF_SIZE (1024UL * 1024 * 1023) -typedef void (*xdiff_emit_line_fn)(void *, char *, unsigned long); +typedef int (*xdiff_emit_line_fn)(void *, char *, unsigned long); typedef void (*xdiff_emit_hunk_fn)(void *data, long old_begin, long old_nr, long new_begin, long new_nr, -- cgit 1.2.3-korg From fa59e7beb2a61d9cd1312b212075edf12935fdc6 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:26 +0200 Subject: pickaxe -G: terminate early on matching lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Solve a long-standing item for "git log -Grx" of us e.g. finding "+ str" in the diff context and noting that we had a "hit", but xdiff diligently continuing to generate and spew the rest of the diff at us. This makes use of a new "early return" xdiff interface added by preceding commits. The TODO item (or, the NEEDSWORK comment) has been there since "git log -G" was implemented. See f506b8e8b5f (git log/diff: add -G that greps in the patch text, 2010-08-23). But now with the support added in the preceding changes to the xdiff-interface we can return early. Let's assert the behavior of that new early-return xdiff-interface by having a BUG() call here to die if it ever starts handing us needless work again. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 30 +++++++++++++++++++----------- xdiff-interface.h | 4 ++++ 2 files changed, 23 insertions(+), 11 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 27aa20be35..2147afef72 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -27,13 +27,12 @@ static int diffgrep_consume(void *priv, char *line, unsigned long len) if (line[0] != '+' && line[0] != '-') return 0; if (data->hit) - /* - * NEEDSWORK: we should have a way to terminate the - * caller early. - */ - return 0; - data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1, - ®match, 0); + BUG("Already matched in diffgrep_consume! Broken xdiff_emit_line_fn?"); + if (!regexec_buf(data->regexp, line + 1, len - 1, 1, + ®match, 0)) { + data->hit = 1; + return 1; + } return 0; } @@ -45,6 +44,7 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, struct diffgrep_cb ecbdata; xpparam_t xpp; xdemitconf_t xecfg; + int ret; if (!one) return !regexec_buf(regexp, two->ptr, two->size, @@ -63,10 +63,18 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, ecbdata.hit = 0; xecfg.ctxlen = o->context; xecfg.interhunkctxlen = o->interhunkcontext; - if (xdi_diff_outf(one, two, discard_hunk_line, diffgrep_consume, - &ecbdata, &xpp, &xecfg)) - return 0; - return ecbdata.hit; + + /* + * An xdiff error might be our "data->hit" from above. See the + * comment for xdiff_emit_line_fn in xdiff-interface.h + */ + ret = xdi_diff_outf(one, two, discard_hunk_line, diffgrep_consume, + &ecbdata, &xpp, &xecfg); + if (ecbdata.hit) + return 1; + if (ret) + return ret; + return 0; } static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws, diff --git a/xdiff-interface.h b/xdiff-interface.h index 7d1724abb6..3b6819586d 100644 --- a/xdiff-interface.h +++ b/xdiff-interface.h @@ -27,6 +27,10 @@ * doing so will currently make your early return indistinguishable * from an error internal to xdiff, xdiff itself will see that * non-zero return and translate it to -1. + * + * See "diff_grep" in diffcore-pickaxe.c for a trick to work around + * this, i.e. using the "consume_callback_data" to note the desired + * early return. */ typedef int (*xdiff_emit_line_fn)(void *, char *, unsigned long); typedef void (*xdiff_emit_hunk_fn)(void *data, -- cgit 1.2.3-korg From f97fe358576c81d995843644ab58de1ebeb7fee9 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:27 +0200 Subject: pickaxe -G: don't special-case create/delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of special-casing creations and deletions let's just generate a diff for them. This logic of not running a diff under -G if we don't have both sides dates back to the original implementation of -S in 52e9578985f ([PATCH] Introducing software archaeologist's tool "pickaxe"., 2005-05-21). In the case of -S we were not working with the xdiff interface and needed to do this, but when -G was implemented in f506b8e8b5f (git log/diff: add -G that greps in the patch text, 2010-08-23) this logic was diligently copied over. But as the performance test added earlier in this series shows, this does not make much of a difference. With: time GIT_TEST_LONG= GIT_PERF_REPEAT_COUNT=10 GIT_PERF_MAKE_OPTS='-j8 CFLAGS=-O3' ./run origin/next HEAD~ HEAD -- p4209-pickaxe.sh With the HEAD~ commit being the preceding "pickaxe -G: terminate early on matching lines" we get these results. Note that it's only the -G codepaths that are relevant to this change: Test origin/next HEAD~ HEAD ----------------------------------------------------------------------------------------------------------------------------------------- 4209.1: git log -S'int main' .. 0.35(0.32+0.03) 0.35(0.33+0.02) +0.0% 0.35(0.30+0.05) +0.0% 4209.2: git log -S'æ' .. 0.46(0.42+0.04) 0.46(0.41+0.05) +0.0% 0.46(0.42+0.04) +0.0% 4209.3: git log --pickaxe-regex -S'(int|void|null)' .. 0.65(0.62+0.02) 0.64(0.61+0.02) -1.5% 0.64(0.60+0.04) -1.5% 4209.4: git log --pickaxe-regex -S'if *\([^ ]+ & ' .. 0.52(0.45+0.06) 0.52(0.50+0.01) +0.0% 0.54(0.47+0.04) +3.8% 4209.5: git log --pickaxe-regex -S'[àáâãäåæñøùúûüýþ]' .. 0.39(0.34+0.05) 0.39(0.34+0.04) +0.0% 0.39(0.36+0.03) +0.0% 4209.6: git log -G'(int|void|null)' .. 0.60(0.55+0.04) 0.58(0.54+0.03) -3.3% 0.58(0.49+0.08) -3.3% 4209.7: git log -G'if *\([^ ]+ & ' .. 0.61(0.52+0.06) 0.59(0.53+0.05) -3.3% 0.59(0.54+0.05) -3.3% 4209.8: git log -G'[àáâãäåæñøùúûüýþ]' .. 0.61(0.51+0.07) 0.58(0.54+0.04) -4.9% 0.57(0.51+0.06) -6.6% 4209.9: git log -i -S'int main' .. 0.36(0.31+0.04) 0.36(0.34+0.02) +0.0% 0.35(0.32+0.03) -2.8% 4209.10: git log -i -S'æ' .. 0.36(0.33+0.03) 0.39(0.34+0.01) +8.3% 0.36(0.32+0.03) +0.0% 4209.11: git log -i --pickaxe-regex -S'(int|void|null)' .. 0.83(0.77+0.05) 0.82(0.77+0.05) -1.2% 0.80(0.75+0.04) -3.6% 4209.12: git log -i --pickaxe-regex -S'if *\([^ ]+ & ' .. 0.67(0.61+0.03) 0.64(0.61+0.03) -4.5% 0.63(0.61+0.02) -6.0% 4209.13: git log -i --pickaxe-regex -S'[àáâãäåæñøùúûüýþ]' .. 0.40(0.37+0.02) 0.40(0.37+0.03) +0.0% 0.40(0.36+0.04) +0.0% 4209.14: git log -i -G'(int|void|null)' .. 0.58(0.51+0.07) 0.59(0.52+0.06) +1.7% 0.58(0.52+0.05) +0.0% 4209.15: git log -i -G'if *\([^ ]+ & ' .. 0.60(0.54+0.05) 0.60(0.54+0.06) +0.0% 0.60(0.56+0.03) +0.0% 4209.16: git log -i -G'[àáâãäåæñøùúûüýþ]' .. 0.58(0.51+0.06) 0.57(0.52+0.05) -1.7% 0.60(0.48+0.09) +3.4% This small simplification really doesn't buy us much now, but I've got plans to both convert the pickaxe code to using a PCREv2 backend[1] and to implement additional pickaxe modes to do custom searches through the diff[2]. Always having the diff available under -G is going to help to simplify both of those changes. 1. https://lore.kernel.org/git/20210203032811.14979-22-avarab@gmail.com/ 2. https://lore.kernel.org/git/20190424152215.16251-3-avarab@gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 2147afef72..96183f4cfa 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -40,19 +40,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, struct diff_options *o, regex_t *regexp, kwset_t kws) { - regmatch_t regmatch; struct diffgrep_cb ecbdata; xpparam_t xpp; xdemitconf_t xecfg; int ret; - if (!one) - return !regexec_buf(regexp, two->ptr, two->size, - 1, ®match, 0); - if (!two) - return !regexec_buf(regexp, one->ptr, one->size, - 1, ®match, 0); - /* * We have both sides; need to run textual diff and see if * the pattern appears on added/deleted lines. @@ -172,9 +164,7 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o, mf1.size = fill_textconv(o->repo, textconv_one, p->one, &mf1.ptr); mf2.size = fill_textconv(o->repo, textconv_two, p->two, &mf2.ptr); - ret = fn(DIFF_FILE_VALID(p->one) ? &mf1 : NULL, - DIFF_FILE_VALID(p->two) ? &mf2 : NULL, - o, regexp, kws); + ret = fn(&mf1, &mf2, o, regexp, kws); if (textconv_one) free(mf1.ptr); -- cgit 1.2.3-korg From 5d93460024541909337d6b08a8bec10b71caaf73 Mon Sep 17 00:00:00 2001 From: Ævar Arnfjörð Bjarmason Date: Mon, 12 Apr 2021 19:15:29 +0200 Subject: xdiff-interface: replace discard_hunk_line() with a flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the dummy discard_hunk_line() function added in 3b40a090fd4 (diff: avoid generating unused hunk header lines, 2018-11-02) in favor of having a new XDL_EMIT_NO_HUNK_HDR flag, for use along with the two existing and similar XDL_EMIT_* flags. Unlike the recently amended xdiff_emit_line_fn interface which'll be called in a loop in xdl_emit_diff(), the hunk header is only emitted once. It makes more sense to pass this as a flag than provide a dummy callback because that function may be able to skip doing certain work if it knows the caller is doing nothing with the hunk header. It would be possible to do so in the case of -U0 now, but the benefit of doing so is so small that I haven't bothered. But this leaves the door open to that, and more importantly makes the API use more intuitive. The reason we're putting a flag in the gap between 1<<0 and 1<<2 is that the old 1<<1 flag was removed in 907681e940d (xdiff: drop XDL_EMIT_COMMON, 2016-02-23) without re-ordering the remaining flags. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- diff.c | 7 ++++--- diffcore-pickaxe.c | 3 ++- xdiff-interface.c | 6 ------ xdiff-interface.h | 8 -------- xdiff/xdiff.h | 1 + xdiff/xemit.c | 3 ++- 6 files changed, 9 insertions(+), 19 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diff.c b/diff.c index 7a03c581c7..fe3abac79f 100644 --- a/diff.c +++ b/diff.c @@ -3725,7 +3725,8 @@ static void builtin_diffstat(const char *name_a, const char *name_b, xpp.anchors_nr = o->anchors_nr; xecfg.ctxlen = o->context; xecfg.interhunkctxlen = o->interhunkcontext; - if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line, + xecfg.flags = XDL_EMIT_NO_HUNK_HDR; + if (xdi_diff_outf(&mf1, &mf2, NULL, diffstat_consume, diffstat, &xpp, &xecfg)) die("unable to generate diffstat for %s", one->path); @@ -6233,8 +6234,8 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid xpp.flags = 0; xecfg.ctxlen = 3; - xecfg.flags = 0; - if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line, + xecfg.flags = XDL_EMIT_NO_HUNK_HDR; + if (xdi_diff_outf(&mf1, &mf2, NULL, patch_id_consume, &data, &xpp, &xecfg)) return error("unable to generate patch-id diff for %s", p->one->path); diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 96183f4cfa..c88e50c632 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -53,6 +53,7 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, memset(&xecfg, 0, sizeof(xecfg)); ecbdata.regexp = regexp; ecbdata.hit = 0; + xecfg.flags = XDL_EMIT_NO_HUNK_HDR; xecfg.ctxlen = o->context; xecfg.interhunkctxlen = o->interhunkcontext; @@ -60,7 +61,7 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, * An xdiff error might be our "data->hit" from above. See the * comment for xdiff_emit_line_fn in xdiff-interface.h */ - ret = xdi_diff_outf(one, two, discard_hunk_line, diffgrep_consume, + ret = xdi_diff_outf(one, two, NULL, diffgrep_consume, &ecbdata, &xpp, &xecfg); if (ecbdata.hit) return 1; diff --git a/xdiff-interface.c b/xdiff-interface.c index 50c0ef759d..95f13a93ff 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -126,12 +126,6 @@ int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t co return xdl_diff(&a, &b, xpp, xecfg, xecb); } -void discard_hunk_line(void *priv, - long ob, long on, long nb, long nn, - const char *func, long funclen) -{ -} - int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2, xdiff_emit_hunk_fn hunk_fn, xdiff_emit_line_fn line_fn, diff --git a/xdiff-interface.h b/xdiff-interface.h index 3b6819586d..4301a7eef2 100644 --- a/xdiff-interface.h +++ b/xdiff-interface.h @@ -53,14 +53,6 @@ void xdiff_clear_find_func(xdemitconf_t *xecfg); int git_xmerge_config(const char *var, const char *value, void *cb); extern int git_xmerge_style; -/* - * Can be used as a no-op hunk_fn for xdi_diff_outf(), since a NULL - * one just sends the hunk line to the line_fn callback). - */ -void discard_hunk_line(void *priv, - long ob, long on, long nb, long nn, - const char *func, long funclen); - /* * Compare the strings l1 with l2 which are of size s1 and s2 respectively. * Returns 1 if the strings are deemed equal, 0 otherwise. diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index 7a04605146..b29deca5de 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -50,6 +50,7 @@ extern "C" { /* xdemitconf_t.flags */ #define XDL_EMIT_FUNCNAMES (1 << 0) +#define XDL_EMIT_NO_HUNK_HDR (1 << 1) #define XDL_EMIT_FUNCCONTEXT (1 << 2) /* merge simplification levels */ diff --git a/xdiff/xemit.c b/xdiff/xemit.c index 9d7d6c5087..1cbf2b9829 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -278,7 +278,8 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, s1 - 1, funclineprev); funclineprev = s1 - 1; } - if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, + if (!(xecfg->flags & XDL_EMIT_NO_HUNK_HDR) && + xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, func_line.buf, func_line.len, ecb) < 0) return -1; -- cgit 1.2.3-korg