From 8d4725e48ef29bd857e21e689411878b6eb4df92 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:47 -0800 Subject: whitespace: correct bit assignment comments A comment in diff.c claimed that bits up to 12th (counting from 0th) are whitespace rules, and 13th thru 15th are for new/old/context, but it turns out it was miscounting. Correct them, and clarify where the whitespace rule bits come from in the comment. Extend bit assignment comments to cover bits used for color-moved, which weren't described. Also update the way these bit constants are defined to use (1 << N) notation, instead of octal constants, as it tends to make it easier to notice a breakage like this. Sprinkle a few blank lines between logically distinct groups of CPP macro definitions to make them easier to read. Signed-off-by: Junio C Hamano --- diff.c | 7 +++++-- diff.h | 6 +++--- ws.h | 25 ++++++++++++++----------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/diff.c b/diff.c index a74e701806..74261b332a 100644 --- a/diff.c +++ b/diff.c @@ -801,16 +801,19 @@ enum diff_symbol { DIFF_SYMBOL_CONTEXT_MARKER, DIFF_SYMBOL_SEPARATOR }; + /* * Flags for content lines: - * 0..12 are whitespace rules - * 13-15 are WSEH_NEW | WSEH_OLD | WSEH_CONTEXT + * 0..11 are whitespace rules (see ws.h) + * 12..14 are WSEH_NEW | WSEH_CONTEXT | WSEH_OLD * 16 is marking if the line is blank at EOF + * 17..19 are used for color-moved. */ #define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16) #define DIFF_SYMBOL_MOVED_LINE (1<<17) #define DIFF_SYMBOL_MOVED_LINE_ALT (1<<18) #define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING (1<<19) + #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | WS_RULE_MASK) /* diff --git a/diff.h b/diff.h index 2fa256c3ef..cbd355cf50 100644 --- a/diff.h +++ b/diff.h @@ -331,9 +331,9 @@ struct diff_options { int ita_invisible_in_index; /* white-space error highlighting */ -#define WSEH_NEW (1<<12) -#define WSEH_CONTEXT (1<<13) -#define WSEH_OLD (1<<14) +#define WSEH_NEW (1<<12) +#define WSEH_CONTEXT (1<<13) +#define WSEH_OLD (1<<14) unsigned ws_error_highlight; const char *prefix; int prefix_length; diff --git a/ws.h b/ws.h index 5ba676c559..23708efb73 100644 --- a/ws.h +++ b/ws.h @@ -7,19 +7,22 @@ struct strbuf; /* * whitespace rules. * used by both diff and apply - * last two digits are tab width + * last two octal-digits are tab width (we support only up to 63). */ -#define WS_BLANK_AT_EOL 0100 -#define WS_SPACE_BEFORE_TAB 0200 -#define WS_INDENT_WITH_NON_TAB 0400 -#define WS_CR_AT_EOL 01000 -#define WS_BLANK_AT_EOF 02000 -#define WS_TAB_IN_INDENT 04000 -#define WS_TRAILING_SPACE (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF) +#define WS_BLANK_AT_EOL (1<<6) +#define WS_SPACE_BEFORE_TAB (1<<7) +#define WS_INDENT_WITH_NON_TAB (1<<8) +#define WS_CR_AT_EOL (1<<9) +#define WS_BLANK_AT_EOF (1<<10) +#define WS_TAB_IN_INDENT (1<<11) + +#define WS_TRAILING_SPACE (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF) #define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8) -#define WS_TAB_WIDTH_MASK 077 -/* All WS_* -- when extended, adapt diff.c emit_symbol */ -#define WS_RULE_MASK 07777 +#define WS_TAB_WIDTH_MASK ((1<<6)-1) + +/* All WS_* -- when extended, adapt constants defined after diff.c:diff_symbol */ +#define WS_RULE_MASK ((1<<12)-1) + extern unsigned whitespace_rule_cfg; unsigned whitespace_rule(struct index_state *, const char *); unsigned parse_whitespace_rule(const char *); -- cgit 1.2.3-korg From f83d1afafb8b772397aa3854184c42f7810fa0df Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:48 -0800 Subject: diff: emit_line_ws_markup() if/else style fix Apply the simple rule: if you need {} in one arm of the if/else if/else... cascade, have {} in all of them. Signed-off-by: Junio C Hamano --- diff.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/diff.c b/diff.c index 74261b332a..9a24a0791c 100644 --- a/diff.c +++ b/diff.c @@ -1327,14 +1327,14 @@ static void emit_line_ws_markup(struct diff_options *o, ws = NULL; } - if (!ws && !set_sign) + if (!ws && !set_sign) { emit_line_0(o, set, NULL, 0, reset, sign, line, len); - else if (!ws) { + } else if (!ws) { emit_line_0(o, set_sign, set, !!set_sign, reset, sign, line, len); - } else if (blank_at_eof) + } else if (blank_at_eof) { /* Blank line at EOF - paint '+' as well */ emit_line_0(o, ws, NULL, 0, reset, sign, line, len); - else { + } else { /* Emit just the prefix, then the rest. */ emit_line_0(o, set_sign ? set_sign : set, NULL, !!set_sign, reset, sign, "", 0); -- cgit 1.2.3-korg From fc7abcd9d5460b381701ef43b7f6dafa73962950 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:49 -0800 Subject: diff: correct suppress_blank_empty hack The suppress-blank-empty feature abused the CONTEXT_INCOMPLETE symbol that was meant to be used only for "\ No newline at the end of file" code path. The intent of the feature was to turn a context line we receive from xdiff machinery (which always uses ' ' for context lines, even an empty one) and spit it out as a truly empty line. Perform such a conversion very locally at where a line from xdiff that begins with ' ' is handled for output; there are many checks before the control reaches such place that checks the first letter of the diff output line to see if it is a context line, and having to check for '\n' and treat it as a special case is error prone. In order to catch similar hacks in the future, make sure the code path that is meant for "\ No newline" case checks the first byte is indeed a backslash. Signed-off-by: Junio C Hamano --- diff.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/diff.c b/diff.c index 9a24a0791c..b9ef8550cc 100644 --- a/diff.c +++ b/diff.c @@ -1321,6 +1321,11 @@ static void emit_line_ws_markup(struct diff_options *o, const char *ws = NULL; int sign = o->output_indicators[sign_index]; + if (diff_suppress_blank_empty && + sign_index == OUTPUT_INDICATOR_CONTEXT && + len == 1 && line[0] == '\n') + sign = 0; + if (o->ws_error_highlight & ws_rule) { ws = diff_get_color_opt(o, DIFF_WHITESPACE); if (!*ws) @@ -1498,15 +1503,9 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, case DIFF_SYMBOL_WORDS: context = diff_get_color_opt(o, DIFF_CONTEXT); reset = diff_get_color_opt(o, DIFF_RESET); - /* - * Skip the prefix character, if any. With - * diff_suppress_blank_empty, there may be - * none. - */ - if (line[0] != '\n') { - line++; - len--; - } + + /* Skip the prefix character */ + line++; len--; emit_line(o, context, reset, line, len); break; case DIFF_SYMBOL_FILEPAIR_PLUS: @@ -2375,12 +2374,6 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) ecbdata->label_path[0] = ecbdata->label_path[1] = NULL; } - if (diff_suppress_blank_empty - && len == 2 && line[0] == ' ' && line[1] == '\n') { - line[0] = '\n'; - len = 1; - } - if (line[0] == '@') { if (ecbdata->diff_words) diff_words_flush(ecbdata); @@ -2431,12 +2424,14 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) ecbdata->lno_in_preimage++; emit_context_line(ecbdata, line + 1, len - 1); break; - default: + case '\\': /* incomplete line at the end */ ecbdata->lno_in_preimage++; emit_diff_symbol(o, DIFF_SYMBOL_CONTEXT_INCOMPLETE, line, len, 0); break; + default: + BUG("fn_out_consume: unknown line '%s'", line); } return 0; } -- cgit 1.2.3-korg From ced0561828271e8fc3fa2699754c5925969111b5 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:50 -0800 Subject: diff: keep track of the type of the last line seen The "\ No newline at the end of the file" can come after any of the "-" (deleted preimage line), " " (unchanged line), or "+" (added postimage line). In later steps in this series, we will start treating a change that makes a file to end in an incomplete line as a whitespace error, and we would need to know what the previous line was when we react to "\ No newline" in the diff output. If the previous line was a context (i.e., unchanged) line, the file lacked the final newline before the change, and the change did not touch that line and left it still incomplete, so we do not want to warn in such a case. Teach fn_out_consume() function to keep track of what the previous line was, and prepare an otherwise empty switch statement to let us react differently to "\ No newline" based on that. Note that there is an existing curiosity (read: likely to be a bug) in the code that increments line number in the preimage file every time it sees a line with "\ No newline" on it, regardless of what the previous line was. I left it as-is, because it does not affect the main theme of this series, and more importantly, I do not think it matters, as these numbers are used only to compare them with blank_at_eof_in_{pre,post}image to issue a warning when we see more empty line was added at the end, but by definition, after we see "\ No newline at the end of the file" for an added line, we will not see an added line for the file. An independent audit to ensure that this curious increment can be safely removed would make a good #leftoverbits clean-up (we may even find some code that decrements this counter or over-increments the other quantity this counter is compared with that compensates the effect of this curious increment that hides a bug, in which case we may also need to remove them). Signed-off-by: Junio C Hamano --- diff.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/diff.c b/diff.c index b9ef8550cc..ff8fc91f88 100644 --- a/diff.c +++ b/diff.c @@ -601,6 +601,7 @@ struct emit_callback { int blank_at_eof_in_postimage; int lno_in_preimage; int lno_in_postimage; + int last_line_kind; const char **label_path; struct diff_words_data *diff_words; struct diff_options *opt; @@ -2426,6 +2427,15 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) break; case '\\': /* incomplete line at the end */ + switch (ecbdata->last_line_kind) { + case '+': + case '-': + case ' ': + break; + default: + BUG("fn_out_consume: '\\No newline' after unknown line (%c)", + ecbdata->last_line_kind); + } ecbdata->lno_in_preimage++; emit_diff_symbol(o, DIFF_SYMBOL_CONTEXT_INCOMPLETE, line, len, 0); @@ -2433,6 +2443,7 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) default: BUG("fn_out_consume: unknown line '%s'", line); } + ecbdata->last_line_kind = line[0]; return 0; } -- cgit 1.2.3-korg From 29228cbdc5f8a80b1f61c7cc209ba8e3714cc38e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:51 -0800 Subject: diff: refactor output of incomplete line Create a helper function that reacts to "\ No newline at the end of file" in preparation for unifying the incomplete line handling in the code path that handles xdiff output and the code path that bypasses xdiff and produces a complete-rewrite patch. Currently the output from the DIFF_SYMBOL_CONTEXT_INCOMPLETE case still (ab)uses the same code as what is used for context lines, but that would change in a later step where we introduce support to treat an incomplete line as a whitespace error. Signed-off-by: Junio C Hamano --- diff.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/diff.c b/diff.c index ff8fc91f88..7ee8620429 100644 --- a/diff.c +++ b/diff.c @@ -1379,6 +1379,10 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, emit_line(o, "", "", line, len); break; case DIFF_SYMBOL_CONTEXT_INCOMPLETE: + set = diff_get_color_opt(o, DIFF_CONTEXT); + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line(o, set, reset, line, len); + break; case DIFF_SYMBOL_CONTEXT_MARKER: context = diff_get_color_opt(o, DIFF_CONTEXT); reset = diff_get_color_opt(o, DIFF_RESET); @@ -1668,6 +1672,13 @@ static void emit_context_line(struct emit_callback *ecbdata, emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_CONTEXT, line, len, flags); } +static void emit_incomplete_line_marker(struct emit_callback *ecbdata, + const char *line, int len) +{ + emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_CONTEXT_INCOMPLETE, + line, len, 0); +} + static void emit_hunk_header(struct emit_callback *ecbdata, const char *line, int len) { @@ -2437,8 +2448,7 @@ static int fn_out_consume(void *priv, char *line, unsigned long len) ecbdata->last_line_kind); } ecbdata->lno_in_preimage++; - emit_diff_symbol(o, DIFF_SYMBOL_CONTEXT_INCOMPLETE, - line, len, 0); + emit_incomplete_line_marker(ecbdata, line, len); break; default: BUG("fn_out_consume: unknown line '%s'", line); -- cgit 1.2.3-korg From 35925f1832ac00a4926623dff061a3b52123470b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:52 -0800 Subject: diff: call emit_callback ecbdata everywhere Everybody else, except for emit_rewrite_lines(), calls the emit_callback data ecbdata. Make sure we call the same thing by the same name for consistency. Signed-off-by: Junio C Hamano --- diff.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/diff.c b/diff.c index 7ee8620429..44b86544b7 100644 --- a/diff.c +++ b/diff.c @@ -1780,7 +1780,7 @@ static void add_line_count(struct strbuf *out, int count) } } -static void emit_rewrite_lines(struct emit_callback *ecb, +static void emit_rewrite_lines(struct emit_callback *ecbdata, int prefix, const char *data, int size) { const char *endp = NULL; @@ -1791,17 +1791,17 @@ static void emit_rewrite_lines(struct emit_callback *ecb, endp = memchr(data, '\n', size); len = endp ? (endp - data + 1) : size; if (prefix != '+') { - ecb->lno_in_preimage++; - emit_del_line(ecb, data, len); + ecbdata->lno_in_preimage++; + emit_del_line(ecbdata, data, len); } else { - ecb->lno_in_postimage++; - emit_add_line(ecb, data, len); + ecbdata->lno_in_postimage++; + emit_add_line(ecbdata, data, len); } size -= len; data += len; } if (!endp) - emit_diff_symbol(ecb->opt, DIFF_SYMBOL_NO_LF_EOF, NULL, 0, 0); + emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_NO_LF_EOF, NULL, 0, 0); } static void emit_rewrite_diff(const char *name_a, -- cgit 1.2.3-korg From 8d8e3c61874bbcf50d64aff34fb6c533458adf5e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:53 -0800 Subject: diff: update the way rewrite diff handles incomplete lines The diff_symbol based output framework uses one DIFF_SYMBOL_* enum value per the kind of output lines of "git diff", which corresponds to one output line from the xdiff machinery used internally. Most notably, DIFF_SYMBOL_PLUS and DIFF_SYMBOL_MINUS that correspond to "+" and "-" lines are designed to always take a complete line, even if the output from xdiff machinery may produce "\ No newline at the end of file" immediately after them. But this is not true in the rewrite-diff codepath, which completely bypasses the xdiff machinery. Since the code path feeds the bytes directly from the payload to the output routines, the output layer has to deal with an incomplete line with DIFF_SYMBOL_PLUS and DIFF_SYMBOL_MINUS, which never would see an incomplete line in the normal code paths. This lack of final newline is compensated by an ugly hack for a fabricated DIFF_SYMBOL_NO_LF_EOF token to inject an extra newline to the output to simulate output coming from the xdiff machinery. Revamp the way the complete-rewrite code path feeds the lines to the output layer by treating the last line of the pre/post image when it is an incomplete line specially. This lets us remove the DIFF_SYMBOL_NO_LF_EOF hack and use the usual DIFF_SYMBOL_CONTEXT_INCOMPLETE code path, which will later learn how to handle whitespace errors. Signed-off-by: Junio C Hamano --- diff.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/diff.c b/diff.c index 44b86544b7..5c606409bb 100644 --- a/diff.c +++ b/diff.c @@ -797,7 +797,6 @@ enum diff_symbol { DIFF_SYMBOL_CONTEXT_INCOMPLETE, DIFF_SYMBOL_PLUS, DIFF_SYMBOL_MINUS, - DIFF_SYMBOL_NO_LF_EOF, DIFF_SYMBOL_CONTEXT_FRAGINFO, DIFF_SYMBOL_CONTEXT_MARKER, DIFF_SYMBOL_SEPARATOR @@ -1352,7 +1351,6 @@ static void emit_line_ws_markup(struct diff_options *o, static void emit_diff_symbol_from_struct(struct diff_options *o, struct emitted_diff_symbol *eds) { - static const char *nneof = " No newline at end of file\n"; const char *context, *reset, *set, *set_sign, *meta, *fraginfo; enum diff_symbol s = eds->s; @@ -1361,13 +1359,6 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, unsigned flags = eds->flags; switch (s) { - case DIFF_SYMBOL_NO_LF_EOF: - context = diff_get_color_opt(o, DIFF_CONTEXT); - reset = diff_get_color_opt(o, DIFF_RESET); - putc('\n', o->file); - emit_line_0(o, context, NULL, 0, reset, '\\', - nneof, strlen(nneof)); - break; case DIFF_SYMBOL_SUBMODULE_HEADER: case DIFF_SYMBOL_SUBMODULE_ERROR: case DIFF_SYMBOL_SUBMODULE_PIPETHROUGH: @@ -1786,22 +1777,38 @@ static void emit_rewrite_lines(struct emit_callback *ecbdata, const char *endp = NULL; while (0 < size) { - int len; + int len, plen; + char *pdata = NULL; endp = memchr(data, '\n', size); - len = endp ? (endp - data + 1) : size; + + if (endp) { + len = endp - data + 1; + plen = len; + } else { + len = size; + plen = len + 1; + pdata = xmalloc(plen + 2); + memcpy(pdata, data, len); + pdata[len] = '\n'; + pdata[len + 1] = '\0'; + } if (prefix != '+') { ecbdata->lno_in_preimage++; - emit_del_line(ecbdata, data, len); + emit_del_line(ecbdata, pdata ? pdata : data, plen); } else { ecbdata->lno_in_postimage++; - emit_add_line(ecbdata, data, len); + emit_add_line(ecbdata, pdata ? pdata : data, plen); } + free(pdata); size -= len; data += len; } - if (!endp) - emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_NO_LF_EOF, NULL, 0, 0); + if (!endp) { + static const char nneof[] = "\\ No newline at end of file\n"; + ecbdata->last_line_kind = prefix; + emit_incomplete_line_marker(ecbdata, nneof, sizeof(nneof) - 1); + } } static void emit_rewrite_diff(const char *name_a, -- cgit 1.2.3-korg From 3a4eb5ad2e9166255d5921196470710523f24ec4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:54 -0800 Subject: apply: revamp the parsing of incomplete lines A patch file represents the incomplete line at the end of the file with two lines, one that is the usual "context" with " " as the first letter, "added" with "+" as the first letter, or "removed" with "-" as the first letter that shows the content of the line, plus an extra "\ No newline at the end of file" line that comes immediately after it. Ever since the apply machinery was written, the "git apply" machinery parses "\ No newline at the end of file" line independently, without even knowing what line the incomplete-ness applies to, simply because it does not even remember what the previous line was. This poses a problem if we want to check and warn on an incomplete line. Revamp the code that parses a fragment, to actually drop the '\n' at the end of the incoming patch file that terminates a line, so that check_whitespace() calls made from the code path actually sees an incomplete as incomplete. Note that the result of this parsing is not directly used by the code path that applies the patch. apply_one_fragment() function already checks if each of the patch text it handles is followed by a line that begins with a backslash to drop the newline at the end of the current line it is looking at. In a sense, this patch harmonizes the behaviour of the parsing side to what is already done in the application side. Signed-off-by: Junio C Hamano --- apply.c | 70 +++++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/apply.c b/apply.c index a2ceb3fb40..2b0f8bdab5 100644 --- a/apply.c +++ b/apply.c @@ -1670,6 +1670,35 @@ static void check_old_for_crlf(struct patch *patch, const char *line, int len) } +/* + * Just saw a single line in a fragment. If it is a part of this hunk + * that is a context " ", an added "+", or a removed "-" line, it may + * be followed by "\\ No newline..." to signal that the last "\n" on + * this line needs to be dropped. Depending on locale settings when + * the patch was produced we don't know what this line would exactly + * say. The only thing we do know is that it begins with "\ ". + * Checking for 12 is just for sanity check; "\ No newline..." would + * be at least that long in any l10n. + * + * Return 0 if the line we saw is not followed by "\ No newline...", + * or length of that line. The caller will use it to skip over the + * "\ No newline..." line. + */ +static int adjust_incomplete(const char *line, int len, + unsigned long size) +{ + int nextlen; + + if (*line != '\n' && *line != ' ' && *line != '+' && *line != '-') + return 0; + if (size - len < 12 || memcmp(line + len, "\\ ", 2)) + return 0; + nextlen = linelen(line + len, size - len); + if (nextlen < 12) + return 0; + return nextlen; +} + /* * Parse a unified diff. Note that this really needs to parse each * fragment separately, since the only way to know the difference @@ -1684,6 +1713,7 @@ static int parse_fragment(struct apply_state *state, { int added, deleted; int len = linelen(line, size), offset; + int skip_len = 0; unsigned long oldlines, newlines; unsigned long leading, trailing; @@ -1710,6 +1740,22 @@ static int parse_fragment(struct apply_state *state, len = linelen(line, size); if (!len || line[len-1] != '\n') return -1; + + /* + * For an incomplete line, skip_len counts the bytes + * on "\\ No newline..." marker line that comes next + * to the current line. + * + * Reduce "len" to drop the newline at the end of + * line[], but add one to "skip_len", which will be + * added back to "len" for the next iteration, to + * compensate. + */ + skip_len = adjust_incomplete(line, len, size); + if (skip_len) { + len--; + skip_len++; + } switch (*line) { default: return -1; @@ -1745,20 +1791,10 @@ static int parse_fragment(struct apply_state *state, newlines--; trailing = 0; break; - - /* - * We allow "\ No newline at end of file". Depending - * on locale settings when the patch was produced we - * don't know what this line looks like. The only - * thing we do know is that it begins with "\ ". - * Checking for 12 is just for sanity check -- any - * l10n of "\ No newline..." is at least that long. - */ - case '\\': - if (len < 12 || memcmp(line, "\\ ", 2)) - return -1; - break; } + + /* eat the "\\ No newline..." as well, if exists */ + len += skip_len; } if (oldlines || newlines) return -1; @@ -1768,14 +1804,6 @@ static int parse_fragment(struct apply_state *state, fragment->leading = leading; fragment->trailing = trailing; - /* - * If a fragment ends with an incomplete line, we failed to include - * it in the above loop because we hit oldlines == newlines == 0 - * before seeing it. - */ - if (12 < size && !memcmp(line, "\\ ", 2)) - offset += linelen(line, size); - patch->lines_added += added; patch->lines_deleted += deleted; -- cgit 1.2.3-korg From a675104c399d242dd3ff5a0823fcd770563cf60f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:55 -0800 Subject: whitespace: allocate a few more bits and define WS_INCOMPLETE_LINE Reserve a few more bits in the diff flags word to be used for future whitespace rules. Add WS_INCOMPLETE_LINE without implementing the behaviour (yet). Signed-off-by: Junio C Hamano --- Documentation/config/core.adoc | 2 ++ diff.c | 16 ++++++++-------- diff.h | 6 +++--- ws.c | 6 ++++++ ws.h | 3 ++- 5 files changed, 21 insertions(+), 12 deletions(-) diff --git a/Documentation/config/core.adoc b/Documentation/config/core.adoc index e2de270c86..682fb595fb 100644 --- a/Documentation/config/core.adoc +++ b/Documentation/config/core.adoc @@ -626,6 +626,8 @@ core.whitespace:: part of the line terminator, i.e. with it, `trailing-space` does not trigger if the character before such a carriage-return is not a whitespace (not enabled by default). +* `incomplete-line` treats the last line of a file that is missing the + newline at the end as an error (not enabled by default). * `tabwidth=` tells how many character positions a tab occupies; this is relevant for `indent-with-non-tab` and when Git fixes `tab-in-indent` errors. The default tab width is 8. Allowed values are 1 to 63. diff --git a/diff.c b/diff.c index 5c606409bb..1b27b15f84 100644 --- a/diff.c +++ b/diff.c @@ -804,15 +804,15 @@ enum diff_symbol { /* * Flags for content lines: - * 0..11 are whitespace rules (see ws.h) - * 12..14 are WSEH_NEW | WSEH_CONTEXT | WSEH_OLD - * 16 is marking if the line is blank at EOF - * 17..19 are used for color-moved. + * 0..15 are whitespace rules (see ws.h) + * 16..18 are WSEH_NEW | WSEH_CONTEXT | WSEH_OLD + * 19 is marking if the line is blank at EOF + * 20..22 are used for color-moved. */ -#define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16) -#define DIFF_SYMBOL_MOVED_LINE (1<<17) -#define DIFF_SYMBOL_MOVED_LINE_ALT (1<<18) -#define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING (1<<19) +#define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<19) +#define DIFF_SYMBOL_MOVED_LINE (1<<20) +#define DIFF_SYMBOL_MOVED_LINE_ALT (1<<21) +#define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING (1<<22) #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | WS_RULE_MASK) diff --git a/diff.h b/diff.h index cbd355cf50..422658407d 100644 --- a/diff.h +++ b/diff.h @@ -331,9 +331,9 @@ struct diff_options { int ita_invisible_in_index; /* white-space error highlighting */ -#define WSEH_NEW (1<<12) -#define WSEH_CONTEXT (1<<13) -#define WSEH_OLD (1<<14) +#define WSEH_NEW (1<<16) +#define WSEH_CONTEXT (1<<17) +#define WSEH_OLD (1<<18) unsigned ws_error_highlight; const char *prefix; int prefix_length; diff --git a/ws.c b/ws.c index 70acee3337..34a7b4fad2 100644 --- a/ws.c +++ b/ws.c @@ -26,6 +26,7 @@ static struct whitespace_rule { { "blank-at-eol", WS_BLANK_AT_EOL, 0 }, { "blank-at-eof", WS_BLANK_AT_EOF, 0 }, { "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 }, + { "incomplete-line", WS_INCOMPLETE_LINE, 0, 0 }, }; unsigned parse_whitespace_rule(const char *string) @@ -139,6 +140,11 @@ char *whitespace_error_string(unsigned ws) strbuf_addstr(&err, ", "); strbuf_addstr(&err, "tab in indent"); } + if (ws & WS_INCOMPLETE_LINE) { + if (err.len) + strbuf_addstr(&err, ", "); + strbuf_addstr(&err, "no newline at the end of file"); + } return strbuf_detach(&err, NULL); } diff --git a/ws.h b/ws.h index 23708efb73..06d5cb73f8 100644 --- a/ws.h +++ b/ws.h @@ -15,13 +15,14 @@ struct strbuf; #define WS_CR_AT_EOL (1<<9) #define WS_BLANK_AT_EOF (1<<10) #define WS_TAB_IN_INDENT (1<<11) +#define WS_INCOMPLETE_LINE (1<<12) #define WS_TRAILING_SPACE (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF) #define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8) #define WS_TAB_WIDTH_MASK ((1<<6)-1) /* All WS_* -- when extended, adapt constants defined after diff.c:diff_symbol */ -#define WS_RULE_MASK ((1<<12)-1) +#define WS_RULE_MASK ((1<<16)-1) extern unsigned whitespace_rule_cfg; unsigned whitespace_rule(struct index_state *, const char *); -- cgit 1.2.3-korg From 9fb15a8e1430b77e2cc771e425ce4f0954ce4777 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:56 -0800 Subject: apply: check and fix incomplete lines The final line of a file that lacks the terminating newline at its end is called an incomplete line. In general they are frowned upon for many reasons (imagine concatenating two files with "cat A B" and what happens when A ends in an incomplete line, for example), and text-oriented tools often mishandle such a line. Implement checks in "git apply" for incomplete lines, which is off by default for backward compatibility's sake, so that "git apply --whitespace={fix,warn,error}" can notice, warn against, and fix them. As one of the new test shows, if you modify contents on an incomplete line in the original and leave the resulting line incomplete, it is still considered a whitespace error, the reasoning being that "you'd better fix it while at it if you are making a change on an incomplete line anyway", which may controversial. Signed-off-by: Junio C Hamano --- apply.c | 13 +++- t/t4124-apply-ws-rule.sh | 187 +++++++++++++++++++++++++++++++++++++++++++++++ ws.c | 14 ++++ 3 files changed, 213 insertions(+), 1 deletion(-) diff --git a/apply.c b/apply.c index 2b0f8bdab5..c9fb45247d 100644 --- a/apply.c +++ b/apply.c @@ -1640,6 +1640,14 @@ static void record_ws_error(struct apply_state *state, state->squelch_whitespace_errors < state->whitespace_error) return; + /* + * line[len] for an incomplete line points at the "\n" at the end + * of patch input line, so "%.*s" would drop the last letter on line; + * compensate for it. + */ + if (result & WS_INCOMPLETE_LINE) + len++; + err = whitespace_error_string(result); if (state->apply_verbosity > verbosity_silent) fprintf(stderr, "%s:%d: %s.\n%.*s\n", @@ -1794,7 +1802,10 @@ static int parse_fragment(struct apply_state *state, } /* eat the "\\ No newline..." as well, if exists */ - len += skip_len; + if (skip_len) { + len += skip_len; + state->linenr++; + } } if (oldlines || newlines) return -1; diff --git a/t/t4124-apply-ws-rule.sh b/t/t4124-apply-ws-rule.sh index 485c7d2d12..115a0f8579 100755 --- a/t/t4124-apply-ws-rule.sh +++ b/t/t4124-apply-ws-rule.sh @@ -556,4 +556,191 @@ test_expect_success 'whitespace check skipped for excluded paths' ' git apply --include=used --stat --whitespace=error sample-i && + (test_write_lines 1 2 3 0 5 && printf 6) >sample2-i && + cat sample-i >target && + git add target && + cat sample2-i >target && + git diff-files -p target >patch && + + cat sample-i >target && + git apply --whitespace=error target && + git apply --whitespace=error --check error && + test_cmp sample-i target && + test_must_be_empty error && + + cat sample2-i >target && + git apply --whitespace=error -R target && + git apply -R --whitespace=error --check error && + test_cmp sample2-i target && + test_must_be_empty error +' + +test_expect_success 'last line made incomplete (error)' ' + test_write_lines 1 2 3 4 5 6 >sample && + (test_write_lines 1 2 3 4 5 && printf 6) >sample-i && + cat sample >target && + git add target && + cat sample-i >target && + git diff-files -p target >patch && + + cat sample >target && + test_must_fail git apply --whitespace=error error && + test_grep "no newline" error && + + cat sample >target && + test_must_fail git apply --whitespace=error --check actual && + test_cmp sample target && + cat >expect <<-\EOF && + :10: no newline at the end of file. + 6 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample-i >target && + git apply --whitespace=error -R target && + git apply --whitespace=error --check -R error && + test_cmp sample-i target && + test_must_be_empty error && + + cat sample >target && + git apply --whitespace=fix sample-i && + test_write_lines 1 2 3 4 5 6 >sample && + cat sample-i >target && + git add target && + cat sample >target && + git diff-files -p target >patch && + + cat sample-i >target && + git apply --whitespace=error target && + git apply --whitespace=error --check error && + test_cmp sample-i target && + test_must_be_empty error && + + cat sample >target && + test_must_fail git apply --whitespace=error -R error && + test_grep "no newline" error && + + cat sample >target && + test_must_fail git apply --whitespace=error --check -R actual && + test_cmp sample target && + cat >expect <<-\EOF && + :9: no newline at the end of file. + 6 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample >target && + git apply --whitespace=fix -R sample-i && + test_write_lines 1 2 3 4 5 7 >sample3 && + cat sample-i >target && + git add target && + cat sample3 >target && + git diff-files -p target >patch && + + cat sample-i >target && + git apply --whitespace=error target && + git apply --whitespace=error --check error && + test_cmp sample-i target && + test_must_be_empty error && + + cat sample3 >target && + test_must_fail git apply --whitespace=error -R error && + test_grep "no newline" error && + + cat sample3 >target && + test_must_fail git apply --whitespace=error -R --check actual && + test_cmp sample3 target && + cat >expect <<-\EOF && + :9: no newline at the end of file. + 6 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample3 >target && + git apply --whitespace=fix -R sample-i && + (test_write_lines 1 2 3 4 5 && printf 7) >sample3-i && + test_write_lines 1 2 3 4 5 6 >sample && + test_write_lines 1 2 3 4 5 7 >sample3 && + cat sample-i >target && + git add target && + cat sample3-i >target && + git diff-files -p target >patch && + + cat sample-i >target && + test_must_fail git apply --whitespace=error error && + test_grep "no newline" error && + + cat sample-i >target && + test_must_fail git apply --whitespace=error --check actual && + test_cmp sample-i target && + cat >expect <<-\EOF && + :11: no newline at the end of file. + 7 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample3-i >target && + test_must_fail git apply --whitespace=error -R error && + test_grep "no newline" error && + + cat sample3-i >target && + test_must_fail git apply --whitespace=error --check -R actual && + test_cmp sample3-i target && + cat >expect <<-\EOF && + :9: no newline at the end of file. + 6 + error: 1 line adds whitespace errors. + EOF + test_cmp expect actual && + + cat sample-i >target && + git apply --whitespace=fix target && + git apply --whitespace=fix -R Date: Wed, 12 Nov 2025 14:02:57 -0800 Subject: diff: highlight and error out on incomplete lines Teach "git diff" to highlight "\ No newline at end of file" message as a whitespace error when incomplete-line whitespace error class is in effect. Thanks to the previous refactoring of complete rewrite code path, we can do this at a single place. Unlike whitespace errors in the payload where we need to annotate in line, possibly using colors, the line that has whitespace problems, we have a dedicated line already that can serve as the error message, so paint it as a whitespace error message. Also teach "git diff --check" to notice incomplete lines as whitespace errors and report when incomplete-line whitespace error class is in effect. Signed-off-by: Junio C Hamano --- diff.c | 29 ++++++++++++++++++-- t/t4015-diff-whitespace.sh | 67 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 6 deletions(-) diff --git a/diff.c b/diff.c index 1b27b15f84..7b7cd50dc2 100644 --- a/diff.c +++ b/diff.c @@ -1370,7 +1370,11 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, emit_line(o, "", "", line, len); break; case DIFF_SYMBOL_CONTEXT_INCOMPLETE: - set = diff_get_color_opt(o, DIFF_CONTEXT); + if ((flags & WS_INCOMPLETE_LINE) && + (flags & o->ws_error_highlight)) + set = diff_get_color_opt(o, DIFF_WHITESPACE); + else + set = diff_get_color_opt(o, DIFF_CONTEXT); reset = diff_get_color_opt(o, DIFF_RESET); emit_line(o, set, reset, line, len); break; @@ -1666,8 +1670,14 @@ static void emit_context_line(struct emit_callback *ecbdata, static void emit_incomplete_line_marker(struct emit_callback *ecbdata, const char *line, int len) { + int last_line_kind = ecbdata->last_line_kind; + unsigned flags = (last_line_kind == '+' + ? WSEH_NEW + : last_line_kind == '-' + ? WSEH_OLD + : WSEH_CONTEXT) | ecbdata->ws_rule; emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_CONTEXT_INCOMPLETE, - line, len, 0); + line, len, flags); } static void emit_hunk_header(struct emit_callback *ecbdata, @@ -3254,6 +3264,7 @@ struct checkdiff_t { struct diff_options *o; unsigned ws_rule; unsigned status; + int last_line_kind; }; static int is_conflict_marker(const char *line, int marker_size, unsigned long len) @@ -3292,6 +3303,7 @@ static void checkdiff_consume_hunk(void *priv, static int checkdiff_consume(void *priv, char *line, unsigned long len) { struct checkdiff_t *data = priv; + int last_line_kind; int marker_size = data->conflict_marker_size; const char *ws = diff_get_color(data->o->use_color, DIFF_WHITESPACE); const char *reset = diff_get_color(data->o->use_color, DIFF_RESET); @@ -3302,6 +3314,8 @@ static int checkdiff_consume(void *priv, char *line, unsigned long len) assert(data->o); line_prefix = diff_line_prefix(data->o); + last_line_kind = data->last_line_kind; + data->last_line_kind = line[0]; if (line[0] == '+') { unsigned bad; data->lineno++; @@ -3324,6 +3338,17 @@ static int checkdiff_consume(void *priv, char *line, unsigned long len) data->o->file, set, reset, ws); } else if (line[0] == ' ') { data->lineno++; + } else if (line[0] == '\\') { + /* no newline at the end of the line */ + if ((data->ws_rule & WS_INCOMPLETE_LINE) && + (last_line_kind == '+')) { + unsigned bad = WS_INCOMPLETE_LINE; + data->status |= bad; + err = whitespace_error_string(bad); + fprintf(data->o->file, "%s%s:%d: %s.\n", + line_prefix, data->filename, data->lineno, err); + free(err); + } } return 0; } diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 9de7f73f42..3c8eb02e4f 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -43,6 +43,53 @@ do ' done +test_expect_success "incomplete line in both pre- and post-image context" ' + (echo foo && echo baz | tr -d "\012") >x && + git add x && + (echo bar && echo baz | tr -d "\012") >x && + git diff x && + git -c core.whitespace=incomplete diff --check x && + git diff -R x && + git -c core.whitespace=incomplete diff -R --check x +' + +test_expect_success "incomplete lines on both pre- and post-image" ' + # The interpretation taken here is "since you are touching + # the line anyway, you would better fix the incomplete line + # while you are at it." but this is debatable. + echo foo | tr -d "\012" >x && + git add x && + echo bar | tr -d "\012" >x && + git diff x && + test_must_fail git -c core.whitespace=incomplete diff --check x >error && + test_grep "no newline at the end of file" error && + git diff -R x && + test_must_fail git -c core.whitespace=incomplete diff -R --check x >error && + test_grep "no newline at the end of file" error +' + +test_expect_success "fix incomplete line in pre-image" ' + echo foo | tr -d "\012" >x && + git add x && + echo bar >x && + git diff x && + git -c core.whitespace=incomplete diff --check x && + git diff -R x && + test_must_fail git -c core.whitespace=incomplete diff -R --check x >error && + test_grep "no newline at the end of file" error +' + +test_expect_success "new incomplete line in post-image" ' + echo foo >x && + git add x && + echo bar | tr -d "\012" >x && + git diff x && + test_must_fail git -c core.whitespace=incomplete diff --check x >error && + test_grep "no newline at the end of file" error && + git diff -R x && + git -c core.whitespace=incomplete diff -R --check x +' + test_expect_success "Ray Lehtiniemi's example" ' cat <<-\EOF >x && do { @@ -1040,7 +1087,8 @@ test_expect_success 'ws-error-highlight test setup' ' { echo "0. blank-at-eol " && echo "1. still-blank-at-eol " && - echo "2. and a new line " + echo "2. and a new line " && + printf "3. and more" } >x && new_hash_x=$(git hash-object x) && after=$(git rev-parse --short "$new_hash_x") && @@ -1050,11 +1098,13 @@ test_expect_success 'ws-error-highlight test setup' ' index $before..$after 100644 --- a/x +++ b/x - @@ -1,2 +1,3 @@ + @@ -1,2 +1,4 @@ 0. blank-at-eol -1. blank-at-eol +1. still-blank-at-eol +2. and a new line + +3. and more + \ No newline at end of file EOF cat >expect.all <<-EOF && @@ -1062,11 +1112,13 @@ test_expect_success 'ws-error-highlight test setup' ' index $before..$after 100644 --- a/x +++ b/x - @@ -1,2 +1,3 @@ + @@ -1,2 +1,4 @@ 0. blank-at-eol -1. blank-at-eol +1. still-blank-at-eol +2. and a new line + +3. and more + \ No newline at end of file EOF cat >expect.none <<-EOF @@ -1074,16 +1126,19 @@ test_expect_success 'ws-error-highlight test setup' ' index $before..$after 100644 --- a/x +++ b/x - @@ -1,2 +1,3 @@ + @@ -1,2 +1,4 @@ 0. blank-at-eol -1. blank-at-eol +1. still-blank-at-eol +2. and a new line + +3. and more + \ No newline at end of file EOF ' test_expect_success 'test --ws-error-highlight option' ' + git config core.whitespace blank-at-eol,incomplete-line && git diff --color --ws-error-highlight=default,old >current.raw && test_decode_color current && @@ -1100,6 +1155,7 @@ test_expect_success 'test --ws-error-highlight option' ' ' test_expect_success 'test diff.wsErrorHighlight config' ' + git config core.whitespace blank-at-eol,incomplete-line && git -c diff.wsErrorHighlight=default,old diff --color >current.raw && test_decode_color current && @@ -1116,6 +1172,7 @@ test_expect_success 'test diff.wsErrorHighlight config' ' ' test_expect_success 'option overrides diff.wsErrorHighlight' ' + git config core.whitespace blank-at-eol,incomplete-line && git -c diff.wsErrorHighlight=none \ diff --color --ws-error-highlight=default,old >current.raw && @@ -1135,6 +1192,8 @@ test_expect_success 'option overrides diff.wsErrorHighlight' ' ' test_expect_success 'detect moved code, complete file' ' + git config core.whitespace blank-at-eol && + git reset --hard && cat <<-\EOF >test.c && #include -- cgit 1.2.3-korg From 51358a1ede7f4b6b50e4e5a86558af5204691fe0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 12 Nov 2025 14:02:58 -0800 Subject: attr: enable incomplete-line whitespace error for this project Now "git diff --check" and "git apply --whitespace=warn/fix" learned incomplete line is a whitespace error, enable them for this project to prevent patches to add new incomplete lines to our source to both code and documentation files. Signed-off-by: Junio C Hamano --- .gitattributes | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitattributes b/.gitattributes index 32583149c2..a8e2950a73 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,13 +1,13 @@ * whitespace=!indent,trail,space -*.[ch] whitespace=indent,trail,space diff=cpp -*.sh whitespace=indent,trail,space text eol=lf +*.[ch] whitespace=indent,trail,space,incomplete diff=cpp +*.sh whitespace=indent,trail,space,incomplete text eol=lf *.perl text eol=lf diff=perl *.pl text eof=lf diff=perl *.pm text eol=lf diff=perl *.py text eol=lf diff=python *.bat text eol=crlf CODE_OF_CONDUCT.md -whitespace -/Documentation/**/*.adoc text eol=lf +/Documentation/**/*.adoc text eol=lf whitespace=trail,space,incomplete /command-list.txt text eol=lf /GIT-VERSION-GEN text eol=lf /mergetools/* text eol=lf -- cgit 1.2.3-korg