aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@datawire.io>2025-03-10 16:57:45 +0100
committerJunio C Hamano <gitster@pobox.com>2025-03-10 13:24:56 -0700
commitdda9bff3c55e45ee8dbfb49fd972c4b35fd4ba7e (patch)
treee79b3aa3dac8976ce194aa44fe3befdb3f0e81c9
parent87f2a9195e7bea721d9cffe98383b4065d233f66 (diff)
downloadgit-dda9bff3c55e45ee8dbfb49fd972c4b35fd4ba7e.tar.gz
fast-export: do not modify memory from get_commit_buffer
fast-export's helper function find_encoding() takes a `const char *`, but modifies that memory despite the `const`. Ultimately, this memory came from get_commit_buffer(), and you're not supposed to modify the memory that you get from get_commit_buffer(). So, get rid of find_encoding() in favor of commit.h:find_commit_header(), which gives back a string length, rather than mutating the memory to insert a '\0' terminator. Because find_commit_header() detects the "\n\n" string that separates the headers and the commit message, move the call to be above the `message = strstr(..., "\n\n")` call. This helps readability, and allows for the value of `encoding` to be used for a better value of "..." so that the same memory doesn't need to be checked twice. Introduce a `commit_buffer_cursor` variable to avoid writing an awkward `encoding ? encoding + encoding_len : committer_end` expression. Signed-off-by: Luke Shumaker <lukeshu@datawire.io> Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--builtin/fast-export.c61
1 files changed, 33 insertions, 28 deletions
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 2de2adc30e..39d43c2a29 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -510,21 +510,6 @@ static void show_filemodify(struct diff_queue_struct *q,
}
}
-static const char *find_encoding(const char *begin, const char *end)
-{
- const char *needle = "\nencoding ";
- char *bol, *eol;
-
- bol = memmem(begin, end ? end - begin : strlen(begin),
- needle, strlen(needle));
- if (!bol)
- return NULL;
- bol += strlen(needle);
- eol = strchrnul(bol, '\n');
- *eol = '\0';
- return bol;
-}
-
static char *anonymize_ref_component(void)
{
static int counter;
@@ -630,9 +615,11 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
struct string_list *paths_of_changed_objects)
{
int saved_output_format = rev->diffopt.output_format;
- const char *commit_buffer;
+ const char *commit_buffer, *commit_buffer_cursor;
const char *author, *author_end, *committer, *committer_end;
- const char *encoding, *message;
+ const char *encoding = NULL;
+ size_t encoding_len;
+ const char *message;
char *reencoded = NULL;
struct commit_list *p;
const char *refname;
@@ -641,21 +628,35 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
parse_commit_or_die(commit);
- commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
- author = strstr(commit_buffer, "\nauthor ");
+ commit_buffer_cursor = commit_buffer = repo_get_commit_buffer(the_repository, commit, NULL);
+
+ author = strstr(commit_buffer_cursor, "\nauthor ");
if (!author)
die("could not find author in commit %s",
oid_to_hex(&commit->object.oid));
author++;
- author_end = strchrnul(author, '\n');
- committer = strstr(author_end, "\ncommitter ");
+ commit_buffer_cursor = author_end = strchrnul(author, '\n');
+
+ committer = strstr(commit_buffer_cursor, "\ncommitter ");
if (!committer)
die("could not find committer in commit %s",
oid_to_hex(&commit->object.oid));
committer++;
- committer_end = strchrnul(committer, '\n');
- message = strstr(committer_end, "\n\n");
- encoding = find_encoding(committer_end, message);
+ commit_buffer_cursor = committer_end = strchrnul(committer, '\n');
+
+ /*
+ * find_commit_header() gets a `+ 1` because
+ * commit_buffer_cursor points at the trailing "\n" at the end
+ * of the previous line, but find_commit_header() wants a
+ * pointer to the beginning of the next line.
+ */
+ if (*commit_buffer_cursor == '\n') {
+ encoding = find_commit_header(commit_buffer_cursor + 1, "encoding", &encoding_len);
+ if (encoding)
+ commit_buffer_cursor = encoding + encoding_len;
+ }
+
+ message = strstr(commit_buffer_cursor, "\n\n");
if (message)
message += 2;
@@ -694,16 +695,20 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
if (anonymize) {
reencoded = anonymize_commit_message();
} else if (encoding) {
+ char *buf;
switch (reencode_mode) {
case REENCODE_YES:
- reencoded = reencode_string(message, "UTF-8", encoding);
+ buf = xstrfmt("%.*s", (int)encoding_len, encoding);
+ reencoded = reencode_string(message, "UTF-8", buf);
+ free(buf);
break;
case REENCODE_NO:
break;
case REENCODE_ABORT:
- die("Encountered commit-specific encoding %s in commit "
+ die("Encountered commit-specific encoding %.*s in commit "
"%s; use --reencode=[yes|no] to handle it",
- encoding, oid_to_hex(&commit->object.oid));
+ (int)encoding_len, encoding,
+ oid_to_hex(&commit->object.oid));
}
}
if (!commit->parents)
@@ -715,7 +720,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
(int)(author_end - author), author,
(int)(committer_end - committer), committer);
if (!reencoded && encoding)
- printf("encoding %s\n", encoding);
+ printf("encoding %.*s\n", (int)encoding_len, encoding);
printf("data %u\n%s",
(unsigned)(reencoded
? strlen(reencoded) : message