diff options
author | Junio C Hamano <gitster@pobox.com> | 2013-02-04 10:25:18 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2013-02-04 10:25:18 -0800 |
commit | d5365b43274779246665416caf1a51af5a29f776 (patch) | |
tree | 4964cb7640ffec8458d3a56815d0d4c21369f1f6 | |
parent | 27d46a70724ef5e087863c3db35eb7ca9c084c2c (diff) | |
parent | be5c9fb9049ed470e7005f159bb923a5f4de1309 (diff) | |
download | git-d5365b43274779246665416caf1a51af5a29f776.tar.gz |
Merge branch 'jk/read-commit-buffer-data-after-free'
Clarify the ownership rule for commit->buffer field, which some
callers incorrectly accessed without making sure it is populated.
* jk/read-commit-buffer-data-after-free:
logmsg_reencode: lazily load missing commit buffers
logmsg_reencode: never return NULL
commit: drop useless xstrdup of commit message
-rw-r--r-- | builtin/blame.c | 22 | ||||
-rw-r--r-- | builtin/commit.c | 14 | ||||
-rw-r--r-- | commit.h | 1 | ||||
-rw-r--r-- | pretty.c | 93 | ||||
-rwxr-xr-x | t/t4042-diff-textconv-caching.sh | 8 |
5 files changed, 85 insertions, 53 deletions
diff --git a/builtin/blame.c b/builtin/blame.c index b431ba3209..86100e9662 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1420,32 +1420,18 @@ static void get_commit_info(struct commit *commit, { int len; const char *subject, *encoding; - char *reencoded, *message; + char *message; commit_info_init(ret); - /* - * We've operated without save_commit_buffer, so - * we now need to populate them for output. - */ - if (!commit->buffer) { - enum object_type type; - unsigned long size; - commit->buffer = - read_sha1_file(commit->object.sha1, &type, &size); - if (!commit->buffer) - die("Cannot read commit %s", - sha1_to_hex(commit->object.sha1)); - } encoding = get_log_output_encoding(); - reencoded = logmsg_reencode(commit, encoding); - message = reencoded ? reencoded : commit->buffer; + message = logmsg_reencode(commit, encoding); get_ac_line(message, "\nauthor ", &ret->author, &ret->author_mail, &ret->author_time, &ret->author_tz); if (!detailed) { - free(reencoded); + logmsg_free(message, commit); return; } @@ -1459,7 +1445,7 @@ static void get_commit_info(struct commit *commit, else strbuf_addf(&ret->summary, "(%s)", sha1_to_hex(commit->object.sha1)); - free(reencoded); + logmsg_free(message, commit); } /* diff --git a/builtin/commit.c b/builtin/commit.c index 6c95fa76d8..1a0e5f14a3 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -946,24 +946,14 @@ static void handle_untracked_files_arg(struct wt_status *s) static const char *read_commit_message(const char *name) { - const char *out_enc, *out; + const char *out_enc; struct commit *commit; commit = lookup_commit_reference_by_name(name); if (!commit) die(_("could not lookup commit %s"), name); out_enc = get_commit_output_encoding(); - out = logmsg_reencode(commit, out_enc); - - /* - * If we failed to reencode the buffer, just copy it - * byte for byte so the user can try to fix it up. - * This also handles the case where input and output - * encodings are identical. - */ - if (out == NULL) - out = xstrdup(commit->buffer); - return out; + return logmsg_reencode(commit, out_enc); } static int parse_and_validate_options(int argc, const char *argv[], @@ -101,6 +101,7 @@ extern int has_non_ascii(const char *text); struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */ extern char *logmsg_reencode(const struct commit *commit, const char *output_encoding); +extern void logmsg_free(char *msg, const struct commit *commit); extern void get_commit_format(const char *arg, struct rev_info *); extern const char *format_subject(struct strbuf *sb, const char *msg, const char *line_separator); @@ -524,10 +524,11 @@ static void add_merge_info(const struct pretty_print_context *pp, strbuf_addch(sb, '\n'); } -static char *get_header(const struct commit *commit, const char *key) +static char *get_header(const struct commit *commit, const char *msg, + const char *key) { int key_len = strlen(key); - const char *line = commit->buffer; + const char *line = msg; while (line) { const char *eol = strchr(line, '\n'), *next; @@ -588,25 +589,77 @@ char *logmsg_reencode(const struct commit *commit, static const char *utf8 = "UTF-8"; const char *use_encoding; char *encoding; + char *msg = commit->buffer; char *out; + if (!msg) { + enum object_type type; + unsigned long size; + + msg = read_sha1_file(commit->object.sha1, &type, &size); + if (!msg) + die("Cannot read commit object %s", + sha1_to_hex(commit->object.sha1)); + if (type != OBJ_COMMIT) + die("Expected commit for '%s', got %s", + sha1_to_hex(commit->object.sha1), typename(type)); + } + if (!output_encoding || !*output_encoding) - return NULL; - encoding = get_header(commit, "encoding"); + return msg; + encoding = get_header(commit, msg, "encoding"); use_encoding = encoding ? encoding : utf8; - if (same_encoding(use_encoding, output_encoding)) - if (encoding) /* we'll strip encoding header later */ - out = xstrdup(commit->buffer); - else - return NULL; /* nothing to do */ - else - out = reencode_string(commit->buffer, - output_encoding, use_encoding); + if (same_encoding(use_encoding, output_encoding)) { + /* + * No encoding work to be done. If we have no encoding header + * at all, then there's nothing to do, and we can return the + * message verbatim (whether newly allocated or not). + */ + if (!encoding) + return msg; + + /* + * Otherwise, we still want to munge the encoding header in the + * result, which will be done by modifying the buffer. If we + * are using a fresh copy, we can reuse it. But if we are using + * the cached copy from commit->buffer, we need to duplicate it + * to avoid munging commit->buffer. + */ + out = msg; + if (out == commit->buffer) + out = xstrdup(out); + } + else { + /* + * There's actual encoding work to do. Do the reencoding, which + * still leaves the header to be replaced in the next step. At + * this point, we are done with msg. If we allocated a fresh + * copy, we can free it. + */ + out = reencode_string(msg, output_encoding, use_encoding); + if (out && msg != commit->buffer) + free(msg); + } + + /* + * This replacement actually consumes the buffer we hand it, so we do + * not have to worry about freeing the old "out" here. + */ if (out) out = replace_encoding_header(out, output_encoding); free(encoding); - return out; + /* + * If the re-encoding failed, out might be NULL here; in that + * case we just return the commit message verbatim. + */ + return out ? out : msg; +} + +void logmsg_free(char *msg, const struct commit *commit) +{ + if (msg != commit->buffer) + free(msg); } static int mailmap_name(const char **email, size_t *email_len, @@ -1278,14 +1331,11 @@ void format_commit_message(const struct commit *commit, context.pretty_ctx = pretty_ctx; context.wrap_start = sb->len; context.message = logmsg_reencode(commit, output_enc); - if (!context.message) - context.message = commit->buffer; strbuf_expand(sb, format, format_commit_item, &context); rewrap_message_tail(sb, &context, 0, 0, 0); - if (context.message != commit->buffer) - free(context.message); + logmsg_free(context.message, commit); free(context.signature.gpg_output); free(context.signature.signer); } @@ -1432,7 +1482,7 @@ void pretty_print_commit(const struct pretty_print_context *pp, { unsigned long beginning_of_body; int indent = 4; - const char *msg = commit->buffer; + const char *msg; char *reencoded; const char *encoding; int need_8bit_cte = pp->need_8bit_cte; @@ -1443,10 +1493,7 @@ void pretty_print_commit(const struct pretty_print_context *pp, } encoding = get_log_output_encoding(); - reencoded = logmsg_reencode(commit, encoding); - if (reencoded) { - msg = reencoded; - } + msg = reencoded = logmsg_reencode(commit, encoding); if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL) indent = 0; @@ -1503,7 +1550,7 @@ void pretty_print_commit(const struct pretty_print_context *pp, if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body) strbuf_addch(sb, '\n'); - free(reencoded); + logmsg_free(reencoded, commit); } void pp_commit_easy(enum cmit_fmt fmt, const struct commit *commit, diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh index 91f8198f05..04a44d5c61 100755 --- a/t/t4042-diff-textconv-caching.sh +++ b/t/t4042-diff-textconv-caching.sh @@ -106,4 +106,12 @@ test_expect_success 'switching diff driver produces correct results' ' test_cmp expect actual ' +# The point here is to test that we can log the notes cache and still use it to +# produce a diff later (older versions of git would segfault on this). It's +# much more likely to come up in the real world with "log --all -p", but using +# --no-walk lets us reliably reproduce the order of traversal. +test_expect_success 'log notes cache and still use cache for -p' ' + git log --no-walk -p refs/notes/textconv/magic HEAD +' + test_done |