summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2013-02-04 10:25:18 -0800
committerJunio C Hamano <gitster@pobox.com>2013-02-04 10:25:18 -0800
commitd5365b43274779246665416caf1a51af5a29f776 (patch)
tree4964cb7640ffec8458d3a56815d0d4c21369f1f6
parent27d46a70724ef5e087863c3db35eb7ca9c084c2c (diff)
parentbe5c9fb9049ed470e7005f159bb923a5f4de1309 (diff)
downloadgit-d5365b43274779246665416caf1a51af5a29f776.tar.gz
Merge branch 'jk/read-commit-buffer-data-after-free'
Clarify the ownership rule for commit->buffer field, which some callers incorrectly accessed without making sure it is populated. * jk/read-commit-buffer-data-after-free: logmsg_reencode: lazily load missing commit buffers logmsg_reencode: never return NULL commit: drop useless xstrdup of commit message
-rw-r--r--builtin/blame.c22
-rw-r--r--builtin/commit.c14
-rw-r--r--commit.h1
-rw-r--r--pretty.c93
-rwxr-xr-xt/t4042-diff-textconv-caching.sh8
5 files changed, 85 insertions, 53 deletions
diff --git a/builtin/blame.c b/builtin/blame.c
index b431ba3209..86100e9662 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -1420,32 +1420,18 @@ static void get_commit_info(struct commit *commit,
{
int len;
const char *subject, *encoding;
- char *reencoded, *message;
+ char *message;
commit_info_init(ret);
- /*
- * We've operated without save_commit_buffer, so
- * we now need to populate them for output.
- */
- if (!commit->buffer) {
- enum object_type type;
- unsigned long size;
- commit->buffer =
- read_sha1_file(commit->object.sha1, &type, &size);
- if (!commit->buffer)
- die("Cannot read commit %s",
- sha1_to_hex(commit->object.sha1));
- }
encoding = get_log_output_encoding();
- reencoded = logmsg_reencode(commit, encoding);
- message = reencoded ? reencoded : commit->buffer;
+ message = logmsg_reencode(commit, encoding);
get_ac_line(message, "\nauthor ",
&ret->author, &ret->author_mail,
&ret->author_time, &ret->author_tz);
if (!detailed) {
- free(reencoded);
+ logmsg_free(message, commit);
return;
}
@@ -1459,7 +1445,7 @@ static void get_commit_info(struct commit *commit,
else
strbuf_addf(&ret->summary, "(%s)", sha1_to_hex(commit->object.sha1));
- free(reencoded);
+ logmsg_free(message, commit);
}
/*
diff --git a/builtin/commit.c b/builtin/commit.c
index 6c95fa76d8..1a0e5f14a3 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -946,24 +946,14 @@ static void handle_untracked_files_arg(struct wt_status *s)
static const char *read_commit_message(const char *name)
{
- const char *out_enc, *out;
+ const char *out_enc;
struct commit *commit;
commit = lookup_commit_reference_by_name(name);
if (!commit)
die(_("could not lookup commit %s"), name);
out_enc = get_commit_output_encoding();
- out = logmsg_reencode(commit, out_enc);
-
- /*
- * If we failed to reencode the buffer, just copy it
- * byte for byte so the user can try to fix it up.
- * This also handles the case where input and output
- * encodings are identical.
- */
- if (out == NULL)
- out = xstrdup(commit->buffer);
- return out;
+ return logmsg_reencode(commit, out_enc);
}
static int parse_and_validate_options(int argc, const char *argv[],
diff --git a/commit.h b/commit.h
index 95b8350a5b..4138bb4c08 100644
--- a/commit.h
+++ b/commit.h
@@ -101,6 +101,7 @@ extern int has_non_ascii(const char *text);
struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */
extern char *logmsg_reencode(const struct commit *commit,
const char *output_encoding);
+extern void logmsg_free(char *msg, const struct commit *commit);
extern void get_commit_format(const char *arg, struct rev_info *);
extern const char *format_subject(struct strbuf *sb, const char *msg,
const char *line_separator);
diff --git a/pretty.c b/pretty.c
index 07fc062865..eae57ad9d7 100644
--- a/pretty.c
+++ b/pretty.c
@@ -524,10 +524,11 @@ static void add_merge_info(const struct pretty_print_context *pp,
strbuf_addch(sb, '\n');
}
-static char *get_header(const struct commit *commit, const char *key)
+static char *get_header(const struct commit *commit, const char *msg,
+ const char *key)
{
int key_len = strlen(key);
- const char *line = commit->buffer;
+ const char *line = msg;
while (line) {
const char *eol = strchr(line, '\n'), *next;
@@ -588,25 +589,77 @@ char *logmsg_reencode(const struct commit *commit,
static const char *utf8 = "UTF-8";
const char *use_encoding;
char *encoding;
+ char *msg = commit->buffer;
char *out;
+ if (!msg) {
+ enum object_type type;
+ unsigned long size;
+
+ msg = read_sha1_file(commit->object.sha1, &type, &size);
+ if (!msg)
+ die("Cannot read commit object %s",
+ sha1_to_hex(commit->object.sha1));
+ if (type != OBJ_COMMIT)
+ die("Expected commit for '%s', got %s",
+ sha1_to_hex(commit->object.sha1), typename(type));
+ }
+
if (!output_encoding || !*output_encoding)
- return NULL;
- encoding = get_header(commit, "encoding");
+ return msg;
+ encoding = get_header(commit, msg, "encoding");
use_encoding = encoding ? encoding : utf8;
- if (same_encoding(use_encoding, output_encoding))
- if (encoding) /* we'll strip encoding header later */
- out = xstrdup(commit->buffer);
- else
- return NULL; /* nothing to do */
- else
- out = reencode_string(commit->buffer,
- output_encoding, use_encoding);
+ if (same_encoding(use_encoding, output_encoding)) {
+ /*
+ * No encoding work to be done. If we have no encoding header
+ * at all, then there's nothing to do, and we can return the
+ * message verbatim (whether newly allocated or not).
+ */
+ if (!encoding)
+ return msg;
+
+ /*
+ * Otherwise, we still want to munge the encoding header in the
+ * result, which will be done by modifying the buffer. If we
+ * are using a fresh copy, we can reuse it. But if we are using
+ * the cached copy from commit->buffer, we need to duplicate it
+ * to avoid munging commit->buffer.
+ */
+ out = msg;
+ if (out == commit->buffer)
+ out = xstrdup(out);
+ }
+ else {
+ /*
+ * There's actual encoding work to do. Do the reencoding, which
+ * still leaves the header to be replaced in the next step. At
+ * this point, we are done with msg. If we allocated a fresh
+ * copy, we can free it.
+ */
+ out = reencode_string(msg, output_encoding, use_encoding);
+ if (out && msg != commit->buffer)
+ free(msg);
+ }
+
+ /*
+ * This replacement actually consumes the buffer we hand it, so we do
+ * not have to worry about freeing the old "out" here.
+ */
if (out)
out = replace_encoding_header(out, output_encoding);
free(encoding);
- return out;
+ /*
+ * If the re-encoding failed, out might be NULL here; in that
+ * case we just return the commit message verbatim.
+ */
+ return out ? out : msg;
+}
+
+void logmsg_free(char *msg, const struct commit *commit)
+{
+ if (msg != commit->buffer)
+ free(msg);
}
static int mailmap_name(const char **email, size_t *email_len,
@@ -1278,14 +1331,11 @@ void format_commit_message(const struct commit *commit,
context.pretty_ctx = pretty_ctx;
context.wrap_start = sb->len;
context.message = logmsg_reencode(commit, output_enc);
- if (!context.message)
- context.message = commit->buffer;
strbuf_expand(sb, format, format_commit_item, &context);
rewrap_message_tail(sb, &context, 0, 0, 0);
- if (context.message != commit->buffer)
- free(context.message);
+ logmsg_free(context.message, commit);
free(context.signature.gpg_output);
free(context.signature.signer);
}
@@ -1432,7 +1482,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
{
unsigned long beginning_of_body;
int indent = 4;
- const char *msg = commit->buffer;
+ const char *msg;
char *reencoded;
const char *encoding;
int need_8bit_cte = pp->need_8bit_cte;
@@ -1443,10 +1493,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
}
encoding = get_log_output_encoding();
- reencoded = logmsg_reencode(commit, encoding);
- if (reencoded) {
- msg = reencoded;
- }
+ msg = reencoded = logmsg_reencode(commit, encoding);
if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL)
indent = 0;
@@ -1503,7 +1550,7 @@ void pretty_print_commit(const struct pretty_print_context *pp,
if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
strbuf_addch(sb, '\n');
- free(reencoded);
+ logmsg_free(reencoded, commit);
}
void pp_commit_easy(enum cmit_fmt fmt, const struct commit *commit,
diff --git a/t/t4042-diff-textconv-caching.sh b/t/t4042-diff-textconv-caching.sh
index 91f8198f05..04a44d5c61 100755
--- a/t/t4042-diff-textconv-caching.sh
+++ b/t/t4042-diff-textconv-caching.sh
@@ -106,4 +106,12 @@ test_expect_success 'switching diff driver produces correct results' '
test_cmp expect actual
'
+# The point here is to test that we can log the notes cache and still use it to
+# produce a diff later (older versions of git would segfault on this). It's
+# much more likely to come up in the real world with "log --all -p", but using
+# --no-walk lets us reliably reproduce the order of traversal.
+test_expect_success 'log notes cache and still use cache for -p' '
+ git log --no-walk -p refs/notes/textconv/magic HEAD
+'
+
test_done