From 3ffefb54c0515308ceafb6ba071567d9fd379498 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:36:52 -0400 Subject: commit_tree: take a pointer/len pair rather than a const strbuf While strbufs are pretty common throughout our code, it is more flexible for functions to take a pointer/len pair than a strbuf. It's easy to turn a strbuf into such a pair (by dereferencing its members), but less easy to go the other way (you can strbuf_attach, but that has implications about memory ownership). This patch teaches commit_tree (and its associated callers and sub-functions) to take such a pair for the commit message rather than a strbuf. This makes passing the buffer around slightly more verbose, but means we can get rid of some dangerous strbuf_attach calls in the next patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/commit-tree.c | 4 ++-- builtin/commit.c | 4 ++-- builtin/merge.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'builtin') diff --git a/builtin/commit-tree.c b/builtin/commit-tree.c index 987a4c3d73..8a66c74e0f 100644 --- a/builtin/commit-tree.c +++ b/builtin/commit-tree.c @@ -123,8 +123,8 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) die_errno("git commit-tree: failed to read"); } - if (commit_tree(&buffer, tree_sha1, parents, commit_sha1, - NULL, sign_commit)) { + if (commit_tree(buffer.buf, buffer.len, tree_sha1, parents, + commit_sha1, NULL, sign_commit)) { strbuf_release(&buffer); return 1; } diff --git a/builtin/commit.c b/builtin/commit.c index 9cfef6c6cc..8b85df475b 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1659,8 +1659,8 @@ int cmd_commit(int argc, const char **argv, const char *prefix) append_merge_tag_headers(parents, &tail); } - if (commit_tree_extended(&sb, active_cache_tree->sha1, parents, sha1, - author_ident.buf, sign_commit, extra)) { + if (commit_tree_extended(sb.buf, sb.len, active_cache_tree->sha1, + parents, sha1, author_ident.buf, sign_commit, extra)) { rollback_index_files(); die(_("failed to write commit object")); } diff --git a/builtin/merge.c b/builtin/merge.c index 66d8843301..8763b2efa2 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -852,8 +852,8 @@ static int merge_trivial(struct commit *head, struct commit_list *remoteheads) parent->next->item = remoteheads->item; parent->next->next = NULL; prepare_to_commit(remoteheads); - if (commit_tree(&merge_msg, result_tree, parent, result_commit, NULL, - sign_commit)) + if (commit_tree(merge_msg.buf, merge_msg.len, result_tree, parent, + result_commit, NULL, sign_commit)) die(_("failed to write commit object")); finish(head, remoteheads, result_commit, "In-index merge"); drop_save(); @@ -877,8 +877,8 @@ static int finish_automerge(struct commit *head, commit_list_insert(head, &parents); strbuf_addch(&merge_msg, '\n'); prepare_to_commit(remoteheads); - if (commit_tree(&merge_msg, result_tree, parents, result_commit, - NULL, sign_commit)) + if (commit_tree(merge_msg.buf, merge_msg.len, result_tree, parents, + result_commit, NULL, sign_commit)) die(_("failed to write commit object")); strbuf_addf(&buf, "Merge made by the '%s' strategy.", wt_strategy); finish(head, remoteheads, result_commit, buf.buf); -- cgit v1.2.1 From 10322a0aaf84382d8901f9ab59e59c39f0c035bb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:39:11 -0400 Subject: do not create "struct commit" with xcalloc In both blame and merge-recursive, we sometimes create a "fake" commit struct for convenience (e.g., to represent the HEAD state as if we would commit it). By allocating ourselves rather than using alloc_commit_node, we do not properly set the "index" field of the commit. This can produce subtle bugs if we then use commit-slab on the resulting commit, as we will share the "0" index with another commit. We can fix this by using alloc_commit_node() to allocate. Note that we cannot free the result, as it is part of our commit allocator. However, both cases were already leaking the allocated commit anyway, so there's nothing to fix up. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/blame.c b/builtin/blame.c index 88cb799727..6e22bdfbfa 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2019,7 +2019,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, struct strbuf msg = STRBUF_INIT; time(&now); - commit = xcalloc(1, sizeof(*commit)); + commit = alloc_commit_node(); commit->object.parsed = 1; commit->date = now; commit->object.type = OBJ_COMMIT; -- cgit v1.2.1 From b000c59b0c80fc187e5e0e48dc9396cd60576c4e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:39:30 -0400 Subject: logmsg_reencode: return const buffer The return value from logmsg_reencode may be either a newly allocated buffer or a pointer to the existing commit->buffer. We would not want the caller to accidentally free() or modify the latter, so let's mark it as const. We can cast away the constness in logmsg_free, but only once we have determined that it is a free-able buffer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 +- builtin/reset.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/blame.c b/builtin/blame.c index 6e22bdfbfa..85a3681306 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1405,7 +1405,7 @@ static void get_commit_info(struct commit *commit, { int len; const char *subject, *encoding; - char *message; + const char *message; commit_info_init(ret); diff --git a/builtin/reset.c b/builtin/reset.c index f4e087596b..b5312c4c65 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -93,7 +93,7 @@ static int reset_index(const unsigned char *sha1, int reset_type, int quiet) static void print_new_head_line(struct commit *commit) { const char *hex, *body; - char *msg; + const char *msg; hex = find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV); printf(_("HEAD is now at %s"), hex); -- cgit v1.2.1 From 0fb370da9ca972f9571530f95c0dacb31368c280 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 12 Jun 2014 18:05:37 -0400 Subject: provide a helper to free commit buffer This converts two lines into one at each caller. But more importantly, it abstracts the concept of freeing the buffer, which will make it easier to change later. Note that we also need to provide a "detach" mechanism for a tricky case in index-pack. We are passed a buffer for the object generated by processing the incoming pack. If we are not using --strict, we just calculate the sha1 on that buffer and return, leaving the caller to free it. But if we are using --strict, we actually attach that buffer to an object, pass the object to the fsck functions, and then detach the buffer from the object again (so that the caller can free it as usual). In this case, we don't want to free the buffer ourselves, but just make sure it is no longer associated with the commit. Note that we are making the assumption here that the attach/detach process does not impact the buffer at all (e.g., it is never reallocated or modified). That holds true now, and we have no plans to change that. However, as we abstract the commit_buffer code, this dependency becomes less obvious. So when we detach, let's also make sure that we get back the same buffer that we gave to the commit_buffer code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 3 +-- builtin/index-pack.c | 3 ++- builtin/log.c | 6 ++---- builtin/rev-list.c | 3 +-- 4 files changed, 6 insertions(+), 9 deletions(-) (limited to 'builtin') diff --git a/builtin/fsck.c b/builtin/fsck.c index fc150c8821..8aadca160e 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -310,8 +310,7 @@ static int fsck_obj(struct object *obj) if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); if (!commit->parents && show_root) printf("root %s\n", sha1_to_hex(commit->object.sha1)); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index b9f6e12c0e..42551ce4ff 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -774,7 +774,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, } if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; - commit->buffer = NULL; + if (detach_commit_buffer(commit) != data) + die("BUG: parse_object_buffer transmogrified our buffer"); } obj->flags |= FLAG_CHECKED; } diff --git a/builtin/log.c b/builtin/log.c index 39e8836352..226f8f2980 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -349,8 +349,7 @@ static int cmd_log_walk(struct rev_info *rev) rev->max_count++; if (!rev->reflog_info) { /* we allow cycles in reflog ancestry */ - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); } free_commit_list(commit->parents); commit->parents = NULL; @@ -1508,8 +1507,7 @@ int cmd_format_patch(int argc, const char **argv, const char *prefix) reopen_stdout(rev.numbered_files ? NULL : commit, NULL, &rev, quiet)) die(_("Failed to create output files")); shown = log_tree_commit(&rev, commit); - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); /* We put one extra blank line between formatted * patches and this flag is used by log-tree code diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 9f92905379..e012ebe502 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -173,8 +173,7 @@ static void finish_commit(struct commit *commit, void *data) free_commit_list(commit->parents); commit->parents = NULL; } - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); } static void finish_object(struct object *obj, -- cgit v1.2.1 From 66c2827ea4deb24ff541e30a5b6239ad5e9f6801 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:40:14 -0400 Subject: provide a helper to set the commit buffer Right now this is just a one-liner, but abstracting it will make it easier to change later. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/blame.c b/builtin/blame.c index 85a3681306..38784ab9d6 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2046,7 +2046,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, ident, ident, path, (!contents_from ? path : (!strcmp(contents_from, "-") ? "standard input" : contents_from))); - commit->buffer = strbuf_detach(&msg, NULL); + set_commit_buffer(commit, strbuf_detach(&msg, NULL)); if (!contents_from || strcmp("-", contents_from)) { struct stat st; -- cgit v1.2.1 From a97934d8205772ffd2a528a9e970af7dec725012 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:40:46 -0400 Subject: use get_cached_commit_buffer where appropriate Some call sites check commit->buffer to see whether we have a cached buffer, and if so, do some work with it. In the long run we may want to switch these code paths to make their decision on a different boolean flag (because checking the cache may get a little more expensive in the future). But for now, we can easily support them by converting the calls to use get_cached_commit_buffer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/rev-list.c b/builtin/rev-list.c index e012ebe502..3fcbf21c03 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -106,7 +106,7 @@ static void show_commit(struct commit *commit, void *data) else putchar('\n'); - if (revs->verbose_header && commit->buffer) { + if (revs->verbose_header && get_cached_commit_buffer(commit)) { struct strbuf buf = STRBUF_INIT; struct pretty_print_context ctx = {0}; ctx.abbrev = revs->abbrev; -- cgit v1.2.1 From b66103c3baa593a39b8b0751213b9fce60e94de4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:41:39 -0400 Subject: convert logmsg_reencode to get_commit_buffer Like the callsites in the previous commit, logmsg_reencode already falls back to read_sha1_file when necessary. However, I split its conversion out into its own commit because it's a bit more complex. We return either: 1. The original commit->buffer 2. A newly allocated buffer from read_sha1_file 3. A reencoded buffer (based on either 1 or 2 above). while trying to do as few extra reads/allocations as possible. Callers currently free the result with logmsg_free, but we can simplify this by pointing them straight to unuse_commit_buffer. This is a slight layering violation, in that we may be passing a buffer from (3). However, since the end result is to free() anything except (1), which is unlikely to change, and because this makes the interface much simpler, it's a reasonable bending of the rules. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 4 ++-- builtin/reset.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/blame.c b/builtin/blame.c index 38784ab9d6..857d98a324 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1416,7 +1416,7 @@ static void get_commit_info(struct commit *commit, &ret->author_time, &ret->author_tz); if (!detailed) { - logmsg_free(message, commit); + unuse_commit_buffer(commit, message); return; } @@ -1430,7 +1430,7 @@ static void get_commit_info(struct commit *commit, else strbuf_addf(&ret->summary, "(%s)", sha1_to_hex(commit->object.sha1)); - logmsg_free(message, commit); + unuse_commit_buffer(commit, message); } /* diff --git a/builtin/reset.c b/builtin/reset.c index b5312c4c65..6bd6245821 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -109,7 +109,7 @@ static void print_new_head_line(struct commit *commit) } else printf("\n"); - logmsg_free(msg, commit); + unuse_commit_buffer(commit, msg); } static void update_index_from_diff(struct diff_queue_struct *q, -- cgit v1.2.1 From bc6b8fc1300ef79c4b4c3c2a79bb3c1e2e032963 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:41:51 -0400 Subject: use get_commit_buffer everywhere Each of these sites assumes that commit->buffer is valid. Since they would segfault if this was not the case, they are likely to be correct in practice. However, we can future-proof them by using get_commit_buffer. And as a side effect, we abstract away the final bare uses of commit->buffer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fast-export.c | 5 ++++- builtin/fmt-merge-msg.c | 5 ++++- builtin/log.c | 7 +++++-- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'builtin') diff --git a/builtin/fast-export.c b/builtin/fast-export.c index b8d8a3aaf9..7ee5e08442 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -279,6 +279,7 @@ static const char *find_encoding(const char *begin, const char *end) static void handle_commit(struct commit *commit, struct rev_info *rev) { int saved_output_format = rev->diffopt.output_format; + const char *commit_buffer; const char *author, *author_end, *committer, *committer_end; const char *encoding, *message; char *reencoded = NULL; @@ -288,7 +289,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) rev->diffopt.output_format = DIFF_FORMAT_CALLBACK; parse_commit_or_die(commit); - author = strstr(commit->buffer, "\nauthor "); + commit_buffer = get_commit_buffer(commit); + author = strstr(commit_buffer, "\nauthor "); if (!author) die ("Could not find author in commit %s", sha1_to_hex(commit->object.sha1)); @@ -335,6 +337,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) ? strlen(message) : 0), reencoded ? reencoded : message ? message : ""); free(reencoded); + unuse_commit_buffer(commit, commit_buffer); for (i = 0, p = commit->parents; p; p = p->next) { int mark = get_object_mark(&p->item->object); diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c index 3906eda877..01f6d59eef 100644 --- a/builtin/fmt-merge-msg.c +++ b/builtin/fmt-merge-msg.c @@ -230,12 +230,14 @@ static void add_branch_desc(struct strbuf *out, const char *name) static void record_person(int which, struct string_list *people, struct commit *commit) { + const char *buffer; char *name_buf, *name, *name_end; struct string_list_item *elem; const char *field; field = (which == 'a') ? "\nauthor " : "\ncommitter "; - name = strstr(commit->buffer, field); + buffer = get_commit_buffer(commit); + name = strstr(buffer, field); if (!name) return; name += strlen(field); @@ -247,6 +249,7 @@ static void record_person(int which, struct string_list *people, if (name_end < name) return; name_buf = xmemdupz(name, name_end - name + 1); + unuse_commit_buffer(commit, buffer); elem = string_list_lookup(people, name_buf); if (!elem) { diff --git a/builtin/log.c b/builtin/log.c index 226f8f2980..2c742606bc 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -918,9 +918,12 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout, log_write_email_headers(rev, head, &pp.subject, &pp.after_subject, &need_8bit_cte); - for (i = 0; !need_8bit_cte && i < nr; i++) - if (has_non_ascii(list[i]->buffer)) + for (i = 0; !need_8bit_cte && i < nr; i++) { + const char *buf = get_commit_buffer(list[i]); + if (has_non_ascii(buf)) need_8bit_cte = 1; + unuse_commit_buffer(list[i], buf); + } if (!branch_name) branch_name = find_branch_name(rev); -- cgit v1.2.1 From 8597ea3afea067b39ba7d4adae7ec6c1ee0e7c91 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:44:13 -0400 Subject: commit: record buffer length in cache Most callsites which use the commit buffer try to use the cached version attached to the commit, rather than re-reading from disk. Unfortunately, that interface provides only a pointer to the NUL-terminated buffer, with no indication of the original length. For the most part, this doesn't matter. People do not put NULs in their commit messages, and the log code is happy to treat it all as a NUL-terminated string. However, some code paths do care. For example, when checking signatures, we want to be very careful that we verify all the bytes to avoid malicious trickery. This patch just adds an optional "size" out-pointer to get_commit_buffer and friends. The existing callers all pass NULL (there did not seem to be any obvious sites where we could avoid an immediate strlen() call, though perhaps with some further refactoring we could). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/blame.c | 14 +++++++++++++- builtin/fast-export.c | 2 +- builtin/fmt-merge-msg.c | 2 +- builtin/index-pack.c | 2 +- builtin/log.c | 2 +- builtin/rev-list.c | 2 +- 6 files changed, 18 insertions(+), 6 deletions(-) (limited to 'builtin') diff --git a/builtin/blame.c b/builtin/blame.c index 857d98a324..b84e375b5c 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1998,6 +1998,18 @@ static void append_merge_parents(struct commit_list **tail) strbuf_release(&line); } +/* + * This isn't as simple as passing sb->buf and sb->len, because we + * want to transfer ownership of the buffer to the commit (so we + * must use detach). + */ +static void set_commit_buffer_from_strbuf(struct commit *c, struct strbuf *sb) +{ + size_t len; + void *buf = strbuf_detach(sb, &len); + set_commit_buffer(c, buf, len); +} + /* * Prepare a dummy commit that represents the work tree (or staged) item. * Note that annotating work tree item never works in the reverse. @@ -2046,7 +2058,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, ident, ident, path, (!contents_from ? path : (!strcmp(contents_from, "-") ? "standard input" : contents_from))); - set_commit_buffer(commit, strbuf_detach(&msg, NULL)); + set_commit_buffer_from_strbuf(commit, &msg); if (!contents_from || strcmp("-", contents_from)) { struct stat st; diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 7ee5e08442..05d161f19f 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -289,7 +289,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) rev->diffopt.output_format = DIFF_FORMAT_CALLBACK; parse_commit_or_die(commit); - commit_buffer = get_commit_buffer(commit); + commit_buffer = get_commit_buffer(commit, NULL); author = strstr(commit_buffer, "\nauthor "); if (!author) die ("Could not find author in commit %s", diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c index 01f6d59eef..ef8b254ef2 100644 --- a/builtin/fmt-merge-msg.c +++ b/builtin/fmt-merge-msg.c @@ -236,7 +236,7 @@ static void record_person(int which, struct string_list *people, const char *field; field = (which == 'a') ? "\nauthor " : "\ncommitter "; - buffer = get_commit_buffer(commit); + buffer = get_commit_buffer(commit, NULL); name = strstr(buffer, field); if (!name) return; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 42551ce4ff..459b9f07bb 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -774,7 +774,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, } if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; - if (detach_commit_buffer(commit) != data) + if (detach_commit_buffer(commit, NULL) != data) die("BUG: parse_object_buffer transmogrified our buffer"); } obj->flags |= FLAG_CHECKED; diff --git a/builtin/log.c b/builtin/log.c index 2c742606bc..c599eacf72 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -919,7 +919,7 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout, &need_8bit_cte); for (i = 0; !need_8bit_cte && i < nr; i++) { - const char *buf = get_commit_buffer(list[i]); + const char *buf = get_commit_buffer(list[i], NULL); if (has_non_ascii(buf)) need_8bit_cte = 1; unuse_commit_buffer(list[i], buf); diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 3fcbf21c03..ff84a825ff 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -106,7 +106,7 @@ static void show_commit(struct commit *commit, void *data) else putchar('\n'); - if (revs->verbose_header && get_cached_commit_buffer(commit)) { + if (revs->verbose_header && get_cached_commit_buffer(commit, NULL)) { struct strbuf buf = STRBUF_INIT; struct pretty_print_context ctx = {0}; ctx.abbrev = revs->abbrev; -- cgit v1.2.1