diff options
-rw-r--r-- | include/git2/message.h | 41 | ||||
-rw-r--r-- | src/trailer.c | 416 | ||||
-rw-r--r-- | tests/message/trailer.c | 165 |
3 files changed, 622 insertions, 0 deletions
diff --git a/include/git2/message.h b/include/git2/message.h index d78b1dce5..329346285 100644 --- a/include/git2/message.h +++ b/include/git2/message.h @@ -38,6 +38,47 @@ GIT_BEGIN_DECL */ GIT_EXTERN(int) git_message_prettify(git_buf *out, const char *message, int strip_comments, char comment_char); +/** + * Represents a single git message trailer. + */ +typedef struct { + const char *key; + const char *value; +} git_message_trailer; + +/** + * Represents an array of git message trailers. + * + * Struct members under the private comment are private, subject to change + * and should not be used by callers. + */ +typedef struct { + git_message_trailer *trailers; + size_t count; + + /* private */ + char *_trailer_block; +} git_message_trailer_array; + +/** + * Parse trailers out of a message, filling the array pointed to by +arr+. + * + * Trailers are key/value pairs in the last paragraph of a message, not + * including any patches or conflicts that may be present. + * + * @param arr A pre-allocated git_message_trailer_array struct to be filled in + * with any trailers found during parsing. + * @param message The message to be parsed + * @return 0 on success, or non-zero on error. + */ +GIT_EXTERN(int) git_message_trailers(git_message_trailer_array *arr, const char *message); + +/** + * Clean's up any allocated memory in the git_message_trailer_array filled by + * a call to git_message_trailers. + */ +GIT_EXTERN(void) git_message_trailer_array_free(git_message_trailer_array *arr); + /** @} */ GIT_END_DECL diff --git a/src/trailer.c b/src/trailer.c new file mode 100644 index 000000000..24c8847f6 --- /dev/null +++ b/src/trailer.c @@ -0,0 +1,416 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "array.h" +#include "common.h" +#include "git2/message.h" + +#include <stddef.h> +#include <string.h> +#include <ctype.h> + +#define COMMENT_LINE_CHAR '#' +#define TRAILER_SEPARATORS ":" + +static const char *const git_generated_prefixes[] = { + "Signed-off-by: ", + "(cherry picked from commit ", + NULL +}; + +static int is_blank_line(const char *str) +{ + const char *s = str; + while (*s && *s != '\n' && isspace(*s)) + s++; + return !*s || *s == '\n'; +} + +static const char *next_line(const char *str) +{ + const char *nl = strchr(str, '\n'); + + if (nl) { + return nl + 1; + } else { + // return pointer to the NUL terminator: + return str + strlen(str); + } +} + +/* + * Return the position of the start of the last line. If len is 0, return -1. + */ +static int last_line(const char *buf, size_t len) +{ + int i; + if (len == 0) + return -1; + if (len == 1) + return 0; + /* + * Skip the last character (in addition to the null terminator), + * because if the last character is a newline, it is considered as part + * of the last line anyway. + */ + i = len - 2; + + for (; i >= 0; i--) { + if (buf[i] == '\n') + return i + 1; + } + return 0; +} + +/* + * If the given line is of the form + * "<token><optional whitespace><separator>..." or "<separator>...", return the + * location of the separator. Otherwise, return -1. The optional whitespace + * is allowed there primarily to allow things like "Bug #43" where <token> is + * "Bug" and <separator> is "#". + * + * The separator-starts-line case (in which this function returns 0) is + * distinguished from the non-well-formed-line case (in which this function + * returns -1) because some callers of this function need such a distinction. + */ +static int find_separator(const char *line, const char *separators) +{ + int whitespace_found = 0; + const char *c; + for (c = line; *c; c++) { + if (strchr(separators, *c)) + return c - line; + if (!whitespace_found && (isalnum(*c) || *c == '-')) + continue; + if (c != line && (*c == ' ' || *c == '\t')) { + whitespace_found = 1; + continue; + } + break; + } + return -1; +} + +/* + * Inspect the given string and determine the true "end" of the log message, in + * order to find where to put a new Signed-off-by: line. Ignored are + * trailing comment lines and blank lines. To support "git commit -s + * --amend" on an existing commit, we also ignore "Conflicts:". To + * support "git commit -v", we truncate at cut lines. + * + * Returns the number of bytes from the tail to ignore, to be fed as + * the second parameter to append_signoff(). + */ +static int ignore_non_trailer(const char *buf, size_t len) +{ + int boc = 0; + size_t bol = 0; + int in_old_conflicts_block = 0; + size_t cutoff = len; + + while (bol < cutoff) { + const char *next_line = memchr(buf + bol, '\n', len - bol); + + if (!next_line) + next_line = buf + len; + else + next_line++; + + if (buf[bol] == COMMENT_LINE_CHAR || buf[bol] == '\n') { + /* is this the first of the run of comments? */ + if (!boc) + boc = bol; + /* otherwise, it is just continuing */ + } else if (git__prefixcmp(buf + bol, "Conflicts:\n") == 0) { + in_old_conflicts_block = 1; + if (!boc) + boc = bol; + } else if (in_old_conflicts_block && buf[bol] == '\t') { + ; /* a pathname in the conflicts block */ + } else if (boc) { + /* the previous was not trailing comment */ + boc = 0; + in_old_conflicts_block = 0; + } + bol = next_line - buf; + } + return boc ? len - boc : len - cutoff; +} + +/* + * Return the position of the start of the patch or the length of str if there + * is no patch in the message. + */ +static int find_patch_start(const char *str) +{ + const char *s; + + for (s = str; *s; s = next_line(s)) { + if (git__prefixcmp(s, "---") == 0) + return s - str; + } + + return s - str; +} + +/* + * Return the position of the first trailer line or len if there are no + * trailers. + */ +static int find_trailer_start(const char *buf, size_t len) +{ + const char *s; + int end_of_title, l, only_spaces = 1; + int recognized_prefix = 0, trailer_lines = 0, non_trailer_lines = 0; + /* + * Number of possible continuation lines encountered. This will be + * reset to 0 if we encounter a trailer (since those lines are to be + * considered continuations of that trailer), and added to + * non_trailer_lines if we encounter a non-trailer (since those lines + * are to be considered non-trailers). + */ + int possible_continuation_lines = 0; + + /* The first paragraph is the title and cannot be trailers */ + for (s = buf; s < buf + len; s = next_line(s)) { + if (s[0] == COMMENT_LINE_CHAR) + continue; + if (is_blank_line(s)) + break; + } + end_of_title = s - buf; + + /* + * Get the start of the trailers by looking starting from the end for a + * blank line before a set of non-blank lines that (i) are all + * trailers, or (ii) contains at least one Git-generated trailer and + * consists of at least 25% trailers. + */ + for (l = last_line(buf, len); + l >= end_of_title; + l = last_line(buf, l)) { + const char *bol = buf + l; + const char *const *p; + int separator_pos; + + if (bol[0] == COMMENT_LINE_CHAR) { + non_trailer_lines += possible_continuation_lines; + possible_continuation_lines = 0; + continue; + } + if (is_blank_line(bol)) { + if (only_spaces) + continue; + non_trailer_lines += possible_continuation_lines; + if (recognized_prefix && + trailer_lines * 3 >= non_trailer_lines) + return next_line(bol) - buf; + else if (trailer_lines && !non_trailer_lines) + return next_line(bol) - buf; + return len; + } + only_spaces = 0; + + for (p = git_generated_prefixes; *p; p++) { + if (git__prefixcmp(bol, *p) == 0) { + trailer_lines++; + possible_continuation_lines = 0; + recognized_prefix = 1; + goto continue_outer_loop; + } + } + + separator_pos = find_separator(bol, TRAILER_SEPARATORS); + if (separator_pos >= 1 && !isspace(bol[0])) { + trailer_lines++; + possible_continuation_lines = 0; + if (recognized_prefix) + continue; + } else if (isspace(bol[0])) + possible_continuation_lines++; + else { + non_trailer_lines++; + non_trailer_lines += possible_continuation_lines; + possible_continuation_lines = 0; + } +continue_outer_loop: + ; + } + + return len; +} + +/* Return the position of the end of the trailers. */ +static int find_trailer_end(const char *buf, size_t len) +{ + return len - ignore_non_trailer(buf, len); +} + +static char *extract_trailer_block(const char *message, size_t* len) +{ + size_t patch_start = find_patch_start(message); + size_t trailer_end = find_trailer_end(message, patch_start); + size_t trailer_start = find_trailer_start(message, trailer_end); + + size_t trailer_len = trailer_end - trailer_start; + + char *buffer = git__malloc(trailer_len + 1); + memcpy(buffer, message + trailer_start, trailer_len); + buffer[trailer_len] = 0; + + *len = trailer_len; + + return buffer; +} + +enum trailer_state { + S_START = 0, + S_KEY = 1, + S_KEY_WS = 2, + S_SEP_WS = 3, + S_VALUE = 4, + S_VALUE_NL = 5, + S_VALUE_END = 6, + S_IGNORE = 7, +}; + +#define NEXT(st) { state = (st); ptr++; continue; } +#define GOTO(st) { state = (st); continue; } + +typedef git_array_t(git_message_trailer) git_array_trailer_t; + +int git_message_trailers(git_message_trailer_array *trailer_arr, const char *message) +{ + enum trailer_state state = S_START; + int rc = 0; + char *ptr; + char *key = NULL; + char *value = NULL; + git_array_trailer_t arr = GIT_ARRAY_INIT; + + size_t trailer_len; + char *trailer = extract_trailer_block(message, &trailer_len); + + for (ptr = trailer;;) { + switch (state) { + case S_START: { + if (*ptr == 0) { + goto ret; + } + + key = ptr; + GOTO(S_KEY); + } + case S_KEY: { + if (*ptr == 0) { + goto ret; + } + + if (isalnum(*ptr) || *ptr == '-') { + // legal key character + NEXT(S_KEY); + } + + if (*ptr == ' ' || *ptr == '\t') { + // optional whitespace before separator + *ptr = 0; + NEXT(S_KEY_WS); + } + + if (strchr(TRAILER_SEPARATORS, *ptr)) { + *ptr = 0; + NEXT(S_SEP_WS); + } + + // illegal character + GOTO(S_IGNORE); + } + case S_KEY_WS: { + if (*ptr == 0) { + goto ret; + } + + if (*ptr == ' ' || *ptr == '\t') { + NEXT(S_KEY_WS); + } + + if (strchr(TRAILER_SEPARATORS, *ptr)) { + NEXT(S_SEP_WS); + } + + // illegal character + GOTO(S_IGNORE); + } + case S_SEP_WS: { + if (*ptr == 0) { + goto ret; + } + + if (*ptr == ' ' || *ptr == '\t') { + NEXT(S_SEP_WS); + } + + value = ptr; + NEXT(S_VALUE); + } + case S_VALUE: { + if (*ptr == 0) { + GOTO(S_VALUE_END); + } + + if (*ptr == '\n') { + NEXT(S_VALUE_NL); + } + + NEXT(S_VALUE); + } + case S_VALUE_NL: { + if (*ptr == ' ') { + // continuation; + NEXT(S_VALUE); + } + + ptr[-1] = 0; + GOTO(S_VALUE_END); + } + case S_VALUE_END: { + git_message_trailer *t = git_array_alloc(arr); + + t->key = key; + t->value = value; + + key = NULL; + value = NULL; + + GOTO(S_START); + } + case S_IGNORE: { + if (*ptr == 0) { + goto ret; + } + + if (*ptr == '\n') { + NEXT(S_START); + } + + NEXT(S_IGNORE); + } + } + } + +ret: + trailer_arr->_trailer_block = trailer; + trailer_arr->trailers = arr.ptr; + trailer_arr->count = arr.size; + + return rc; +} + +void git_message_trailer_array_free(git_message_trailer_array *arr) +{ + git__free(arr->_trailer_block); + git__free(arr->trailers); +} diff --git a/tests/message/trailer.c b/tests/message/trailer.c new file mode 100644 index 000000000..9cc83de72 --- /dev/null +++ b/tests/message/trailer.c @@ -0,0 +1,165 @@ +#include "clar_libgit2.h" +#include "message.h" + +static void assert_trailers(const char *message, git_message_trailer *trailers) +{ + git_message_trailer_array arr; + size_t i; + + int rc = git_message_trailers(&arr, message); + + cl_assert_equal_i(0, rc); + + for(i=0; i<arr.count; i++) { + cl_assert_equal_s(arr.trailers[i].key, trailers[i].key); + cl_assert_equal_s(arr.trailers[i].value, trailers[i].value); + } + + cl_assert_equal_i(0, rc); + + git_message_trailer_array_free(&arr); +} + +void test_message_trailer__simple(void) +{ + git_message_trailer trailers[] = { + {"Signed-off-by", "foo@bar.com"}, + {"Signed-off-by", "someone@else.com"}, + {NULL, NULL}, + }; + + assert_trailers( + "Message\n" + "\n" + "Signed-off-by: foo@bar.com\n" + "Signed-off-by: someone@else.com\n" + , trailers); +} + +void test_message_trailer__no_whitespace(void) +{ + git_message_trailer trailers[] = { + {"Key", "value"}, + {NULL, NULL}, + }; + + assert_trailers( + "Message\n" + "\n" + "Key:value\n" + , trailers); +} + +void test_message_trailer__extra_whitespace(void) +{ + git_message_trailer trailers[] = { + {"Key", "value"}, + {NULL, NULL}, + }; + + assert_trailers( + "Message\n" + "\n" + "Key : value\n" + , trailers); +} + +void test_message_trailer__no_newline(void) +{ + git_message_trailer trailers[] = { + {"Key", "value"}, + {NULL, NULL}, + }; + + assert_trailers( + "Message\n" + "\n" + "Key: value" + , trailers); +} + +void test_message_trailer__not_last_paragraph(void) +{ + git_message_trailer trailers[] = { + {NULL, NULL}, + }; + + assert_trailers( + "Message\n" + "\n" + "Key: value\n" + "\n" + "More stuff\n" + , trailers); +} + +void test_message_trailer__conflicts(void) +{ + git_message_trailer trailers[] = { + {"Key", "value"}, + {NULL, NULL}, + }; + + assert_trailers( + "Message\n" + "\n" + "Key: value\n" + "\n" + "Conflicts:\n" + "\tfoo.c\n" + , trailers); +} + +void test_message_trailer__patch(void) +{ + git_message_trailer trailers[] = { + {"Key", "value"}, + {NULL, NULL}, + }; + + assert_trailers( + "Message\n" + "\n" + "Key: value\n" + "\n" + "---\n" + "More: stuff\n" + , trailers); +} + +void test_message_trailer__continuation(void) +{ + git_message_trailer trailers[] = { + {"A", "b\n c"}, + {"D", "e\n f: g h"}, + {"I", "j"}, + {NULL, NULL}, + }; + + assert_trailers( + "Message\n" + "\n" + "A: b\n" + " c\n" + "D: e\n" + " f: g h\n" + "I: j\n" + , trailers); +} + +void test_message_trailer__invalid(void) +{ + git_message_trailer trailers[] = { + {"Signed-off-by", "some@one.com"}, + {"Another", "trailer"}, + {NULL, NULL}, + }; + + assert_trailers( + "Message\n" + "\n" + "Signed-off-by: some@one.com\n" + "Not a trailer\n" + "Another: trailer\n" + , trailers); +} |