summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Lopez <seniorlopez@gmail.com>2018-01-17 13:54:42 -0800
committerGitHub <noreply@github.com>2018-01-17 13:54:42 -0800
commit4893a9c01c8da084eb995178f80d0d453109056e (patch)
treebfe94f6be3e56a211205075ecdf863a3195c3423
parentecd55cec771d9c6f7ffffe80422a1decd4645c17 (diff)
parentd4a3a4b5383fefcb397524340af05118b4687f29 (diff)
downloadlibgit2-4893a9c01c8da084eb995178f80d0d453109056e.tar.gz
Merge pull request #4451 from libgit2/charliesome/trailer-info
Implement message trailer parsing API
-rw-r--r--include/git2/message.h41
-rw-r--r--src/trailer.c416
-rw-r--r--tests/message/trailer.c165
3 files changed, 622 insertions, 0 deletions
diff --git a/include/git2/message.h b/include/git2/message.h
index d78b1dce5..329346285 100644
--- a/include/git2/message.h
+++ b/include/git2/message.h
@@ -38,6 +38,47 @@ GIT_BEGIN_DECL
*/
GIT_EXTERN(int) git_message_prettify(git_buf *out, const char *message, int strip_comments, char comment_char);
+/**
+ * Represents a single git message trailer.
+ */
+typedef struct {
+ const char *key;
+ const char *value;
+} git_message_trailer;
+
+/**
+ * Represents an array of git message trailers.
+ *
+ * Struct members under the private comment are private, subject to change
+ * and should not be used by callers.
+ */
+typedef struct {
+ git_message_trailer *trailers;
+ size_t count;
+
+ /* private */
+ char *_trailer_block;
+} git_message_trailer_array;
+
+/**
+ * Parse trailers out of a message, filling the array pointed to by +arr+.
+ *
+ * Trailers are key/value pairs in the last paragraph of a message, not
+ * including any patches or conflicts that may be present.
+ *
+ * @param arr A pre-allocated git_message_trailer_array struct to be filled in
+ * with any trailers found during parsing.
+ * @param message The message to be parsed
+ * @return 0 on success, or non-zero on error.
+ */
+GIT_EXTERN(int) git_message_trailers(git_message_trailer_array *arr, const char *message);
+
+/**
+ * Clean's up any allocated memory in the git_message_trailer_array filled by
+ * a call to git_message_trailers.
+ */
+GIT_EXTERN(void) git_message_trailer_array_free(git_message_trailer_array *arr);
+
/** @} */
GIT_END_DECL
diff --git a/src/trailer.c b/src/trailer.c
new file mode 100644
index 000000000..24c8847f6
--- /dev/null
+++ b/src/trailer.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#include "array.h"
+#include "common.h"
+#include "git2/message.h"
+
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+
+#define COMMENT_LINE_CHAR '#'
+#define TRAILER_SEPARATORS ":"
+
+static const char *const git_generated_prefixes[] = {
+ "Signed-off-by: ",
+ "(cherry picked from commit ",
+ NULL
+};
+
+static int is_blank_line(const char *str)
+{
+ const char *s = str;
+ while (*s && *s != '\n' && isspace(*s))
+ s++;
+ return !*s || *s == '\n';
+}
+
+static const char *next_line(const char *str)
+{
+ const char *nl = strchr(str, '\n');
+
+ if (nl) {
+ return nl + 1;
+ } else {
+ // return pointer to the NUL terminator:
+ return str + strlen(str);
+ }
+}
+
+/*
+ * Return the position of the start of the last line. If len is 0, return -1.
+ */
+static int last_line(const char *buf, size_t len)
+{
+ int i;
+ if (len == 0)
+ return -1;
+ if (len == 1)
+ return 0;
+ /*
+ * Skip the last character (in addition to the null terminator),
+ * because if the last character is a newline, it is considered as part
+ * of the last line anyway.
+ */
+ i = len - 2;
+
+ for (; i >= 0; i--) {
+ if (buf[i] == '\n')
+ return i + 1;
+ }
+ return 0;
+}
+
+/*
+ * If the given line is of the form
+ * "<token><optional whitespace><separator>..." or "<separator>...", return the
+ * location of the separator. Otherwise, return -1. The optional whitespace
+ * is allowed there primarily to allow things like "Bug #43" where <token> is
+ * "Bug" and <separator> is "#".
+ *
+ * The separator-starts-line case (in which this function returns 0) is
+ * distinguished from the non-well-formed-line case (in which this function
+ * returns -1) because some callers of this function need such a distinction.
+ */
+static int find_separator(const char *line, const char *separators)
+{
+ int whitespace_found = 0;
+ const char *c;
+ for (c = line; *c; c++) {
+ if (strchr(separators, *c))
+ return c - line;
+ if (!whitespace_found && (isalnum(*c) || *c == '-'))
+ continue;
+ if (c != line && (*c == ' ' || *c == '\t')) {
+ whitespace_found = 1;
+ continue;
+ }
+ break;
+ }
+ return -1;
+}
+
+/*
+ * Inspect the given string and determine the true "end" of the log message, in
+ * order to find where to put a new Signed-off-by: line. Ignored are
+ * trailing comment lines and blank lines. To support "git commit -s
+ * --amend" on an existing commit, we also ignore "Conflicts:". To
+ * support "git commit -v", we truncate at cut lines.
+ *
+ * Returns the number of bytes from the tail to ignore, to be fed as
+ * the second parameter to append_signoff().
+ */
+static int ignore_non_trailer(const char *buf, size_t len)
+{
+ int boc = 0;
+ size_t bol = 0;
+ int in_old_conflicts_block = 0;
+ size_t cutoff = len;
+
+ while (bol < cutoff) {
+ const char *next_line = memchr(buf + bol, '\n', len - bol);
+
+ if (!next_line)
+ next_line = buf + len;
+ else
+ next_line++;
+
+ if (buf[bol] == COMMENT_LINE_CHAR || buf[bol] == '\n') {
+ /* is this the first of the run of comments? */
+ if (!boc)
+ boc = bol;
+ /* otherwise, it is just continuing */
+ } else if (git__prefixcmp(buf + bol, "Conflicts:\n") == 0) {
+ in_old_conflicts_block = 1;
+ if (!boc)
+ boc = bol;
+ } else if (in_old_conflicts_block && buf[bol] == '\t') {
+ ; /* a pathname in the conflicts block */
+ } else if (boc) {
+ /* the previous was not trailing comment */
+ boc = 0;
+ in_old_conflicts_block = 0;
+ }
+ bol = next_line - buf;
+ }
+ return boc ? len - boc : len - cutoff;
+}
+
+/*
+ * Return the position of the start of the patch or the length of str if there
+ * is no patch in the message.
+ */
+static int find_patch_start(const char *str)
+{
+ const char *s;
+
+ for (s = str; *s; s = next_line(s)) {
+ if (git__prefixcmp(s, "---") == 0)
+ return s - str;
+ }
+
+ return s - str;
+}
+
+/*
+ * Return the position of the first trailer line or len if there are no
+ * trailers.
+ */
+static int find_trailer_start(const char *buf, size_t len)
+{
+ const char *s;
+ int end_of_title, l, only_spaces = 1;
+ int recognized_prefix = 0, trailer_lines = 0, non_trailer_lines = 0;
+ /*
+ * Number of possible continuation lines encountered. This will be
+ * reset to 0 if we encounter a trailer (since those lines are to be
+ * considered continuations of that trailer), and added to
+ * non_trailer_lines if we encounter a non-trailer (since those lines
+ * are to be considered non-trailers).
+ */
+ int possible_continuation_lines = 0;
+
+ /* The first paragraph is the title and cannot be trailers */
+ for (s = buf; s < buf + len; s = next_line(s)) {
+ if (s[0] == COMMENT_LINE_CHAR)
+ continue;
+ if (is_blank_line(s))
+ break;
+ }
+ end_of_title = s - buf;
+
+ /*
+ * Get the start of the trailers by looking starting from the end for a
+ * blank line before a set of non-blank lines that (i) are all
+ * trailers, or (ii) contains at least one Git-generated trailer and
+ * consists of at least 25% trailers.
+ */
+ for (l = last_line(buf, len);
+ l >= end_of_title;
+ l = last_line(buf, l)) {
+ const char *bol = buf + l;
+ const char *const *p;
+ int separator_pos;
+
+ if (bol[0] == COMMENT_LINE_CHAR) {
+ non_trailer_lines += possible_continuation_lines;
+ possible_continuation_lines = 0;
+ continue;
+ }
+ if (is_blank_line(bol)) {
+ if (only_spaces)
+ continue;
+ non_trailer_lines += possible_continuation_lines;
+ if (recognized_prefix &&
+ trailer_lines * 3 >= non_trailer_lines)
+ return next_line(bol) - buf;
+ else if (trailer_lines && !non_trailer_lines)
+ return next_line(bol) - buf;
+ return len;
+ }
+ only_spaces = 0;
+
+ for (p = git_generated_prefixes; *p; p++) {
+ if (git__prefixcmp(bol, *p) == 0) {
+ trailer_lines++;
+ possible_continuation_lines = 0;
+ recognized_prefix = 1;
+ goto continue_outer_loop;
+ }
+ }
+
+ separator_pos = find_separator(bol, TRAILER_SEPARATORS);
+ if (separator_pos >= 1 && !isspace(bol[0])) {
+ trailer_lines++;
+ possible_continuation_lines = 0;
+ if (recognized_prefix)
+ continue;
+ } else if (isspace(bol[0]))
+ possible_continuation_lines++;
+ else {
+ non_trailer_lines++;
+ non_trailer_lines += possible_continuation_lines;
+ possible_continuation_lines = 0;
+ }
+continue_outer_loop:
+ ;
+ }
+
+ return len;
+}
+
+/* Return the position of the end of the trailers. */
+static int find_trailer_end(const char *buf, size_t len)
+{
+ return len - ignore_non_trailer(buf, len);
+}
+
+static char *extract_trailer_block(const char *message, size_t* len)
+{
+ size_t patch_start = find_patch_start(message);
+ size_t trailer_end = find_trailer_end(message, patch_start);
+ size_t trailer_start = find_trailer_start(message, trailer_end);
+
+ size_t trailer_len = trailer_end - trailer_start;
+
+ char *buffer = git__malloc(trailer_len + 1);
+ memcpy(buffer, message + trailer_start, trailer_len);
+ buffer[trailer_len] = 0;
+
+ *len = trailer_len;
+
+ return buffer;
+}
+
+enum trailer_state {
+ S_START = 0,
+ S_KEY = 1,
+ S_KEY_WS = 2,
+ S_SEP_WS = 3,
+ S_VALUE = 4,
+ S_VALUE_NL = 5,
+ S_VALUE_END = 6,
+ S_IGNORE = 7,
+};
+
+#define NEXT(st) { state = (st); ptr++; continue; }
+#define GOTO(st) { state = (st); continue; }
+
+typedef git_array_t(git_message_trailer) git_array_trailer_t;
+
+int git_message_trailers(git_message_trailer_array *trailer_arr, const char *message)
+{
+ enum trailer_state state = S_START;
+ int rc = 0;
+ char *ptr;
+ char *key = NULL;
+ char *value = NULL;
+ git_array_trailer_t arr = GIT_ARRAY_INIT;
+
+ size_t trailer_len;
+ char *trailer = extract_trailer_block(message, &trailer_len);
+
+ for (ptr = trailer;;) {
+ switch (state) {
+ case S_START: {
+ if (*ptr == 0) {
+ goto ret;
+ }
+
+ key = ptr;
+ GOTO(S_KEY);
+ }
+ case S_KEY: {
+ if (*ptr == 0) {
+ goto ret;
+ }
+
+ if (isalnum(*ptr) || *ptr == '-') {
+ // legal key character
+ NEXT(S_KEY);
+ }
+
+ if (*ptr == ' ' || *ptr == '\t') {
+ // optional whitespace before separator
+ *ptr = 0;
+ NEXT(S_KEY_WS);
+ }
+
+ if (strchr(TRAILER_SEPARATORS, *ptr)) {
+ *ptr = 0;
+ NEXT(S_SEP_WS);
+ }
+
+ // illegal character
+ GOTO(S_IGNORE);
+ }
+ case S_KEY_WS: {
+ if (*ptr == 0) {
+ goto ret;
+ }
+
+ if (*ptr == ' ' || *ptr == '\t') {
+ NEXT(S_KEY_WS);
+ }
+
+ if (strchr(TRAILER_SEPARATORS, *ptr)) {
+ NEXT(S_SEP_WS);
+ }
+
+ // illegal character
+ GOTO(S_IGNORE);
+ }
+ case S_SEP_WS: {
+ if (*ptr == 0) {
+ goto ret;
+ }
+
+ if (*ptr == ' ' || *ptr == '\t') {
+ NEXT(S_SEP_WS);
+ }
+
+ value = ptr;
+ NEXT(S_VALUE);
+ }
+ case S_VALUE: {
+ if (*ptr == 0) {
+ GOTO(S_VALUE_END);
+ }
+
+ if (*ptr == '\n') {
+ NEXT(S_VALUE_NL);
+ }
+
+ NEXT(S_VALUE);
+ }
+ case S_VALUE_NL: {
+ if (*ptr == ' ') {
+ // continuation;
+ NEXT(S_VALUE);
+ }
+
+ ptr[-1] = 0;
+ GOTO(S_VALUE_END);
+ }
+ case S_VALUE_END: {
+ git_message_trailer *t = git_array_alloc(arr);
+
+ t->key = key;
+ t->value = value;
+
+ key = NULL;
+ value = NULL;
+
+ GOTO(S_START);
+ }
+ case S_IGNORE: {
+ if (*ptr == 0) {
+ goto ret;
+ }
+
+ if (*ptr == '\n') {
+ NEXT(S_START);
+ }
+
+ NEXT(S_IGNORE);
+ }
+ }
+ }
+
+ret:
+ trailer_arr->_trailer_block = trailer;
+ trailer_arr->trailers = arr.ptr;
+ trailer_arr->count = arr.size;
+
+ return rc;
+}
+
+void git_message_trailer_array_free(git_message_trailer_array *arr)
+{
+ git__free(arr->_trailer_block);
+ git__free(arr->trailers);
+}
diff --git a/tests/message/trailer.c b/tests/message/trailer.c
new file mode 100644
index 000000000..9cc83de72
--- /dev/null
+++ b/tests/message/trailer.c
@@ -0,0 +1,165 @@
+#include "clar_libgit2.h"
+#include "message.h"
+
+static void assert_trailers(const char *message, git_message_trailer *trailers)
+{
+ git_message_trailer_array arr;
+ size_t i;
+
+ int rc = git_message_trailers(&arr, message);
+
+ cl_assert_equal_i(0, rc);
+
+ for(i=0; i<arr.count; i++) {
+ cl_assert_equal_s(arr.trailers[i].key, trailers[i].key);
+ cl_assert_equal_s(arr.trailers[i].value, trailers[i].value);
+ }
+
+ cl_assert_equal_i(0, rc);
+
+ git_message_trailer_array_free(&arr);
+}
+
+void test_message_trailer__simple(void)
+{
+ git_message_trailer trailers[] = {
+ {"Signed-off-by", "foo@bar.com"},
+ {"Signed-off-by", "someone@else.com"},
+ {NULL, NULL},
+ };
+
+ assert_trailers(
+ "Message\n"
+ "\n"
+ "Signed-off-by: foo@bar.com\n"
+ "Signed-off-by: someone@else.com\n"
+ , trailers);
+}
+
+void test_message_trailer__no_whitespace(void)
+{
+ git_message_trailer trailers[] = {
+ {"Key", "value"},
+ {NULL, NULL},
+ };
+
+ assert_trailers(
+ "Message\n"
+ "\n"
+ "Key:value\n"
+ , trailers);
+}
+
+void test_message_trailer__extra_whitespace(void)
+{
+ git_message_trailer trailers[] = {
+ {"Key", "value"},
+ {NULL, NULL},
+ };
+
+ assert_trailers(
+ "Message\n"
+ "\n"
+ "Key : value\n"
+ , trailers);
+}
+
+void test_message_trailer__no_newline(void)
+{
+ git_message_trailer trailers[] = {
+ {"Key", "value"},
+ {NULL, NULL},
+ };
+
+ assert_trailers(
+ "Message\n"
+ "\n"
+ "Key: value"
+ , trailers);
+}
+
+void test_message_trailer__not_last_paragraph(void)
+{
+ git_message_trailer trailers[] = {
+ {NULL, NULL},
+ };
+
+ assert_trailers(
+ "Message\n"
+ "\n"
+ "Key: value\n"
+ "\n"
+ "More stuff\n"
+ , trailers);
+}
+
+void test_message_trailer__conflicts(void)
+{
+ git_message_trailer trailers[] = {
+ {"Key", "value"},
+ {NULL, NULL},
+ };
+
+ assert_trailers(
+ "Message\n"
+ "\n"
+ "Key: value\n"
+ "\n"
+ "Conflicts:\n"
+ "\tfoo.c\n"
+ , trailers);
+}
+
+void test_message_trailer__patch(void)
+{
+ git_message_trailer trailers[] = {
+ {"Key", "value"},
+ {NULL, NULL},
+ };
+
+ assert_trailers(
+ "Message\n"
+ "\n"
+ "Key: value\n"
+ "\n"
+ "---\n"
+ "More: stuff\n"
+ , trailers);
+}
+
+void test_message_trailer__continuation(void)
+{
+ git_message_trailer trailers[] = {
+ {"A", "b\n c"},
+ {"D", "e\n f: g h"},
+ {"I", "j"},
+ {NULL, NULL},
+ };
+
+ assert_trailers(
+ "Message\n"
+ "\n"
+ "A: b\n"
+ " c\n"
+ "D: e\n"
+ " f: g h\n"
+ "I: j\n"
+ , trailers);
+}
+
+void test_message_trailer__invalid(void)
+{
+ git_message_trailer trailers[] = {
+ {"Signed-off-by", "some@one.com"},
+ {"Another", "trailer"},
+ {NULL, NULL},
+ };
+
+ assert_trailers(
+ "Message\n"
+ "\n"
+ "Signed-off-by: some@one.com\n"
+ "Not a trailer\n"
+ "Another: trailer\n"
+ , trailers);
+}