diff options
author | Vicent Martà <tanoku@gmail.com> | 2012-02-29 01:26:03 +0100 |
---|---|---|
committer | Vicent Martà <tanoku@gmail.com> | 2012-02-29 01:35:47 +0100 |
commit | 27950fa3f40f45ede9aa2b108796fd2b73b33016 (patch) | |
tree | be213ff6d1d92552add2100cf512665cb8074612 | |
parent | 450b40cab39c786bf67e7491755e7d0b3a4dc3ba (diff) | |
download | libgit2-27950fa3f40f45ede9aa2b108796fd2b73b33016.tar.gz |
filter: Add write-to CRLF filter
-rw-r--r-- | src/blob.c | 24 | ||||
-rw-r--r-- | src/crlf.c | 193 | ||||
-rw-r--r-- | src/filter.c | 131 | ||||
-rw-r--r-- | src/filter.h | 21 |
4 files changed, 254 insertions, 115 deletions
diff --git a/src/blob.c b/src/blob.c index 57a31041e..245326157 100644 --- a/src/blob.c +++ b/src/blob.c @@ -104,29 +104,29 @@ cleanup: static int write_file_filtered( git_oid *oid, git_odb *odb, - const char *path, + const char *full_path, git_vector *filters) { int error; - git_buf file_in = GIT_BUF_INIT; - git_buf filter_result = GIT_BUF_INIT; + git_buf source = GIT_BUF_INIT; + git_buf dest = GIT_BUF_INIT; - error = git_futils_readbuffer(&file_in, path); + error = git_futils_readbuffer(&source, full_path); if (error < GIT_SUCCESS) return error; - error = git_filter__apply(&filter_result, &file_in, filters, path); + error = git_filter__apply(&dest, &source, filters); if (error < GIT_SUCCESS) { - git_buf_free(&file_in); - git_buf_free(&filter_result); + git_buf_free(&source); + git_buf_free(&dest); return error; } - error = git_odb_write(oid, odb, filter_result.ptr, filter_result.size, GIT_OBJ_BLOB); + error = git_odb_write(oid, odb, dest.ptr, dest.size, GIT_OBJ_BLOB); - git_buf_free(&file_in); - git_buf_free(&filter_result); + git_buf_free(&source); + git_buf_free(&dest); return GIT_SUCCESS; } @@ -188,7 +188,7 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat git_vector write_filters = GIT_VECTOR_INIT; if ((error = git_filter__load_for_file( - &write_filters, repo, full_path.ptr, GIT_FILTER_TO_ODB)) < GIT_SUCCESS) + &write_filters, repo, path, GIT_FILTER_TO_ODB)) < GIT_SUCCESS) goto cleanup; if (write_filters.length == 0) { @@ -197,6 +197,8 @@ int git_blob_create_fromfile(git_oid *oid, git_repository *repo, const char *pat error = write_file_filtered(oid, odb, full_path.ptr, &write_filters); } + git_filter__free(&write_filters); + /* * TODO: eventually support streaming filtered files, for files which are bigger * than a given threshold. This is not a priority because applying a filter in diff --git a/src/crlf.c b/src/crlf.c new file mode 100644 index 000000000..d8dd1c382 --- /dev/null +++ b/src/crlf.c @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "fileops.h" +#include "hash.h" +#include "filter.h" +#include "repository.h" + +#include "git2/attr.h" + +struct crlf_attrs { + int crlf_action; + int eol; +}; + +struct crlf_filter { + git_filter f; + struct crlf_attrs attrs; +}; + +static int check_crlf(const char *value) +{ + if (value == git_attr__true) + return GIT_CRLF_TEXT; + + if (value == git_attr__false) + return GIT_CRLF_BINARY; + + if (value == NULL) + return GIT_CRLF_GUESS; + + if (strcmp(value, "input") == 0) + return GIT_CRLF_INPUT; + + if (strcmp(value, "auto") == 0) + return GIT_CRLF_AUTO; + + return GIT_CRLF_GUESS; +} + +static int check_eol(const char *value) +{ + if (value == NULL) + return GIT_EOL_UNSET; + + if (strcmp(value, "lf") == 0) + return GIT_EOL_LF; + + if (strcmp(value, "crlf") == 0) + return GIT_EOL_CRLF; + + return GIT_EOL_UNSET; +} + +static int crlf_input_action(struct crlf_attrs *ca) +{ + if (ca->crlf_action == GIT_CRLF_BINARY) + return GIT_CRLF_BINARY; + + if (ca->eol == GIT_EOL_LF) + return GIT_CRLF_INPUT; + + if (ca->eol == GIT_EOL_CRLF) + return GIT_CRLF_CRLF; + + return ca->crlf_action; +} + +static int crlf_load_attributes(struct crlf_attrs *ca, git_repository *repo, const char *path) +{ +#define NUM_CONV_ATTRS 3 + + static const char *attr_names[NUM_CONV_ATTRS] = { + "crlf", "eol", "text", + }; + + const char *attr_vals[NUM_CONV_ATTRS]; + int error; + + error = git_attr_get_many(repo, path, NUM_CONV_ATTRS, attr_names, attr_vals); + + if (error == GIT_ENOTFOUND) { + ca->crlf_action = GIT_CRLF_GUESS; + ca->eol = GIT_EOL_UNSET; + return 0; + } + + if (error == GIT_SUCCESS) { + ca->crlf_action = check_crlf(attr_vals[2]); /* text */ + if (ca->crlf_action == GIT_CRLF_GUESS) + ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */ + + ca->eol = check_eol(attr_vals[1]); /* eol */ + return 0; + } + + return error; +} + +static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *source) +{ + size_t i = 0; + struct crlf_filter *filter = (struct crlf_filter *)self; + + assert(self && dest && source); + + if (filter->attrs.crlf_action == GIT_CRLF_AUTO || + filter->attrs.crlf_action == GIT_CRLF_GUESS) { + + git_text_stats stats; + git_text__stat(&stats, source); + + /* + * We're currently not going to even try to convert stuff + * that has bare CR characters. Does anybody do that crazy + * stuff? + */ + if (stats.cr != stats.crlf) + return -1; + + /* + * And add some heuristics for binary vs text, of course... + */ + if (git_text__is_binary(&stats)) + return -1; + +#if 0 + if (crlf_action == CRLF_GUESS) { + /* + * If the file in the index has any CR in it, do not convert. + * This is the new safer autocrlf handling. + */ + if (has_cr_in_index(path)) + return 0; + } +#endif + + if (!stats.cr) + return -1; + } + + /* TODO: do not copy anything if there isn't a single CR */ + while (i < source->size) { + size_t org = i; + + while (i < source->size && source->ptr[i] != '\r') + i++; + + if (i > org) + git_buf_put(dest, source->ptr + org, i - org); + + i++; + + if (i >= source->size || source->ptr[i] != '\n') { + git_buf_putc(dest, '\r'); + } + } + + return 0; +} + +int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path) +{ + struct crlf_filter filter; + int error; + + filter.f.apply = &crlf_apply_to_odb; + filter.f.do_free = NULL; + + if ((error = crlf_load_attributes(&filter.attrs, repo, path)) < 0) + return error; + + filter.attrs.crlf_action = crlf_input_action(&filter.attrs); + + if (filter.attrs.crlf_action == GIT_CRLF_BINARY) + return 0; + + if (filter.attrs.crlf_action == GIT_CRLF_GUESS && repo->filter_options.auto_crlf == GIT_AUTO_CRLF_FALSE) + return 0; + + *filter_out = git__malloc(sizeof(struct crlf_filter)); + if (*filter_out == NULL) + return GIT_ENOMEM; + + memcpy(*filter_out, &filter, sizeof(struct crlf_attrs)); + return 0; +} + diff --git a/src/filter.c b/src/filter.c index 1775c09c7..ed24ce202 100644 --- a/src/filter.c +++ b/src/filter.c @@ -10,10 +10,8 @@ #include "hash.h" #include "filter.h" -#include "git2/attr.h" - /* Fresh from Core Git. I wonder what we could use this for... */ -void git_text__stat(git_text_stats *stats, git_buf *text) +void git_text__stat(git_text_stats *stats, const git_buf *text) { size_t i; @@ -84,13 +82,45 @@ int git_text__is_binary(git_text_stats *stats) return 0; } -int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode) +int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *path, int mode) { - /* We don't load any filters yet. HAHA */ + int error; + git_filter *crlf_filter; + + return 0; /* TODO: not quite ready yet */ + + if (mode == GIT_FILTER_TO_ODB) { + error = git_filter__crlf_to_odb(&crlf_filter, repo, path); + if (error < GIT_SUCCESS) + return error; + + if (crlf_filter != NULL) + git_vector_insert(filters, crlf_filter); + + } else { + return git__throw(GIT_ENOTIMPLEMENTED, + "Worktree filters are not implemented yet"); + } + return 0; } -int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename) +void git_filter__free(git_vector *filters) +{ + size_t i; + git_filter *filter; + + git_vector_foreach(filters, i, filter) { + if (filter->do_free != NULL) + filter->do_free(filter); + else + free(filter); + } + + git_vector_free(filters); +} + +int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters) { unsigned int src, dst, i; git_buf *dbuffer[2]; @@ -106,7 +136,7 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const return GIT_ENOMEM; for (i = 0; i < filters->length; ++i) { - git_filter_cb filter = git_vector_get(filters, i); + git_filter *filter = git_vector_get(filters, i); dst = (src + 1) % 2; git_buf_clear(dbuffer[dst]); @@ -117,7 +147,7 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const * of the double buffering (so that the text goes through * cleanly). */ - if (filter(dbuffer[dst], dbuffer[src], filename) == 0) { + if (filter->apply(filter, dbuffer[dst], dbuffer[src]) == 0) { src = (src + 1) % 2; } @@ -133,88 +163,3 @@ int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const return GIT_SUCCESS; } - -static int check_crlf(const char *value) -{ - if (value == git_attr__true) - return GIT_CRLF_TEXT; - - if (value == git_attr__false) - return GIT_CRLF_BINARY; - - if (value == NULL) - return GIT_CRLF_GUESS; - - if (strcmp(value, "input") == 0) - return GIT_CRLF_INPUT; - - if (strcmp(value, "auto") == 0) - return GIT_CRLF_AUTO; - - return GIT_CRLF_GUESS; -} - -static int check_eol(const char *value) -{ - if (value == NULL) - return GIT_EOL_UNSET; - - if (strcmp(value, "lf") == 0) - return GIT_EOL_LF; - - if (strcmp(value, "crlf") == 0) - return GIT_EOL_CRLF; - - return GIT_EOL_UNSET; -} - -static int check_ident(const char *value) -{ - return (value == git_attr__true); -} - -#if 0 -static int input_crlf_action(enum crlf_action text_attr, enum eol eol_attr) -{ - if (text_attr == CRLF_BINARY) - return CRLF_BINARY; - if (eol_attr == EOL_LF) - return CRLF_INPUT; - if (eol_attr == EOL_CRLF) - return CRLF_CRLF; - return text_attr; -} -#endif - -int git_filter__load_attrs(git_conv_attrs *ca, git_repository *repo, const char *path) -{ -#define NUM_CONV_ATTRS 5 - - static const char *attr_names[NUM_CONV_ATTRS] = { - "crlf", "ident", "filter", "eol", "text", - }; - - const char *attr_vals[NUM_CONV_ATTRS]; - int error; - - error = git_attr_get_many(repo, path, NUM_CONV_ATTRS, attr_names, attr_vals); - - if (error == GIT_ENOTFOUND) { - ca->crlf_action = GIT_CRLF_GUESS; - ca->eol_attr = GIT_EOL_UNSET; - ca->ident = 0; - return 0; - } - - if (error == GIT_SUCCESS) { - ca->crlf_action = check_crlf(attr_vals[4]); /* text */ - if (ca->crlf_action == GIT_CRLF_GUESS) - ca->crlf_action = check_crlf(attr_vals[0]); /* clrf */ - - ca->ident = check_ident(attr_vals[1]); /* ident */ - ca->eol_attr = check_eol(attr_vals[3]); /* eol */ - return 0; - } - - return error; -} diff --git a/src/filter.h b/src/filter.h index 2ed9da00b..9055fc0dc 100644 --- a/src/filter.h +++ b/src/filter.h @@ -12,7 +12,10 @@ #include "git2/odb.h" #include "git2/repository.h" -typedef int (*git_filter_cb)(git_buf *dest, const git_buf *source, const char *filename); +typedef struct git_filter { + int (*apply)(struct git_filter *self, git_buf *dest, const git_buf *source); + void (*do_free)(struct git_filter *self); +} git_filter; typedef enum { GIT_FILTER_TO_WORKTREE, @@ -47,13 +50,6 @@ typedef enum { #endif } git_eol_t; - -typedef struct { - int crlf_action; - int eol_attr; - int ident; -} git_conv_attrs; - typedef struct { /* NUL, CR, LF and CRLF counts */ unsigned int nul, cr, lf, crlf; @@ -63,14 +59,17 @@ typedef struct { } git_text_stats; extern int git_filter__load_for_file(git_vector *filters, git_repository *repo, const char *full_path, int mode); -extern int git_filter__load_attrs(git_conv_attrs *ca, git_repository *repo, const char *path); -extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters, const char *filename); +extern void git_filter__free(git_vector *filters); +extern int git_filter__apply(git_buf *dest, git_buf *source, git_vector *filters); /* Gather stats for a piece of text */ -extern void git_text__stat(git_text_stats *stats, git_buf *text); +extern void git_text__stat(git_text_stats *stats, const git_buf *text); /* Heuristics on a set of text stats to check whether it's binary * text or not */ extern int git_text__is_binary(git_text_stats *stats); +/* Available filters */ +extern int git_filter__crlf_to_odb(git_filter **filter_out, git_repository *repo, const char *path); + #endif |