diff options
author | Junio C Hamano <gitster@pobox.com> | 2015-05-13 14:05:51 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2015-05-13 14:05:51 -0700 |
commit | 8a1d89745d1b60d0d9e8bd91e4e9564673b6c22a (patch) | |
tree | eef6bd2437ba2a1188dbe1b4023b27ef99531b65 | |
parent | ebb464f0cba9efcb5552fad02f452f09f68fc9b2 (diff) | |
parent | 27547e5fccda134560ad0441aa5bfa187387cec0 (diff) | |
download | git-8a1d89745d1b60d0d9e8bd91e4e9564673b6c22a.tar.gz |
Merge branch 'cn/bom-in-gitignore' into maint
Teach the codepaths that read .gitignore and .gitattributes files
that these files encoded in UTF-8 may have UTF-8 BOM marker at the
beginning; this makes it in line with what we do for configuration
files already.
* cn/bom-in-gitignore:
attr: skip UTF8 BOM at the beginning of the input file
config: use utf8_bom[] from utf.[ch] in git_parse_source()
utf8-bom: introduce skip_utf8_bom() helper
add_excludes_from_file: clarify the bom skipping logic
dir: allow a BOM at the beginning of exclude files
-rw-r--r-- | attr.c | 9 | ||||
-rw-r--r-- | config.c | 6 | ||||
-rw-r--r-- | dir.c | 6 | ||||
-rwxr-xr-x | t/t7061-wtstatus-ignore.sh | 9 | ||||
-rw-r--r-- | utf8.c | 11 | ||||
-rw-r--r-- | utf8.h | 3 |
6 files changed, 39 insertions, 5 deletions
@@ -12,6 +12,7 @@ #include "exec_cmd.h" #include "attr.h" #include "dir.h" +#include "utf8.h" const char git_attr__true[] = "(builtin)true"; const char git_attr__false[] = "\0(builtin)false"; @@ -379,8 +380,12 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok) return NULL; } res = xcalloc(1, sizeof(*res)); - while (fgets(buf, sizeof(buf), fp)) - handle_attr_line(res, buf, path, ++lineno, macro_ok); + while (fgets(buf, sizeof(buf), fp)) { + char *bufp = buf; + if (!lineno) + skip_utf8_bom(&bufp, strlen(bufp)); + handle_attr_line(res, bufp, path, ++lineno, macro_ok); + } fclose(fp); return res; } @@ -12,6 +12,7 @@ #include "quote.h" #include "hashmap.h" #include "string-list.h" +#include "utf8.h" struct config_source { struct config_source *prev; @@ -417,8 +418,7 @@ static int git_parse_source(config_fn_t fn, void *data) struct strbuf *var = &cf->var; /* U+FEFF Byte Order Mark in UTF8 */ - static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; - const unsigned char *bomptr = utf8_bom; + const char *bomptr = utf8_bom; for (;;) { int c = get_next_char(); @@ -426,7 +426,7 @@ static int git_parse_source(config_fn_t fn, void *data) /* We are at the file beginning; skip UTF8-encoded BOM * if present. Sane editors won't put this in on their * own, but e.g. Windows Notepad will do it happily. */ - if ((unsigned char) c == *bomptr) { + if (c == (*bomptr & 0377)) { bomptr++; continue; } else { @@ -12,6 +12,7 @@ #include "refs.h" #include "wildmatch.h" #include "pathspec.h" +#include "utf8.h" struct path_simplify { int len; @@ -617,7 +618,12 @@ int add_excludes_from_file_to_list(const char *fname, } el->filebuf = buf; + + if (skip_utf8_bom(&buf, size)) + size -= buf - el->filebuf; + entry = buf; + for (i = 0; i < size; i++) { if (buf[i] == '\n') { if (entry != buf + i && entry[0] != '#') { diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 460789b4d8..cdc0747bf0 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -20,6 +20,15 @@ test_expect_success 'status untracked directory with --ignored' ' test_cmp expected actual ' +test_expect_success 'same with gitignore starting with BOM' ' + printf "\357\273\277ignored\n" >.gitignore && + mkdir -p untracked && + : >untracked/ignored && + : >untracked/uncommitted && + git status --porcelain --ignored >actual && + test_cmp expected actual +' + cat >expected <<\EOF ?? .gitignore ?? actual @@ -633,3 +633,14 @@ int is_hfs_dotgit(const char *path) return 1; } + +const char utf8_bom[] = "\357\273\277"; + +int skip_utf8_bom(char **text, size_t len) +{ + if (len < strlen(utf8_bom) || + memcmp(*text, utf8_bom, strlen(utf8_bom))) + return 0; + *text += strlen(utf8_bom); + return 1; +} @@ -13,6 +13,9 @@ int same_encoding(const char *, const char *); __attribute__((format (printf, 2, 3))) int utf8_fprintf(FILE *, const char *, ...); +extern const char utf8_bom[]; +extern int skip_utf8_bom(char **, size_t); + void strbuf_add_wrapped_text(struct strbuf *buf, const char *text, int indent, int indent2, int width); void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, |