summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2015-05-13 14:05:51 -0700
committerJunio C Hamano <gitster@pobox.com>2015-05-13 14:05:51 -0700
commit8a1d89745d1b60d0d9e8bd91e4e9564673b6c22a (patch)
treeeef6bd2437ba2a1188dbe1b4023b27ef99531b65
parentebb464f0cba9efcb5552fad02f452f09f68fc9b2 (diff)
parent27547e5fccda134560ad0441aa5bfa187387cec0 (diff)
downloadgit-8a1d89745d1b60d0d9e8bd91e4e9564673b6c22a.tar.gz
Merge branch 'cn/bom-in-gitignore' into maint
Teach the codepaths that read .gitignore and .gitattributes files that these files encoded in UTF-8 may have UTF-8 BOM marker at the beginning; this makes it in line with what we do for configuration files already. * cn/bom-in-gitignore: attr: skip UTF8 BOM at the beginning of the input file config: use utf8_bom[] from utf.[ch] in git_parse_source() utf8-bom: introduce skip_utf8_bom() helper add_excludes_from_file: clarify the bom skipping logic dir: allow a BOM at the beginning of exclude files
-rw-r--r--attr.c9
-rw-r--r--config.c6
-rw-r--r--dir.c6
-rwxr-xr-xt/t7061-wtstatus-ignore.sh9
-rw-r--r--utf8.c11
-rw-r--r--utf8.h3
6 files changed, 39 insertions, 5 deletions
diff --git a/attr.c b/attr.c
index 1f9eebd2dd..7f445965c1 100644
--- a/attr.c
+++ b/attr.c
@@ -12,6 +12,7 @@
#include "exec_cmd.h"
#include "attr.h"
#include "dir.h"
+#include "utf8.h"
const char git_attr__true[] = "(builtin)true";
const char git_attr__false[] = "\0(builtin)false";
@@ -379,8 +380,12 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
return NULL;
}
res = xcalloc(1, sizeof(*res));
- while (fgets(buf, sizeof(buf), fp))
- handle_attr_line(res, buf, path, ++lineno, macro_ok);
+ while (fgets(buf, sizeof(buf), fp)) {
+ char *bufp = buf;
+ if (!lineno)
+ skip_utf8_bom(&bufp, strlen(bufp));
+ handle_attr_line(res, bufp, path, ++lineno, macro_ok);
+ }
fclose(fp);
return res;
}
diff --git a/config.c b/config.c
index 66c0a51bce..c4424c0138 100644
--- a/config.c
+++ b/config.c
@@ -12,6 +12,7 @@
#include "quote.h"
#include "hashmap.h"
#include "string-list.h"
+#include "utf8.h"
struct config_source {
struct config_source *prev;
@@ -417,8 +418,7 @@ static int git_parse_source(config_fn_t fn, void *data)
struct strbuf *var = &cf->var;
/* U+FEFF Byte Order Mark in UTF8 */
- static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
- const unsigned char *bomptr = utf8_bom;
+ const char *bomptr = utf8_bom;
for (;;) {
int c = get_next_char();
@@ -426,7 +426,7 @@ static int git_parse_source(config_fn_t fn, void *data)
/* We are at the file beginning; skip UTF8-encoded BOM
* if present. Sane editors won't put this in on their
* own, but e.g. Windows Notepad will do it happily. */
- if ((unsigned char) c == *bomptr) {
+ if (c == (*bomptr & 0377)) {
bomptr++;
continue;
} else {
diff --git a/dir.c b/dir.c
index 0943a81964..a3e7073400 100644
--- a/dir.c
+++ b/dir.c
@@ -12,6 +12,7 @@
#include "refs.h"
#include "wildmatch.h"
#include "pathspec.h"
+#include "utf8.h"
struct path_simplify {
int len;
@@ -617,7 +618,12 @@ int add_excludes_from_file_to_list(const char *fname,
}
el->filebuf = buf;
+
+ if (skip_utf8_bom(&buf, size))
+ size -= buf - el->filebuf;
+
entry = buf;
+
for (i = 0; i < size; i++) {
if (buf[i] == '\n') {
if (entry != buf + i && entry[0] != '#') {
diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh
index 460789b4d8..cdc0747bf0 100755
--- a/t/t7061-wtstatus-ignore.sh
+++ b/t/t7061-wtstatus-ignore.sh
@@ -20,6 +20,15 @@ test_expect_success 'status untracked directory with --ignored' '
test_cmp expected actual
'
+test_expect_success 'same with gitignore starting with BOM' '
+ printf "\357\273\277ignored\n" >.gitignore &&
+ mkdir -p untracked &&
+ : >untracked/ignored &&
+ : >untracked/uncommitted &&
+ git status --porcelain --ignored >actual &&
+ test_cmp expected actual
+'
+
cat >expected <<\EOF
?? .gitignore
?? actual
diff --git a/utf8.c b/utf8.c
index 520fbb4994..28e6d76a42 100644
--- a/utf8.c
+++ b/utf8.c
@@ -633,3 +633,14 @@ int is_hfs_dotgit(const char *path)
return 1;
}
+
+const char utf8_bom[] = "\357\273\277";
+
+int skip_utf8_bom(char **text, size_t len)
+{
+ if (len < strlen(utf8_bom) ||
+ memcmp(*text, utf8_bom, strlen(utf8_bom)))
+ return 0;
+ *text += strlen(utf8_bom);
+ return 1;
+}
diff --git a/utf8.h b/utf8.h
index e4d9183c5f..e7b2aa4168 100644
--- a/utf8.h
+++ b/utf8.h
@@ -13,6 +13,9 @@ int same_encoding(const char *, const char *);
__attribute__((format (printf, 2, 3)))
int utf8_fprintf(FILE *, const char *, ...);
+extern const char utf8_bom[];
+extern int skip_utf8_bom(char **, size_t);
+
void strbuf_add_wrapped_text(struct strbuf *buf,
const char *text, int indent, int indent2, int width);
void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,