diff options
author | Junio C Hamano <gitster@pobox.com> | 2012-07-13 15:37:51 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2012-07-13 15:37:51 -0700 |
commit | b856ad623e4f686815986c0b9341dd1bfd791e71 (patch) | |
tree | ddce49b4afdb7428c1cba49628020bd796704661 | |
parent | 6a9aa0c9b224517db0549d9252fdbc5177e6c0e2 (diff) | |
parent | 76759c7dff53e8c84e975b88cb8245587c14c7ba (diff) | |
download | git-b856ad623e4f686815986c0b9341dd1bfd791e71.tar.gz |
Merge branch 'tb/sanitize-decomposed-utf-8-pathname'
Teaches git to normalize pathnames read from readdir(3) and all
arguments from the command line into precomposed UTF-8 (assuming
that they come as decomposed UTF-8) to work around issues on Mac OS.
I think there still are other places that need conversion
(e.g. paths that are read from stdin for some commands), but this
should be a good first step in the right direction.
* tb/sanitize-decomposed-utf-8-pathname:
git on Mac OS and precomposed unicode
-rw-r--r-- | Documentation/config.txt | 9 | ||||
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | builtin/init-db.c | 1 | ||||
-rw-r--r-- | cache.h | 1 | ||||
-rw-r--r-- | compat/precompose_utf8.c | 190 | ||||
-rw-r--r-- | compat/precompose_utf8.h | 45 | ||||
-rw-r--r-- | config.c | 5 | ||||
-rw-r--r-- | environment.c | 1 | ||||
-rw-r--r-- | git-compat-util.h | 9 | ||||
-rw-r--r-- | parse-options.c | 1 | ||||
-rwxr-xr-x | t/t3910-mac-os-precompose.sh | 164 | ||||
-rw-r--r-- | utf8.c | 26 | ||||
-rw-r--r-- | utf8.h | 1 |
13 files changed, 446 insertions, 10 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index c6ff15e594..7bc0e53848 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -211,6 +211,15 @@ The default is false, except linkgit:git-clone[1] or linkgit:git-init[1] will probe and set core.ignorecase true if appropriate when the repository is created. +core.precomposeunicode:: + This option is only used by Mac OS implementation of git. + When core.precomposeunicode=true, git reverts the unicode decomposition + of filenames done by Mac OS. This is useful when sharing a repository + between Mac OS and Linux or Windows. + (Git for Windows 1.7.10 or higher is needed, or git under cygwin 1.7). + When false, file names are handled fully transparent by git, + which is backward compatible with older versions of git. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time @@ -607,6 +607,7 @@ LIB_H += compat/bswap.h LIB_H += compat/cygwin.h LIB_H += compat/mingw.h LIB_H += compat/obstack.h +LIB_H += compat/precompose_utf8.h LIB_H += compat/terminal.h LIB_H += compat/win32/dirent.h LIB_H += compat/win32/poll.h @@ -1001,6 +1002,8 @@ ifeq ($(uname_S),Darwin) NO_MEMMEM = YesPlease USE_ST_TIMESPEC = YesPlease HAVE_DEV_TTY = YesPlease + COMPAT_OBJS += compat/precompose_utf8.o + BASIC_CFLAGS += -DPRECOMPOSE_UNICODE endif ifeq ($(uname_S),SunOS) NEEDS_SOCKET = YesPlease diff --git a/builtin/init-db.c b/builtin/init-db.c index 0dacb8b79c..244fb7fc32 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -290,6 +290,7 @@ static int create_default_files(const char *template_path) strcpy(path + len, "CoNfIg"); if (!access(path, F_OK)) git_config_set("core.ignorecase", "true"); + probe_utf8_pathname_composition(path, len); } return reinit; @@ -563,6 +563,7 @@ extern int read_replace_refs; extern int fsync_object_files; extern int core_preload_index; extern int core_apply_sparse_checkout; +extern int precomposed_unicode; enum branch_track { BRANCH_TRACK_UNSPECIFIED = -1, diff --git a/compat/precompose_utf8.c b/compat/precompose_utf8.c new file mode 100644 index 0000000000..d40d1b3807 --- /dev/null +++ b/compat/precompose_utf8.c @@ -0,0 +1,190 @@ +/* + * Converts filenames from decomposed unicode into precomposed unicode. + * Used on MacOS X. +*/ + + +#define PRECOMPOSE_UNICODE_C + +#include "cache.h" +#include "utf8.h" +#include "precompose_utf8.h" + +typedef char *iconv_ibp; +const static char *repo_encoding = "UTF-8"; +const static char *path_encoding = "UTF-8-MAC"; + + +static size_t has_utf8(const char *s, size_t maxlen, size_t *strlen_c) +{ + const uint8_t *utf8p = (const uint8_t*) s; + size_t strlen_chars = 0; + size_t ret = 0; + + if ((!utf8p) || (!*utf8p)) { + return 0; + } + + while((*utf8p) && maxlen) { + if (*utf8p & 0x80) + ret++; + strlen_chars++; + utf8p++; + maxlen--; + } + if (strlen_c) + *strlen_c = strlen_chars; + + return ret; +} + + +void probe_utf8_pathname_composition(char *path, int len) +{ + const static char *auml_nfc = "\xc3\xa4"; + const static char *auml_nfd = "\x61\xcc\x88"; + int output_fd; + if (precomposed_unicode != -1) + return; /* We found it defined in the global config, respect it */ + path[len] = 0; + strcpy(path + len, auml_nfc); + output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600); + if (output_fd >=0) { + close(output_fd); + path[len] = 0; + strcpy(path + len, auml_nfd); + /* Indicate to the user, that we can configure it to true */ + if (0 == access(path, R_OK)) + git_config_set("core.precomposeunicode", "false"); + /* To be backward compatible, set precomposed_unicode to 0 */ + precomposed_unicode = 0; + path[len] = 0; + strcpy(path + len, auml_nfc); + unlink(path); + } +} + + +void precompose_argv(int argc, const char **argv) +{ + int i = 0; + const char *oldarg; + char *newarg; + iconv_t ic_precompose; + + if (precomposed_unicode != 1) + return; + + ic_precompose = iconv_open(repo_encoding, path_encoding); + if (ic_precompose == (iconv_t) -1) + return; + + while (i < argc) { + size_t namelen; + oldarg = argv[i]; + if (has_utf8(oldarg, (size_t)-1, &namelen)) { + newarg = reencode_string_iconv(oldarg, namelen, ic_precompose); + if (newarg) + argv[i] = newarg; + } + i++; + } + iconv_close(ic_precompose); +} + + +PREC_DIR *precompose_utf8_opendir(const char *dirname) +{ + PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR)); + prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx)); + prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name); + + prec_dir->dirp = opendir(dirname); + if (!prec_dir->dirp) { + free(prec_dir->dirent_nfc); + free(prec_dir); + return NULL; + } else { + int ret_errno = errno; + prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding); + /* if iconv_open() fails, die() in readdir() if needed */ + errno = ret_errno; + } + + return prec_dir; +} + +struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir) +{ + struct dirent *res; + res = readdir(prec_dir->dirp); + if (res) { + size_t namelenz = strlen(res->d_name) + 1; /* \0 */ + size_t new_maxlen = namelenz; + + int ret_errno = errno; + + if (new_maxlen > prec_dir->dirent_nfc->max_name_len) { + size_t new_len = sizeof(dirent_prec_psx) + new_maxlen - + sizeof(prec_dir->dirent_nfc->d_name); + + prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len); + prec_dir->dirent_nfc->max_name_len = new_maxlen; + } + + prec_dir->dirent_nfc->d_ino = res->d_ino; + prec_dir->dirent_nfc->d_type = res->d_type; + + if ((precomposed_unicode == 1) && has_utf8(res->d_name, (size_t)-1, NULL)) { + if (prec_dir->ic_precompose == (iconv_t)-1) { + die("iconv_open(%s,%s) failed, but needed:\n" + " precomposed unicode is not supported.\n" + " If you wnat to use decomposed unicode, run\n" + " \"git config core.precomposeunicode false\"\n", + repo_encoding, path_encoding); + } else { + iconv_ibp cp = (iconv_ibp)res->d_name; + size_t inleft = namelenz; + char *outpos = &prec_dir->dirent_nfc->d_name[0]; + size_t outsz = prec_dir->dirent_nfc->max_name_len; + size_t cnt; + errno = 0; + cnt = iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz); + if (errno || inleft) { + /* + * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF + * MacOS X avoids illegal byte sequemces. + * If they occur on a mounted drive (e.g. NFS) it is not worth to + * die() for that, but rather let the user see the original name + */ + namelenz = 0; /* trigger strlcpy */ + } + } + } + else + namelenz = 0; + + if (!namelenz) + strlcpy(prec_dir->dirent_nfc->d_name, res->d_name, + prec_dir->dirent_nfc->max_name_len); + + errno = ret_errno; + return prec_dir->dirent_nfc; + } + return NULL; +} + + +int precompose_utf8_closedir(PREC_DIR *prec_dir) +{ + int ret_value; + int ret_errno; + ret_value = closedir(prec_dir->dirp); + ret_errno = errno; + if (prec_dir->ic_precompose != (iconv_t)-1) + iconv_close(prec_dir->ic_precompose); + free(prec_dir->dirent_nfc); + free(prec_dir); + errno = ret_errno; + return ret_value; +} diff --git a/compat/precompose_utf8.h b/compat/precompose_utf8.h new file mode 100644 index 0000000000..3b73585fc5 --- /dev/null +++ b/compat/precompose_utf8.h @@ -0,0 +1,45 @@ +#ifndef PRECOMPOSE_UNICODE_H +#include <sys/stat.h> +#include <sys/types.h> +#include <dirent.h> +#include <iconv.h> + + +typedef struct dirent_prec_psx { + ino_t d_ino; /* Posix */ + size_t max_name_len; /* See below */ + unsigned char d_type; /* available on all systems git runs on */ + + /* + * See http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html + * NAME_MAX + 1 should be enough, but some systems have + * NAME_MAX=255 and strlen(d_name) may return 508 or 510 + * Solution: allocate more when needed, see precompose_utf8_readdir() + */ + char d_name[NAME_MAX+1]; +} dirent_prec_psx; + + +typedef struct { + iconv_t ic_precompose; + DIR *dirp; + struct dirent_prec_psx *dirent_nfc; +} PREC_DIR; + +void precompose_argv(int argc, const char **argv); +void probe_utf8_pathname_composition(char *, int); + +PREC_DIR *precompose_utf8_opendir(const char *dirname); +struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *dirp); +int precompose_utf8_closedir(PREC_DIR *dirp); + +#ifndef PRECOMPOSE_UNICODE_C +#define dirent dirent_prec_psx +#define opendir(n) precompose_utf8_opendir(n) +#define readdir(d) precompose_utf8_readdir(d) +#define closedir(d) precompose_utf8_closedir(d) +#define DIR PREC_DIR +#endif /* PRECOMPOSE_UNICODE_C */ + +#define PRECOMPOSE_UNICODE_H +#endif /* PRECOMPOSE_UNICODE_H */ @@ -758,6 +758,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.precomposeunicode")) { + precomposed_unicode = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/environment.c b/environment.c index 669e498f5a..85edd7f95a 100644 --- a/environment.c +++ b/environment.c @@ -58,6 +58,7 @@ char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; int merge_log_config = -1; +int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ struct startup_info *startup_info; unsigned long pack_size_limit_cfg; diff --git a/git-compat-util.h b/git-compat-util.h index 5bd9ad7d2a..35b095e8ae 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -153,6 +153,15 @@ #endif #endif +/* used on Mac OS X */ +#ifdef PRECOMPOSE_UNICODE +#include "compat/precompose_utf8.h" +#else +#define precompose_str(in,i_nfd2nfc) +#define precompose_argv(c,v) +#define probe_utf8_pathname_composition(a,b) +#endif + #ifndef NO_LIBGEN_H #include <libgen.h> #else diff --git a/parse-options.c b/parse-options.c index ab70c29c49..c1c66bd408 100644 --- a/parse-options.c +++ b/parse-options.c @@ -476,6 +476,7 @@ int parse_options(int argc, const char **argv, const char *prefix, usage_with_options(usagestr, options); } + precompose_argv(argc, argv); return parse_options_end(&ctx); } diff --git a/t/t3910-mac-os-precompose.sh b/t/t3910-mac-os-precompose.sh new file mode 100755 index 0000000000..88b7a20c11 --- /dev/null +++ b/t/t3910-mac-os-precompose.sh @@ -0,0 +1,164 @@ +#!/bin/sh +# +# Copyright (c) 2012 Torsten Bögershausen +# + +test_description='utf-8 decomposed (nfd) converted to precomposed (nfc)' + +. ./test-lib.sh + +Adiarnfc=`printf '\303\204'` +Adiarnfd=`printf 'A\314\210'` + +# check if the feature is compiled in +mkdir junk && +>junk/"$Adiarnfc" && +case "$(cd junk && echo *)" in + "$Adiarnfd") + test_nfd=1 + ;; + *) ;; +esac +rm -rf junk + + +if test "$test_nfd" +then + # create more utf-8 variables + Odiarnfc=`printf '\303\226'` + Odiarnfd=`printf 'O\314\210'` + AEligatu=`printf '\303\206'` + Invalidu=`printf '\303\377'` + + + #Create a string with 255 bytes (decomposed) + Alongd=$Adiarnfd$Adiarnfd$Adiarnfd$Adiarnfd$Adiarnfd$Adiarnfd$Adiarnfd #21 Byte + Alongd=$Alongd$Alongd$Alongd #63 Byte + Alongd=$Alongd$Alongd$Alongd$Alongd$Adiarnfd #255 Byte + + #Create a string with 254 bytes (precomposed) + Alongc=$AEligatu$AEligatu$AEligatu$AEligatu$AEligatu #10 Byte + Alongc=$Alongc$Alongc$Alongc$Alongc$Alongc #50 Byte + Alongc=$Alongc$Alongc$Alongc$Alongc$Alongc #250 Byte + Alongc=$Alongc$AEligatu$AEligatu #254 Byte + + test_expect_success "detect if nfd needed" ' + precomposeunicode=`git config core.precomposeunicode` && + test "$precomposeunicode" = false && + git config core.precomposeunicode true + ' + test_expect_success "setup" ' + >x && + git add x && + git commit -m "1st commit" && + git rm x && + git commit -m "rm x" + ' + test_expect_success "setup case mac" ' + git checkout -b mac_os + ' + # This will test nfd2nfc in readdir() + test_expect_success "add file Adiarnfc" ' + echo f.Adiarnfc >f.$Adiarnfc && + git add f.$Adiarnfc && + git commit -m "add f.$Adiarnfc" + ' + # This will test nfd2nfc in git stage() + test_expect_success "stage file d.Adiarnfd/f.Adiarnfd" ' + mkdir d.$Adiarnfd && + echo d.$Adiarnfd/f.$Adiarnfd >d.$Adiarnfd/f.$Adiarnfd && + git stage d.$Adiarnfd/f.$Adiarnfd && + git commit -m "add d.$Adiarnfd/f.$Adiarnfd" + ' + test_expect_success "add link Adiarnfc" ' + ln -s d.$Adiarnfd/f.$Adiarnfd l.$Adiarnfc && + git add l.$Adiarnfc && + git commit -m "add l.Adiarnfc" + ' + # This will test git log + test_expect_success "git log f.Adiar" ' + git log f.$Adiarnfc > f.Adiarnfc.log && + git log f.$Adiarnfd > f.Adiarnfd.log && + test -s f.Adiarnfc.log && + test -s f.Adiarnfd.log && + test_cmp f.Adiarnfc.log f.Adiarnfd.log && + rm f.Adiarnfc.log f.Adiarnfd.log + ' + # This will test git ls-files + test_expect_success "git lsfiles f.Adiar" ' + git ls-files f.$Adiarnfc > f.Adiarnfc.log && + git ls-files f.$Adiarnfd > f.Adiarnfd.log && + test -s f.Adiarnfc.log && + test -s f.Adiarnfd.log && + test_cmp f.Adiarnfc.log f.Adiarnfd.log && + rm f.Adiarnfc.log f.Adiarnfd.log + ' + # This will test git mv + test_expect_success "git mv" ' + git mv f.$Adiarnfd f.$Odiarnfc && + git mv d.$Adiarnfd d.$Odiarnfc && + git mv l.$Adiarnfd l.$Odiarnfc && + git commit -m "mv Adiarnfd Odiarnfc" + ' + # Files can be checked out as nfc + # And the link has been corrected from nfd to nfc + test_expect_success "git checkout nfc" ' + rm f.$Odiarnfc && + git checkout f.$Odiarnfc + ' + # Make it possible to checkout files with their NFD names + test_expect_success "git checkout file nfd" ' + rm -f f.* && + git checkout f.$Odiarnfd + ' + # Make it possible to checkout links with their NFD names + test_expect_success "git checkout link nfd" ' + rm l.* && + git checkout l.$Odiarnfd + ' + test_expect_success "setup case mac2" ' + git checkout master && + git reset --hard && + git checkout -b mac_os_2 + ' + # This will test nfd2nfc in git commit + test_expect_success "commit file d2.Adiarnfd/f.Adiarnfd" ' + mkdir d2.$Adiarnfd && + echo d2.$Adiarnfd/f.$Adiarnfd >d2.$Adiarnfd/f.$Adiarnfd && + git add d2.$Adiarnfd/f.$Adiarnfd && + git commit -m "add d2.$Adiarnfd/f.$Adiarnfd" -- d2.$Adiarnfd/f.$Adiarnfd + ' + test_expect_success "setup for long decomposed filename" ' + git checkout master && + git reset --hard && + git checkout -b mac_os_long_nfd_fn + ' + test_expect_success "Add long decomposed filename" ' + echo longd >$Alongd && + git add * && + git commit -m "Long filename" + ' + test_expect_success "setup for long precomposed filename" ' + git checkout master && + git reset --hard && + git checkout -b mac_os_long_nfc_fn + ' + test_expect_success "Add long precomposed filename" ' + echo longc >$Alongc && + git add * && + git commit -m "Long filename" + ' + # Test if the global core.precomposeunicode stops autosensing + # Must be the last test case + test_expect_success "respect git config --global core.precomposeunicode" ' + git config --global core.precomposeunicode true && + rm -rf .git && + git init && + precomposeunicode=`git config core.precomposeunicode` && + test "$precomposeunicode" = "true" + ' +else + say "Skipping nfc/nfd tests" +fi + +test_done @@ -433,19 +433,12 @@ int is_encoding_utf8(const char *name) #else typedef char * iconv_ibp; #endif -char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding) +char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv) { - iconv_t conv; - size_t insz, outsz, outalloc; + size_t outsz, outalloc; char *out, *outpos; iconv_ibp cp; - if (!in_encoding) - return NULL; - conv = iconv_open(out_encoding, in_encoding); - if (conv == (iconv_t) -1) - return NULL; - insz = strlen(in); outsz = insz; outalloc = outsz + 1; /* for terminating NUL */ out = xmalloc(outalloc); @@ -459,7 +452,6 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e size_t sofar; if (errno != E2BIG) { free(out); - iconv_close(conv); return NULL; } /* insz has remaining number of bytes. @@ -478,6 +470,20 @@ char *reencode_string(const char *in, const char *out_encoding, const char *in_e break; } } + return out; +} + +char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding) +{ + iconv_t conv; + char *out; + + if (!in_encoding) + return NULL; + conv = iconv_open(out_encoding, in_encoding); + if (conv == (iconv_t) -1) + return NULL; + out = reencode_string_iconv(in, strlen(in), conv); iconv_close(conv); return out; } @@ -14,6 +14,7 @@ int strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len, int indent, int indent2, int width); #ifndef NO_ICONV +char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv); char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding); #else #define reencode_string(a,b,c) NULL |