diff options
author | Russell Belfer <rb@github.com> | 2014-05-12 10:51:56 -0700 |
---|---|---|
committer | Russell Belfer <rb@github.com> | 2014-05-12 10:51:56 -0700 |
commit | df3419269bb3a7dba18f1df8a31d7d79e0a20475 (patch) | |
tree | 38684dcd60eb4f2ce3b634f09915f4236ef32e08 | |
parent | af567e8853e359df3b3abb7d3f7c15c3b1b391c7 (diff) | |
parent | 8a2ef218b99c41923dc32e25dc0915f68e2e4bca (diff) | |
download | libgit2-df3419269bb3a7dba18f1df8a31d7d79e0a20475.tar.gz |
Merge pull request #2336 from libgit2/rb/unicode-branch-names
Pass unconverted Unicode path data when iconv doesn't like it
-rw-r--r-- | src/path.c | 63 | ||||
-rw-r--r-- | src/path.h | 2 | ||||
-rw-r--r-- | src/repository.c | 57 | ||||
-rw-r--r-- | tests/clar_libgit2.c | 3 | ||||
-rw-r--r-- | tests/refs/branches/create.c | 56 |
5 files changed, 124 insertions, 57 deletions
diff --git a/src/path.c b/src/path.c index 2690cd8e8..e0b00a086 100644 --- a/src/path.c +++ b/src/path.c @@ -799,8 +799,11 @@ int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen) if (rv != (size_t)-1) break; + /* if we cannot convert the data (probably because iconv thinks + * it is not valid UTF-8 source data), then use original data + */ if (errno != E2BIG) - goto fail; + return 0; /* make space for 2x the remaining data to be converted * (with per retry overhead to avoid infinite loops) @@ -823,6 +826,64 @@ fail: return -1; } +static const char *nfc_file = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D.XXXXXX"; +static const char *nfd_file = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D.XXXXXX"; + +/* Check if the platform is decomposing unicode data for us. We will + * emulate core Git and prefer to use precomposed unicode data internally + * on these platforms, composing the decomposed unicode on the fly. + * + * This mainly happens on the Mac where HDFS stores filenames as + * decomposed unicode. Even on VFAT and SAMBA file systems, the Mac will + * return decomposed unicode from readdir() even when the actual + * filesystem is storing precomposed unicode. + */ +bool git_path_does_fs_decompose_unicode(const char *root) +{ + git_buf path = GIT_BUF_INIT; + int fd; + bool found_decomposed = false; + char tmp[6]; + + /* Create a file using a precomposed path and then try to find it + * using the decomposed name. If the lookup fails, then we will mark + * that we should precompose unicode for this repository. + */ + if (git_buf_joinpath(&path, root, nfc_file) < 0 || + (fd = p_mkstemp(path.ptr)) < 0) + goto done; + p_close(fd); + + /* record trailing digits generated by mkstemp */ + memcpy(tmp, path.ptr + path.size - sizeof(tmp), sizeof(tmp)); + + /* try to look up as NFD path */ + if (git_buf_joinpath(&path, root, nfd_file) < 0) + goto done; + memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp)); + + found_decomposed = git_path_exists(path.ptr); + + /* remove temporary file (using original precomposed path) */ + if (git_buf_joinpath(&path, root, nfc_file) < 0) + goto done; + memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp)); + + (void)p_unlink(path.ptr); + +done: + git_buf_free(&path); + return found_decomposed; +} + +#else + +bool git_path_does_fs_decompose_unicode(const char *root) +{ + GIT_UNUSED(root); + return false; +} + #endif #if defined(__sun) || defined(__GNU__) diff --git a/src/path.h b/src/path.h index 2367d707b..3213c5104 100644 --- a/src/path.h +++ b/src/path.h @@ -436,4 +436,6 @@ extern int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen); #endif /* GIT_USE_ICONV */ +extern bool git_path_does_fs_decompose_unicode(const char *root); + #endif diff --git a/src/repository.c b/src/repository.c index 43a476016..7d055e28e 100644 --- a/src/repository.c +++ b/src/repository.c @@ -889,60 +889,6 @@ static bool are_symlinks_supported(const char *wd_path) return symlinks_supported; } -#ifdef GIT_USE_ICONV - -static const char *nfc_file = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D.XXXXXX"; -static const char *nfd_file = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D.XXXXXX"; - -/* Check if the platform is decomposing unicode data for us. We will - * emulate core Git and prefer to use precomposed unicode data internally - * on these platforms, composing the decomposed unicode on the fly. - * - * This mainly happens on the Mac where HDFS stores filenames as - * decomposed unicode. Even on VFAT and SAMBA file systems, the Mac will - * return decomposed unicode from readdir() even when the actual - * filesystem is storing precomposed unicode. - */ -static bool does_fs_decompose_unicode_paths(const char *wd_path) -{ - git_buf path = GIT_BUF_INIT; - int fd; - bool found_decomposed = false; - char tmp[6]; - - /* Create a file using a precomposed path and then try to find it - * using the decomposed name. If the lookup fails, then we will mark - * that we should precompose unicode for this repository. - */ - if (git_buf_joinpath(&path, wd_path, nfc_file) < 0 || - (fd = p_mkstemp(path.ptr)) < 0) - goto done; - p_close(fd); - - /* record trailing digits generated by mkstemp */ - memcpy(tmp, path.ptr + path.size - sizeof(tmp), sizeof(tmp)); - - /* try to look up as NFD path */ - if (git_buf_joinpath(&path, wd_path, nfd_file) < 0) - goto done; - memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp)); - - found_decomposed = git_path_exists(path.ptr); - - /* remove temporary file (using original precomposed path) */ - if (git_buf_joinpath(&path, wd_path, nfc_file) < 0) - goto done; - memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp)); - - (void)p_unlink(path.ptr); - -done: - git_buf_free(&path); - return found_decomposed; -} - -#endif - static int create_empty_file(const char *path, mode_t mode) { int fd; @@ -1033,8 +979,9 @@ static int repo_init_fs_configs( #ifdef GIT_USE_ICONV if ((error = git_config_set_bool( cfg, "core.precomposeunicode", - does_fs_decompose_unicode_paths(work_dir))) < 0) + git_path_does_fs_decompose_unicode(work_dir))) < 0) return error; + /* on non-iconv platforms, don't even set core.precomposeunicode */ #endif return 0; diff --git a/tests/clar_libgit2.c b/tests/clar_libgit2.c index f457adb33..b2730f4d1 100644 --- a/tests/clar_libgit2.c +++ b/tests/clar_libgit2.c @@ -408,7 +408,8 @@ int cl_repo_get_bool(git_repository *repo, const char *cfg) int val = 0; git_config *config; cl_git_pass(git_repository_config(&config, repo)); - cl_git_pass(git_config_get_bool(&val, config, cfg));; + if (git_config_get_bool(&val, config, cfg) < 0) + giterr_clear(); git_config_free(config); return val; } diff --git a/tests/refs/branches/create.c b/tests/refs/branches/create.c index 38af2f681..864640ab3 100644 --- a/tests/refs/branches/create.c +++ b/tests/refs/branches/create.c @@ -1,5 +1,6 @@ #include "clar_libgit2.h" #include "refs.h" +#include "path.h" static git_repository *repo; static git_commit *target; @@ -137,3 +138,58 @@ void test_refs_branches_create__default_reflog_message(void) git_reflog_free(log); git_signature_free(sig); } + +static void assert_branch_matches_name( + const char *expected, const char *lookup_as) +{ + git_reference *ref; + git_buf b = GIT_BUF_INIT; + + cl_git_pass(git_branch_lookup(&ref, repo, lookup_as, GIT_BRANCH_LOCAL)); + + cl_git_pass(git_buf_sets(&b, "refs/heads/")); + cl_git_pass(git_buf_puts(&b, expected)); + cl_assert_equal_s(b.ptr, git_reference_name(ref)); + + cl_git_pass( + git_oid_cmp(git_reference_target(ref), git_commit_id(target))); + + git_reference_free(ref); + git_buf_free(&b); +} + +void test_refs_branches_create__can_create_branch_with_unicode(void) +{ + const char *nfc = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D"; + const char *nfd = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D"; + const char *emoji = "\xF0\x9F\x8D\xB7"; + const char *names[] = { nfc, nfd, emoji }; + const char *alt[] = { nfd, nfc, NULL }; + const char *expected[] = { nfc, nfd, emoji }; + unsigned int i; + bool fs_decompose_unicode = + git_path_does_fs_decompose_unicode(git_repository_path(repo)); + + retrieve_known_commit(&target, repo); + + if (cl_repo_get_bool(repo, "core.precomposeunicode")) + expected[1] = nfc; + /* test decomp. because not all Mac filesystems decompose unicode */ + else if (fs_decompose_unicode) + expected[0] = nfd; + + for (i = 0; i < ARRAY_SIZE(names); ++i) { + cl_git_pass(git_branch_create( + &branch, repo, names[i], target, 0, NULL, NULL)); + cl_git_pass(git_oid_cmp( + git_reference_target(branch), git_commit_id(target))); + + assert_branch_matches_name(expected[i], names[i]); + if (fs_decompose_unicode && alt[i]) + assert_branch_matches_name(expected[i], alt[i]); + + cl_git_pass(git_branch_delete(branch)); + git_reference_free(branch); + branch = NULL; + } +} |