diff options
author | Junio C Hamano <gitster@pobox.com> | 2008-05-10 18:14:28 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2008-05-10 18:14:28 -0700 |
commit | 380a7426794dcad369dd48519cd01d6e0246cde5 (patch) | |
tree | a6671d54dfa9c41094f541ef5377ae53d04e842c | |
parent | 1f8115b113def8ee03701aa87b26c5e8b7c94434 (diff) | |
parent | 1102952b45dde09d73445aa2284bcb592362fa23 (diff) | |
download | git-380a7426794dcad369dd48519cd01d6e0246cde5.tar.gz |
Merge branch 'lt/case-insensitive'
* lt/case-insensitive:
Make git-add behave more sensibly in a case-insensitive environment
When adding files to the index, add support for case-independent matches
Make unpack-tree update removed files before any updated files
Make branch merging aware of underlying case-insensitive filsystems
Add 'core.ignorecase' option
Make hash_name_lookup able to do case-independent lookups
Make "index_name_exists()" return the cache_entry it found
Move name hashing functions into a file of its own
Make unpack_trees_options bit flags actual bitfields
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | builtin-read-tree.c | 2 | ||||
-rw-r--r-- | cache.h | 37 | ||||
-rw-r--r-- | config.c | 5 | ||||
-rw-r--r-- | dir.c | 2 | ||||
-rw-r--r-- | environment.c | 1 | ||||
-rw-r--r-- | name-hash.c | 119 | ||||
-rw-r--r-- | read-cache.c | 114 | ||||
-rw-r--r-- | unpack-trees.c | 43 | ||||
-rw-r--r-- | unpack-trees.h | 22 |
10 files changed, 241 insertions, 105 deletions
@@ -423,6 +423,7 @@ LIB_OBJS += log-tree.o LIB_OBJS += mailmap.o LIB_OBJS += match-trees.o LIB_OBJS += merge-file.o +LIB_OBJS += name-hash.o LIB_OBJS += object.o LIB_OBJS += pack-check.o LIB_OBJS += pack-revindex.o diff --git a/builtin-read-tree.c b/builtin-read-tree.c index e9cfd2bbc5..7ac30883bc 100644 --- a/builtin-read-tree.c +++ b/builtin-read-tree.c @@ -40,7 +40,7 @@ static int read_cache_unmerged(void) for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; if (ce_stage(ce)) { - remove_index_entry(ce); + remove_name_hash(ce); if (last && !strcmp(ce->name, last->name)) continue; cache_tree_invalidate_path(active_cache_tree, ce->name); @@ -133,6 +133,7 @@ struct cache_entry { #define CE_UPDATE (0x10000) #define CE_REMOVE (0x20000) #define CE_UPTODATE (0x40000) +#define CE_ADDED (0x80000) #define CE_HASHED (0x100000) #define CE_UNHASHED (0x200000) @@ -153,20 +154,6 @@ static inline void copy_cache_entry(struct cache_entry *dst, struct cache_entry dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state; } -/* - * We don't actually *remove* it, we can just mark it invalid so that - * we won't find it in lookups. - * - * Not only would we have to search the lists (simple enough), but - * we'd also have to rehash other hash buckets in case this makes the - * hash bucket empty (common). So it's much better to just mark - * it. - */ -static inline void remove_index_entry(struct cache_entry *ce) -{ - ce->ce_flags |= CE_UNHASHED; -} - static inline unsigned create_ce_flags(size_t len, unsigned stage) { if (len >= CE_NAMEMASK) @@ -241,6 +228,23 @@ struct index_state { extern struct index_state the_index; +/* Name hashing */ +extern void add_name_hash(struct index_state *istate, struct cache_entry *ce); +/* + * We don't actually *remove* it, we can just mark it invalid so that + * we won't find it in lookups. + * + * Not only would we have to search the lists (simple enough), but + * we'd also have to rehash other hash buckets in case this makes the + * hash bucket empty (common). So it's much better to just mark + * it. + */ +static inline void remove_name_hash(struct cache_entry *ce) +{ + ce->ce_flags |= CE_UNHASHED; +} + + #ifndef NO_THE_INDEX_COMPATIBILITY_MACROS #define active_cache (the_index.cache) #define active_nr (the_index.cache_nr) @@ -261,7 +265,7 @@ extern struct index_state the_index; #define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL) #define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options)) #define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options)) -#define cache_name_exists(name, namelen) index_name_exists(&the_index, (name), (namelen)) +#define cache_name_exists(name, namelen, igncase) index_name_exists(&the_index, (name), (namelen), (igncase)) #endif enum object_type { @@ -351,7 +355,7 @@ extern int write_index(const struct index_state *, int newfd); extern int discard_index(struct index_state *); extern int unmerged_index(const struct index_state *); extern int verify_path(const char *path); -extern int index_name_exists(struct index_state *istate, const char *name, int namelen); +extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase); extern int index_name_pos(const struct index_state *, const char *name, int namelen); #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ @@ -405,6 +409,7 @@ extern int delete_ref(const char *, const unsigned char *sha1); extern int trust_executable_bit; extern int quote_path_fully; extern int has_symlinks; +extern int ignore_case; extern int assume_unchanged; extern int prefer_symlink_refs; extern int log_all_ref_updates; @@ -350,6 +350,11 @@ int git_default_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.ignorecase")) { + ignore_case = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.bare")) { is_bare_repository_cfg = git_config_bool(var, value); return 0; @@ -389,7 +389,7 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len) struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len) { - if (cache_name_exists(pathname, len)) + if (cache_name_exists(pathname, len, ignore_case)) return NULL; ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc); diff --git a/environment.c b/environment.c index fcd1ee5ef8..945574169b 100644 --- a/environment.c +++ b/environment.c @@ -14,6 +14,7 @@ char git_default_name[MAX_GITNAME]; int trust_executable_bit = 1; int quote_path_fully = 1; int has_symlinks = 1; +int ignore_case; int assume_unchanged; int prefer_symlink_refs; int is_bare_repository_cfg = -1; /* unspecified */ diff --git a/name-hash.c b/name-hash.c new file mode 100644 index 0000000000..0031d78e8c --- /dev/null +++ b/name-hash.c @@ -0,0 +1,119 @@ +/* + * name-hash.c + * + * Hashing names in the index state + * + * Copyright (C) 2008 Linus Torvalds + */ +#define NO_THE_INDEX_COMPATIBILITY_MACROS +#include "cache.h" + +/* + * This removes bit 5 if bit 6 is set. + * + * That will make US-ASCII characters hash to their upper-case + * equivalent. We could easily do this one whole word at a time, + * but that's for future worries. + */ +static inline unsigned char icase_hash(unsigned char c) +{ + return c & ~((c & 0x40) >> 1); +} + +static unsigned int hash_name(const char *name, int namelen) +{ + unsigned int hash = 0x123; + + do { + unsigned char c = *name++; + c = icase_hash(c); + hash = hash*101 + c; + } while (--namelen); + return hash; +} + +static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) +{ + void **pos; + unsigned int hash; + + if (ce->ce_flags & CE_HASHED) + return; + ce->ce_flags |= CE_HASHED; + ce->next = NULL; + hash = hash_name(ce->name, ce_namelen(ce)); + pos = insert_hash(hash, ce, &istate->name_hash); + if (pos) { + ce->next = *pos; + *pos = ce; + } +} + +static void lazy_init_name_hash(struct index_state *istate) +{ + int nr; + + if (istate->name_hash_initialized) + return; + for (nr = 0; nr < istate->cache_nr; nr++) + hash_index_entry(istate, istate->cache[nr]); + istate->name_hash_initialized = 1; +} + +void add_name_hash(struct index_state *istate, struct cache_entry *ce) +{ + ce->ce_flags &= ~CE_UNHASHED; + if (istate->name_hash_initialized) + hash_index_entry(istate, ce); +} + +static int slow_same_name(const char *name1, int len1, const char *name2, int len2) +{ + if (len1 != len2) + return 0; + + while (len1) { + unsigned char c1 = *name1++; + unsigned char c2 = *name2++; + len1--; + if (c1 != c2) { + c1 = toupper(c1); + c2 = toupper(c2); + if (c1 != c2) + return 0; + } + } + return 1; +} + +static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase) +{ + int len = ce_namelen(ce); + + /* + * Always do exact compare, even if we want a case-ignoring comparison; + * we do the quick exact one first, because it will be the common case. + */ + if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) + return 1; + + return icase && slow_same_name(name, namelen, ce->name, len); +} + +struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) +{ + unsigned int hash = hash_name(name, namelen); + struct cache_entry *ce; + + lazy_init_name_hash(istate); + ce = lookup_hash(hash, &istate->name_hash); + + while (ce) { + if (!(ce->ce_flags & CE_UNHASHED)) { + if (same_name(ce, name, namelen, icase)) + return ce; + } + ce = ce->next; + } + return NULL; +} diff --git a/read-cache.c b/read-cache.c index c3692f41ad..3b20a142ea 100644 --- a/read-cache.c +++ b/read-cache.c @@ -23,80 +23,21 @@ struct index_state the_index; -static unsigned int hash_name(const char *name, int namelen) -{ - unsigned int hash = 0x123; - - do { - unsigned char c = *name++; - hash = hash*101 + c; - } while (--namelen); - return hash; -} - -static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) -{ - void **pos; - unsigned int hash; - - if (ce->ce_flags & CE_HASHED) - return; - ce->ce_flags |= CE_HASHED; - ce->next = NULL; - hash = hash_name(ce->name, ce_namelen(ce)); - pos = insert_hash(hash, ce, &istate->name_hash); - if (pos) { - ce->next = *pos; - *pos = ce; - } -} - -static void lazy_init_name_hash(struct index_state *istate) -{ - int nr; - - if (istate->name_hash_initialized) - return; - for (nr = 0; nr < istate->cache_nr; nr++) - hash_index_entry(istate, istate->cache[nr]); - istate->name_hash_initialized = 1; -} - static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) { - ce->ce_flags &= ~CE_UNHASHED; istate->cache[nr] = ce; - if (istate->name_hash_initialized) - hash_index_entry(istate, ce); + add_name_hash(istate, ce); } static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) { struct cache_entry *old = istate->cache[nr]; - remove_index_entry(old); + remove_name_hash(old); set_index_entry(istate, nr, ce); istate->cache_changed = 1; } -int index_name_exists(struct index_state *istate, const char *name, int namelen) -{ - unsigned int hash = hash_name(name, namelen); - struct cache_entry *ce; - - lazy_init_name_hash(istate); - ce = lookup_hash(hash, &istate->name_hash); - - while (ce) { - if (!(ce->ce_flags & CE_UNHASHED)) { - if (!cache_name_compare(name, namelen, ce->name, ce->ce_flags)) - return 1; - } - ce = ce->next; - } - return 0; -} - /* * This only updates the "non-critical" parts of the directory * cache, ie the parts that aren't tracked by GIT, and only used @@ -438,7 +379,7 @@ int remove_index_entry_at(struct index_state *istate, int pos) { struct cache_entry *ce = istate->cache[pos]; - remove_index_entry(ce); + remove_name_hash(ce); istate->cache_changed = 1; istate->cache_nr--; if (pos >= istate->cache_nr) @@ -488,11 +429,43 @@ static int index_name_pos_also_unmerged(struct index_state *istate, return pos; } +static int different_name(struct cache_entry *ce, struct cache_entry *alias) +{ + int len = ce_namelen(ce); + return ce_namelen(alias) != len || memcmp(ce->name, alias->name, len); +} + +/* + * If we add a filename that aliases in the cache, we will use the + * name that we already have - but we don't want to update the same + * alias twice, because that implies that there were actually two + * different files with aliasing names! + * + * So we use the CE_ADDED flag to verify that the alias was an old + * one before we accept it as + */ +static struct cache_entry *create_alias_ce(struct cache_entry *ce, struct cache_entry *alias) +{ + int len; + struct cache_entry *new; + + if (alias->ce_flags & CE_ADDED) + die("Will not add file alias '%s' ('%s' already exists in index)", ce->name, alias->name); + + /* Ok, create the new entry using the name of the existing alias */ + len = ce_namelen(alias); + new = xcalloc(1, cache_entry_size(len)); + memcpy(new->name, alias->name, len); + copy_cache_entry(new, ce); + free(ce); + return new; +} + int add_file_to_index(struct index_state *istate, const char *path, int verbose) { - int size, namelen, pos; + int size, namelen; struct stat st; - struct cache_entry *ce; + struct cache_entry *ce, *alias; unsigned ce_option = CE_MATCH_IGNORE_VALID|CE_MATCH_RACY_IS_DIRTY; if (lstat(path, &st)) @@ -525,18 +498,19 @@ int add_file_to_index(struct index_state *istate, const char *path, int verbose) ce->ce_mode = ce_mode_from_stat(ent, st.st_mode); } - pos = index_name_pos(istate, ce->name, namelen); - if (0 <= pos && - !ce_stage(istate->cache[pos]) && - !ie_match_stat(istate, istate->cache[pos], &st, ce_option)) { + alias = index_name_exists(istate, ce->name, ce_namelen(ce), ignore_case); + if (alias && !ce_stage(alias) && !ie_match_stat(istate, alias, &st, ce_option)) { /* Nothing changed, really */ free(ce); - ce_mark_uptodate(istate->cache[pos]); + ce_mark_uptodate(alias); + alias->ce_flags |= CE_ADDED; return 0; } - if (index_path(ce->sha1, path, &st, 1)) die("unable to index file %s", path); + if (ignore_case && alias && different_name(ce, alias)) + ce = create_alias_ce(ce, alias); + ce->ce_flags |= CE_ADDED; if (add_index_entry(istate, ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE)) die("unable to add %s to index",path); if (verbose) diff --git a/unpack-trees.c b/unpack-trees.c index a59f47557a..feae846226 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -79,16 +79,21 @@ static int check_updates(struct unpack_trees_options *o) for (i = 0; i < index->cache_nr; i++) { struct cache_entry *ce = index->cache[i]; - if (ce->ce_flags & (CE_UPDATE | CE_REMOVE)) - display_progress(progress, ++cnt); if (ce->ce_flags & CE_REMOVE) { + display_progress(progress, ++cnt); if (o->update) unlink_entry(ce->name, last_symlink); remove_index_entry_at(&o->result, i); i--; continue; } + } + + for (i = 0; i < index->cache_nr; i++) { + struct cache_entry *ce = index->cache[i]; + if (ce->ce_flags & CE_UPDATE) { + display_progress(progress, ++cnt); ce->ce_flags &= ~CE_UPDATE; if (o->update) { errs |= checkout_entry(ce, &state, NULL); @@ -521,6 +526,22 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action, } /* + * This gets called when there was no index entry for the tree entry 'dst', + * but we found a file in the working tree that 'lstat()' said was fine, + * and we're on a case-insensitive filesystem. + * + * See if we can find a case-insensitive match in the index that also + * matches the stat information, and assume it's that other file! + */ +static int icase_exists(struct unpack_trees_options *o, struct cache_entry *dst, struct stat *st) +{ + struct cache_entry *src; + + src = index_name_exists(o->src_index, dst->name, ce_namelen(dst), 1); + return src && !ie_match_stat(o->src_index, src, st, CE_MATCH_IGNORE_VALID); +} + +/* * We do not want to remove or overwrite a working tree file that * is not tracked, unless it is ignored. */ @@ -538,6 +559,17 @@ static int verify_absent(struct cache_entry *ce, const char *action, if (!lstat(ce->name, &st)) { int cnt; int dtype = ce_to_dtype(ce); + struct cache_entry *result; + + /* + * It may be that the 'lstat()' succeeded even though + * target 'ce' was absent, because there is an old + * entry that is different only in case.. + * + * Ignore that lstat() if it matches. + */ + if (ignore_case && icase_exists(o, ce, &st)) + return 0; if (o->dir && excluded(o->dir, ce->name, &dtype)) /* @@ -581,10 +613,9 @@ static int verify_absent(struct cache_entry *ce, const char *action, * delete this path, which is in a subdirectory that * is being replaced with a blob. */ - cnt = index_name_pos(&o->result, ce->name, strlen(ce->name)); - if (0 <= cnt) { - struct cache_entry *ce = o->result.cache[cnt]; - if (ce->ce_flags & CE_REMOVE) + result = index_name_exists(&o->result, ce->name, ce_namelen(ce), 0); + if (result) { + if (result->ce_flags & CE_REMOVE) return 0; } diff --git a/unpack-trees.h b/unpack-trees.h index 50453ed20f..d436d6ced9 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -9,16 +9,16 @@ typedef int (*merge_fn_t)(struct cache_entry **src, struct unpack_trees_options *options); struct unpack_trees_options { - int reset; - int merge; - int update; - int index_only; - int nontrivial_merge; - int trivial_merges_only; - int verbose_update; - int aggressive; - int skip_unmerged; - int gently; + unsigned int reset:1, + merge:1, + update:1, + index_only:1, + nontrivial_merge:1, + trivial_merges_only:1, + verbose_update:1, + aggressive:1, + skip_unmerged:1, + gently:1; const char *prefix; int pos; struct dir_struct *dir; @@ -31,7 +31,7 @@ struct unpack_trees_options { void *unpack_data; struct index_state *dst_index; - const struct index_state *src_index; + struct index_state *src_index; struct index_state result; }; |