summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/gitrepository-layout.txt4
-rw-r--r--Documentation/technical/index-format.txt35
-rw-r--r--Makefile1
-rw-r--r--cache.h3
-rw-r--r--read-cache.c96
-rw-r--r--split-index.c90
-rw-r--r--split-index.h25
-rw-r--r--unpack-trees.c4
8 files changed, 253 insertions, 5 deletions
diff --git a/Documentation/gitrepository-layout.txt b/Documentation/gitrepository-layout.txt
index 17d2ea6c1e..79653f3134 100644
--- a/Documentation/gitrepository-layout.txt
+++ b/Documentation/gitrepository-layout.txt
@@ -155,6 +155,10 @@ index::
The current index file for the repository. It is
usually not found in a bare repository.
+sharedindex.<SHA-1>::
+ The shared index part, to be referenced by $GIT_DIR/index and
+ other temporary index files. Only valid in split index mode.
+
info::
Additional information about the repository is recorded
in this directory.
diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt
index f352a9b22e..fe6f31667d 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -129,6 +129,9 @@ Git index format
(Version 4) In version 4, the padding after the pathname does not
exist.
+ Interpretation of index entries in split index mode is completely
+ different. See below for details.
+
== Extensions
=== Cached tree
@@ -198,3 +201,35 @@ Git index format
- At most three 160-bit object names of the entry in stages from 1 to 3
(nothing is written for a missing stage).
+=== Split index
+
+ In split index mode, the majority of index entries could be stored
+ in a separate file. This extension records the changes to be made on
+ top of that to produce the final index.
+
+ The signature for this extension is { 'l', 'i, 'n', 'k' }.
+
+ The extension consists of:
+
+ - 160-bit SHA-1 of the shared index file. The shared index file path
+ is $GIT_DIR/sharedindex.<SHA-1>. If all 160 bits are zero, the
+ index does not require a shared index file.
+
+ - An ewah-encoded delete bitmap, each bit represents an entry in the
+ shared index. If a bit is set, its corresponding entry in the
+ shared index will be removed from the final index. Note, because
+ a delete operation changes index entry positions, but we do need
+ original positions in replace phase, it's best to just mark
+ entries for removal, then do a mass deletion after replacement.
+
+ - An ewah-encoded replace bitmap, each bit represents an entry in
+ the shared index. If a bit is set, its corresponding entry in the
+ shared index will be replaced with an entry in this index
+ file. All replaced entries are stored in sorted order in this
+ index. The first "1" bit in the replace bitmap corresponds to the
+ first index entry, the second "1" bit to the second entry and so
+ on. Replaced entries may have empty path names to save space.
+
+ The remaining index entries after replaced ones will be added to the
+ final index. These added entries are also sorted by entry namme then
+ stage.
diff --git a/Makefile b/Makefile
index 81e8214236..94ad3cec7f 100644
--- a/Makefile
+++ b/Makefile
@@ -887,6 +887,7 @@ LIB_OBJS += sha1_name.o
LIB_OBJS += shallow.o
LIB_OBJS += sideband.o
LIB_OBJS += sigchain.o
+LIB_OBJS += split-index.o
LIB_OBJS += strbuf.o
LIB_OBJS += streaming.o
LIB_OBJS += string-list.o
diff --git a/cache.h b/cache.h
index 41cdcd0633..47fe3746be 100644
--- a/cache.h
+++ b/cache.h
@@ -135,6 +135,7 @@ struct cache_entry {
unsigned int ce_mode;
unsigned int ce_flags;
unsigned int ce_namelen;
+ unsigned int index; /* for link extension */
unsigned char sha1[20];
char name[FLEX_ARRAY]; /* more */
};
@@ -275,12 +276,14 @@ static inline unsigned int canon_mode(unsigned int mode)
#define RESOLVE_UNDO_CHANGED (1 << 4)
#define CACHE_TREE_CHANGED (1 << 5)
+struct split_index;
struct index_state {
struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
struct cache_tree *cache_tree;
+ struct split_index *split_index;
struct cache_time timestamp;
unsigned name_hash_initialized : 1,
initialized : 1;
diff --git a/read-cache.c b/read-cache.c
index b4653c083a..90a3f09db9 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -14,6 +14,7 @@
#include "resolve-undo.h"
#include "strbuf.h"
#include "varint.h"
+#include "split-index.h"
static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
unsigned int options);
@@ -34,6 +35,10 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) )
#define CACHE_EXT_TREE 0x54524545 /* "TREE" */
#define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
+#define CACHE_EXT_LINK 0x6c696e6b /* "link" */
+
+/* changes that can be kept in $GIT_DIR/index (basically all extensions) */
+#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED)
struct index_state the_index;
static const char *alternate_index_output;
@@ -63,6 +68,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
copy_cache_entry(new, old);
new->ce_flags &= ~CE_HASHED;
new->ce_namelen = namelen;
+ new->index = 0;
memcpy(new->name, new_name, namelen + 1);
cache_tree_invalidate_path(istate, old->name);
@@ -1335,6 +1341,10 @@ static int read_index_extension(struct index_state *istate,
case CACHE_EXT_RESOLVE_UNDO:
istate->resolve_undo = resolve_undo_read(data, sz);
break;
+ case CACHE_EXT_LINK:
+ if (read_link_extension(istate, data, sz))
+ return -1;
+ break;
default:
if (*ext < 'A' || 'Z' < *ext)
return error("index uses %.4s extension, which we do not understand",
@@ -1369,6 +1379,7 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
ce->ce_stat_data.sd_size = get_be32(&ondisk->size);
ce->ce_flags = flags & ~CE_NAMEMASK;
ce->ce_namelen = len;
+ ce->index = 0;
hashcpy(ce->sha1, ondisk->sha1);
memcpy(ce->name, name, len);
ce->name[len] = '\0';
@@ -1443,7 +1454,8 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
}
/* remember to discard_cache() before reading a different cache! */
-int read_index_from(struct index_state *istate, const char *path)
+static int do_read_index(struct index_state *istate, const char *path,
+ int must_exist)
{
int fd, i;
struct stat st;
@@ -1460,9 +1472,9 @@ int read_index_from(struct index_state *istate, const char *path)
istate->timestamp.nsec = 0;
fd = open(path, O_RDONLY);
if (fd < 0) {
- if (errno == ENOENT)
+ if (!must_exist && errno == ENOENT)
return 0;
- die_errno("index file open failed");
+ die_errno("%s: index file open failed", path);
}
if (fstat(fd, &st))
@@ -1535,6 +1547,42 @@ unmap:
die("index file corrupt");
}
+int read_index_from(struct index_state *istate, const char *path)
+{
+ struct split_index *split_index;
+ int ret;
+
+ /* istate->initialized covers both .git/index and .git/sharedindex.xxx */
+ if (istate->initialized)
+ return istate->cache_nr;
+
+ ret = do_read_index(istate, path, 0);
+ split_index = istate->split_index;
+ if (!split_index)
+ return ret;
+
+ if (is_null_sha1(split_index->base_sha1))
+ return ret;
+ if (istate->cache_nr)
+ die("index in split-index mode must contain no entries");
+
+ if (split_index->base)
+ discard_index(split_index->base);
+ else
+ split_index->base = xcalloc(1, sizeof(*split_index->base));
+ ret = do_read_index(split_index->base,
+ git_path("sharedindex.%s",
+ sha1_to_hex(split_index->base_sha1)), 1);
+ if (hashcmp(split_index->base_sha1, split_index->base->sha1))
+ die("broken index, expect %s in %s, got %s",
+ sha1_to_hex(split_index->base_sha1),
+ git_path("sharedindex.%s",
+ sha1_to_hex(split_index->base_sha1)),
+ sha1_to_hex(split_index->base->sha1));
+ merge_base_index(istate);
+ return ret;
+}
+
int is_index_unborn(struct index_state *istate)
{
return (!istate->cache_nr && !istate->timestamp.sec);
@@ -1544,8 +1592,15 @@ int discard_index(struct index_state *istate)
{
int i;
- for (i = 0; i < istate->cache_nr; i++)
+ for (i = 0; i < istate->cache_nr; i++) {
+ if (istate->cache[i]->index &&
+ istate->split_index &&
+ istate->split_index->base &&
+ istate->cache[i]->index <= istate->split_index->base->cache_nr &&
+ istate->cache[i] == istate->split_index->base->cache[istate->cache[i]->index - 1])
+ continue;
free(istate->cache[i]);
+ }
resolve_undo_clear_index(istate);
istate->cache_nr = 0;
istate->cache_changed = 0;
@@ -1557,6 +1612,7 @@ int discard_index(struct index_state *istate)
free(istate->cache);
istate->cache = NULL;
istate->cache_alloc = 0;
+ discard_split_index(istate);
return 0;
}
@@ -1852,6 +1908,17 @@ static int do_write_index(struct index_state *istate, int newfd)
strbuf_release(&previous_name_buf);
/* Write extension data here */
+ if (istate->split_index) {
+ struct strbuf sb = STRBUF_INIT;
+
+ err = write_link_extension(&sb, istate) < 0 ||
+ write_index_ext_header(&c, newfd, CACHE_EXT_LINK,
+ sb.len) < 0 ||
+ ce_write(&c, newfd, sb.buf, sb.len) < 0;
+ strbuf_release(&sb);
+ if (err)
+ return -1;
+ }
if (istate->cache_tree) {
struct strbuf sb = STRBUF_INIT;
@@ -1916,10 +1983,29 @@ static int do_write_locked_index(struct index_state *istate, struct lock_file *l
return ret;
}
+static int write_split_index(struct index_state *istate,
+ struct lock_file *lock,
+ unsigned flags)
+{
+ int ret;
+ prepare_to_write_split_index(istate);
+ ret = do_write_locked_index(istate, lock, flags);
+ finish_writing_split_index(istate);
+ return ret;
+}
+
int write_locked_index(struct index_state *istate, struct lock_file *lock,
unsigned flags)
{
- return do_write_locked_index(istate, lock, flags);
+ struct split_index *si = istate->split_index;
+
+ if (!si || (istate->cache_changed & ~EXTMASK)) {
+ if (si)
+ hashclr(si->base_sha1);
+ return do_write_locked_index(istate, lock, flags);
+ }
+
+ return write_split_index(istate, lock, flags);
}
/*
diff --git a/split-index.c b/split-index.c
new file mode 100644
index 0000000000..63b52bb086
--- /dev/null
+++ b/split-index.c
@@ -0,0 +1,90 @@
+#include "cache.h"
+#include "split-index.h"
+
+struct split_index *init_split_index(struct index_state *istate)
+{
+ if (!istate->split_index) {
+ istate->split_index = xcalloc(1, sizeof(*istate->split_index));
+ istate->split_index->refcount = 1;
+ }
+ return istate->split_index;
+}
+
+int read_link_extension(struct index_state *istate,
+ const void *data_, unsigned long sz)
+{
+ const unsigned char *data = data_;
+ struct split_index *si;
+ if (sz < 20)
+ return error("corrupt link extension (too short)");
+ si = init_split_index(istate);
+ hashcpy(si->base_sha1, data);
+ data += 20;
+ sz -= 20;
+ if (sz)
+ return error("garbage at the end of link extension");
+ return 0;
+}
+
+int write_link_extension(struct strbuf *sb,
+ struct index_state *istate)
+{
+ struct split_index *si = istate->split_index;
+ strbuf_add(sb, si->base_sha1, 20);
+ return 0;
+}
+
+static void mark_base_index_entries(struct index_state *base)
+{
+ int i;
+ /*
+ * To keep track of the shared entries between
+ * istate->base->cache[] and istate->cache[], base entry
+ * position is stored in each base entry. All positions start
+ * from 1 instead of 0, which is resrved to say "this is a new
+ * entry".
+ */
+ for (i = 0; i < base->cache_nr; i++)
+ base->cache[i]->index = i + 1;
+}
+
+void merge_base_index(struct index_state *istate)
+{
+ struct split_index *si = istate->split_index;
+
+ mark_base_index_entries(si->base);
+ istate->cache_nr = si->base->cache_nr;
+ ALLOC_GROW(istate->cache, istate->cache_nr, istate->cache_alloc);
+ memcpy(istate->cache, si->base->cache,
+ sizeof(*istate->cache) * istate->cache_nr);
+}
+
+void prepare_to_write_split_index(struct index_state *istate)
+{
+ struct split_index *si = init_split_index(istate);
+ /* take cache[] out temporarily */
+ si->saved_cache_nr = istate->cache_nr;
+ istate->cache_nr = 0;
+}
+
+void finish_writing_split_index(struct index_state *istate)
+{
+ struct split_index *si = init_split_index(istate);
+ istate->cache_nr = si->saved_cache_nr;
+}
+
+void discard_split_index(struct index_state *istate)
+{
+ struct split_index *si = istate->split_index;
+ if (!si)
+ return;
+ istate->split_index = NULL;
+ si->refcount--;
+ if (si->refcount)
+ return;
+ if (si->base) {
+ discard_index(si->base);
+ free(si->base);
+ }
+ free(si);
+}
diff --git a/split-index.h b/split-index.h
new file mode 100644
index 0000000000..8d7404117e
--- /dev/null
+++ b/split-index.h
@@ -0,0 +1,25 @@
+#ifndef SPLIT_INDEX_H
+#define SPLIT_INDEX_H
+
+struct index_state;
+struct strbuf;
+
+struct split_index {
+ unsigned char base_sha1[20];
+ struct index_state *base;
+ unsigned int saved_cache_nr;
+ int refcount;
+};
+
+struct split_index *init_split_index(struct index_state *istate);
+int read_link_extension(struct index_state *istate,
+ const void *data, unsigned long sz);
+int write_link_extension(struct strbuf *sb,
+ struct index_state *istate);
+void move_cache_to_base_index(struct index_state *istate);
+void merge_base_index(struct index_state *istate);
+void prepare_to_write_split_index(struct index_state *istate);
+void finish_writing_split_index(struct index_state *istate);
+void discard_split_index(struct index_state *istate);
+
+#endif
diff --git a/unpack-trees.c b/unpack-trees.c
index f594932080..a941f7c33e 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -8,6 +8,7 @@
#include "progress.h"
#include "refs.h"
#include "attr.h"
+#include "split-index.h"
/*
* Error messages expected by scripts out of plumbing commands such as
@@ -1046,6 +1047,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
o->result.timestamp.sec = o->src_index->timestamp.sec;
o->result.timestamp.nsec = o->src_index->timestamp.nsec;
o->result.version = o->src_index->version;
+ o->result.split_index = o->src_index->split_index;
+ if (o->result.split_index)
+ o->result.split_index->refcount++;
hashcpy(o->result.sha1, o->src_index->sha1);
o->merge_size = len;
mark_all_ce_unused(o->src_index);