summaryrefslogtreecommitdiff
path: root/archive-tar.c
diff options
context:
space:
mode:
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>2012-05-03 08:51:04 +0700
committerJunio C Hamano <gitster@pobox.com>2012-05-03 10:22:56 -0700
commit5544049def9a80bc5ea09a5649e13c1b56160518 (patch)
treeb3fe36f87fed8040d1df754ab367d585d9c014e8 /archive-tar.c
parent9cb513b7988c2fe443c47186e42dd827b76ddb14 (diff)
downloadgit-5544049def9a80bc5ea09a5649e13c1b56160518.tar.gz
archive-tar: stream large blobs to tar file
t5000 verifies output while t1050 makes sure the command always respects core.bigfilethreshold Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'archive-tar.c')
-rw-r--r--archive-tar.c56
1 files changed, 51 insertions, 5 deletions
diff --git a/archive-tar.c b/archive-tar.c
index 3be0cdf350..93387ea336 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -4,6 +4,7 @@
#include "cache.h"
#include "tar.h"
#include "archive.h"
+#include "streaming.h"
#include "run-command.h"
#define RECORDSIZE (512)
@@ -30,10 +31,9 @@ static void write_if_needed(void)
* queues up writes, so that all our write(2) calls write exactly one
* full block; pads writes to RECORDSIZE
*/
-static void write_blocked(const void *data, unsigned long size)
+static void do_write_blocked(const void *data, unsigned long size)
{
const char *buf = data;
- unsigned long tail;
if (offset) {
unsigned long chunk = BLOCKSIZE - offset;
@@ -54,6 +54,11 @@ static void write_blocked(const void *data, unsigned long size)
memcpy(block + offset, buf, size);
offset += size;
}
+}
+
+static void finish_record(void)
+{
+ unsigned long tail;
tail = offset % RECORDSIZE;
if (tail) {
memset(block + offset, 0, RECORDSIZE - tail);
@@ -62,6 +67,12 @@ static void write_blocked(const void *data, unsigned long size)
write_if_needed();
}
+static void write_blocked(const void *data, unsigned long size)
+{
+ do_write_blocked(data, size);
+ finish_record();
+}
+
/*
* The end of tar archives is marked by 2*512 nul bytes and after that
* follows the rest of the block (if any).
@@ -78,6 +89,33 @@ static void write_trailer(void)
}
/*
+ * queues up writes, so that all our write(2) calls write exactly one
+ * full block; pads writes to RECORDSIZE
+ */
+static int stream_blocked(const unsigned char *sha1)
+{
+ struct git_istream *st;
+ enum object_type type;
+ unsigned long sz;
+ char buf[BLOCKSIZE];
+ ssize_t readlen;
+
+ st = open_istream(sha1, &type, &sz, NULL);
+ if (!st)
+ return error("cannot stream blob %s", sha1_to_hex(sha1));
+ for (;;) {
+ readlen = read_istream(st, buf, sizeof(buf));
+ if (readlen <= 0)
+ break;
+ do_write_blocked(buf, readlen);
+ }
+ close_istream(st);
+ if (!readlen)
+ finish_record();
+ return readlen;
+}
+
+/*
* pax extended header records have the format "%u %s=%s\n". %u contains
* the size of the whole string (including the %u), the first %s is the
* keyword, the second one is the value. This function constructs such a
@@ -203,7 +241,11 @@ static int write_tar_entry(struct archiver_args *args,
} else
memcpy(header.name, path, pathlen);
- if (S_ISLNK(mode) || S_ISREG(mode)) {
+ if (S_ISREG(mode) && !args->convert &&
+ sha1_object_info(sha1, &size) == OBJ_BLOB &&
+ size > big_file_threshold)
+ buffer = NULL;
+ else if (S_ISLNK(mode) || S_ISREG(mode)) {
enum object_type type;
buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size);
if (!buffer)
@@ -235,8 +277,12 @@ static int write_tar_entry(struct archiver_args *args,
}
strbuf_release(&ext_header);
write_blocked(&header, sizeof(header));
- if (S_ISREG(mode) && buffer && size > 0)
- write_blocked(buffer, size);
+ if (S_ISREG(mode) && size > 0) {
+ if (buffer)
+ write_blocked(buffer, size);
+ else
+ err = stream_blocked(sha1);
+ }
free(buffer);
return err;
}