From 9cb513b7988c2fe443c47186e42dd827b76ddb14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Thu, 3 May 2012 08:51:03 +0700 Subject: archive: delegate blob reading to backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit archive-tar.c and archive-zip.c now perform conversion check, with help of sha1_file_to_archive() from archive.c This gives backends more freedom in dealing with (streaming) large blobs. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-zip.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index 02d1f3787a..716cc42710 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -121,8 +121,9 @@ static void *zlib_deflate(void *data, unsigned long size, } static int write_zip_entry(struct archiver_args *args, - const unsigned char *sha1, const char *path, size_t pathlen, - unsigned int mode, void *buffer, unsigned long size) + const unsigned char *sha1, + const char *path, size_t pathlen, + unsigned int mode) { struct zip_local_header header; struct zip_dir_header dirent; @@ -134,6 +135,8 @@ static int write_zip_entry(struct archiver_args *args, int method; unsigned char *out; void *deflated = NULL; + void *buffer; + unsigned long size; crc = crc32(0, NULL, 0); @@ -148,7 +151,14 @@ static int write_zip_entry(struct archiver_args *args, out = NULL; uncompressed_size = 0; compressed_size = 0; + buffer = NULL; + size = 0; } else if (S_ISREG(mode) || S_ISLNK(mode)) { + enum object_type type; + buffer = sha1_file_to_archive(args, path, sha1, mode, &type, &size); + if (!buffer) + return error("cannot read %s", sha1_to_hex(sha1)); + method = 0; attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : (mode & 0111) ? ((mode) << 16) : 0; @@ -229,6 +239,7 @@ static int write_zip_entry(struct archiver_args *args, } free(deflated); + free(buffer); return 0; } -- cgit v1.2.1 From 60df6bd19ad40e3eae2926f3785a63e670c150ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 08:51:05 +0700 Subject: archive-zip: remove uncompressed_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only need size and compressed_size. Signed-off-by: Rene Scharfe Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-zip.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index 716cc42710..400ba38c7d 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -129,7 +129,6 @@ static int write_zip_entry(struct archiver_args *args, struct zip_dir_header dirent; unsigned long attr2; unsigned long compressed_size; - unsigned long uncompressed_size; unsigned long crc; unsigned long direntsize; int method; @@ -149,7 +148,7 @@ static int write_zip_entry(struct archiver_args *args, method = 0; attr2 = 16; out = NULL; - uncompressed_size = 0; + size = 0; compressed_size = 0; buffer = NULL; size = 0; @@ -166,7 +165,6 @@ static int write_zip_entry(struct archiver_args *args, method = 8; crc = crc32(crc, buffer, size); out = buffer; - uncompressed_size = size; compressed_size = size; } else { return error("unsupported file mode: 0%o (SHA1: %s)", mode, @@ -204,7 +202,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.mdate, zip_date); copy_le32(dirent.crc32, crc); copy_le32(dirent.compressed_size, compressed_size); - copy_le32(dirent.size, uncompressed_size); + copy_le32(dirent.size, size); copy_le16(dirent.filename_length, pathlen); copy_le16(dirent.extra_length, 0); copy_le16(dirent.comment_length, 0); @@ -226,7 +224,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(header.mdate, zip_date); copy_le32(header.crc32, crc); copy_le32(header.compressed_size, compressed_size); - copy_le32(header.size, uncompressed_size); + copy_le32(header.size, size); copy_le16(header.filename_length, pathlen); copy_le16(header.extra_length, 0); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); -- cgit v1.2.1 From ebf5374afa87afa334b040faec35144c2a3d03d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 08:51:06 +0700 Subject: archive-zip: factor out helpers for writing sizes and CRC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're going to reuse them soon for streaming. Also, update the ZIP directory only at the very end, which will also make streaming easier. Signed-off-by: Rene Scharfe Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-zip.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index 400ba38c7d..678569ab2c 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -120,6 +120,26 @@ static void *zlib_deflate(void *data, unsigned long size, return buffer; } +static void set_zip_dir_data_desc(struct zip_dir_header *header, + unsigned long size, + unsigned long compressed_size, + unsigned long crc) +{ + copy_le32(header->crc32, crc); + copy_le32(header->compressed_size, compressed_size); + copy_le32(header->size, size); +} + +static void set_zip_header_data_desc(struct zip_local_header *header, + unsigned long size, + unsigned long compressed_size, + unsigned long crc) +{ + copy_le32(header->crc32, crc); + copy_le32(header->compressed_size, compressed_size); + copy_le32(header->size, size); +} + static int write_zip_entry(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, @@ -200,9 +220,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.compression_method, method); copy_le16(dirent.mtime, zip_time); copy_le16(dirent.mdate, zip_date); - copy_le32(dirent.crc32, crc); - copy_le32(dirent.compressed_size, compressed_size); - copy_le32(dirent.size, size); + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); copy_le16(dirent.filename_length, pathlen); copy_le16(dirent.extra_length, 0); copy_le16(dirent.comment_length, 0); @@ -210,11 +228,6 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.attr1, 0); copy_le32(dirent.attr2, attr2); copy_le32(dirent.offset, zip_offset); - memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); - zip_dir_offset += ZIP_DIR_HEADER_SIZE; - memcpy(zip_dir + zip_dir_offset, path, pathlen); - zip_dir_offset += pathlen; - zip_dir_entries++; copy_le32(header.magic, 0x04034b50); copy_le16(header.version, 10); @@ -222,9 +235,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(header.compression_method, method); copy_le16(header.mtime, zip_time); copy_le16(header.mdate, zip_date); - copy_le32(header.crc32, crc); - copy_le32(header.compressed_size, compressed_size); - copy_le32(header.size, size); + set_zip_header_data_desc(&header, size, compressed_size, crc); copy_le16(header.filename_length, pathlen); copy_le16(header.extra_length, 0); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); @@ -239,6 +250,12 @@ static int write_zip_entry(struct archiver_args *args, free(deflated); free(buffer); + memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); + zip_dir_offset += ZIP_DIR_HEADER_SIZE; + memcpy(zip_dir + zip_dir_offset, path, pathlen); + zip_dir_offset += pathlen; + zip_dir_entries++; + return 0; } -- cgit v1.2.1 From 2158f883d99a92f801534c91294305ccbe171f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 08:51:07 +0700 Subject: archive-zip: streaming for stored files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write a data descriptor containing the CRC of the entry and its sizes after streaming it out. For simplicity, do that only if we're storing files (option -0) for now. t5000 verifies output. t1050 makes sure the command always respects core.bigfilethreshold Signed-off-by: Rene Scharfe Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-zip.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 12 deletions(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index 678569ab2c..1c6c39d42c 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -3,6 +3,7 @@ */ #include "cache.h" #include "archive.h" +#include "streaming.h" static int zip_date; static int zip_time; @@ -15,6 +16,7 @@ static unsigned int zip_dir_offset; static unsigned int zip_dir_entries; #define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024) +#define ZIP_STREAM (8) struct zip_local_header { unsigned char magic[4]; @@ -31,6 +33,14 @@ struct zip_local_header { unsigned char _end[1]; }; +struct zip_data_desc { + unsigned char magic[4]; + unsigned char crc32[4]; + unsigned char compressed_size[4]; + unsigned char size[4]; + unsigned char _end[1]; +}; + struct zip_dir_header { unsigned char magic[4]; unsigned char creator_version[2]; @@ -70,6 +80,7 @@ struct zip_dir_trailer { * we're interested in. */ #define ZIP_LOCAL_HEADER_SIZE offsetof(struct zip_local_header, _end) +#define ZIP_DATA_DESC_SIZE offsetof(struct zip_data_desc, _end) #define ZIP_DIR_HEADER_SIZE offsetof(struct zip_dir_header, _end) #define ZIP_DIR_TRAILER_SIZE offsetof(struct zip_dir_trailer, _end) @@ -120,6 +131,19 @@ static void *zlib_deflate(void *data, unsigned long size, return buffer; } +static void write_zip_data_desc(unsigned long size, + unsigned long compressed_size, + unsigned long crc) +{ + struct zip_data_desc trailer; + + copy_le32(trailer.magic, 0x08074b50); + copy_le32(trailer.crc32, crc); + copy_le32(trailer.compressed_size, compressed_size); + copy_le32(trailer.size, size); + write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE); +} + static void set_zip_dir_data_desc(struct zip_dir_header *header, unsigned long size, unsigned long compressed_size, @@ -140,6 +164,8 @@ static void set_zip_header_data_desc(struct zip_local_header *header, copy_le32(header->size, size); } +#define STREAM_BUFFER_SIZE (1024 * 16) + static int write_zip_entry(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, @@ -155,6 +181,8 @@ static int write_zip_entry(struct archiver_args *args, unsigned char *out; void *deflated = NULL; void *buffer; + struct git_istream *stream = NULL; + unsigned long flags = 0; unsigned long size; crc = crc32(0, NULL, 0); @@ -173,25 +201,38 @@ static int write_zip_entry(struct archiver_args *args, buffer = NULL; size = 0; } else if (S_ISREG(mode) || S_ISLNK(mode)) { - enum object_type type; - buffer = sha1_file_to_archive(args, path, sha1, mode, &type, &size); - if (!buffer) - return error("cannot read %s", sha1_to_hex(sha1)); + enum object_type type = sha1_object_info(sha1, &size); method = 0; attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : (mode & 0111) ? ((mode) << 16) : 0; - if (S_ISREG(mode) && args->compression_level != 0) + if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = 8; - crc = crc32(crc, buffer, size); - out = buffer; compressed_size = size; + + if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && + size > big_file_threshold && method == 0) { + stream = open_istream(sha1, &type, &size, NULL); + if (!stream) + return error("cannot stream blob %s", + sha1_to_hex(sha1)); + flags |= ZIP_STREAM; + out = buffer = NULL; + } else { + buffer = sha1_file_to_archive(args, path, sha1, mode, + &type, &size); + if (!buffer) + return error("cannot read %s", + sha1_to_hex(sha1)); + crc = crc32(crc, buffer, size); + out = buffer; + } } else { return error("unsupported file mode: 0%o (SHA1: %s)", mode, sha1_to_hex(sha1)); } - if (method == 8) { + if (buffer && method == 8) { deflated = zlib_deflate(buffer, size, args->compression_level, &compressed_size); if (deflated && compressed_size - 6 < size) { @@ -216,7 +257,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.creator_version, S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0); copy_le16(dirent.version, 10); - copy_le16(dirent.flags, 0); + copy_le16(dirent.flags, flags); copy_le16(dirent.compression_method, method); copy_le16(dirent.mtime, zip_time); copy_le16(dirent.mdate, zip_date); @@ -231,18 +272,43 @@ static int write_zip_entry(struct archiver_args *args, copy_le32(header.magic, 0x04034b50); copy_le16(header.version, 10); - copy_le16(header.flags, 0); + copy_le16(header.flags, flags); copy_le16(header.compression_method, method); copy_le16(header.mtime, zip_time); copy_le16(header.mdate, zip_date); - set_zip_header_data_desc(&header, size, compressed_size, crc); + if (flags & ZIP_STREAM) + set_zip_header_data_desc(&header, 0, 0, 0); + else + set_zip_header_data_desc(&header, size, compressed_size, crc); copy_le16(header.filename_length, pathlen); copy_le16(header.extra_length, 0); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); zip_offset += ZIP_LOCAL_HEADER_SIZE; write_or_die(1, path, pathlen); zip_offset += pathlen; - if (compressed_size > 0) { + if (stream && method == 0) { + unsigned char buf[STREAM_BUFFER_SIZE]; + ssize_t readlen; + + for (;;) { + readlen = read_istream(stream, buf, sizeof(buf)); + if (readlen <= 0) + break; + crc = crc32(crc, buf, readlen); + write_or_die(1, buf, readlen); + } + close_istream(stream); + if (readlen) + return readlen; + + compressed_size = size; + zip_offset += compressed_size; + + write_zip_data_desc(size, compressed_size, crc); + zip_offset += ZIP_DATA_DESC_SIZE; + + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); + } else if (compressed_size > 0) { write_or_die(1, out, compressed_size); zip_offset += compressed_size; } -- cgit v1.2.1 From c743c21591f9433fe784ac38902872701ce2e850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 08:51:08 +0700 Subject: archive-zip: streaming for deflated files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After an entry has been streamed out, its CRC and sizes are written as part of a data descriptor. For simplicity, we make the buffer for the compressed chunks twice as big as for the uncompressed ones, to be sure the result fit in even if deflate makes them bigger. t5000 verifies output. t1050 makes sure the command always respects core.bigfilethreshold Signed-off-by: Rene Scharfe Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-zip.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index 1c6c39d42c..f5af81f904 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -211,7 +211,7 @@ static int write_zip_entry(struct archiver_args *args, compressed_size = size; if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && - size > big_file_threshold && method == 0) { + size > big_file_threshold) { stream = open_istream(sha1, &type, &size, NULL); if (!stream) return error("cannot stream blob %s", @@ -307,6 +307,68 @@ static int write_zip_entry(struct archiver_args *args, write_zip_data_desc(size, compressed_size, crc); zip_offset += ZIP_DATA_DESC_SIZE; + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); + } else if (stream && method == 8) { + unsigned char buf[STREAM_BUFFER_SIZE]; + ssize_t readlen; + git_zstream zstream; + int result; + size_t out_len; + unsigned char compressed[STREAM_BUFFER_SIZE * 2]; + + memset(&zstream, 0, sizeof(zstream)); + git_deflate_init(&zstream, args->compression_level); + + compressed_size = 0; + zstream.next_out = compressed; + zstream.avail_out = sizeof(compressed); + + for (;;) { + readlen = read_istream(stream, buf, sizeof(buf)); + if (readlen <= 0) + break; + crc = crc32(crc, buf, readlen); + + zstream.next_in = buf; + zstream.avail_in = readlen; + result = git_deflate(&zstream, 0); + if (result != Z_OK) + die("deflate error (%d)", result); + out = compressed; + if (!compressed_size) + out += 2; + out_len = zstream.next_out - out; + + if (out_len > 0) { + write_or_die(1, out, out_len); + compressed_size += out_len; + zstream.next_out = compressed; + zstream.avail_out = sizeof(compressed); + } + + } + close_istream(stream); + if (readlen) + return readlen; + + zstream.next_in = buf; + zstream.avail_in = 0; + result = git_deflate(&zstream, Z_FINISH); + if (result != Z_STREAM_END) + die("deflate error (%d)", result); + + git_deflate_end(&zstream); + out = compressed; + if (!compressed_size) + out += 2; + out_len = zstream.next_out - out - 4; + write_or_die(1, out, out_len); + compressed_size += out_len; + zip_offset += compressed_size; + + write_zip_data_desc(size, compressed_size, crc); + zip_offset += ZIP_DATA_DESC_SIZE; + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); } else if (compressed_size > 0) { write_or_die(1, out, compressed_size); -- cgit v1.2.1 From 2162bd8cc461d6c3a12ab81c5db5a44bf5ecabc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Tue, 4 Sep 2012 22:23:38 +0200 Subject: archive-zip: support UTF-8 paths Set general purpose flag 11 if we encounter a path that contains non-ASCII characters. We assume that all paths are given as UTF-8; no conversion is done. The flag seems to be ignored by unzip unless we also mark the archive entry as coming from a Unix system. This is done by setting the field creator_version ("version made by" in the standard[1]) to 0x03NN. The NN part represents the version of the standard supported by us, and this patch sets it to 3f (for version 6.3) for Unix paths. We keep creator_version set to 0 (FAT filesystem, standard version 0) in the non-special cases, as before. But when we declare a file to have a Unix path, then we have to set the file mode as well, or unzip will extract the files with the permission set 0000, i.e. inaccessible by all. [1] http://www.pkware.com/documents/casestudies/APPNOTE.TXT Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index f5af81f904..928da1d791 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -4,6 +4,8 @@ #include "cache.h" #include "archive.h" #include "streaming.h" +#include "commit.h" +#include "utf8.h" static int zip_date; static int zip_time; @@ -16,7 +18,8 @@ static unsigned int zip_dir_offset; static unsigned int zip_dir_entries; #define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024) -#define ZIP_STREAM (8) +#define ZIP_STREAM (1 << 3) +#define ZIP_UTF8 (1 << 11) struct zip_local_header { unsigned char magic[4]; @@ -173,7 +176,8 @@ static int write_zip_entry(struct archiver_args *args, { struct zip_local_header header; struct zip_dir_header dirent; - unsigned long attr2; + unsigned int creator_version = 0; + unsigned long attr2 = 0; unsigned long compressed_size; unsigned long crc; unsigned long direntsize; @@ -187,6 +191,13 @@ static int write_zip_entry(struct archiver_args *args, crc = crc32(0, NULL, 0); + if (has_non_ascii(path)) { + if (is_utf8(path)) + flags |= ZIP_UTF8; + else + warning("Path is not valid UTF-8: %s", path); + } + if (pathlen > 0xffff) { return error("path too long (%d chars, SHA1: %s): %s", (int)pathlen, sha1_to_hex(sha1), path); @@ -204,10 +215,15 @@ static int write_zip_entry(struct archiver_args *args, enum object_type type = sha1_object_info(sha1, &size); method = 0; - attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : - (mode & 0111) ? ((mode) << 16) : 0; if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = 8; + if (S_ISLNK(mode) || (mode & 0111) || (flags & ZIP_UTF8)) { + creator_version = 0x033f; + attr2 = mode; + if (S_ISLNK(mode)) + attr2 |= 0777; + attr2 <<= 16; + } compressed_size = size; if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && @@ -254,8 +270,7 @@ static int write_zip_entry(struct archiver_args *args, } copy_le32(dirent.magic, 0x02014b50); - copy_le16(dirent.creator_version, - S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0); + copy_le16(dirent.creator_version, creator_version); copy_le16(dirent.version, 10); copy_le16(dirent.flags, flags); copy_le16(dirent.compression_method, method); -- cgit v1.2.1 From bb52d22ebbc6d8792d3a016e6e89fd6e39c7a39f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 18 Sep 2012 13:32:39 -0700 Subject: Revert "archive-zip: support UTF-8 paths" This reverts commit 2162bd8cc461d6c3a12ab81c5db5a44bf5ecabc3; a two-patch series to replace it will follow. --- archive-zip.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index 928da1d791..f5af81f904 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -4,8 +4,6 @@ #include "cache.h" #include "archive.h" #include "streaming.h" -#include "commit.h" -#include "utf8.h" static int zip_date; static int zip_time; @@ -18,8 +16,7 @@ static unsigned int zip_dir_offset; static unsigned int zip_dir_entries; #define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024) -#define ZIP_STREAM (1 << 3) -#define ZIP_UTF8 (1 << 11) +#define ZIP_STREAM (8) struct zip_local_header { unsigned char magic[4]; @@ -176,8 +173,7 @@ static int write_zip_entry(struct archiver_args *args, { struct zip_local_header header; struct zip_dir_header dirent; - unsigned int creator_version = 0; - unsigned long attr2 = 0; + unsigned long attr2; unsigned long compressed_size; unsigned long crc; unsigned long direntsize; @@ -191,13 +187,6 @@ static int write_zip_entry(struct archiver_args *args, crc = crc32(0, NULL, 0); - if (has_non_ascii(path)) { - if (is_utf8(path)) - flags |= ZIP_UTF8; - else - warning("Path is not valid UTF-8: %s", path); - } - if (pathlen > 0xffff) { return error("path too long (%d chars, SHA1: %s): %s", (int)pathlen, sha1_to_hex(sha1), path); @@ -215,15 +204,10 @@ static int write_zip_entry(struct archiver_args *args, enum object_type type = sha1_object_info(sha1, &size); method = 0; + attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : + (mode & 0111) ? ((mode) << 16) : 0; if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = 8; - if (S_ISLNK(mode) || (mode & 0111) || (flags & ZIP_UTF8)) { - creator_version = 0x033f; - attr2 = mode; - if (S_ISLNK(mode)) - attr2 |= 0777; - attr2 <<= 16; - } compressed_size = size; if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && @@ -270,7 +254,8 @@ static int write_zip_entry(struct archiver_args *args, } copy_le32(dirent.magic, 0x02014b50); - copy_le16(dirent.creator_version, creator_version); + copy_le16(dirent.creator_version, + S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0); copy_le16(dirent.version, 10); copy_le16(dirent.flags, flags); copy_le16(dirent.compression_method, method); -- cgit v1.2.1 From 88182bab001a9b9e9b0acd4b888693fa7c28ff4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Tue, 18 Sep 2012 21:46:56 +0200 Subject: archive-zip: support UTF-8 paths Set general purpose flag 11 if we encounter a path that contains non-ASCII characters. We assume that all paths are given as UTF-8; no conversion is done. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index f5af81f904..0f763e8022 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -4,6 +4,7 @@ #include "cache.h" #include "archive.h" #include "streaming.h" +#include "utf8.h" static int zip_date; static int zip_time; @@ -16,7 +17,8 @@ static unsigned int zip_dir_offset; static unsigned int zip_dir_entries; #define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024) -#define ZIP_STREAM (8) +#define ZIP_STREAM (1 << 3) +#define ZIP_UTF8 (1 << 11) struct zip_local_header { unsigned char magic[4]; @@ -164,6 +166,17 @@ static void set_zip_header_data_desc(struct zip_local_header *header, copy_le32(header->size, size); } +static int has_only_ascii(const char *s) +{ + for (;;) { + int c = *s++; + if (c == '\0') + return 1; + if (!isascii(c)) + return 0; + } +} + #define STREAM_BUFFER_SIZE (1024 * 16) static int write_zip_entry(struct archiver_args *args, @@ -187,6 +200,13 @@ static int write_zip_entry(struct archiver_args *args, crc = crc32(0, NULL, 0); + if (!has_only_ascii(path)) { + if (is_utf8(path)) + flags |= ZIP_UTF8; + else + warning("Path is not valid UTF-8: %s", path); + } + if (pathlen > 0xffff) { return error("path too long (%d chars, SHA1: %s): %s", (int)pathlen, sha1_to_hex(sha1), path); -- cgit v1.2.1 From 227bf59806dc408d60f8f80a14d8dfe71b92c6d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 24 Sep 2012 17:56:23 +0200 Subject: archive-zip: write extended timestamp File modification times in ZIP files are encoded in DOS format: local time with a granularity of two seconds. Add an extra field to all archive entries to also record the mtime in Unix' fashion, as UTC with a granularity of one second. This has the desirable side-effect of convincing Info-ZIP unzip 6.00 to respect general purpose flag 11, which is used to indicate that a file name is encoded in UTF-8. Any extra field would do, actually, but the extended timestamp is a reasonably small one (22 bytes per entry). Archives created by Info-ZIP zip 3.0 contain it, too (but with ctime and atime as well). Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index 0f763e8022..55f66b4060 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -76,6 +76,14 @@ struct zip_dir_trailer { unsigned char _end[1]; }; +struct zip_extra_mtime { + unsigned char magic[2]; + unsigned char extra_size[2]; + unsigned char flags[1]; + unsigned char mtime[4]; + unsigned char _end[1]; +}; + /* * On ARM, padding is added at the end of the struct, so a simple * sizeof(struct ...) reports two bytes more than the payload size @@ -85,6 +93,9 @@ struct zip_dir_trailer { #define ZIP_DATA_DESC_SIZE offsetof(struct zip_data_desc, _end) #define ZIP_DIR_HEADER_SIZE offsetof(struct zip_dir_header, _end) #define ZIP_DIR_TRAILER_SIZE offsetof(struct zip_dir_trailer, _end) +#define ZIP_EXTRA_MTIME_SIZE offsetof(struct zip_extra_mtime, _end) +#define ZIP_EXTRA_MTIME_PAYLOAD_SIZE \ + (ZIP_EXTRA_MTIME_SIZE - offsetof(struct zip_extra_mtime, flags)) static void copy_le16(unsigned char *dest, unsigned int n) { @@ -186,6 +197,7 @@ static int write_zip_entry(struct archiver_args *args, { struct zip_local_header header; struct zip_dir_header dirent; + struct zip_extra_mtime extra; unsigned long attr2; unsigned long compressed_size; unsigned long crc; @@ -266,8 +278,13 @@ static int write_zip_entry(struct archiver_args *args, } } + copy_le16(extra.magic, 0x5455); + copy_le16(extra.extra_size, ZIP_EXTRA_MTIME_PAYLOAD_SIZE); + extra.flags[0] = 1; /* just mtime */ + copy_le32(extra.mtime, args->time); + /* make sure we have enough free space in the dictionary */ - direntsize = ZIP_DIR_HEADER_SIZE + pathlen; + direntsize = ZIP_DIR_HEADER_SIZE + pathlen + ZIP_EXTRA_MTIME_SIZE; while (zip_dir_size < zip_dir_offset + direntsize) { zip_dir_size += ZIP_DIRECTORY_MIN_SIZE; zip_dir = xrealloc(zip_dir, zip_dir_size); @@ -283,7 +300,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.mdate, zip_date); set_zip_dir_data_desc(&dirent, size, compressed_size, crc); copy_le16(dirent.filename_length, pathlen); - copy_le16(dirent.extra_length, 0); + copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE); copy_le16(dirent.comment_length, 0); copy_le16(dirent.disk, 0); copy_le16(dirent.attr1, 0); @@ -301,11 +318,13 @@ static int write_zip_entry(struct archiver_args *args, else set_zip_header_data_desc(&header, size, compressed_size, crc); copy_le16(header.filename_length, pathlen); - copy_le16(header.extra_length, 0); + copy_le16(header.extra_length, ZIP_EXTRA_MTIME_SIZE); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); zip_offset += ZIP_LOCAL_HEADER_SIZE; write_or_die(1, path, pathlen); zip_offset += pathlen; + write_or_die(1, &extra, ZIP_EXTRA_MTIME_SIZE); + zip_offset += ZIP_EXTRA_MTIME_SIZE; if (stream && method == 0) { unsigned char buf[STREAM_BUFFER_SIZE]; ssize_t readlen; @@ -402,6 +421,8 @@ static int write_zip_entry(struct archiver_args *args, zip_dir_offset += ZIP_DIR_HEADER_SIZE; memcpy(zip_dir + zip_dir_offset, path, pathlen); zip_dir_offset += pathlen; + memcpy(zip_dir + zip_dir_offset, &extra, ZIP_EXTRA_MTIME_SIZE); + zip_dir_offset += ZIP_EXTRA_MTIME_SIZE; zip_dir_entries++; return 0; -- cgit v1.2.1 From 5ea2c847c5938e9868f15a273869e54c6ed4c79c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 6 Jan 2013 16:20:57 +0100 Subject: archive-zip: write uncompressed size into header even with streaming We record the uncompressed and compressed sizes and the CRC of streamed files as zero in the local header of the file. The actual values are recorded in an extra data descriptor after the file content, and in the usual ZIP directory entry at the end of the archive. While we know the compressed size and the CRC only after we processed the contents, we actually know the uncompressed size right from the start. And for files that we store uncompressed we also already know their final size. Do it like InfoZIP's zip and recored the known values, even though they can be reconstructed using the ZIP directory and the data descriptors alone. InfoZIP's unzip worked fine before, but NetBSD's version actually depends on these fields. The uncompressed size is already set by sha1_object_info(). We just need to initialize the compressed size to zero or the uncompressed size depending on the compression method (0 means storing). The CRC was propertly initialized already. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index f5af81f904..44b1dedd9c 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -208,7 +208,7 @@ static int write_zip_entry(struct archiver_args *args, (mode & 0111) ? ((mode) << 16) : 0; if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = 8; - compressed_size = size; + compressed_size = (method == 0) ? size : 0; if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && size > big_file_threshold) { @@ -276,10 +276,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(header.compression_method, method); copy_le16(header.mtime, zip_time); copy_le16(header.mdate, zip_date); - if (flags & ZIP_STREAM) - set_zip_header_data_desc(&header, 0, 0, 0); - else - set_zip_header_data_desc(&header, size, compressed_size, crc); + set_zip_header_data_desc(&header, size, compressed_size, crc); copy_le16(header.filename_length, pathlen); copy_le16(header.extra_length, 0); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); -- cgit v1.2.1 From d3c1472fe35e2575508d80415565822ee2211df5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Wed, 27 Feb 2013 11:20:21 +0100 Subject: archive-zip: fix compressed size for stored export-subst files Currently ZIP archive entries of files with export-subst attribute are broken if they are stored uncompressed. We get the size of a file from sha1_object_info(), but this number is likely wrong for files whose contents are changed due to export-subst placeholder expansion. We use sha1_file_to_archive() to get the expanded file contents and size in that case. We proceed to use that size for the uncompressed size field (good), but the compressed size field is set based on the size from sha1_object_info() (bad). This matters only for uncompressed files because for deflated files we use the correct value after compression is done. And for files without export-subst expansion the sizes from sha1_object_info() and sha1_file_to_archive() are the same, so they are unaffected as well. This patch fixes the issue by setting the compressed size based on the uncompressed size only after we actually know the latter. Also make use of the test file substfile1 to check for the breakage; it was only stored verbatim so far. For that purpose, set the attribute export-subst and replace its contents with the expected expansion after committing. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index d3aef532b7..a8d119305f 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -240,7 +240,6 @@ static int write_zip_entry(struct archiver_args *args, (mode & 0111) ? ((mode) << 16) : 0; if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = 8; - compressed_size = (method == 0) ? size : 0; if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && size > big_file_threshold) { @@ -259,6 +258,7 @@ static int write_zip_entry(struct archiver_args *args, crc = crc32(crc, buffer, size); out = buffer; } + compressed_size = (method == 0) ? size : 0; } else { return error("unsupported file mode: 0%o (SHA1: %s)", mode, sha1_to_hex(sha1)); -- cgit v1.2.1 From c3c2e1a09b84dc932d90fa09d1632e5efa34c940 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 15 Mar 2013 23:21:51 +0100 Subject: archive-zip: use deflateInit2() to ask for raw compressed data We use the function git_deflate_init() -- which wraps the zlib function deflateInit() -- to initialize compression of ZIP file entries. This results in compressed data prefixed with a two-bytes long header and followed by a four-bytes trailer. ZIP file entries consist of ZIP headers and raw compressed data instead, so we remove the zlib wrapper before writing the result. We can ask zlib for the the raw compressed data without the unwanted parts in the first place by using deflateInit2() and specifying a negative number of bits to size the window. For that purpose, factor out the function do_git_deflate_init() and add git_deflate_init_raw(), which wraps it. Then use the latter in archive-zip.c and get rid of the code that stripped the zlib header and trailer. Also rename the helper function zlib_deflate() to zlib_deflate_raw() to reflect the change. Thus we avoid generating data that we throw away anyway, the code becomes shorter and some magic constants are removed. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index a8d119305f..b2c4fe0e9f 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -111,8 +111,9 @@ static void copy_le32(unsigned char *dest, unsigned int n) dest[3] = 0xff & (n >> 030); } -static void *zlib_deflate(void *data, unsigned long size, - int compression_level, unsigned long *compressed_size) +static void *zlib_deflate_raw(void *data, unsigned long size, + int compression_level, + unsigned long *compressed_size) { git_zstream stream; unsigned long maxsize; @@ -120,7 +121,7 @@ static void *zlib_deflate(void *data, unsigned long size, int result; memset(&stream, 0, sizeof(stream)); - git_deflate_init(&stream, compression_level); + git_deflate_init_raw(&stream, compression_level); maxsize = git_deflate_bound(&stream, size); buffer = xmalloc(maxsize); @@ -265,14 +266,11 @@ static int write_zip_entry(struct archiver_args *args, } if (buffer && method == 8) { - deflated = zlib_deflate(buffer, size, args->compression_level, - &compressed_size); - if (deflated && compressed_size - 6 < size) { - /* ZLIB --> raw compressed data (see RFC 1950) */ - /* CMF and FLG ... */ - out = (unsigned char *)deflated + 2; - compressed_size -= 6; /* ... and ADLER32 */ - } else { + out = deflated = zlib_deflate_raw(buffer, size, + args->compression_level, + &compressed_size); + if (!out || compressed_size >= size) { + out = buffer; method = 0; compressed_size = size; } @@ -353,7 +351,7 @@ static int write_zip_entry(struct archiver_args *args, unsigned char compressed[STREAM_BUFFER_SIZE * 2]; memset(&zstream, 0, sizeof(zstream)); - git_deflate_init(&zstream, args->compression_level); + git_deflate_init_raw(&zstream, args->compression_level); compressed_size = 0; zstream.next_out = compressed; @@ -370,13 +368,10 @@ static int write_zip_entry(struct archiver_args *args, result = git_deflate(&zstream, 0); if (result != Z_OK) die("deflate error (%d)", result); - out = compressed; - if (!compressed_size) - out += 2; - out_len = zstream.next_out - out; + out_len = zstream.next_out - compressed; if (out_len > 0) { - write_or_die(1, out, out_len); + write_or_die(1, compressed, out_len); compressed_size += out_len; zstream.next_out = compressed; zstream.avail_out = sizeof(compressed); @@ -394,11 +389,8 @@ static int write_zip_entry(struct archiver_args *args, die("deflate error (%d)", result); git_deflate_end(&zstream); - out = compressed; - if (!compressed_size) - out += 2; - out_len = zstream.next_out - out - 4; - write_or_die(1, out, out_len); + out_len = zstream.next_out - compressed; + write_or_die(1, compressed, out_len); compressed_size += out_len; zip_offset += compressed_size; -- cgit v1.2.1 From be706c6f4f99ec04c4104bda8820513276af8129 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Sun, 26 May 2013 21:11:35 +0200 Subject: archive-zip:write_zip_entry: Remove second reset of size variable to zero. It is set to zero just 3 lines before. Reported by cppcheck. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- archive-zip.c | 1 - 1 file changed, 1 deletion(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index a8d119305f..04c2f8eed7 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -231,7 +231,6 @@ static int write_zip_entry(struct archiver_args *args, size = 0; compressed_size = 0; buffer = NULL; - size = 0; } else if (S_ISREG(mode) || S_ISLNK(mode)) { enum object_type type = sha1_object_info(sha1, &size); -- cgit v1.2.1