summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Kientzle <kientzle@acm.org>2016-01-03 18:11:46 -0800
committerTim Kientzle <kientzle@acm.org>2016-01-03 18:11:46 -0800
commit4cd17347b4c35a0c06c2b13ee30e018bbcef6677 (patch)
treeec64c93872eca2189a376b97ae704406a4ee15a9
parent3ea734488052804ff5fd47f6691073ca215e1110 (diff)
downloadlibarchive-utf8.tar.gz
First implementation of UTF-8 path support for Zip reader.utf8
This uses the existing "update_pathname_utf8" logic to accept the UTF8 pathname. The tests verify that this supports UTF-8 paths stored using GP#11 or 0x7075 extension. More testing is certainly needed... Note: A lot of the diff here is reshuffling of an internal API so it can accept non-null-terminated strings.
-rw-r--r--Makefile.am1
-rw-r--r--libarchive/archive_entry.c45
-rw-r--r--libarchive/archive_entry.h2
-rw-r--r--libarchive/archive_read_support_format_zip.c55
-rw-r--r--libarchive/archive_string.c8
-rw-r--r--libarchive/archive_string.h2
-rw-r--r--libarchive/test/test_read_format_zip_utf8_paths.c263
-rw-r--r--libarchive/test/test_read_format_zip_utf8_paths.zip.uu62
8 files changed, 284 insertions, 154 deletions
diff --git a/Makefile.am b/Makefile.am
index 0c04b54b..72dd6a01 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -790,7 +790,6 @@ libarchive_test_EXTRA_DIST=\
libarchive/test/test_read_format_zip_sfx.uu \
libarchive/test/test_read_format_zip_symlink.zip.uu \
libarchive/test/test_read_format_zip_traditional_encryption_data.zip.uu \
- libarchive/test/test_read_format_zip_utf8_paths.zip.uu \
libarchive/test/test_read_format_zip_ux.zip.uu \
libarchive/test/test_read_format_zip_winzip_aes128.zip.uu \
libarchive/test/test_read_format_zip_winzip_aes256.zip.uu \
diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c
index 4ac19660..cfff7725 100644
--- a/libarchive/archive_entry.c
+++ b/libarchive/archive_entry.c
@@ -852,8 +852,8 @@ archive_entry_copy_gname_w(struct archive_entry *entry, const wchar_t *name)
int
archive_entry_update_gname_utf8(struct archive_entry *entry, const char *name)
{
- if (archive_mstring_update_utf8(entry->archive,
- &entry->ae_gname, name) == 0)
+ if (archive_mstring_update_utf8_len(entry->archive,
+ &entry->ae_gname, name, name == NULL ? 0 : strlen(name)) == 0)
return (1);
if (errno == ENOMEM)
__archive_errx(1, "No memory");
@@ -930,8 +930,8 @@ archive_entry_update_hardlink_utf8(struct archive_entry *entry, const char *targ
entry->ae_set |= AE_SET_HARDLINK;
else
entry->ae_set &= ~AE_SET_HARDLINK;
- if (archive_mstring_update_utf8(entry->archive,
- &entry->ae_hardlink, target) == 0)
+ if (archive_mstring_update_utf8_len(entry->archive,
+ &entry->ae_hardlink, target, target == NULL ? 0 : strlen(target)) == 0)
return (1);
if (errno == ENOMEM)
__archive_errx(1, "No memory");
@@ -1075,11 +1075,11 @@ archive_entry_update_link_utf8(struct archive_entry *entry, const char *target)
{
int r;
if (entry->ae_set & AE_SET_SYMLINK)
- r = archive_mstring_update_utf8(entry->archive,
- &entry->ae_symlink, target);
+ r = archive_mstring_update_utf8_len(entry->archive,
+ &entry->ae_symlink, target, target == NULL ? 0 : strlen(target));
else
- r = archive_mstring_update_utf8(entry->archive,
- &entry->ae_hardlink, target);
+ r = archive_mstring_update_utf8_len(entry->archive,
+ &entry->ae_hardlink, target, target == NULL ? 0 : strlen(target));
if (r == 0)
return (1);
if (errno == ENOMEM)
@@ -1152,6 +1152,12 @@ archive_entry_copy_pathname(struct archive_entry *entry, const char *name)
}
void
+archive_entry_copy_pathname_len(struct archive_entry *entry, const char *name, size_t length)
+{
+ archive_mstring_copy_mbs_len(&entry->ae_pathname, name, length);
+}
+
+void
archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name)
{
archive_mstring_copy_wcs(&entry->ae_pathname, name);
@@ -1160,8 +1166,19 @@ archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name)
int
archive_entry_update_pathname_utf8(struct archive_entry *entry, const char *name)
{
- if (archive_mstring_update_utf8(entry->archive,
- &entry->ae_pathname, name) == 0)
+ if (archive_mstring_update_utf8_len(entry->archive,
+ &entry->ae_pathname, name, name == NULL ? 0 : strlen(name)) == 0)
+ return (1);
+ if (errno == ENOMEM)
+ __archive_errx(1, "No memory");
+ return (0);
+}
+
+int
+archive_entry_update_pathname_utf8_len(struct archive_entry *entry, const char *name, size_t length)
+{
+ if (archive_mstring_update_utf8_len(entry->archive,
+ &entry->ae_pathname, name, length) == 0)
return (1);
if (errno == ENOMEM)
__archive_errx(1, "No memory");
@@ -1282,8 +1299,8 @@ archive_entry_update_symlink_utf8(struct archive_entry *entry, const char *linkn
entry->ae_set |= AE_SET_SYMLINK;
else
entry->ae_set &= ~AE_SET_SYMLINK;
- if (archive_mstring_update_utf8(entry->archive,
- &entry->ae_symlink, linkname) == 0)
+ if (archive_mstring_update_utf8_len(entry->archive,
+ &entry->ae_symlink, linkname, linkname == NULL ? 0 : strlen(linkname)) == 0)
return (1);
if (errno == ENOMEM)
__archive_errx(1, "No memory");
@@ -1339,8 +1356,8 @@ archive_entry_copy_uname_w(struct archive_entry *entry, const wchar_t *name)
int
archive_entry_update_uname_utf8(struct archive_entry *entry, const char *name)
{
- if (archive_mstring_update_utf8(entry->archive,
- &entry->ae_uname, name) == 0)
+ if (archive_mstring_update_utf8_len(entry->archive,
+ &entry->ae_uname, name, name == NULL ? 0 : strlen(name)) == 0)
return (1);
if (errno == ENOMEM)
__archive_errx(1, "No memory");
diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h
index 06740926..ecd02a79 100644
--- a/libarchive/archive_entry.h
+++ b/libarchive/archive_entry.h
@@ -305,8 +305,10 @@ __LA_DECL void archive_entry_set_nlink(struct archive_entry *, unsigned int);
__LA_DECL void archive_entry_set_pathname(struct archive_entry *, const char *);
__LA_DECL void archive_entry_set_pathname_utf8(struct archive_entry *, const char *);
__LA_DECL void archive_entry_copy_pathname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_pathname_len(struct archive_entry *, const char *, size_t);
__LA_DECL void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
__LA_DECL int archive_entry_update_pathname_utf8(struct archive_entry *, const char *);
+__LA_DECL int archive_entry_update_pathname_utf8_len(struct archive_entry *, const char *, size_t);
__LA_DECL void archive_entry_set_perm(struct archive_entry *, __LA_MODE_T);
__LA_DECL void archive_entry_set_rdev(struct archive_entry *, dev_t);
__LA_DECL void archive_entry_set_rdevmajor(struct archive_entry *, dev_t);
diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c
index c0b47c86..a446b81c 100644
--- a/libarchive/archive_read_support_format_zip.c
+++ b/libarchive/archive_read_support_format_zip.c
@@ -410,7 +410,7 @@ zip_time(const char *p)
* triplets. id and size are 2 bytes each.
*/
static void
-process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry)
+process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry, struct archive_entry *entry)
{
unsigned offset = 0;
@@ -626,6 +626,11 @@ process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry)
}
break;
}
+ case 0x7075:
+ if (entry != NULL) {
+ archive_entry_update_pathname_utf8_len(entry, p + offset, datasize);
+ }
+ break;
case 0x7855:
/* Info-ZIP Unix Extra Field (type 2) "Ux". */
#ifdef DEBUG
@@ -780,33 +785,27 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry,
return (ARCHIVE_FATAL);
}
if (zip_entry->zip_flags & ZIP_UTF8_NAME) {
- /* The filename is stored to be UTF-8. */
- if (zip->sconv_utf8 == NULL) {
- zip->sconv_utf8 =
- archive_string_conversion_from_charset(
- &a->archive, "UTF-8", 1);
- if (zip->sconv_utf8 == NULL)
- return (ARCHIVE_FATAL);
- }
- sconv = zip->sconv_utf8;
- } else if (zip->sconv != NULL)
- sconv = zip->sconv;
- else
- sconv = zip->sconv_default;
+ archive_entry_update_pathname_utf8_len(entry, h, filename_length);
+ } else {
+ if (zip->sconv != NULL)
+ sconv = zip->sconv;
+ else
+ sconv = zip->sconv_default;
- if (archive_entry_copy_pathname_l(entry,
- h, filename_length, sconv) != 0) {
- if (errno == ENOMEM) {
- archive_set_error(&a->archive, ENOMEM,
- "Can't allocate memory for Pathname");
- return (ARCHIVE_FATAL);
+ if (archive_entry_copy_pathname_l(entry,
+ h, filename_length, sconv) != 0) {
+ if (errno == ENOMEM) {
+ archive_set_error(&a->archive, ENOMEM,
+ "Can't allocate memory for Pathname");
+ return (ARCHIVE_FATAL);
+ }
+ archive_set_error(&a->archive,
+ ARCHIVE_ERRNO_FILE_FORMAT,
+ "Pathname cannot be converted "
+ "from %s to current locale.",
+ archive_string_conversion_charset_name(sconv));
+ ret = ARCHIVE_WARN;
}
- archive_set_error(&a->archive,
- ARCHIVE_ERRNO_FILE_FORMAT,
- "Pathname cannot be converted "
- "from %s to current locale.",
- archive_string_conversion_charset_name(sconv));
- ret = ARCHIVE_WARN;
}
__archive_read_consume(a, filename_length);
@@ -850,7 +849,7 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry,
return (ARCHIVE_FATAL);
}
- process_extra(h, extra_length, zip_entry);
+ process_extra(h, extra_length, zip_entry, entry);
__archive_read_consume(a, extra_length);
if (zip_entry->flags & LA_FROM_CENTRAL_DIRECTORY) {
@@ -2630,7 +2629,7 @@ slurp_central_directory(struct archive_read *a, struct zip *zip)
"Truncated ZIP file header");
return ARCHIVE_FATAL;
}
- process_extra(p + filename_length, extra_length, zip_entry);
+ process_extra(p + filename_length, extra_length, zip_entry, NULL);
/*
* Mac resource fork files are stored under the
diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c
index 3d4be825..4db842fb 100644
--- a/libarchive/archive_string.c
+++ b/libarchive/archive_string.c
@@ -4152,8 +4152,8 @@ archive_mstring_copy_mbs_len_l(struct archive_mstring *aes,
* usable values even if some of the character conversions are failing.)
*/
int
-archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes,
- const char *utf8)
+archive_mstring_update_utf8_len(struct archive *a, struct archive_mstring *aes,
+ const char *utf8, size_t length)
{
struct archive_string_conv *sc;
int r;
@@ -4164,7 +4164,7 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes,
}
/* Save the UTF8 string. */
- archive_strcpy(&(aes->aes_utf8), utf8);
+ archive_strncpy(&(aes->aes_utf8), utf8, length);
/* Empty the mbs and wcs strings. */
archive_string_empty(&(aes->aes_mbs));
@@ -4176,7 +4176,7 @@ archive_mstring_update_utf8(struct archive *a, struct archive_mstring *aes,
sc = archive_string_conversion_from_charset(a, "UTF-8", 1);
if (sc == NULL)
return (-1);/* Couldn't allocate memory for sc. */
- r = archive_strcpy_l(&(aes->aes_mbs), utf8, sc);
+ r = archive_strncpy_l(&(aes->aes_mbs), utf8, length, sc);
if (a == NULL)
free_sconv_object(sc);
if (r != 0)
diff --git a/libarchive/archive_string.h b/libarchive/archive_string.h
index 23f49165..6f7d2e5a 100644
--- a/libarchive/archive_string.h
+++ b/libarchive/archive_string.h
@@ -233,7 +233,7 @@ int archive_mstring_copy_wcs_len(struct archive_mstring *,
const wchar_t *wcs, size_t);
int archive_mstring_copy_mbs_len_l(struct archive_mstring *,
const char *mbs, size_t, struct archive_string_conv *);
-int archive_mstring_update_utf8(struct archive *, struct archive_mstring *aes, const char *utf8);
+int archive_mstring_update_utf8_len(struct archive *, struct archive_mstring *aes, const char *utf8, size_t length);
#endif
diff --git a/libarchive/test/test_read_format_zip_utf8_paths.c b/libarchive/test/test_read_format_zip_utf8_paths.c
index a7034162..ea4738b4 100644
--- a/libarchive/test/test_read_format_zip_utf8_paths.c
+++ b/libarchive/test/test_read_format_zip_utf8_paths.c
@@ -26,68 +26,243 @@
#include "test.h"
__FBSDID("$FreeBSD$");
-static void
-verify(struct archive *a) {
+/*
+ * This collection of tests tries to verify that libarchive correctly
+ * handles Zip UTF-8 filenames stored in various fashions, including
+ * boundary cases where the different copies of the filename don't
+ * agree with each other.
+ *
+ * A UTF8 filename can appear in a Zip file in three different fashions.
+ *
+ * Unmarked: If bit 11 of the GP bit flag is not set, then the
+ * filename is stored in an unspecified encoding which may or may not
+ * be UTF-8. Practically speaking, decoders can make no assumptions
+ * about the filename encoding.
+ *
+ * GP bit flag #11: If this bit is set, then the Filename and File
+ * comment should be stored in UTF-8.
+ *
+ * Extra field 0x7075: This field was added by Info-ZIP. It stores a
+ * second copy of the filename in UTF-8. Note this second filename
+ * may not be the same encoding -- or even the same name -- as the primary
+ * filename. It makes no assertion about the character set used by
+ * the file comment.
+ *
+ * Also note that the above can appear in the local file header or the
+ * central directory or both and may or may not agree in any of those
+ * cases. In the worst case, we may have four different filenames for
+ * a single entry: The local file header can have both a regular filename
+ * (in UTF-8 or not) and the 0x7075 extension, the central directory
+ * would also have both, and all four names could be different.
+ */
+
+/*
+ * Case 1: Use GP#11 to flag UTF-8 filename in local file header,
+ * but central directory has a different name.
+ */
+static const unsigned char case1[] = {
+ /* Local file header */
+ 0x50, 0x4b, 0x03, 0x04, /* PK\003\004 */
+ 0x20, 0x00, /* Version needed to extract: 2.0 */
+ 0x00, 0x08, /* General purpose bit flag: 0x0800 == UTF8 filename */
+ 0x00, 0x00, /* Compression method: None */
+ 0x00, 0x00, /* Last mod time */
+ 0x00, 0x00, /* Last mod date */
+ 0x00, 0x00, 0x00, 0x00, /* CRC32 */
+ 0x04, 0x00, 0x00, 0x00, /* Compressed size: 4 */
+ 0x04, 0x00, 0x00, 0x00, /* Uncompressed size: 4 */
+ 0x0a, 0x00, /* Filename length: 5 */
+ 0x00, 0x00, /* Extra field lenght: 0 */
+ 0x41, 0x42, 0x43, 0xE2, 0x86, 0x92, 0x2e, 0x74, 0x78, 0x74, /* Filename: ABC<right arrow>.txt */
+ /* Extra field: Not present */
+
+ /* File data */
+ 0x41, 0x42, 0x43, 0x0a, /* "ABC\n" */
+
+ /* Central directory header */
+ 0x50, 0x4b, 0x01, 0x02, /* PK\001\002 */
+ 0x20, 0x00, /* Version made by: 2.0 for MSDOS */
+ 0x20, 0x00, /* Version needed to extract: 2.0 */
+ 0x00, 0x08, /* General purpose bit flag: bit 11 = UTF8 filename */
+ 0x00, 0x00, /* Compression method: None */
+ 0x00, 0x00, /* Last mod time */
+ 0x00, 0x00, /* Last mod date */
+ 0x00, 0x00, 0x00, 0x00, /* CRC32 */
+ 0x04, 0x00, 0x00, 0x00, /* Compressed size: 4 */
+ 0x04, 0x00, 0x00, 0x00, /* Uncompressed size: 4 */
+ 0x05, 0x00, /* Filename length */
+ 0x00, 0x00, /* Extra field length: 0 */
+ 0x00, 0x00, /* Comment length: 0 */
+ 0x00, 0x00, /* Disk number start: 0 */
+ 0x00, 0x00, /* Internal file attributes */
+ 0x00, 0x00, 0x00, 0x00, /* External file attributes */
+ 0x00, 0x00, 0x00, 0x00, /* Offset of local header */
+ 0x41, 0x2e, 0x74, 0x78, 0x74, /* File name */
+ /* Extra field: not present */
+ /* File comment: not present */
+
+ /* End of central directory record */
+ 0x50, 0x4b, 0x05, 0x06, /* PK\005\006 */
+ 0x00, 0x00, /* Number of this disk: 0 */
+ 0x00, 0x00, /* Central directory starts on this disk: 0 */
+ 0x01, 0x00, /* Total CD entries on this disk: 1 */
+ 0x01, 0x00, /* Total CD entries: 1 */
+ 0x33, 0x00, 0x00, 0x00, /* Size of CD in bytes */
+ 0x2c, 0x00, 0x00, 0x00, /* Offset of start of CD */
+ 0x00, 0x00, /* Length of archive comment: 0 */
+ /* Archive comment: not present */
+};
+
+DEFINE_TEST(test_read_format_zip_utf8_paths_case1_seeking)
+{
+ struct archive *a;
struct archive_entry *ae;
- const wchar_t *wp;
- int file, i;
-
- /*
- * Test file has a pattern to all names: They all have a
- * number followed by " - " and an accented character. This
- * archive was created by Windows and has regular filenames in
- * some MBCS and uses the Zip 0x7075 extension to hold UTF-8
- * pathnames. The code below checks that the correct
- * (Unicode) characters are decoded by comparing the number to
- * the expected accented character.
- */
-
- for (file = 0; file < 20; ++file) {
- assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
- assert((wp = archive_entry_pathname_w(ae)) != NULL);
- if (wp) {
- for (i = 0; wp[i] != 0; ++i) {
- if (wp[i] == '2') {
- failure("Unicode 'o with umlaut' expected");
- assertEqualInt(wp[i + 4], 0xF6);
- } else if (wp[i] == '3') {
- failure("Unicode 'a with umlaut' expected");
- assertEqualInt(wp[i + 4], 0xE4);
- } else if (wp[i] == '4') {
- failure("Unicode 'a with ring' expected");
- assertEqualInt(wp[i + 4], 0xE5);
- }
- }
- }
- }
- assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
+
+ /* Verify with seeking reader. */
+ assert((a = archive_read_new()) != NULL);
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+ assertEqualIntA(a, ARCHIVE_OK, read_open_memory_seek(a, case1, sizeof(case1), 7));
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+ assertEqualString(archive_entry_pathname(ae), NULL);
+ assertEqualString(archive_entry_pathname_utf8(ae), "ABC\xe2\x86\x92.txt");
+
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a));
}
-DEFINE_TEST(test_read_format_zip_utf8_paths)
+DEFINE_TEST(test_read_format_zip_utf8_paths_case1_streaming)
{
- const char *refname = "test_read_format_zip_utf8_paths.zip";
struct archive *a;
- char *p;
- size_t s;
+ struct archive_entry *ae;
- extract_reference_file(refname);
+ /* Verify with streaming reader. */
+ assert((a = archive_read_new()) != NULL);
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+ assertEqualIntA(a, ARCHIVE_OK, read_open_memory(a, case1, sizeof(case1), 31));
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+ assertEqualString(archive_entry_pathname(ae), NULL);
+ assertEqualString(archive_entry_pathname_utf8(ae), "ABC\xe2\x86\x92.txt");
+
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
+ assertEqualIntA(a, ARCHIVE_OK, archive_free(a));
+}
+
+/*
+ * TODO: Case 2: GP#11 is used, but filename is not valid UTF-8.
+ * This should always cause an error; malformed UTF-8 should never happen.
+ */
+
+/*
+ * TODO: Case 3: Store UTF-8 filename using extra field 0x7075
+ * 0x7075 filename and regular filename have identical bytes but
+ * regular filename is not marked with GP#11 bit.
+ *
+ * Note: Central dir entry has only "A.txt" and no 0x7075 extension.
+ */
+static const unsigned char case3[] = {
+ /* Local file header */
+ 0x50, 0x4b, 0x03, 0x04, /* PK\003\004 */
+ 0x20, 0x00, /* Version needed to extract: 2.0 */
+ 0x00, 0x00, /* General purpose bit flag: 0x0000 */
+ 0x00, 0x00, /* Compression method: None */
+ 0x00, 0x00, /* Last mod time */
+ 0x00, 0x00, /* Last mod date */
+ 0x00, 0x00, 0x00, 0x00, /* CRC32 */
+ 0x04, 0x00, 0x00, 0x00, /* Compressed size: 4 */
+ 0x04, 0x00, 0x00, 0x00, /* Uncompressed size: 4 */
+ 0x0a, 0x00, /* Filename length: 10 */
+ 0x0e, 0x00, /* Extra field length: 14 */
+ 0x41, 0x42, 0x43, 0xE2, 0x86, 0x92, 0x2e, 0x74, 0x78, 0x74, /* Filename: ABC<right arrow>.txt */
+ 0x75, 0x70, 0x0a, 0x00, 0x41, 0x42, 0x43, 0xE2, 0x86, 0x92, 0x2e, 0x74, 0x78, 0x74, /* Extra field: 0x7075 */
+
+ /* File data */
+ 0x41, 0x42, 0x43, 0x0a, /* "ABC\n" */
+
+ /* Central directory header */
+ 0x50, 0x4b, 0x01, 0x02, /* PK\001\002 */
+ 0x20, 0x00, /* Version made by: 2.0 for MSDOS */
+ 0x20, 0x00, /* Version needed to extract: 2.0 */
+ 0x00, 0x08, /* General purpose bit flag: bit 11 = UTF8 filename */
+ 0x00, 0x00, /* Compression method: None */
+ 0x00, 0x00, /* Last mod time */
+ 0x00, 0x00, /* Last mod date */
+ 0x00, 0x00, 0x00, 0x00, /* CRC32 */
+ 0x04, 0x00, 0x00, 0x00, /* Compressed size: 4 */
+ 0x04, 0x00, 0x00, 0x00, /* Uncompressed size: 4 */
+ 0x05, 0x00, /* Filename length */
+ 0x00, 0x00, /* Extra field length: 0 */
+ 0x00, 0x00, /* Comment length: 0 */
+ 0x00, 0x00, /* Disk number start: 0 */
+ 0x00, 0x00, /* Internal file attributes */
+ 0x00, 0x00, 0x00, 0x00, /* External file attributes */
+ 0x00, 0x00, 0x00, 0x00, /* Offset of local header */
+ 0x41, 0x2e, 0x74, 0x78, 0x74, /* File name */
+ /* No extra fields */
+ /* File comment: not present */
+
+ /* End of central directory record */
+ 0x50, 0x4b, 0x05, 0x06, /* PK\005\006 */
+ 0x00, 0x00, /* Number of this disk: 0 */
+ 0x00, 0x00, /* Central directory starts on this disk: 0 */
+ 0x01, 0x00, /* Total CD entries on this disk: 1 */
+ 0x01, 0x00, /* Total CD entries: 1 */
+ 0x33, 0x00, 0x00, 0x00, /* Size of CD in bytes */
+ 0x3a, 0x00, 0x00, 0x00, /* Offset of start of CD */
+ 0x00, 0x00, /* Length of archive comment: 0 */
+ /* Archive comment: not present */
+};
+
+DEFINE_TEST(test_read_format_zip_utf8_paths_case3_seeking)
+{
+ struct archive *a;
+ struct archive_entry *ae;
/* Verify with seeking reader. */
assert((a = archive_read_new()) != NULL);
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
- assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 10240));
- verify(a);
+ assertEqualIntA(a, ARCHIVE_OK, read_open_memory_seek(a, case3, sizeof(case3), 7));
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+ assertEqualString(archive_entry_pathname(ae), NULL);
+ assertEqualString(archive_entry_pathname_utf8(ae), "ABC\xe2\x86\x92.txt");
+
assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a));
+}
+
+DEFINE_TEST(test_read_format_zip_utf8_paths_case3_streaming)
+{
+ struct archive *a;
+ struct archive_entry *ae;
/* Verify with streaming reader. */
- p = slurpfile(&s, refname);
assert((a = archive_read_new()) != NULL);
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
- assertEqualIntA(a, ARCHIVE_OK, read_open_memory(a, p, s, 31));
- verify(a);
+ assertEqualIntA(a, ARCHIVE_OK, read_open_memory(a, case3, sizeof(case3), 31));
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+ assertEqualString(archive_entry_pathname(ae), NULL);
+ assertEqualString(archive_entry_pathname_utf8(ae), "ABC\xe2\x86\x92.txt");
+
assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
assertEqualIntA(a, ARCHIVE_OK, archive_free(a));
}
+
+
+/*
+ * TODO: Case 4: As with Case 3, but the two filenames are not
+ * the same.
+ */
+
+/*
+ * TODO: Case 5: GP#11 and extra field 0x7075 both used, but
+ * store different names.
+ */
+
+/*
+ * TODO: Similar cases where the local file header and central directory
+ * disagree. Seeking reader should always use the CD version, streaming
+ * reader must necessarily always use the local file header version.
+ */
diff --git a/libarchive/test/test_read_format_zip_utf8_paths.zip.uu b/libarchive/test/test_read_format_zip_utf8_paths.zip.uu
deleted file mode 100644
index 7e6cd742..00000000
--- a/libarchive/test/test_read_format_zip_utf8_paths.zip.uu
+++ /dev/null
@@ -1,62 +0,0 @@
-begin 644 test_read_format_zip_utf8_paths.zip
-M4$L#!!0``````,(^9D5BZ95P"0````D````.````1FEL92`S("T@A"YT>'14
-M97-T(&9I;&502P,$%```````PCYF16+IE7`)````"0````X```!&:6QE(#0@
-M+2"&+G1X=%1E<W0@9FEL95!+`P04``````#"/F9%8NF5<`D````)````$P``
-M`$9O;&1E<B`Q+T9I;&4@,2YT>'1497-T(&9I;&502P,$%```````PCYF16+I
-ME7`)````"0```!<```!&;VQD97(@,2]&:6QE(#(@+2"4+G1X=%1E<W0@9FEL
-M95!+`P04``````#"/F9%8NF5<`D````)````%P```$9O;&1E<B`Q+T9I;&4@
-M,R`M((0N='AT5&5S="!F:6QE4$L#!!0``````,(^9D5BZ95P"0````D````7
-M````1F]L9&5R(#$O1FEL92`T("T@ABYT>'1497-T(&9I;&502P,$%```````
-MPCYF16+IE7`)````"0```!<```!&;VQD97(@,B`M()0O1FEL92`Q+G1X=%1E
-M<W0@9FEL95!+`P04``````#"/F9%8NF5<`D````)````&P```$9O;&1E<B`R
-M("T@E"]&:6QE(#(@+2"4+G1X=%1E<W0@9FEL95!+`P04``````#"/F9%8NF5
-M<`D````)````&P```$9O;&1E<B`R("T@E"]&:6QE(#,@+2"$+G1X=%1E<W0@
-M9FEL95!+`P04``````#"/F9%8NF5<`D````)````&P```$9O;&1E<B`R("T@
-ME"]&:6QE(#0@+2"&+G1X=%1E<W0@9FEL95!+`P04``````#"/F9%8NF5<`D`
-M```)````%P```$9O;&1E<B`S("T@A"]&:6QE(#$N='AT5&5S="!F:6QE4$L#
-M!!0``````,(^9D5BZ95P"0````D````;````1F]L9&5R(#,@+2"$+T9I;&4@
-M,B`M()0N='AT5&5S="!F:6QE4$L#!!0``````,(^9D5BZ95P"0````D````;
-M````1F]L9&5R(#,@+2"$+T9I;&4@,R`M((0N='AT5&5S="!F:6QE4$L#!!0`
-M`````,(^9D5BZ95P"0````D````;````1F]L9&5R(#,@+2"$+T9I;&4@-"`M
-M((8N='AT5&5S="!F:6QE4$L#!!0``````,(^9D5BZ95P"0````D````7````
-M1F]L9&5R(#0@+2"&+T9I;&4@,2YT>'1497-T(&9I;&502P,$%```````PCYF
-M16+IE7`)````"0```!L```!&;VQD97(@-"`M((8O1FEL92`R("T@E"YT>'14
-M97-T(&9I;&502P,$%```````PCYF16+IE7`)````"0```!L```!&;VQD97(@
-M-"`M((8O1FEL92`S("T@A"YT>'1497-T(&9I;&502P,$%```````PCYF16+I
-ME7`)````"0```!L```!&;VQD97(@-"`M((8O1FEL92`T("T@ABYT>'1497-T
-M(&9I;&502P,$%```````PCYF16+IE7`)````"0````H```!&:6QE(#$N='AT
-M5&5S="!F:6QE4$L#!!0``````,(^9D5BZ95P"0````D````.````1FEL92`R
-M("T@E"YT>'1497-T(&9I;&502P$"%``4``````#"/F9%8NF5<`D````)````
-M#@`````````!`"``````````1FEL92`S("T@A"YT>'102P$"%``4``````#"
-M/F9%8NF5<`D````)````#@`````````!`"`````U````1FEL92`T("T@ABYT
-M>'102P$"%``4``````#"/F9%8NF5<`D````)````$P`````````!`"````!J
-M````1F]L9&5R(#$O1FEL92`Q+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0``
-M``D````7``````````$`(````*0```!&;VQD97(@,2]&:6QE(#(@+2"4+G1X
-M=%!+`0(4`!0``````,(^9D5BZ95P"0````D````7``````````$`(````.(`
-M``!&;VQD97(@,2]&:6QE(#,@+2"$+G1X=%!+`0(4`!0``````,(^9D5BZ95P
-M"0````D````7``````````$`(````"`!``!&;VQD97(@,2]&:6QE(#0@+2"&
-M+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0````D````7``````````$`(```
-M`%X!``!&;VQD97(@,B`M()0O1FEL92`Q+G1X=%!+`0(4`!0``````,(^9D5B
-MZ95P"0````D````;``````````$`(````)P!``!&;VQD97(@,B`M()0O1FEL
-M92`R("T@E"YT>'102P$"%``4``````#"/F9%8NF5<`D````)````&P``````
-M```!`"````#>`0``1F]L9&5R(#(@+2"4+T9I;&4@,R`M((0N='AT4$L!`A0`
-M%```````PCYF16+IE7`)````"0```!L``````````0`@````(`(``$9O;&1E
-M<B`R("T@E"]&:6QE(#0@+2"&+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0``
-M``D````7``````````$`(````&("``!&;VQD97(@,R`M((0O1FEL92`Q+G1X
-M=%!+`0(4`!0``````,(^9D5BZ95P"0````D````;``````````$`(````*`"
-M``!&;VQD97(@,R`M((0O1FEL92`R("T@E"YT>'102P$"%``4``````#"/F9%
-M8NF5<`D````)````&P`````````!`"````#B`@``1F]L9&5R(#,@+2"$+T9I
-M;&4@,R`M((0N='AT4$L!`A0`%```````PCYF16+IE7`)````"0```!L`````
-M`````0`@````)`,``$9O;&1E<B`S("T@A"]&:6QE(#0@+2"&+G1X=%!+`0(4
-M`!0``````,(^9D5BZ95P"0````D````7``````````$`(````&8#``!&;VQD
-M97(@-"`M((8O1FEL92`Q+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0````D`
-M```;``````````$`(````*0#``!&;VQD97(@-"`M((8O1FEL92`R("T@E"YT
-M>'102P$"%``4``````#"/F9%8NF5<`D````)````&P`````````!`"````#F
-M`P``1F]L9&5R(#0@+2"&+T9I;&4@,R`M((0N='AT4$L!`A0`%```````PCYF
-M16+IE7`)````"0```!L``````````0`@````*`0``$9O;&1E<B`T("T@AB]&
-M:6QE(#0@+2"&+G1X=%!+`0(4`!0``````,(^9D5BZ95P"0````D````*````
-M``````$`(````&H$``!&:6QE(#$N='AT4$L!`A0`%```````PCYF16+IE7`)
-M````"0````X``````````0`@````FP0``$9I;&4@,B`M()0N='AT4$L%!@``
-0```4`!0`7`4``-`$````````
-`
-end