summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Reiser <jreiser@BitWagon.com>2022-08-26 09:51:19 -0700
committerJohn Reiser <jreiser@BitWagon.com>2022-08-26 09:51:19 -0700
commitd6248d2640efe3e40a1a9c0bb7bd8903f6beef98 (patch)
tree4e6b33faaf5b582fe0a225504f456a2609aa5dbb
parent66d6e8a9166d3a340a16e1bd861fea9e1c186af3 (diff)
downloadlibarchive-d6248d2640efe3e40a1a9c0bb7bd8903f6beef98.tar.gz
archive_entry_pathname() tries UTF-8 if MBS returns EILSEQ
For better pathname portability across OS, in particular Windows to Linux. Original bug: unrar in https://bugzilla.redhat.com/show_bug.cgi?id=2120926 modified: libarchive/archive_entry.c
-rw-r--r--libarchive/archive_entry.c14
1 files changed, 14 insertions, 0 deletions
diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c
index ca7a4bdb..ae6dc333 100644
--- a/libarchive/archive_entry.c
+++ b/libarchive/archive_entry.c
@@ -568,6 +568,13 @@ archive_entry_nlink(struct archive_entry *entry)
return (entry->ae_stat.aest_nlink);
}
+/* Instead, our caller could have chosen a specific encoding
+ * (archive_mstring_get_mbs, archive_mstring_get_utf8,
+ * archive_mstring_get_wcs). So we should try multiple
+ * encodings. Try mbs first because of history, even though
+ * utf8 might be better for pathname portability.
+ * Also omit wcs because of type mismatch (char * versus wchar *)
+ */
const char *
archive_entry_pathname(struct archive_entry *entry)
{
@@ -575,6 +582,13 @@ archive_entry_pathname(struct archive_entry *entry)
if (archive_mstring_get_mbs(
entry->archive, &entry->ae_pathname, &p) == 0)
return (p);
+#if HAVE_EILSEQ /*{*/
+ if (errno == EILSEQ) {
+ if (archive_mstring_get_utf8(
+ entry->archive, &entry->ae_pathname, &p) == 0)
+ return (p);
+ }
+#endif /*}*/
if (errno == ENOMEM)
__archive_errx(1, "No memory");
return (NULL);