summaryrefslogtreecommitdiff
path: root/src/fileio.c
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2018-05-18 16:34:19 +0300
committerEli Zaretskii <eliz@gnu.org>2018-06-02 12:37:19 +0300
commit6e0ff4cc1f261def00f9f9dd581ba6ef72703f0c (patch)
tree406a66a7ed14dca5884fb5001473c6a9a624e71e /src/fileio.c
parent35c1ab1419174f72010c745d963a55b6c183443c (diff)
downloademacs-6e0ff4cc1f261def00f9f9dd581ba6ef72703f0c.tar.gz
Fix decoding of directories when "~" includes non-ASCII chars
* src/fileio.c (Fexpand_file_name): Don't build multibyte strings from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY have different multibyteness, as this adds bytes to the byte sequence, and in some situations, e.g., when the home directory includes non-ASCII characters, can fail file APIs. (Bug#30755) * lisp/startup.el (normal-top-level): Make sure default-directory is set to a multibyte string when decoded on MS-Windows. (cherry picked from commit 3aab8626ba5080bb04d0fdae52d99c850a842a52)
Diffstat (limited to 'src/fileio.c')
-rw-r--r--src/fileio.c75
1 files changed, 60 insertions, 15 deletions
diff --git a/src/fileio.c b/src/fileio.c
index c4a10000bc3..9dbe3ad788e 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -864,33 +864,78 @@ the root directory. */)
}
}
multibyte = STRING_MULTIBYTE (name);
- if (multibyte != STRING_MULTIBYTE (default_directory))
+ bool defdir_multibyte = STRING_MULTIBYTE (default_directory);
+ if (multibyte != defdir_multibyte)
{
+ /* We want to make both NAME and DEFAULT_DIRECTORY have the same
+ multibyteness. Strategy:
+ . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they
+ can be converted to the multibyteness of the other one
+ while keeping the same byte sequence.
+ . If both are non-ASCII, the only safe conversion is to
+ convert the multibyte one to be unibyte, because the
+ reverse conversion potentially adds bytes while raw bytes
+ are converted to their multibyte forms, which we will be
+ unable to account for, since the information about the
+ original multibyteness is lost. If those additional bytes
+ later leak to system APIs because they are not encoded or
+ because they are converted to unibyte strings by keeping
+ the data, file APIs will fail.
+
+ Note: One could argue that if we see a multibyte string, it
+ is evidence that file-name decoding was already set up, and
+ we could convert unibyte strings to multibyte using
+ DECODE_FILE. However, this is risky, because the likes of
+ string_to_multibyte are able of creating multibyte strings
+ without any decoding. */
if (multibyte)
{
- unsigned char *p = SDATA (name);
+ bool name_ascii_p = SCHARS (name) == SBYTES (name);
+ unsigned char *p = SDATA (default_directory);
- while (*p && ASCII_CHAR_P (*p))
- p++;
- if (*p == '\0')
+ if (!name_ascii_p)
+ while (*p && ASCII_CHAR_P (*p))
+ p++;
+ if (name_ascii_p || *p != '\0')
{
- /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is
- unibyte. Do not convert DEFAULT_DIRECTORY to
- multibyte; instead, convert NAME to a unibyte string,
- so that the result of this function is also a unibyte
- string. This is needed during bootstrapping and
- dumping, when Emacs cannot decode file names, because
- the locale environment is not set up. */
+ /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII.
+ Make a unibyte string out of NAME, and arrange for
+ the result of this function to be a unibyte string.
+ This is needed during bootstrapping and dumping, when
+ Emacs cannot decode file names, because the locale
+ environment is not set up. */
name = make_unibyte_string (SSDATA (name), SBYTES (name));
multibyte = 0;
}
else
- default_directory = string_to_multibyte (default_directory);
+ {
+ /* NAME is non-ASCII and multibyte, and
+ DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a
+ multibyte string out of DEFAULT_DIRECTORY's data. */
+ default_directory =
+ make_multibyte_string (SSDATA (default_directory),
+ SCHARS (default_directory),
+ SCHARS (default_directory));
+ }
}
else
{
- name = string_to_multibyte (name);
- multibyte = 1;
+ unsigned char *p = SDATA (name);
+
+ while (*p && ASCII_CHAR_P (*p))
+ p++;
+ if (*p == '\0')
+ {
+ /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte
+ and pure-ASCII. Make a multibyte string out of
+ NAME's data. */
+ name = make_multibyte_string (SSDATA (name),
+ SCHARS (name), SCHARS (name));
+ multibyte = 1;
+ }
+ else
+ default_directory = make_unibyte_string (SSDATA (default_directory),
+ SBYTES (default_directory));
}
}