diff options
author | Eli Zaretskii <eliz@gnu.org> | 2014-08-29 22:18:06 +0300 |
---|---|---|
committer | Eli Zaretskii <eliz@gnu.org> | 2014-08-29 22:18:06 +0300 |
commit | 21ba51de76390907ca86b1e7715f472dd740fbc3 (patch) | |
tree | 0b3418f0563a5da979cacf6894120840b56b8456 /src | |
parent | 2ae366c73e27dc695b6bc1cd03d93f48b3db76d4 (diff) | |
download | emacs-21ba51de76390907ca86b1e7715f472dd740fbc3.tar.gz |
Implement case-insensitive and Unicode-compliant collation on MS-Windows.
src/fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix.
src/w32proc.c (w32_compare_strings): Accept additional argument
IGNORE_CASE. Set up the flags for CompareStringW to ignore case
if requested. If w32-collate-ignore-punctuation is non-nil, add
NORM_IGNORESYMBOLS to the flags.
(LINGUISTIC_IGNORECASE): Define if not already defined.
(syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable.
src/sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface
change.
src/w32.h: Adjust prototype of w32_compare_strings.
etc/NEWS: Mention w32-collate-ignore-punctuation.
Fixes: debbugs:18051
Diffstat (limited to 'src')
-rw-r--r-- | src/ChangeLog | 14 | ||||
-rw-r--r-- | src/fns.c | 23 | ||||
-rw-r--r-- | src/sysdep.c | 2 | ||||
-rw-r--r-- | src/w32.h | 2 | ||||
-rw-r--r-- | src/w32proc.c | 42 |
5 files changed, 69 insertions, 14 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 66588bc3e67..181a43d058f 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,17 @@ +2014-08-29 Eli Zaretskii <eliz@gnu.org> + + * fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix. + + * w32proc.c (w32_compare_strings): Accept additional argument + IGNORE_CASE. Set up the flags for CompareStringW to ignore case + if requested. If w32-collate-ignore-punctuation is non-nil, add + NORM_IGNORESYMBOLS to the flags. + (LINGUISTIC_IGNORECASE): Define if not already defined. + (syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable. + + * sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface + change. + 2014-08-29 Michael Albinus <michael.albinus@gmx.de> * sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l): diff --git a/src/fns.c b/src/fns.c index 3cca40df50f..f838599230b 100644 --- a/src/fns.c +++ b/src/fns.c @@ -350,7 +350,7 @@ Symbols are also allowed; their print names are used instead. This function obeys the conventions for collation order in your locale settings. For example, punctuation and whitespace characters -are considered less significant for sorting: +might be considered less significant for sorting: \(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp) => \("11" "1 1" "1.1" "12" "1 2" "1.2") @@ -358,11 +358,15 @@ are considered less significant for sorting: The optional argument LOCALE, a string, overrides the setting of your current locale identifier for collation. The value is system dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, -while it would be \"English_USA.1252\" on MS Windows systems. +while it would be, e.g., \"enu_USA.1252\" on MS-Windows systems. If IGNORE-CASE is non-nil, characters are converted to lower-case before comparing them. +To emulate Unicode-compliant collation on MS-Windows systems, +bind `w32-collate-ignore-punctuation' to a non-nil value, since +the codeset part of the locale cannot be \"UTF-8\" on MS-Windows. + If your system does not support a locale environment, this function behaves like `string-lessp'. */) (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) @@ -391,8 +395,8 @@ Symbols are also allowed; their print names are used instead. This function obeys the conventions for collation order in your locale settings. For example, characters with different coding points but -the same meaning are considered as equal, like different grave accent -unicode characters: +the same meaning might be considered as equal, like different grave +accent Unicode characters: \(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF)) => t @@ -400,13 +404,20 @@ unicode characters: The optional argument LOCALE, a string, overrides the setting of your current locale identifier for collation. The value is system dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems, -while it would be \"English_USA.1252\" on MS Windows systems. +while it would be \"enu_USA.1252\" on MS Windows systems. If IGNORE-CASE is non-nil, characters are converted to lower-case before comparing them. +To emulate Unicode-compliant collation on MS-Windows systems, +bind `w32-collate-ignore-punctuation' to a non-nil value, since +the codeset part of the locale cannot be \"UTF-8\" on MS-Windows. + If your system does not support a locale environment, this function -behaves like `string-equal'. */) +behaves like `string-equal'. + +Do NOT use this function to compare file names for equality, only +for sorting them. */) (Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case) { #if defined __STDC_ISO_10646__ || defined WINDOWSNT diff --git a/src/sysdep.c b/src/sysdep.c index 7993a59e721..52a72385f46 100644 --- a/src/sysdep.c +++ b/src/sysdep.c @@ -3796,6 +3796,6 @@ str_collate (Lisp_Object s1, Lisp_Object s2, char *loc = STRINGP (locale) ? SSDATA (locale) : NULL; - return w32_compare_strings (SDATA (s1), SDATA (s2), loc); + return w32_compare_strings (SDATA (s1), SDATA (s2), loc, !NILP (ignore_case)); } #endif /* WINDOWSNT */ diff --git a/src/w32.h b/src/w32.h index 68ee14c70e3..2cc179a0c36 100644 --- a/src/w32.h +++ b/src/w32.h @@ -211,7 +211,7 @@ extern int w32_memory_info (unsigned long long *, unsigned long long *, unsigned long long *, unsigned long long *); /* Compare 2 UTF-8 strings in locale-dependent fashion. */ -extern int w32_compare_strings (const char *, const char *, char *); +extern int w32_compare_strings (const char *, const char *, char *, int); #ifdef HAVE_GNUTLS #include <gnutls/gnutls.h> diff --git a/src/w32proc.c b/src/w32proc.c index ed62de02433..0b441d45186 100644 --- a/src/w32proc.c +++ b/src/w32proc.c @@ -3213,15 +3213,20 @@ get_lcid (const char *locale_name) #ifndef _NSLCMPERROR # define _NSLCMPERROR INT_MAX #endif +#ifndef LINGUISTIC_IGNORECASE +# define LINGUISTIC_IGNORECASE 0x00000010 +#endif int -w32_compare_strings (const char *s1, const char *s2, char *locname) +w32_compare_strings (const char *s1, const char *s2, char *locname, + int ignore_case) { LCID lcid = GetThreadLocale (); wchar_t *string1_w, *string2_w; int val, needed; extern BOOL g_b_init_compare_string_w; static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int); + DWORD flags = 0; USE_SAFE_ALLOCA; @@ -3284,11 +3289,22 @@ w32_compare_strings (const char *s1, const char *s2, char *locname) lcid = new_lcid; } - /* FIXME: Need a way to control the FLAGS argument, perhaps via the - CODESET part of LOCNAME. In particular, ls-lisp will want - NORM_IGNORESYMBOLS and sometimes LINGUISTIC_IGNORECASE or - NORM_IGNORECASE. */ - val = pCompareStringW (lcid, 0, string1_w, -1, string2_w, -1); + if (ignore_case) + { + /* NORM_IGNORECASE ignores any tertiary distinction, not just + case variants. LINGUISTIC_IGNORECASE is more selective, and + is sensitive to the locale's language, but it is not + available before Vista. */ + if (w32_major_version >= 6) + flags |= LINGUISTIC_IGNORECASE; + else + flags |= NORM_IGNORECASE; + } + /* This approximates what glibc collation functions do when the + locale's codeset is UTF-8. */ + if (!NILP (Vw32_collate_ignore_punctuation)) + flags |= NORM_IGNORESYMBOLS; + val = pCompareStringW (lcid, flags, string1_w, -1, string2_w, -1); SAFE_FREE (); if (!val) { @@ -3408,6 +3424,20 @@ Any other non-nil value means do this even on remote and removable drives where the performance impact may be noticeable even on modern hardware. */); Vw32_get_true_file_attributes = Qlocal; + DEFVAR_LISP ("w32-collate-ignore-punctuation", + Vw32_collate_ignore_punctuation, + doc: /* Non-nil causes string collation functions ignore punctuation on MS-Windows. +On Posix platforms, `string-collate-lessp' and `string-collate-equalp' +ignore punctuation characters when they compare strings, if the +locale's codeset is UTF-8, as in \"en_US.UTF-8\". Binding this option +to a non-nil value will achieve a similar effect on MS-Windows, where +locales with UTF-8 codeset are not supported. + +Note that setting this to non-nil will also ignore blanks and symbols +in the strings. So do NOT use this option when comparing file names +for equality, only when you need to sort them. */); + Vw32_collate_ignore_punctuation = Qnil; + staticpro (&Vw32_valid_locale_ids); staticpro (&Vw32_valid_codepages); } |