summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2014-08-29 22:18:06 +0300
committerEli Zaretskii <eliz@gnu.org>2014-08-29 22:18:06 +0300
commit21ba51de76390907ca86b1e7715f472dd740fbc3 (patch)
tree0b3418f0563a5da979cacf6894120840b56b8456 /src
parent2ae366c73e27dc695b6bc1cd03d93f48b3db76d4 (diff)
downloademacs-21ba51de76390907ca86b1e7715f472dd740fbc3.tar.gz
Implement case-insensitive and Unicode-compliant collation on MS-Windows.
src/fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix. src/w32proc.c (w32_compare_strings): Accept additional argument IGNORE_CASE. Set up the flags for CompareStringW to ignore case if requested. If w32-collate-ignore-punctuation is non-nil, add NORM_IGNORESYMBOLS to the flags. (LINGUISTIC_IGNORECASE): Define if not already defined. (syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable. src/sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface change. src/w32.h: Adjust prototype of w32_compare_strings. etc/NEWS: Mention w32-collate-ignore-punctuation. Fixes: debbugs:18051
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog14
-rw-r--r--src/fns.c23
-rw-r--r--src/sysdep.c2
-rw-r--r--src/w32.h2
-rw-r--r--src/w32proc.c42
5 files changed, 69 insertions, 14 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 66588bc3e67..181a43d058f 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,17 @@
+2014-08-29 Eli Zaretskii <eliz@gnu.org>
+
+ * fns.c (Fstring_collate_lessp, Fstring_collate_equalp): Doc fix.
+
+ * w32proc.c (w32_compare_strings): Accept additional argument
+ IGNORE_CASE. Set up the flags for CompareStringW to ignore case
+ if requested. If w32-collate-ignore-punctuation is non-nil, add
+ NORM_IGNORESYMBOLS to the flags.
+ (LINGUISTIC_IGNORECASE): Define if not already defined.
+ (syms_of_ntproc) <Vw32_collate_ignore_punctuation>: New variable.
+
+ * sysdep.c (str_collate) [WINDOWSNT]: Adapt to the interface
+ change.
+
2014-08-29 Michael Albinus <michael.albinus@gmx.de>
* sysdep.c (LC_CTYPE, LC_CTYPE_MASK, towlower_l):
diff --git a/src/fns.c b/src/fns.c
index 3cca40df50f..f838599230b 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -350,7 +350,7 @@ Symbols are also allowed; their print names are used instead.
This function obeys the conventions for collation order in your
locale settings. For example, punctuation and whitespace characters
-are considered less significant for sorting:
+might be considered less significant for sorting:
\(sort '\("11" "12" "1 1" "1 2" "1.1" "1.2") 'string-collate-lessp)
=> \("11" "1 1" "1.1" "12" "1 2" "1.2")
@@ -358,11 +358,15 @@ are considered less significant for sorting:
The optional argument LOCALE, a string, overrides the setting of your
current locale identifier for collation. The value is system
dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems,
-while it would be \"English_USA.1252\" on MS Windows systems.
+while it would be, e.g., \"enu_USA.1252\" on MS-Windows systems.
If IGNORE-CASE is non-nil, characters are converted to lower-case
before comparing them.
+To emulate Unicode-compliant collation on MS-Windows systems,
+bind `w32-collate-ignore-punctuation' to a non-nil value, since
+the codeset part of the locale cannot be \"UTF-8\" on MS-Windows.
+
If your system does not support a locale environment, this function
behaves like `string-lessp'. */)
(Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case)
@@ -391,8 +395,8 @@ Symbols are also allowed; their print names are used instead.
This function obeys the conventions for collation order in your locale
settings. For example, characters with different coding points but
-the same meaning are considered as equal, like different grave accent
-unicode characters:
+the same meaning might be considered as equal, like different grave
+accent Unicode characters:
\(string-collate-equalp \(string ?\\uFF40) \(string ?\\u1FEF))
=> t
@@ -400,13 +404,20 @@ unicode characters:
The optional argument LOCALE, a string, overrides the setting of your
current locale identifier for collation. The value is system
dependent; a LOCALE \"en_US.UTF-8\" is applicable on POSIX systems,
-while it would be \"English_USA.1252\" on MS Windows systems.
+while it would be \"enu_USA.1252\" on MS Windows systems.
If IGNORE-CASE is non-nil, characters are converted to lower-case
before comparing them.
+To emulate Unicode-compliant collation on MS-Windows systems,
+bind `w32-collate-ignore-punctuation' to a non-nil value, since
+the codeset part of the locale cannot be \"UTF-8\" on MS-Windows.
+
If your system does not support a locale environment, this function
-behaves like `string-equal'. */)
+behaves like `string-equal'.
+
+Do NOT use this function to compare file names for equality, only
+for sorting them. */)
(Lisp_Object s1, Lisp_Object s2, Lisp_Object locale, Lisp_Object ignore_case)
{
#if defined __STDC_ISO_10646__ || defined WINDOWSNT
diff --git a/src/sysdep.c b/src/sysdep.c
index 7993a59e721..52a72385f46 100644
--- a/src/sysdep.c
+++ b/src/sysdep.c
@@ -3796,6 +3796,6 @@ str_collate (Lisp_Object s1, Lisp_Object s2,
char *loc = STRINGP (locale) ? SSDATA (locale) : NULL;
- return w32_compare_strings (SDATA (s1), SDATA (s2), loc);
+ return w32_compare_strings (SDATA (s1), SDATA (s2), loc, !NILP (ignore_case));
}
#endif /* WINDOWSNT */
diff --git a/src/w32.h b/src/w32.h
index 68ee14c70e3..2cc179a0c36 100644
--- a/src/w32.h
+++ b/src/w32.h
@@ -211,7 +211,7 @@ extern int w32_memory_info (unsigned long long *, unsigned long long *,
unsigned long long *, unsigned long long *);
/* Compare 2 UTF-8 strings in locale-dependent fashion. */
-extern int w32_compare_strings (const char *, const char *, char *);
+extern int w32_compare_strings (const char *, const char *, char *, int);
#ifdef HAVE_GNUTLS
#include <gnutls/gnutls.h>
diff --git a/src/w32proc.c b/src/w32proc.c
index ed62de02433..0b441d45186 100644
--- a/src/w32proc.c
+++ b/src/w32proc.c
@@ -3213,15 +3213,20 @@ get_lcid (const char *locale_name)
#ifndef _NSLCMPERROR
# define _NSLCMPERROR INT_MAX
#endif
+#ifndef LINGUISTIC_IGNORECASE
+# define LINGUISTIC_IGNORECASE 0x00000010
+#endif
int
-w32_compare_strings (const char *s1, const char *s2, char *locname)
+w32_compare_strings (const char *s1, const char *s2, char *locname,
+ int ignore_case)
{
LCID lcid = GetThreadLocale ();
wchar_t *string1_w, *string2_w;
int val, needed;
extern BOOL g_b_init_compare_string_w;
static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, int);
+ DWORD flags = 0;
USE_SAFE_ALLOCA;
@@ -3284,11 +3289,22 @@ w32_compare_strings (const char *s1, const char *s2, char *locname)
lcid = new_lcid;
}
- /* FIXME: Need a way to control the FLAGS argument, perhaps via the
- CODESET part of LOCNAME. In particular, ls-lisp will want
- NORM_IGNORESYMBOLS and sometimes LINGUISTIC_IGNORECASE or
- NORM_IGNORECASE. */
- val = pCompareStringW (lcid, 0, string1_w, -1, string2_w, -1);
+ if (ignore_case)
+ {
+ /* NORM_IGNORECASE ignores any tertiary distinction, not just
+ case variants. LINGUISTIC_IGNORECASE is more selective, and
+ is sensitive to the locale's language, but it is not
+ available before Vista. */
+ if (w32_major_version >= 6)
+ flags |= LINGUISTIC_IGNORECASE;
+ else
+ flags |= NORM_IGNORECASE;
+ }
+ /* This approximates what glibc collation functions do when the
+ locale's codeset is UTF-8. */
+ if (!NILP (Vw32_collate_ignore_punctuation))
+ flags |= NORM_IGNORESYMBOLS;
+ val = pCompareStringW (lcid, flags, string1_w, -1, string2_w, -1);
SAFE_FREE ();
if (!val)
{
@@ -3408,6 +3424,20 @@ Any other non-nil value means do this even on remote and removable drives
where the performance impact may be noticeable even on modern hardware. */);
Vw32_get_true_file_attributes = Qlocal;
+ DEFVAR_LISP ("w32-collate-ignore-punctuation",
+ Vw32_collate_ignore_punctuation,
+ doc: /* Non-nil causes string collation functions ignore punctuation on MS-Windows.
+On Posix platforms, `string-collate-lessp' and `string-collate-equalp'
+ignore punctuation characters when they compare strings, if the
+locale's codeset is UTF-8, as in \"en_US.UTF-8\". Binding this option
+to a non-nil value will achieve a similar effect on MS-Windows, where
+locales with UTF-8 codeset are not supported.
+
+Note that setting this to non-nil will also ignore blanks and symbols
+in the strings. So do NOT use this option when comparing file names
+for equality, only when you need to sort them. */);
+ Vw32_collate_ignore_punctuation = Qnil;
+
staticpro (&Vw32_valid_locale_ids);
staticpro (&Vw32_valid_codepages);
}