summaryrefslogtreecommitdiff
path: root/locale
diff options
context:
space:
mode:
authorLeonhard Holz <leonhard.holz@web.de>2015-05-12 11:37:52 +0200
committerOndřej Bílka <neleai@seznam.cz>2015-05-12 11:37:52 +0200
commitf13c2a8dff2329c6692a80176262ceaaf8a6f74e (patch)
treedd2443fba95dd55830d0d0e745a055ef3981c506 /locale
parent34cb304e5a6df706e186d504b69af974bfc15a2f (diff)
downloadglibc-f13c2a8dff2329c6692a80176262ceaaf8a6f74e.tar.gz
Improve strcoll with strdiff.
This patch improves strcoll hot case by finding first byte that mismatches. That is in likely case enough to determine comparison result.
Diffstat (limited to 'locale')
-rw-r--r--locale/C-collate.c4
-rw-r--r--locale/categories.def1
-rw-r--r--locale/langinfo.h1
-rw-r--r--locale/localeinfo.h8
-rw-r--r--locale/programs/ld-collate.c9
5 files changed, 22 insertions, 1 deletions
diff --git a/locale/C-collate.c b/locale/C-collate.c
index 06dfdfaad5..d7f3c550a5 100644
--- a/locale/C-collate.c
+++ b/locale/C-collate.c
@@ -144,6 +144,8 @@ const struct __locale_data _nl_C_LC_COLLATE attribute_hidden =
/* _NL_COLLATE_COLLSEQWC */
{ .string = (const char *) collseqwc },
/* _NL_COLLATE_CODESET */
- { .string = _nl_C_codeset }
+ { .string = _nl_C_codeset },
+ /* _NL_COLLATE_ENCODING_TYPE */
+ { .word = __cet_8bit }
}
};
diff --git a/locale/categories.def b/locale/categories.def
index a8dda53007..045489d741 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -58,6 +58,7 @@ DEFINE_CATEGORY
DEFINE_ELEMENT (_NL_COLLATE_COLLSEQMB, "collate-collseqmb", std, wstring)
DEFINE_ELEMENT (_NL_COLLATE_COLLSEQWC, "collate-collseqwc", std, wstring)
DEFINE_ELEMENT (_NL_COLLATE_CODESET, "collate-codeset", std, string)
+ DEFINE_ELEMENT (_NL_COLLATE_ENCODING_TYPE, "collate-encoding-type", std, word)
), NO_POSTLOAD)
diff --git a/locale/langinfo.h b/locale/langinfo.h
index a565d9d120..ffc5c7f471 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -255,6 +255,7 @@ enum
_NL_COLLATE_COLLSEQMB,
_NL_COLLATE_COLLSEQWC,
_NL_COLLATE_CODESET,
+ _NL_COLLATE_ENCODING_TYPE,
_NL_NUM_LC_COLLATE,
/* LC_CTYPE category: character classification.
diff --git a/locale/localeinfo.h b/locale/localeinfo.h
index 1d2ee00876..bdab9fe745 100644
--- a/locale/localeinfo.h
+++ b/locale/localeinfo.h
@@ -110,6 +110,14 @@ enum coll_sort_rule
sort_mask
};
+/* Collation encoding type. */
+enum collation_encoding_type
+{
+ __cet_other,
+ __cet_8bit,
+ __cet_utf8
+};
+
/* We can map the types of the entries into a few categories. */
enum value_type
{
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index dc0fe30a13..a39a94f2cc 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -32,6 +32,7 @@
#include "linereader.h"
#include "locfile.h"
#include "elem-hash.h"
+#include "../localeinfo.h"
/* Uncomment the following line in the production version. */
/* #define NDEBUG 1 */
@@ -2130,6 +2131,8 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
/* The words have to be handled specially. */
if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
add_locale_uint32 (&file, 0);
+ else if (idx == _NL_ITEM_INDEX (_NL_COLLATE_ENCODING_TYPE))
+ add_locale_uint32 (&file, __cet_other);
else
add_locale_empty (&file);
}
@@ -2493,6 +2496,12 @@ collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
add_locale_raw_data (&file, collate->mbseqorder, 256);
add_locale_collseq_table (&file, &collate->wcseqorder);
add_locale_string (&file, charmap->code_set_name);
+ if (strcmp (charmap->code_set_name, "UTF-8") == 0)
+ add_locale_uint32 (&file, __cet_utf8);
+ else if (charmap->mb_cur_max == 1)
+ add_locale_uint32 (&file, __cet_8bit);
+ else
+ add_locale_uint32 (&file, __cet_other);
write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
obstack_free (&weightpool, NULL);