summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Fulton <jim@zope.com>2005-12-07 00:50:04 +0000
committerJim Fulton <jim@zope.com>2005-12-07 00:50:04 +0000
commit1c68a055132756af3f6100dbf62a1cf41b7387f8 (patch)
treedaee3e44e3bac3776b068945af2a13ea17b7c9c5
parent68bc0cce74b1a3933f458cb385c19be1f22b64ef (diff)
parent1ce588f06a2f35ac13cdc8c446ac816ec62c9a47 (diff)
downloadzope-i18n-1c68a055132756af3f6100dbf62a1cf41b7387f8.tar.gz
Added an API for collating text and a fallback implementation.
(Apps that really care will probably use an ICU-based adapter that we will provide soonish. Watch the zope.ucol project.)
-rw-r--r--interfaces/locales.py19
-rw-r--r--locales/fallbackcollator.py33
-rw-r--r--locales/fallbackcollator.txt63
-rw-r--r--locales/tests/test_fallbackcollator.py25
4 files changed, 140 insertions, 0 deletions
diff --git a/interfaces/locales.py b/interfaces/locales.py
index 7ecad7c..c0f77d1 100644
--- a/interfaces/locales.py
+++ b/interfaces/locales.py
@@ -618,3 +618,22 @@ class IDictionaryInheritance(ILocaleInheritance):
If an key is not found or is None, the next higher up Locale
object is consulted.
"""
+
+class ICollator(Interface):
+ """Provide support for collating text strings
+
+ This interface will typically be provided by adapting a locale.
+ """
+
+ def key(text):
+ """Return a collation key for the given text.
+ """
+
+ def cmp(text1, text2):
+ """Compare two text strings.
+
+ The return value is negative if text1 < text2, 0 is they are
+ equal, and positive if text1 > text2.
+ """
+
+
diff --git a/locales/fallbackcollator.py b/locales/fallbackcollator.py
new file mode 100644
index 0000000..ab1ddda
--- /dev/null
+++ b/locales/fallbackcollator.py
@@ -0,0 +1,33 @@
+##############################################################################
+#
+# Copyright (c) 2004 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+"""Fallback collator
+
+$Id$
+"""
+
+from unicodedata import normalize
+
+class FallbackCollator:
+
+ def __init__(self, locale):
+ pass
+
+ def key(self, s):
+ s = normalize('NFKC', s)
+ return s.lower(), s
+
+ def cmp(self, s1, s2):
+ return cmp(self.key(s1), self.key(s2))
+
+
diff --git a/locales/fallbackcollator.txt b/locales/fallbackcollator.txt
new file mode 100644
index 0000000..9457be2
--- /dev/null
+++ b/locales/fallbackcollator.txt
@@ -0,0 +1,63 @@
+Fallback Collator
+=================
+
+The zope.i18n.interfaces.locales.ICollator interface defines an API
+for collating text. Why is this important? Simply sorting unicode
+strings doesn't provide an ordering that users in a given locale will
+fine useful. Various languages have text sorting conventions that
+don't agree with the ordering of unicode code points. (This is even
+true for English. :)
+
+Text collation is a fairly involved process. Systems that need this,
+will likely use something like ICU
+(http://www-306.ibm.com/software/globalization/icu,
+http://pyicu.osafoundation.org/). We don't want to introduce a
+dependency on ICU and this time, so we are providing a fallback
+collator that:
+
+- Provides an implementation of the ICollator interface that can be
+ used for development, and
+
+- Provides a small amount of value, at least for English speakers. :)
+
+Application code should obtain a collator by adapting a locale to
+ICollator. Here we just call the collator factory with None. The
+fallback collator doesn't actually use the locale, although
+application code should certainly *not* count on this.
+
+ >>> import zope.i18n.locales.fallbackcollator
+ >>> collator = zope.i18n.locales.fallbackcollator.FallbackCollator(None)
+
+Now, we can pass the collator's key method to sort functions to sort
+strings in a slightly friendly way:
+
+ >>> sorted([u'Sam', u'sally', u'Abe', u'alice', u'Terry', u'tim'],
+ ... key=collator.key)
+ [u'Abe', u'alice', u'sally', u'Sam', u'Terry', u'tim']
+
+
+The collator has a very simple algorithm. It normalizes strings and
+then returns a tuple with the result of lower-casing the normalized
+string and the normalized string. We can see this by calling the key
+method, which converts unicode strings to collation keys:
+
+ >>> collator.key(u'Sam')
+ (u'sam', u'Sam')
+
+ >>> collator.key(u'\xc6\xf8a\u030a')
+ (u'\xe6\xf8\xe5', u'\xc6\xf8\xe5')
+
+There is also a cmp function for comparing strings:
+
+ >>> collator.cmp(u'Terry', u'sally')
+ 1
+
+
+ >>> collator.cmp(u'sally', u'Terry')
+ -1
+
+ >>> collator.cmp(u'terry', u'Terry')
+ 1
+
+ >>> collator.cmp(u'terry', u'terry')
+ 0
diff --git a/locales/tests/test_fallbackcollator.py b/locales/tests/test_fallbackcollator.py
new file mode 100644
index 0000000..071652d
--- /dev/null
+++ b/locales/tests/test_fallbackcollator.py
@@ -0,0 +1,25 @@
+##############################################################################
+#
+# Copyright (c) 2004 Zope Corporation and Contributors.
+# All Rights Reserved.
+#
+# This software is subject to the provisions of the Zope Public License,
+# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
+# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
+# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
+# FOR A PARTICULAR PURPOSE.
+#
+##############################################################################
+
+import unittest
+from zope.testing import doctest
+
+def test_suite():
+ return unittest.TestSuite((
+ doctest.DocFileSuite('../fallbackcollator.txt'),
+ ))
+
+if __name__ == '__main__':
+ unittest.main(defaultTest='test_suite')
+