diff options
author | Jim Fulton <jim@zope.com> | 2005-12-07 00:50:04 +0000 |
---|---|---|
committer | Jim Fulton <jim@zope.com> | 2005-12-07 00:50:04 +0000 |
commit | 1c68a055132756af3f6100dbf62a1cf41b7387f8 (patch) | |
tree | daee3e44e3bac3776b068945af2a13ea17b7c9c5 | |
parent | 68bc0cce74b1a3933f458cb385c19be1f22b64ef (diff) | |
parent | 1ce588f06a2f35ac13cdc8c446ac816ec62c9a47 (diff) | |
download | zope-i18n-1c68a055132756af3f6100dbf62a1cf41b7387f8.tar.gz |
Added an API for collating text and a fallback implementation.
(Apps that really care will probably use an ICU-based adapter that
we will provide soonish. Watch the zope.ucol project.)
-rw-r--r-- | interfaces/locales.py | 19 | ||||
-rw-r--r-- | locales/fallbackcollator.py | 33 | ||||
-rw-r--r-- | locales/fallbackcollator.txt | 63 | ||||
-rw-r--r-- | locales/tests/test_fallbackcollator.py | 25 |
4 files changed, 140 insertions, 0 deletions
diff --git a/interfaces/locales.py b/interfaces/locales.py index 7ecad7c..c0f77d1 100644 --- a/interfaces/locales.py +++ b/interfaces/locales.py @@ -618,3 +618,22 @@ class IDictionaryInheritance(ILocaleInheritance): If an key is not found or is None, the next higher up Locale object is consulted. """ + +class ICollator(Interface): + """Provide support for collating text strings + + This interface will typically be provided by adapting a locale. + """ + + def key(text): + """Return a collation key for the given text. + """ + + def cmp(text1, text2): + """Compare two text strings. + + The return value is negative if text1 < text2, 0 is they are + equal, and positive if text1 > text2. + """ + + diff --git a/locales/fallbackcollator.py b/locales/fallbackcollator.py new file mode 100644 index 0000000..ab1ddda --- /dev/null +++ b/locales/fallbackcollator.py @@ -0,0 +1,33 @@ +############################################################################## +# +# Copyright (c) 2004 Zope Corporation and Contributors. +# All Rights Reserved. +# +# This software is subject to the provisions of the Zope Public License, +# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. +# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED +# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS +# FOR A PARTICULAR PURPOSE. +# +############################################################################## +"""Fallback collator + +$Id$ +""" + +from unicodedata import normalize + +class FallbackCollator: + + def __init__(self, locale): + pass + + def key(self, s): + s = normalize('NFKC', s) + return s.lower(), s + + def cmp(self, s1, s2): + return cmp(self.key(s1), self.key(s2)) + + diff --git a/locales/fallbackcollator.txt b/locales/fallbackcollator.txt new file mode 100644 index 0000000..9457be2 --- /dev/null +++ b/locales/fallbackcollator.txt @@ -0,0 +1,63 @@ +Fallback Collator +================= + +The zope.i18n.interfaces.locales.ICollator interface defines an API +for collating text. Why is this important? Simply sorting unicode +strings doesn't provide an ordering that users in a given locale will +fine useful. Various languages have text sorting conventions that +don't agree with the ordering of unicode code points. (This is even +true for English. :) + +Text collation is a fairly involved process. Systems that need this, +will likely use something like ICU +(http://www-306.ibm.com/software/globalization/icu, +http://pyicu.osafoundation.org/). We don't want to introduce a +dependency on ICU and this time, so we are providing a fallback +collator that: + +- Provides an implementation of the ICollator interface that can be + used for development, and + +- Provides a small amount of value, at least for English speakers. :) + +Application code should obtain a collator by adapting a locale to +ICollator. Here we just call the collator factory with None. The +fallback collator doesn't actually use the locale, although +application code should certainly *not* count on this. + + >>> import zope.i18n.locales.fallbackcollator + >>> collator = zope.i18n.locales.fallbackcollator.FallbackCollator(None) + +Now, we can pass the collator's key method to sort functions to sort +strings in a slightly friendly way: + + >>> sorted([u'Sam', u'sally', u'Abe', u'alice', u'Terry', u'tim'], + ... key=collator.key) + [u'Abe', u'alice', u'sally', u'Sam', u'Terry', u'tim'] + + +The collator has a very simple algorithm. It normalizes strings and +then returns a tuple with the result of lower-casing the normalized +string and the normalized string. We can see this by calling the key +method, which converts unicode strings to collation keys: + + >>> collator.key(u'Sam') + (u'sam', u'Sam') + + >>> collator.key(u'\xc6\xf8a\u030a') + (u'\xe6\xf8\xe5', u'\xc6\xf8\xe5') + +There is also a cmp function for comparing strings: + + >>> collator.cmp(u'Terry', u'sally') + 1 + + + >>> collator.cmp(u'sally', u'Terry') + -1 + + >>> collator.cmp(u'terry', u'Terry') + 1 + + >>> collator.cmp(u'terry', u'terry') + 0 diff --git a/locales/tests/test_fallbackcollator.py b/locales/tests/test_fallbackcollator.py new file mode 100644 index 0000000..071652d --- /dev/null +++ b/locales/tests/test_fallbackcollator.py @@ -0,0 +1,25 @@ +############################################################################## +# +# Copyright (c) 2004 Zope Corporation and Contributors. +# All Rights Reserved. +# +# This software is subject to the provisions of the Zope Public License, +# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. +# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED +# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS +# FOR A PARTICULAR PURPOSE. +# +############################################################################## + +import unittest +from zope.testing import doctest + +def test_suite(): + return unittest.TestSuite(( + doctest.DocFileSuite('../fallbackcollator.txt'), + )) + +if __name__ == '__main__': + unittest.main(defaultTest='test_suite') + |