summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristopher Lenz <cmlenz@gmail.com>2007-08-01 12:32:20 +0000
committerChristopher Lenz <cmlenz@gmail.com>2007-08-01 12:32:20 +0000
commitbdbfb31cd2eb8264cd94a011dd1229dccdeb1fac (patch)
tree54b57de654fa35dc8d3e9fef68b899627a9f5f55
parenta7a5d86b1f6d60c64e1a150cd789b2985f09ac5c (diff)
downloadbabel-bdbfb31cd2eb8264cd94a011dd1229dccdeb1fac.tar.gz
Upgraded to CLDR 1.5 and improved timezone formatting.
-rw-r--r--ChangeLog2
-rw-r--r--MANIFEST.in1
-rw-r--r--babel/core.py72
-rw-r--r--babel/dates.py311
-rw-r--r--babel/localedata.py2
-rw-r--r--babel/numbers.py4
-rw-r--r--babel/tests/dates.py28
-rw-r--r--doc/dates.txt4
-rwxr-xr-xscripts/dump_data.py5
-rwxr-xr-xscripts/dump_global.py33
-rwxr-xr-xscripts/import_cldr.py70
-rwxr-xr-xsetup.py2
12 files changed, 454 insertions, 80 deletions
diff --git a/ChangeLog b/ChangeLog
index c9e05fb..c01652d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,6 +21,8 @@ http://svn.edgewall.org/repos/babel/tags/0.9.0/
for multi-line function calls, and other small fixes (tickets #38 and #39).
* Improved support for detecting Python string formatting fields in message
strings (ticket #57).
+ * CLDR upgraded to the 1.5 release.
+ * Improved timezone formatting.
Version 0.8.1
diff --git a/MANIFEST.in b/MANIFEST.in
index 4fa7a34..d11a949 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,4 @@
+include babel/global.dat
include babel/localedata/*.dat
include doc/api/*.*
include doc/*.html
diff --git a/babel/core.py b/babel/core.py
index f0cc0d1..7d530ff 100644
--- a/babel/core.py
+++ b/babel/core.py
@@ -14,6 +14,7 @@
"""Core locale representation and locale data access."""
import os
+import pickle
from babel import localedata
@@ -21,6 +22,33 @@ __all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale',
'parse_locale']
__docformat__ = 'restructuredtext en'
+_global_data = None
+
+def get_global(key):
+ """
+ Return the dictionary for the given key in the global data.
+
+ The global data is stored in the ``babel/global.dat`` file and contains
+ information independent of individual locales.
+
+ >>> get_global('zone_aliases')['UTC']
+ 'Etc/GMT'
+ >>> get_global('zone_territories')['Europe/Berlin']
+ 'DE'
+
+ :since: version 0.9
+ """
+ global _global_data
+ if _global_data is None:
+ dirname = os.path.join(os.path.dirname(__file__))
+ filename = os.path.join(dirname, 'global.dat')
+ fileobj = open(filename, 'rb')
+ try:
+ _global_data = pickle.load(fileobj)
+ finally:
+ fileobj.close()
+ return _global_data.get(key, {})
+
class UnknownLocaleError(Exception):
"""Exception thrown when a locale is requested for which no locale data
@@ -305,10 +333,10 @@ class Locale(object):
currency_symbols = property(currency_symbols, doc="""\
Mapping of currency codes to symbols.
- >>> Locale('en').currency_symbols['USD']
- u'US$'
>>> Locale('en', 'US').currency_symbols['USD']
u'$'
+ >>> Locale('es', 'CO').currency_symbols['USD']
+ u'US$'
:type: `dict`
""")
@@ -432,25 +460,41 @@ class Locale(object):
time_zones = property(time_zones, doc="""\
Locale display names for time zones.
- >>> Locale('en', 'US').time_zones['America/Los_Angeles']['long']['standard']
- u'Pacific Standard Time'
- >>> Locale('en', 'US').time_zones['Europe/Dublin']['city']
- u'Dublin'
+ >>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight']
+ u'British Summer Time'
+ >>> Locale('en', 'US').time_zones['America/St_Johns']['city']
+ u'St. John\u2019s'
+
+ :type: `dict`
+ """)
+
+ def meta_zones(self):
+ return self._data['meta_zones']
+ meta_zones = property(meta_zones, doc="""\
+ Locale display names for meta time zones.
+
+ Meta time zones are basically groups of different Olson time zones that
+ have the same GMT offset and daylight savings time.
+
+ >>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight']
+ u'Central European Summer Time'
:type: `dict`
+ :since: version 0.9
""")
- def zone_aliases(self):
- return self._data['zone_aliases']
- zone_aliases = property(zone_aliases, doc="""\
- Mapping of time zone aliases to their respective canonical identifer.
+ def zone_formats(self):
+ return self._data['zone_formats']
+ zone_formats = property(zone_formats, doc=r"""\
+ Patterns related to the formatting of time zones.
- >>> Locale('en').zone_aliases['UTC']
- 'Etc/GMT'
+ >>> Locale('en', 'US').zone_formats['fallback']
+ u'%(1)s (%(0)s)'
+ >>> Locale('pt', 'BR').zone_formats['region']
+ u'Hor\xe1rio %s'
:type: `dict`
- :note: this doesn't really belong here, as it does not change between
- locales
+ :since: version 0.9
""")
def first_week_day(self):
diff --git a/babel/dates.py b/babel/dates.py
index 5d6e246..f6a0298 100644
--- a/babel/dates.py
+++ b/babel/dates.py
@@ -24,11 +24,11 @@ following environment variables, in that order:
from datetime import date, datetime, time, timedelta, tzinfo
import re
-from babel.core import default_locale, Locale
+from babel.core import default_locale, get_global, Locale
from babel.util import UTC
-__all__ = ['format_date', 'format_datetime', 'format_time', 'parse_date',
- 'parse_datetime', 'parse_time']
+__all__ = ['format_date', 'format_datetime', 'format_time',
+ 'get_timezone_name', 'parse_date', 'parse_datetime', 'parse_time']
__docformat__ = 'restructuredtext en'
LC_TIME = default_locale('LC_TIME')
@@ -110,7 +110,7 @@ def get_era_names(width='wide', locale=LC_TIME):
>>> get_era_names('abbreviated', locale='de_DE')[1]
u'n. Chr.'
- :param width: the width to use, either "wide" or "abbreviated"
+ :param width: the width to use, either "wide", "abbreviated", or "narrow"
:param locale: the `Locale` object, or a locale string
:return: the dictionary of era names
:rtype: `dict`
@@ -159,7 +159,7 @@ def get_time_format(format='medium', locale=LC_TIME):
>>> get_time_format(locale='en_US')
<DateTimePattern u'h:mm:ss a'>
>>> get_time_format('full', locale='de_DE')
- <DateTimePattern u"H:mm' Uhr 'z">
+ <DateTimePattern u'HH:mm:ss v'>
:param format: the format to use, one of "full", "long", "medium", or
"short"
@@ -169,6 +169,249 @@ def get_time_format(format='medium', locale=LC_TIME):
"""
return Locale.parse(locale).time_formats[format]
+def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME):
+ """Return the timezone associated with the given `datetime` object formatted
+ as string indicating the offset from GMT.
+
+ >>> dt = datetime(2007, 4, 1, 15, 30)
+ >>> get_timezone_gmt(dt)
+ u'GMT+00:00'
+
+ >>> from pytz import timezone
+ >>> tz = timezone('America/Los_Angeles')
+ >>> dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
+ >>> get_timezone_gmt(dt)
+ u'GMT-08:00'
+ >>> get_timezone_gmt(dt, 'short')
+ u'-0800'
+
+ The long format depends on the locale, for example in France a different
+ string is used for GMT:
+
+ >>> get_timezone_gmt(dt, 'long', locale='fr_FR')
+ u'HMG-08:00'
+
+ :param dt: the ``datetime`` object; if `None`, the current date and time are
+ used
+ :param width: either "long" or "short"
+ :param locale: the `Locale` object, or a locale string
+ :return: the GMT offset representation of the timezone
+ :rtype: `unicode`
+ :since: version 0.9
+ """
+ if datetime is None:
+ datetime = datetime_.now()
+ elif isinstance(datetime, (int, long)):
+ datetime = datetime_.fromtimestamp(datetime).time()
+ if datetime.tzinfo is None:
+ datetime = datetime.replace(tzinfo=UTC)
+ locale = Locale.parse(locale)
+
+ offset = datetime.utcoffset()
+ seconds = offset.days * 24 * 60 * 60 + offset.seconds
+ hours, seconds = divmod(seconds, 3600)
+ if width == 'short':
+ pattern = u'%+03d%02d'
+ else:
+ pattern = locale.zone_formats['gmt'] % '%+03d:%02d'
+ return pattern % (hours, seconds // 60)
+
+def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME):
+ """Return a representation of the given timezone using "location format".
+
+ The result depends on both the local display name of the country and the
+ city assocaited with the time zone:
+
+ >>> from pytz import timezone
+ >>> tz = timezone('America/St_Johns')
+ >>> get_timezone_location(tz, locale='de_DE')
+ u"Kanada (St. John's)"
+ >>> tz = timezone('America/Mexico_City')
+ >>> get_timezone_location(tz, locale='de_DE')
+ u'Mexiko (Mexiko-Stadt)'
+
+ If the timezone is associated with a country that uses only a single
+ timezone, just the localized country name is returned:
+
+ >>> tz = timezone('Europe/Berlin')
+ >>> get_timezone_name(tz, locale='de_DE')
+ u'Deutschland'
+
+ :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
+ the timezone; if `None`, the current date and time in
+ UTC is assumed
+ :param locale: the `Locale` object, or a locale string
+ :return: the localized timezone name using location format
+ :rtype: `unicode`
+ :since: version 0.9
+ """
+ if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)):
+ dt = None
+ tzinfo = UTC
+ elif isinstance(dt_or_tzinfo, (datetime, time)):
+ dt = dt_or_tzinfo
+ if dt.tzinfo is not None:
+ tzinfo = dt.tzinfo
+ else:
+ tzinfo = UTC
+ else:
+ dt = None
+ tzinfo = dt_or_tzinfo
+ locale = Locale.parse(locale)
+
+ if hasattr(tzinfo, 'zone'):
+ zone = tzinfo.zone
+ else:
+ zone = tzinfo.tzname(dt or datetime.utcnow())
+
+ # Get the canonical time-zone code
+ zone = get_global('zone_aliases').get(zone, zone)
+
+ metainfo = {}
+ info = locale.time_zones.get(zone, {})
+ if 'use_metazone' in info:
+ metainfo = locale.meta_zones.get(info['use_metazone'], {})
+
+ # Otherwise, if there is only one timezone for the country, return the
+ # localized country name
+ region_format = locale.zone_formats['region']
+ territory = get_global('zone_territories').get(zone)
+ territory_name = locale.territories[territory]
+ if territory and len(get_global('territory_zones')[territory]) == 1:
+ return region_format % (territory_name)
+
+ # Otherwise, include the city in the output
+ fallback_format = locale.zone_formats['fallback']
+ if 'city' in info:
+ city_name = info['city']
+ elif 'city' in metainfo:
+ city_name = metainfo['city']
+ elif '/' in zone:
+ city_name = zone.split('/', 1)[1].replace('_', ' ')
+ else:
+ city_name = zone.replace('_', ' ')
+
+ return region_format % (fallback_format % {
+ '0': city_name,
+ '1': territory_name
+ })
+
+def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False,
+ locale=LC_TIME):
+ r"""Return the localized display name for the given timezone. The timezone
+ may be specified using a ``datetime`` or `tzinfo` object.
+
+ >>> from pytz import timezone
+ >>> dt = time(15, 30, tzinfo=timezone('America/Los_Angeles'))
+ >>> get_timezone_name(dt, locale='en_US')
+ u'Pacific Standard Time'
+ >>> get_timezone_name(dt, width='short', locale='en_US')
+ u'PST'
+
+ If this function gets passed only a `tzinfo` object and no concrete
+ `datetime`, the returned display name is indenpendent of daylight savings
+ time. This can be used for example for selecting timezones, or to set the
+ time of events that recur across DST changes:
+
+ >>> tz = timezone('America/Los_Angeles')
+ >>> get_timezone_name(tz, locale='en_US')
+ u'Pacific Time'
+ >>> get_timezone_name(tz, 'short', locale='en_US')
+ u'PT'
+
+ If no localized display name for the timezone is available, and the timezone
+ is associated with a country that uses only a single timezone, the name of
+ that country is returned, formatted according to the locale:
+
+ >>> tz = timezone('Europe/Berlin')
+ >>> get_timezone_name(tz, locale='de_DE')
+ u'Deutschland'
+ >>> get_timezone_name(tz, locale='pt_BR')
+ u'Hor\xe1rio Alemanha'
+
+ On the other hand, if the country uses multiple timezones, the city is also
+ included in the representation:
+
+ >>> tz = timezone('America/St_Johns')
+ >>> get_timezone_name(tz, locale='de_DE')
+ u"Kanada (St. John's)"
+
+ The `uncommon` parameter can be set to `True` to enable the use of timezone
+ representations that are not commonly used by the requested locale. For
+ example, while in frensh the central europian timezone is usually
+ abbreviated as "HEC", in Canadian frensh, this abbreviation is not in common
+ use, so a generic name would be chosen by default:
+
+ >>> tz = timezone('Europe/Paris')
+ >>> get_timezone_name(tz, 'short', locale='fr_CA')
+ u'France'
+ >>> get_timezone_name(tz, 'short', uncommon=True, locale='fr_CA')
+ u'HEC'
+
+ :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
+ the timezone; if a ``tzinfo`` object is used, the
+ resulting display name will be generic, i.e.
+ independent of daylight savings time; if `None`, the
+ current date in UTC is assumed
+ :param width: either "long" or "short"
+ :param uncommon: whether even uncommon timezone abbreviations should be used
+ :param locale: the `Locale` object, or a locale string
+ :return: the timezone display name
+ :rtype: `unicode`
+ :since: version 0.9
+ :see: `LDML Appendix J: Time Zone Display Names
+ <http://www.unicode.org/reports/tr35/#Time_Zone_Fallback>`_
+ """
+ if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)):
+ dt = None
+ tzinfo = UTC
+ elif isinstance(dt_or_tzinfo, (datetime, time)):
+ dt = dt_or_tzinfo
+ if dt.tzinfo is not None:
+ tzinfo = dt.tzinfo
+ else:
+ tzinfo = UTC
+ else:
+ dt = None
+ tzinfo = dt_or_tzinfo
+ locale = Locale.parse(locale)
+
+ if hasattr(tzinfo, 'zone'):
+ zone = tzinfo.zone
+ else:
+ zone = tzinfo.tzname(dt or datetime.utcnow())
+
+ # Get the canonical time-zone code
+ zone = get_global('zone_aliases').get(zone, zone)
+
+ metainfo = {}
+ info = locale.time_zones.get(zone, {})
+ # Try explicitly translated zone names first
+ if width in info:
+ if dt is None:
+ field = 'generic'
+ else:
+ field = tzinfo.dst(dt) and 'daylight' or 'standard'
+ if field in info[width]:
+ return info[width][field]
+
+ if 'use_metazone' in info:
+ metainfo = locale.meta_zones.get(info['use_metazone'], {})
+ if width in metainfo and (uncommon or metainfo.get('common')):
+ if dt is None:
+ field = 'generic'
+ else:
+ field = tzinfo.dst(dt) and 'daylight' or 'standard'
+ if field in metainfo[width]:
+ return metainfo[width][field]
+
+ # If we have a concrete datetime, we assume that the result can't be
+ # independent of daylight savings time, so we return the GMT offset
+ if dt is not None:
+ return get_timezone_gmt(time, width=width, locale=locale)
+
+ return get_timezone_location(dt_or_tzinfo, locale=locale)
+
def format_date(date=None, format='medium', locale=LC_TIME):
"""Return a date formatted according to the given pattern.
@@ -219,9 +462,9 @@ def format_datetime(datetime=None, format='medium', tzinfo=None,
``pytz`` package is needed to explicitly specify the time-zone:
>>> from pytz import timezone
- >>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Berlin'),
- ... locale='de_DE')
- u'Sonntag, 1. April 2007 17:30 Uhr MESZ'
+ >>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Paris'),
+ ... locale='fr_FR')
+ u'dimanche 1 avril 2007 17:30:00 HEC'
>>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
... tzinfo=timezone('US/Eastern'), locale='en')
u'2007.04.01 AD at 11:30:00 EDT'
@@ -276,9 +519,9 @@ def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME):
>>> from pytz import timezone
>>> t = time(15, 30)
- >>> format_time(t, format='full', tzinfo=timezone('Europe/Berlin'),
- ... locale='de_DE')
- u'17:30 Uhr MESZ'
+ >>> format_time(t, format='full', tzinfo=timezone('Europe/Paris'),
+ ... locale='fr_FR')
+ u'17:30:00 HEC'
>>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=timezone('US/Eastern'),
... locale='en')
u"11 o'clock AM, Eastern Daylight Time"
@@ -474,7 +717,7 @@ class DateTimeFormat(object):
return self.format_frac_seconds(num)
elif char == 'A':
return self.format_milliseconds_in_day(num)
- elif char in ('z', 'Z', 'v'):
+ elif char in ('z', 'Z', 'v', 'V'):
return self.format_timezone(char, num)
else:
raise KeyError('Unsupported date/time field %r' % char)
@@ -538,37 +781,19 @@ class DateTimeFormat(object):
return self.format(msecs, num)
def format_timezone(self, char, num):
- if char in ('z', 'v'):
- if hasattr(self.value.tzinfo, 'zone'):
- zone = self.value.tzinfo.zone
- else:
- zone = self.value.tzinfo.tzname(self.value)
-
- # Get the canonical time-zone code
- zone = self.locale.zone_aliases.get(zone, zone)
-
- # Try explicitly translated zone names first
- display = self.locale.time_zones.get(zone)
- if display:
- if 'long' in display:
- width = {3: 'short', 4: 'long'}[max(3, num)]
- if char == 'v':
- dst = 'generic'
- else:
- dst = self.value.dst() and 'daylight' or 'standard'
- return display[width][dst]
- elif 'city' in display:
- return display['city']
-
- else:
- return zone.split('/', 1)[1]
-
+ width = {3: 'short', 4: 'long'}[max(3, num)]
+ if char == 'z':
+ return get_timezone_name(self.value, width, locale=self.locale)
elif char == 'Z':
- offset = self.value.utcoffset()
- seconds = offset.days * 24 * 60 * 60 + offset.seconds
- hours, seconds = divmod(seconds, 3600)
- pattern = {3: '%+03d%02d', 4: 'GMT %+03d:%02d'}[max(3, num)]
- return pattern % (hours, seconds // 60)
+ return get_timezone_gmt(self.value, width)
+ elif char == 'v':
+ return get_timezone_name(self.value.tzinfo, width,
+ locale=self.locale)
+ elif char == 'V':
+ if num == 1:
+ return get_timezone_name(self.value.tzinfo, width,
+ uncommon=True, locale=self.locale)
+ return get_timezone_location(self.value.tzinfo, locale=self.locale)
def format(self, value, length):
return ('%%0%dd' % length) % value
@@ -586,7 +811,7 @@ PATTERN_CHARS = {
'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2], # hour
'm': [1, 2], # minute
's': [1, 2], 'S': None, 'A': None, # second
- 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4], 'v': [1, 4] # zone
+ 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4], 'v': [1, 4], 'V': [1, 4] # zone
}
def parse_pattern(pattern):
diff --git a/babel/localedata.py b/babel/localedata.py
index b32690c..40ac6aa 100644
--- a/babel/localedata.py
+++ b/babel/localedata.py
@@ -115,7 +115,7 @@ def merge(dict1, dict2):
:param dict2: the dictionary containing the data that should be merged
"""
for key, value in dict2.items():
- if value:
+ if value is not None:
if type(value) is dict:
dict1[key] = dict1.get(key, {}).copy()
merge(dict1[key], value)
diff --git a/babel/numbers.py b/babel/numbers.py
index 767c241..fde47cf 100644
--- a/babel/numbers.py
+++ b/babel/numbers.py
@@ -127,7 +127,7 @@ def format_currency(number, currency, format=None, locale=LC_NUMERIC):
>>> format_currency(1099.98, 'USD', locale='en_US')
u'$1,099.98'
>>> format_currency(1099.98, 'USD', locale='es_CO')
- u'US$1.099,98'
+ u'US$ 1.099,98'
>>> format_currency(1099.98, 'EUR', locale='de_DE')
u'1.099,98 \\u20ac'
@@ -156,7 +156,7 @@ def format_percent(number, format=None, locale=LC_NUMERIC):
>>> format_percent(25.1234, locale='en_US')
u'2,512%'
>>> format_percent(25.1234, locale='sv_SE')
- u'2\\xa0512 %'
+ u'2\\xa0512\\xa0%'
The format pattern can also be specified explicitly:
diff --git a/babel/tests/dates.py b/babel/tests/dates.py
index 4dfdfe7..87e8e9a 100644
--- a/babel/tests/dates.py
+++ b/babel/tests/dates.py
@@ -111,19 +111,37 @@ class DateTimeFormatTestCase(unittest.TestCase):
tz = timezone('Europe/Berlin')
t = time(15, 30, tzinfo=tz)
fmt = dates.DateTimeFormat(t, locale='de_DE')
- self.assertEqual('GMT +01:00', fmt['ZZZZ'])
+ self.assertEqual('GMT+01:00', fmt['ZZZZ'])
+
+ def test_timezone_no_uncommon(self):
+ tz = timezone('Europe/Paris')
+ dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
+ fmt = dates.DateTimeFormat(dt, locale='fr_CA')
+ self.assertEqual('France', fmt['v'])
+
+ def test_timezone_with_uncommon(self):
+ tz = timezone('Europe/Paris')
+ dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
+ fmt = dates.DateTimeFormat(dt, locale='fr_CA')
+ self.assertEqual('HEC', fmt['V'])
+
+ def test_timezone_location_format(self):
+ tz = timezone('Europe/Paris')
+ dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
+ fmt = dates.DateTimeFormat(dt, locale='fr_FR')
+ self.assertEqual('France', fmt['VVVV'])
def test_timezone_walltime_short(self):
tz = timezone('Europe/Paris')
t = time(15, 30, tzinfo=tz)
- fmt = dates.DateTimeFormat(t, locale='en_US')
- self.assertEqual('CET', fmt['v'])
+ fmt = dates.DateTimeFormat(t, locale='fr_FR')
+ self.assertEqual('HEC', fmt['v'])
def test_timezone_walltime_long(self):
tz = timezone('Europe/Paris')
t = time(15, 30, tzinfo=tz)
- fmt = dates.DateTimeFormat(t, locale='en_US')
- self.assertEqual('Central European Time', fmt['vvvv'])
+ fmt = dates.DateTimeFormat(t, locale='fr_FR')
+ self.assertEqual(u'Heure de l’Europe centrale', fmt['vvvv'])
class FormatDateTestCase(unittest.TestCase):
diff --git a/doc/dates.txt b/doc/dates.txt
index 1d82320..6ed88e6 100644
--- a/doc/dates.txt
+++ b/doc/dates.txt
@@ -190,6 +190,10 @@ Time Fields
| +--------+--------------------------------------------------------+
| | ``v`` | Use one letter for short wall (generic) time, four for |
| | | long wall time. |
+ | +--------+--------------------------------------------------------+
+ | | ``V`` | Same as ``z``, except that timezone abbreviations |
+ | | | should be used regardless of whether they are in |
+ | | | common use by the locale. |
+----------+--------+--------------------------------------------------------+
diff --git a/scripts/dump_data.py b/scripts/dump_data.py
index 67c1396..efa6d0f 100755
--- a/scripts/dump_data.py
+++ b/scripts/dump_data.py
@@ -17,4 +17,7 @@ import sys
from babel.localedata import load
-pprint(load(sys.argv[1]))
+if len(sys.argv) > 2:
+ pprint(load(sys.argv[1]).get(sys.argv[2]))
+else:
+ pprint(load(sys.argv[1]))
diff --git a/scripts/dump_global.py b/scripts/dump_global.py
new file mode 100755
index 0000000..db722f5
--- /dev/null
+++ b/scripts/dump_global.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://babel.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://babel.edgewall.org/log/.
+
+import os
+import pickle
+from pprint import pprint
+import sys
+
+import babel
+
+dirname = os.path.join(os.path.dirname(babel.__file__))
+filename = os.path.join(dirname, 'global.dat')
+fileobj = open(filename, 'rb')
+try:
+ data = pickle.load(fileobj)
+finally:
+ fileobj.close()
+
+if len(sys.argv) > 1:
+ pprint(data.get(sys.argv[1]))
+else:
+ pprint(data)
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
index 8f3bed4..26d58e0 100755
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -51,10 +51,30 @@ def main():
srcdir = args[0]
destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
- '..', 'babel', 'localedata')
+ '..', 'babel')
sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
+ # import global data from the supplemental files
+ global_data = {}
+
+ territory_zones = global_data.setdefault('territory_zones', {})
+ zone_aliases = global_data.setdefault('zone_aliases', {})
+ zone_territories = global_data.setdefault('zone_territories', {})
+ for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
+ tzid = elem.attrib['type']
+ territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
+ zone_territories[tzid] = elem.attrib['territory']
+ if 'aliases' in elem.attrib:
+ for alias in elem.attrib['aliases'].split():
+ zone_aliases[alias] = tzid
+
+ outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
+ try:
+ pickle.dump(global_data, outfile, 2)
+ finally:
+ outfile.close()
+
# build a territory containment mapping for inheritance
regions = {}
for elem in sup.findall('//territoryContainment/group'):
@@ -76,8 +96,6 @@ def main():
filenames.sort(lambda a,b: len(a)-len(b))
filenames.insert(0, 'root.xml')
- dicts = {}
-
for filename in filenames:
print>>sys.stderr, 'Processing input file %r' % filename
stem, ext = os.path.splitext(filename)
@@ -154,6 +172,21 @@ def main():
if territory in territories or any([r in territories for r in regions]):
week_data['weekend_end'] = weekdays[elem.attrib['day']]
+ zone_formats = data.setdefault('zone_formats', {})
+ for elem in tree.findall('//timeZoneNames/gmtFormat'):
+ if 'draft' not in elem.attrib:
+ zone_formats['gmt'] = unicode(elem.text).replace('{0}', '%s')
+ break
+ for elem in tree.findall('//timeZoneNames/regionFormat'):
+ if 'draft' not in elem.attrib:
+ zone_formats['region'] = unicode(elem.text).replace('{0}', '%s')
+ break
+ for elem in tree.findall('//timeZoneNames/fallbackFormat'):
+ if 'draft' not in elem.attrib:
+ zone_formats['fallback'] = unicode(elem.text) \
+ .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
+ break
+
time_zones = data.setdefault('time_zones', {})
for elem in tree.findall('//timeZoneNames/zone'):
info = {}
@@ -164,15 +197,23 @@ def main():
info.setdefault('long', {})[child.tag] = unicode(child.text)
for child in elem.findall('short/*'):
info.setdefault('short', {})[child.tag] = unicode(child.text)
+ for child in elem.findall('usesMetazone'):
+ if 'to' not in child.attrib: # FIXME: support old mappings
+ info['use_metazone'] = child.attrib['mzone']
time_zones[elem.attrib['type']] = info
- zone_aliases = data.setdefault('zone_aliases', {})
- if stem == 'root':
- for elem in sup.findall('//timezoneData/zoneFormatting/zoneItem'):
- if 'aliases' in elem.attrib:
- canonical_id = elem.attrib['type']
- for alias in elem.attrib['aliases'].split():
- zone_aliases[alias] = canonical_id
+ meta_zones = data.setdefault('meta_zones', {})
+ for elem in tree.findall('//timeZoneNames/metazone'):
+ info = {}
+ city = elem.findtext('exemplarCity')
+ if city:
+ info['city'] = unicode(city)
+ for child in elem.findall('long/*'):
+ info.setdefault('long', {})[child.tag] = unicode(child.text)
+ for child in elem.findall('short/*'):
+ info.setdefault('short', {})[child.tag] = unicode(child.text)
+ info['common'] = elem.findtext('commonlyUsed') == 'true'
+ meta_zones[elem.attrib['type']] = info
for calendar in tree.findall('//calendars/calendar'):
if calendar.attrib['type'] != 'gregorian':
@@ -212,7 +253,11 @@ def main():
eras = data.setdefault('eras', {})
for width in calendar.findall('eras/*'):
- ewidth = {'eraNames': 'wide', 'eraAbbr': 'abbreviated'}[width.tag]
+ ewidth = {
+ 'eraAbbr': 'abbreviated',
+ 'eraNames': 'wide',
+ 'eraNarrow': 'narrow',
+ }[width.tag]
widths = eras.setdefault(ewidth, {})
for elem in width.findall('era'):
if 'draft' in elem.attrib and int(elem.attrib['type']) in widths:
@@ -304,8 +349,7 @@ def main():
if symbol:
currency_symbols[elem.attrib['type']] = unicode(symbol)
- dicts[stem] = data
- outfile = open(os.path.join(destdir, stem + '.dat'), 'wb')
+ outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb')
try:
pickle.dump(data, outfile, 2)
finally:
diff --git a/setup.py b/setup.py
index 4ef5fcf..fd4a120 100755
--- a/setup.py
+++ b/setup.py
@@ -130,7 +130,7 @@ setup(
'Topic :: Software Development :: Libraries :: Python Modules',
],
packages = ['babel', 'babel.messages'],
- package_data = {'babel': ['localedata/*.dat']},
+ package_data = {'babel': ['global.dat', 'localedata/*.dat']},
test_suite = 'babel.tests.suite',
entry_points = """