Made babel work with the latest version of the CLDR (23)

author: Armin Ronacher <armin.ronacher@active-4.com> 2013-07-04 18:19:51 +0200
committer: Armin Ronacher <armin.ronacher@active-4.com> 2013-07-04 18:19:51 +0200
commit: e8168071afb3f68e1e366073ca73b10a2e2f0b93 (patch)
tree: e028f726887b3348ea18481842aacfeca2005770 /scripts
parent: 874e39cb47cbba9b458b7bfe28fd4a4fc4d91844 (diff)
download: babel-e8168071afb3f68e1e366073ca73b10a2e2f0b93.tar.gz
2 files changed, 43 insertions, 22 deletions
diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py
index 27e37ac..da4c1ef 100755
--- a/scripts/download_import_cldr.py
+++ b/scripts/download_import_cldr.py
@@ -9,9 +9,9 @@ import urllib
 import subprocess
 
 
-URL = 'http://unicode.org/Public/cldr/1.9.1/core.zip'
-FILENAME = 'core-1.9.1.zip'
-FILESUM = '1c506cd7a30bf5b4f3cbb8a5b382d96c'
+URL = 'http://unicode.org/Public/cldr/23/core.zip'
+FILENAME = 'core-23.zip'
+FILESUM = '800373f275df21bb2c569ddee2d05dd5'
 BLKSIZE = 131072
 
 
@@ -72,7 +72,7 @@ def main():
         print
     common_path = os.path.join(cldr_path, 'common')
 
-    if changed:
+    if changed or not os.path.isdir(common_path):
         if os.path.isdir(common_path):
             log('Deleting old CLDR checkout in \'%s\'', cldr_path)
             shutil.rmtree(common_path)
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
index b550494..67f8265 100755
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -103,6 +103,9 @@ def main():
                            '..', 'babel')
 
     sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml')
+    bcp47_timezone = parse(os.path.join(srcdir, 'bcp47', 'timezone.xml'))
+    sup_windows_zones = parse(os.path.join(srcdir, 'supplemental',
+                                           'windowsZones.xml'))
     sup = parse(sup_filename)
 
     # Import global data from the supplemental files
@@ -112,13 +115,27 @@ def main():
         territory_zones = global_data.setdefault('territory_zones', {})
         zone_aliases = global_data.setdefault('zone_aliases', {})
         zone_territories = global_data.setdefault('zone_territories', {})
-        for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
-            tzid = elem.attrib['type']
-            territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
-            zone_territories[tzid] = elem.attrib['territory']
-            if 'aliases' in elem.attrib:
-                for alias in elem.attrib['aliases'].split():
-                    zone_aliases[alias] = tzid
+
+         # create auxiliary zone->territory map from the windows zones (we don't set
+         # the 'zones_territories' map directly here, because there are some zones
+         # aliases listed and we defer the decision of which ones to choose to the
+         # 'bcp47' data
+        _zone_territory_map = {}
+        for map_zone in sup_windows_zones.findall('.//windowsZones/mapTimezones/mapZone'):
+            for tzid in map_zone.attrib['type'].split():
+                _zone_territory_map[tzid] = map_zone.attrib['territory']
+
+        for key_elem in bcp47_timezone.findall('.//keyword/key'):
+            if key_elem.attrib['name'] == 'tz':
+                for elem in key_elem.findall('type'):
+                    aliases = elem.attrib['alias'].split()
+                    tzid = aliases.pop(0)
+                    territory = _zone_territory_map.get(tzid, '001')
+                    territory_zones.setdefault(territory, []).append(tzid)
+                    zone_territories[tzid] = territory
+                    for alias in aliases:
+                        zone_aliases[alias] = tzid
+                break
 
         # Import Metazone mapping
         meta_zones = global_data.setdefault('meta_zones', {})
@@ -273,6 +290,11 @@ def main():
                 zone_formats['fallback'] = unicode(elem.text) \
                     .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
                 break
+        for elem in tree.findall('.//timeZoneNames/fallbackRegionFormat'):
+            if 'draft' not in elem.attrib and 'alt' not in elem.attrib:
+                zone_formats['fallback_region'] = unicode(elem.text) \
+                    .replace('{0}', '%(0)s').replace('{1}', '%(1)s')
+                break
 
         time_zones = data.setdefault('time_zones', {})
         for elem in tree.findall('.//timeZoneNames/zone'):
@@ -380,16 +402,13 @@ def main():
 
             # AM/PM
             periods = data.setdefault('periods', {})
-            for elem in calendar.findall('am'):
-                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
-                        and elem.tag in periods:
-                    continue
-                periods[elem.tag] = unicode(elem.text)
-            for elem in calendar.findall('pm'):
-                if ('draft' in elem.attrib or 'alt' in elem.attrib) \
-                        and elem.tag in periods:
-                    continue
-                periods[elem.tag] = unicode(elem.text)
+            for day_period_width in calendar.findall(
+                'dayPeriods/dayPeriodContext/dayPeriodWidth'):
+                if day_period_width.attrib['type'] == 'wide':
+                    for day_period in day_period_width.findall('dayPeriod'):
+                        if 'alt' not in day_period.attrib:
+                            periods[day_period.attrib['type']] = unicode(
+                                day_period.text)
 
             date_formats = data.setdefault('date_formats', {})
             for format in calendar.findall('dateFormats'):
@@ -455,7 +474,9 @@ def main():
             if ('draft' in elem.attrib or 'alt' in elem.attrib) \
                     and elem.attrib.get('type') in decimal_formats:
                 continue
-            pattern = unicode(elem.findtext('decimalFormat/pattern'))
+            pattern = unicode(elem.findtext('./decimalFormat/pattern'))
+            if pattern == 'None':
+                continue
             decimal_formats[elem.attrib.get('type')] = numbers.parse_pattern(pattern)
 
         scientific_formats = data.setdefault('scientific_formats', {})
author	Armin Ronacher <armin.ronacher@active-4.com>	2013-07-04 18:19:51 +0200
committer	Armin Ronacher <armin.ronacher@active-4.com>	2013-07-04 18:19:51 +0200
commit	e8168071afb3f68e1e366073ca73b10a2e2f0b93 (patch)
tree	e028f726887b3348ea18481842aacfeca2005770 /scripts
parent	874e39cb47cbba9b458b7bfe28fd4a4fc4d91844 (diff)
download	babel-e8168071afb3f68e1e366073ca73b10a2e2f0b93.tar.gz