4 files changed, 136 insertions, 59 deletions
diff --git a/Makefile b/Makefile
index e96ef8b..199374b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,14 @@
-test:
-	python setup.py test
+test: import-cldr
+	@python setup.py test
+
+import-cldr:
+	@./scripts/download_import_cldr.py
+
+clean-cldr:
+	@rm babel/localedata/*.dat
 
 develop:
-	pip install --editable .
+	@pip install --editable .
 
 tox-test:
 	@tox
diff --git a/babel/global.dat b/babel/global.dat
index 49db8db..4eb6099 100644
--- a/babel/global.dat
+++ b/babel/global.dat
diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py
index 3b21117..a3e5d79 100755
--- a/scripts/download_import_cldr.py
+++ b/scripts/download_import_cldr.py
@@ -3,6 +3,7 @@
 import os
 import sys
 import shutil
+import hashlib
 import zipfile
 import urllib
 import subprocess
@@ -10,32 +11,76 @@ import subprocess
 
 URL = 'http://unicode.org/Public/cldr/1.9.1/core.zip'
 FILENAME = 'core-1.9.1.zip'
+FILESUM = '1c506cd7a30bf5b4f3cbb8a5b382d96c'
 BLKSIZE = 131072
 
 
+def get_terminal_width():
+    import fcntl
+    import termios
+    import struct
+    fd = sys.stdin.fileno()
+    cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))
+    return cr[1]
+
+
+def reporthook(block_count, block_size, total_size):
+    bytes_transmitted = block_count * block_size
+    cols = get_terminal_width()
+    buffer = 6
+    percent = float(bytes_transmitted) / (total_size or 1)
+    done = int(percent * (cols - buffer))
+    sys.stdout.write('\r')
+    sys.stdout.write(' ' + '=' * done + ' ' * (cols - done - buffer))
+    sys.stdout.write('% 4d%%' % (percent * 100))
+    sys.stdout.flush()
+
+
+def log(message, *args):
+    if args:
+        message = message % args
+    print >> sys.stderr, message
+
+
+def is_good_file(filename):
+    if not os.path.isfile(filename):
+        log('\'%s\' not found', filename)
+        return False
+    h = hashlib.md5()
+    with open(filename, 'rb') as f:
+        while 1:
+            blk = f.read(BLKSIZE)
+            if not blk:
+                break
+            h.update(blk)
+        return h.hexdigest() == FILESUM
+
+
 def main():
     scripts_path = os.path.dirname(os.path.abspath(__file__))
     repo = os.path.dirname(scripts_path)
     cldr_path = os.path.join(repo, 'cldr')
     zip_path = os.path.join(cldr_path, FILENAME)
+    changed = False
 
-    if not os.path.isfile(zip_path):
-        with open(zip_path, 'wb') as f:
-            conn = urllib.urlopen(URL)
-            while True:
-                buf = conn.read(BLKSIZE)
-                if not buf:
-                    break
-                f.write(buf)
-            conn.close()
-
+    while not is_good_file(zip_path):
+        log('Downloading \'%s\'', FILENAME)
+        if os.path.isfile(zip_path):
+            os.remove(zip_path)
+        urllib.urlretrieve(URL, zip_path, reporthook)
+        changed = True
+        print
     common_path = os.path.join(cldr_path, 'common')
-    if os.path.isdir(common_path):
-        shutil.rmtree(common_path)
 
-    z = zipfile.ZipFile(zip_path)
-    z.extractall(cldr_path)
-    z.close()
+    if changed:
+        if os.path.isdir(common_path):
+            log('Deleting old CLDR checkout in \'%s\'', cldr_path)
+            shutil.rmtree(common_path)
+
+        log('Extracting CLDR to \'%s\'', cldr_path)
+        z = zipfile.ZipFile(zip_path)
+        z.extractall(cldr_path)
+        z.close()
 
     subprocess.check_call([
         sys.executable,
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
index 595556b..0717f22 100755
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -17,12 +17,7 @@ from optparse import OptionParser
 import os
 import re
 import sys
-# don't put the ElementTree import in babel/compat.py as this will add a new
-# dependency (elementtree) for Python 2.4 users.
-try:
-    from xml.etree import ElementTree
-except ImportError:
-    from elementtree import ElementTree
+from xml.etree import ElementTree
 
 # Make sure we're using Babel source, and not some previously installed version
 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
@@ -57,6 +52,30 @@ NAME_MAP = {
     'timeFormats': 'time_formats'
 }
 
+def log(message, *args):
+    if args:
+        message = message % args
+    print >> sys.stderr, message
+
+
+def error(message, *args):
+    log('ERROR: %s' % message, args)
+
+
+def need_conversion(dst_filename, data_dict, source_filename):
+    with open(source_filename, 'rb') as f:
+        blob = f.read(4096)
+        version = int(re.search(r'version number="\$Revision: (\d+)', blob).group(1))
+
+    data_dict['_version'] = version
+    if not os.path.isfile(dst_filename):
+        return True
+
+    with open(dst_filename, 'rb') as f:
+        data = pickle.load(f)
+        return data.get('_version') != version
+
+
 def _translate_alias(ctxt, path):
     parts = path.split('/')
     keys = ctxt[:]
@@ -83,35 +102,37 @@ def main():
     destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
                            '..', 'babel')
 
-    sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
+    sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml')
+    sup = parse(sup_filename)
 
     # Import global data from the supplemental files
+    global_path = os.path.join(destdir, 'global.dat')
     global_data = {}
-
-    territory_zones = global_data.setdefault('territory_zones', {})
-    zone_aliases = global_data.setdefault('zone_aliases', {})
-    zone_territories = global_data.setdefault('zone_territories', {})
-    for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
-        tzid = elem.attrib['type']
-        territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
-        zone_territories[tzid] = elem.attrib['territory']
-        if 'aliases' in elem.attrib:
-            for alias in elem.attrib['aliases'].split():
-                zone_aliases[alias] = tzid
-
-    # Import Metazone mapping
-    meta_zones = global_data.setdefault('meta_zones', {})
-    tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
-    for elem in tzsup.findall('.//timezone'):
-        for child in elem.findall('usesMetazone'):
-            if 'to' not in child.attrib: # FIXME: support old mappings
-                meta_zones[elem.attrib['type']] = child.attrib['mzone']
-
-    outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
-    try:
-        pickle.dump(global_data, outfile, 2)
-    finally:
-        outfile.close()
+    if need_conversion(global_path, global_data, sup_filename):
+        territory_zones = global_data.setdefault('territory_zones', {})
+        zone_aliases = global_data.setdefault('zone_aliases', {})
+        zone_territories = global_data.setdefault('zone_territories', {})
+        for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
+            tzid = elem.attrib['type']
+            territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
+            zone_territories[tzid] = elem.attrib['territory']
+            if 'aliases' in elem.attrib:
+                for alias in elem.attrib['aliases'].split():
+                    zone_aliases[alias] = tzid
+
+        # Import Metazone mapping
+        meta_zones = global_data.setdefault('meta_zones', {})
+        tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
+        for elem in tzsup.findall('.//timezone'):
+            for child in elem.findall('usesMetazone'):
+                if 'to' not in child.attrib: # FIXME: support old mappings
+                    meta_zones[elem.attrib['type']] = child.attrib['mzone']
+
+        outfile = open(global_path, 'wb')
+        try:
+            pickle.dump(global_data, outfile, 2)
+        finally:
+            outfile.close()
 
     # build a territory containment mapping for inheritance
     regions = {}
@@ -150,15 +171,19 @@ def main():
         if ext != '.xml':
             continue
 
-        print>>sys.stderr, 'Processing input file %r' % filename
-        tree = parse(os.path.join(srcdir, 'main', filename))
+        full_filename = os.path.join(srcdir, 'main', filename)
+        data_filename = os.path.join(destdir, 'localedata', stem + '.dat')
+
         data = {}
+        if not need_conversion(data_filename, data, full_filename):
+            continue
+
+        tree = parse(full_filename)
 
         language = None
         elem = tree.find('.//identity/language')
         if elem is not None:
             language = elem.attrib['type']
-        print>>sys.stderr, '  Language:  %r' % language
 
         territory = None
         elem = tree.find('.//identity/territory')
@@ -166,9 +191,10 @@ def main():
             territory = elem.attrib['type']
         else:
             territory = '001' # world
-        print>>sys.stderr, '  Territory: %r' % territory
         regions = territory_containment.get(territory, [])
-        print>>sys.stderr, '  Regions:    %r' % regions
+
+        log('Processing %s (Language = %s; Territory = %s)',
+            filename, language, territory)
 
         # plural rules
         locale_id = '_'.join(filter(None, [
@@ -376,7 +402,7 @@ def main():
                             date_formats[elem.attrib.get('type')] = \
                                 dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
                         except ValueError, e:
-                            print>>sys.stderr, 'ERROR: %s' % e
+                            error(e)
                     elif elem.tag == 'alias':
                         date_formats = Alias(_translate_alias(
                             ['date_formats'], elem.attrib['path'])
@@ -393,7 +419,7 @@ def main():
                             time_formats[elem.attrib.get('type')] = \
                                 dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
                         except ValueError, e:
-                            print>>sys.stderr, 'ERROR: %s' % e
+                            error(e)
                     elif elem.tag == 'alias':
                         time_formats = Alias(_translate_alias(
                             ['time_formats'], elem.attrib['path'])
@@ -410,7 +436,7 @@ def main():
                             datetime_formats[elem.attrib.get('type')] = \
                                 unicode(elem.findtext('dateTimeFormat/pattern'))
                         except ValueError, e:
-                            print>>sys.stderr, 'ERROR: %s' % e
+                            error(e)
                     elif elem.tag == 'alias':
                         datetime_formats = Alias(_translate_alias(
                             ['datetime_formats'], elem.attrib['path'])
@@ -482,7 +508,7 @@ def main():
                 unit_patterns[unit_type][pattern.attrib['count']] = \
                         unicode(pattern.text)
 
-        outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb')
+        outfile = open(data_filename, 'wb')
         try:
             pickle.dump(data, outfile, 2)
         finally: