diff options
-rw-r--r-- | Makefile | 12 | ||||
-rw-r--r-- | babel/global.dat | bin | 16203 -> 15987 bytes | |||
-rwxr-xr-x | scripts/download_import_cldr.py | 75 | ||||
-rwxr-xr-x | scripts/import_cldr.py | 108 |
4 files changed, 136 insertions, 59 deletions
@@ -1,8 +1,14 @@ -test: - python setup.py test +test: import-cldr + @python setup.py test + +import-cldr: + @./scripts/download_import_cldr.py + +clean-cldr: + @rm babel/localedata/*.dat develop: - pip install --editable . + @pip install --editable . tox-test: @tox diff --git a/babel/global.dat b/babel/global.dat Binary files differindex 49db8db..4eb6099 100644 --- a/babel/global.dat +++ b/babel/global.dat diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py index 3b21117..a3e5d79 100755 --- a/scripts/download_import_cldr.py +++ b/scripts/download_import_cldr.py @@ -3,6 +3,7 @@ import os import sys import shutil +import hashlib import zipfile import urllib import subprocess @@ -10,32 +11,76 @@ import subprocess URL = 'http://unicode.org/Public/cldr/1.9.1/core.zip' FILENAME = 'core-1.9.1.zip' +FILESUM = '1c506cd7a30bf5b4f3cbb8a5b382d96c' BLKSIZE = 131072 +def get_terminal_width(): + import fcntl + import termios + import struct + fd = sys.stdin.fileno() + cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) + return cr[1] + + +def reporthook(block_count, block_size, total_size): + bytes_transmitted = block_count * block_size + cols = get_terminal_width() + buffer = 6 + percent = float(bytes_transmitted) / (total_size or 1) + done = int(percent * (cols - buffer)) + sys.stdout.write('\r') + sys.stdout.write(' ' + '=' * done + ' ' * (cols - done - buffer)) + sys.stdout.write('% 4d%%' % (percent * 100)) + sys.stdout.flush() + + +def log(message, *args): + if args: + message = message % args + print >> sys.stderr, message + + +def is_good_file(filename): + if not os.path.isfile(filename): + log('\'%s\' not found', filename) + return False + h = hashlib.md5() + with open(filename, 'rb') as f: + while 1: + blk = f.read(BLKSIZE) + if not blk: + break + h.update(blk) + return h.hexdigest() == FILESUM + + def main(): scripts_path = os.path.dirname(os.path.abspath(__file__)) repo = os.path.dirname(scripts_path) cldr_path = os.path.join(repo, 'cldr') zip_path = os.path.join(cldr_path, FILENAME) + changed = False - if not os.path.isfile(zip_path): - with open(zip_path, 'wb') as f: - conn = urllib.urlopen(URL) - while True: - buf = conn.read(BLKSIZE) - if not buf: - break - f.write(buf) - conn.close() - + while not is_good_file(zip_path): + log('Downloading \'%s\'', FILENAME) + if os.path.isfile(zip_path): + os.remove(zip_path) + urllib.urlretrieve(URL, zip_path, reporthook) + changed = True + print common_path = os.path.join(cldr_path, 'common') - if os.path.isdir(common_path): - shutil.rmtree(common_path) - z = zipfile.ZipFile(zip_path) - z.extractall(cldr_path) - z.close() + if changed: + if os.path.isdir(common_path): + log('Deleting old CLDR checkout in \'%s\'', cldr_path) + shutil.rmtree(common_path) + + log('Extracting CLDR to \'%s\'', cldr_path) + z = zipfile.ZipFile(zip_path) + z.extractall(cldr_path) + z.close() subprocess.check_call([ sys.executable, diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index 595556b..0717f22 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -17,12 +17,7 @@ from optparse import OptionParser import os import re import sys -# don't put the ElementTree import in babel/compat.py as this will add a new -# dependency (elementtree) for Python 2.4 users. -try: - from xml.etree import ElementTree -except ImportError: - from elementtree import ElementTree +from xml.etree import ElementTree # Make sure we're using Babel source, and not some previously installed version sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..')) @@ -57,6 +52,30 @@ NAME_MAP = { 'timeFormats': 'time_formats' } +def log(message, *args): + if args: + message = message % args + print >> sys.stderr, message + + +def error(message, *args): + log('ERROR: %s' % message, args) + + +def need_conversion(dst_filename, data_dict, source_filename): + with open(source_filename, 'rb') as f: + blob = f.read(4096) + version = int(re.search(r'version number="\$Revision: (\d+)', blob).group(1)) + + data_dict['_version'] = version + if not os.path.isfile(dst_filename): + return True + + with open(dst_filename, 'rb') as f: + data = pickle.load(f) + return data.get('_version') != version + + def _translate_alias(ctxt, path): parts = path.split('/') keys = ctxt[:] @@ -83,35 +102,37 @@ def main(): destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), '..', 'babel') - sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml')) + sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml') + sup = parse(sup_filename) # Import global data from the supplemental files + global_path = os.path.join(destdir, 'global.dat') global_data = {} - - territory_zones = global_data.setdefault('territory_zones', {}) - zone_aliases = global_data.setdefault('zone_aliases', {}) - zone_territories = global_data.setdefault('zone_territories', {}) - for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): - tzid = elem.attrib['type'] - territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) - zone_territories[tzid] = elem.attrib['territory'] - if 'aliases' in elem.attrib: - for alias in elem.attrib['aliases'].split(): - zone_aliases[alias] = tzid - - # Import Metazone mapping - meta_zones = global_data.setdefault('meta_zones', {}) - tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml')) - for elem in tzsup.findall('.//timezone'): - for child in elem.findall('usesMetazone'): - if 'to' not in child.attrib: # FIXME: support old mappings - meta_zones[elem.attrib['type']] = child.attrib['mzone'] - - outfile = open(os.path.join(destdir, 'global.dat'), 'wb') - try: - pickle.dump(global_data, outfile, 2) - finally: - outfile.close() + if need_conversion(global_path, global_data, sup_filename): + territory_zones = global_data.setdefault('territory_zones', {}) + zone_aliases = global_data.setdefault('zone_aliases', {}) + zone_territories = global_data.setdefault('zone_territories', {}) + for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'): + tzid = elem.attrib['type'] + territory_zones.setdefault(elem.attrib['territory'], []).append(tzid) + zone_territories[tzid] = elem.attrib['territory'] + if 'aliases' in elem.attrib: + for alias in elem.attrib['aliases'].split(): + zone_aliases[alias] = tzid + + # Import Metazone mapping + meta_zones = global_data.setdefault('meta_zones', {}) + tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml')) + for elem in tzsup.findall('.//timezone'): + for child in elem.findall('usesMetazone'): + if 'to' not in child.attrib: # FIXME: support old mappings + meta_zones[elem.attrib['type']] = child.attrib['mzone'] + + outfile = open(global_path, 'wb') + try: + pickle.dump(global_data, outfile, 2) + finally: + outfile.close() # build a territory containment mapping for inheritance regions = {} @@ -150,15 +171,19 @@ def main(): if ext != '.xml': continue - print>>sys.stderr, 'Processing input file %r' % filename - tree = parse(os.path.join(srcdir, 'main', filename)) + full_filename = os.path.join(srcdir, 'main', filename) + data_filename = os.path.join(destdir, 'localedata', stem + '.dat') + data = {} + if not need_conversion(data_filename, data, full_filename): + continue + + tree = parse(full_filename) language = None elem = tree.find('.//identity/language') if elem is not None: language = elem.attrib['type'] - print>>sys.stderr, ' Language: %r' % language territory = None elem = tree.find('.//identity/territory') @@ -166,9 +191,10 @@ def main(): territory = elem.attrib['type'] else: territory = '001' # world - print>>sys.stderr, ' Territory: %r' % territory regions = territory_containment.get(territory, []) - print>>sys.stderr, ' Regions: %r' % regions + + log('Processing %s (Language = %s; Territory = %s)', + filename, language, territory) # plural rules locale_id = '_'.join(filter(None, [ @@ -376,7 +402,7 @@ def main(): date_formats[elem.attrib.get('type')] = \ dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern'))) except ValueError, e: - print>>sys.stderr, 'ERROR: %s' % e + error(e) elif elem.tag == 'alias': date_formats = Alias(_translate_alias( ['date_formats'], elem.attrib['path']) @@ -393,7 +419,7 @@ def main(): time_formats[elem.attrib.get('type')] = \ dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern'))) except ValueError, e: - print>>sys.stderr, 'ERROR: %s' % e + error(e) elif elem.tag == 'alias': time_formats = Alias(_translate_alias( ['time_formats'], elem.attrib['path']) @@ -410,7 +436,7 @@ def main(): datetime_formats[elem.attrib.get('type')] = \ unicode(elem.findtext('dateTimeFormat/pattern')) except ValueError, e: - print>>sys.stderr, 'ERROR: %s' % e + error(e) elif elem.tag == 'alias': datetime_formats = Alias(_translate_alias( ['datetime_formats'], elem.attrib['path']) @@ -482,7 +508,7 @@ def main(): unit_patterns[unit_type][pattern.attrib['count']] = \ unicode(pattern.text) - outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb') + outfile = open(data_filename, 'wb') try: pickle.dump(data, outfile, 2) finally: |