summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile12
-rw-r--r--babel/global.datbin16203 -> 15987 bytes
-rwxr-xr-xscripts/download_import_cldr.py75
-rwxr-xr-xscripts/import_cldr.py108
4 files changed, 136 insertions, 59 deletions
diff --git a/Makefile b/Makefile
index e96ef8b..199374b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,14 @@
-test:
- python setup.py test
+test: import-cldr
+ @python setup.py test
+
+import-cldr:
+ @./scripts/download_import_cldr.py
+
+clean-cldr:
+ @rm babel/localedata/*.dat
develop:
- pip install --editable .
+ @pip install --editable .
tox-test:
@tox
diff --git a/babel/global.dat b/babel/global.dat
index 49db8db..4eb6099 100644
--- a/babel/global.dat
+++ b/babel/global.dat
Binary files differ
diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py
index 3b21117..a3e5d79 100755
--- a/scripts/download_import_cldr.py
+++ b/scripts/download_import_cldr.py
@@ -3,6 +3,7 @@
import os
import sys
import shutil
+import hashlib
import zipfile
import urllib
import subprocess
@@ -10,32 +11,76 @@ import subprocess
URL = 'http://unicode.org/Public/cldr/1.9.1/core.zip'
FILENAME = 'core-1.9.1.zip'
+FILESUM = '1c506cd7a30bf5b4f3cbb8a5b382d96c'
BLKSIZE = 131072
+def get_terminal_width():
+ import fcntl
+ import termios
+ import struct
+ fd = sys.stdin.fileno()
+ cr = struct.unpack('hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))
+ return cr[1]
+
+
+def reporthook(block_count, block_size, total_size):
+ bytes_transmitted = block_count * block_size
+ cols = get_terminal_width()
+ buffer = 6
+ percent = float(bytes_transmitted) / (total_size or 1)
+ done = int(percent * (cols - buffer))
+ sys.stdout.write('\r')
+ sys.stdout.write(' ' + '=' * done + ' ' * (cols - done - buffer))
+ sys.stdout.write('% 4d%%' % (percent * 100))
+ sys.stdout.flush()
+
+
+def log(message, *args):
+ if args:
+ message = message % args
+ print >> sys.stderr, message
+
+
+def is_good_file(filename):
+ if not os.path.isfile(filename):
+ log('\'%s\' not found', filename)
+ return False
+ h = hashlib.md5()
+ with open(filename, 'rb') as f:
+ while 1:
+ blk = f.read(BLKSIZE)
+ if not blk:
+ break
+ h.update(blk)
+ return h.hexdigest() == FILESUM
+
+
def main():
scripts_path = os.path.dirname(os.path.abspath(__file__))
repo = os.path.dirname(scripts_path)
cldr_path = os.path.join(repo, 'cldr')
zip_path = os.path.join(cldr_path, FILENAME)
+ changed = False
- if not os.path.isfile(zip_path):
- with open(zip_path, 'wb') as f:
- conn = urllib.urlopen(URL)
- while True:
- buf = conn.read(BLKSIZE)
- if not buf:
- break
- f.write(buf)
- conn.close()
-
+ while not is_good_file(zip_path):
+ log('Downloading \'%s\'', FILENAME)
+ if os.path.isfile(zip_path):
+ os.remove(zip_path)
+ urllib.urlretrieve(URL, zip_path, reporthook)
+ changed = True
+ print
common_path = os.path.join(cldr_path, 'common')
- if os.path.isdir(common_path):
- shutil.rmtree(common_path)
- z = zipfile.ZipFile(zip_path)
- z.extractall(cldr_path)
- z.close()
+ if changed:
+ if os.path.isdir(common_path):
+ log('Deleting old CLDR checkout in \'%s\'', cldr_path)
+ shutil.rmtree(common_path)
+
+ log('Extracting CLDR to \'%s\'', cldr_path)
+ z = zipfile.ZipFile(zip_path)
+ z.extractall(cldr_path)
+ z.close()
subprocess.check_call([
sys.executable,
diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py
index 595556b..0717f22 100755
--- a/scripts/import_cldr.py
+++ b/scripts/import_cldr.py
@@ -17,12 +17,7 @@ from optparse import OptionParser
import os
import re
import sys
-# don't put the ElementTree import in babel/compat.py as this will add a new
-# dependency (elementtree) for Python 2.4 users.
-try:
- from xml.etree import ElementTree
-except ImportError:
- from elementtree import ElementTree
+from xml.etree import ElementTree
# Make sure we're using Babel source, and not some previously installed version
sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
@@ -57,6 +52,30 @@ NAME_MAP = {
'timeFormats': 'time_formats'
}
+def log(message, *args):
+ if args:
+ message = message % args
+ print >> sys.stderr, message
+
+
+def error(message, *args):
+ log('ERROR: %s' % message, args)
+
+
+def need_conversion(dst_filename, data_dict, source_filename):
+ with open(source_filename, 'rb') as f:
+ blob = f.read(4096)
+ version = int(re.search(r'version number="\$Revision: (\d+)', blob).group(1))
+
+ data_dict['_version'] = version
+ if not os.path.isfile(dst_filename):
+ return True
+
+ with open(dst_filename, 'rb') as f:
+ data = pickle.load(f)
+ return data.get('_version') != version
+
+
def _translate_alias(ctxt, path):
parts = path.split('/')
keys = ctxt[:]
@@ -83,35 +102,37 @@ def main():
destdir = os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),
'..', 'babel')
- sup = parse(os.path.join(srcdir, 'supplemental', 'supplementalData.xml'))
+ sup_filename = os.path.join(srcdir, 'supplemental', 'supplementalData.xml')
+ sup = parse(sup_filename)
# Import global data from the supplemental files
+ global_path = os.path.join(destdir, 'global.dat')
global_data = {}
-
- territory_zones = global_data.setdefault('territory_zones', {})
- zone_aliases = global_data.setdefault('zone_aliases', {})
- zone_territories = global_data.setdefault('zone_territories', {})
- for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
- tzid = elem.attrib['type']
- territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
- zone_territories[tzid] = elem.attrib['territory']
- if 'aliases' in elem.attrib:
- for alias in elem.attrib['aliases'].split():
- zone_aliases[alias] = tzid
-
- # Import Metazone mapping
- meta_zones = global_data.setdefault('meta_zones', {})
- tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
- for elem in tzsup.findall('.//timezone'):
- for child in elem.findall('usesMetazone'):
- if 'to' not in child.attrib: # FIXME: support old mappings
- meta_zones[elem.attrib['type']] = child.attrib['mzone']
-
- outfile = open(os.path.join(destdir, 'global.dat'), 'wb')
- try:
- pickle.dump(global_data, outfile, 2)
- finally:
- outfile.close()
+ if need_conversion(global_path, global_data, sup_filename):
+ territory_zones = global_data.setdefault('territory_zones', {})
+ zone_aliases = global_data.setdefault('zone_aliases', {})
+ zone_territories = global_data.setdefault('zone_territories', {})
+ for elem in sup.findall('.//timezoneData/zoneFormatting/zoneItem'):
+ tzid = elem.attrib['type']
+ territory_zones.setdefault(elem.attrib['territory'], []).append(tzid)
+ zone_territories[tzid] = elem.attrib['territory']
+ if 'aliases' in elem.attrib:
+ for alias in elem.attrib['aliases'].split():
+ zone_aliases[alias] = tzid
+
+ # Import Metazone mapping
+ meta_zones = global_data.setdefault('meta_zones', {})
+ tzsup = parse(os.path.join(srcdir, 'supplemental', 'metaZones.xml'))
+ for elem in tzsup.findall('.//timezone'):
+ for child in elem.findall('usesMetazone'):
+ if 'to' not in child.attrib: # FIXME: support old mappings
+ meta_zones[elem.attrib['type']] = child.attrib['mzone']
+
+ outfile = open(global_path, 'wb')
+ try:
+ pickle.dump(global_data, outfile, 2)
+ finally:
+ outfile.close()
# build a territory containment mapping for inheritance
regions = {}
@@ -150,15 +171,19 @@ def main():
if ext != '.xml':
continue
- print>>sys.stderr, 'Processing input file %r' % filename
- tree = parse(os.path.join(srcdir, 'main', filename))
+ full_filename = os.path.join(srcdir, 'main', filename)
+ data_filename = os.path.join(destdir, 'localedata', stem + '.dat')
+
data = {}
+ if not need_conversion(data_filename, data, full_filename):
+ continue
+
+ tree = parse(full_filename)
language = None
elem = tree.find('.//identity/language')
if elem is not None:
language = elem.attrib['type']
- print>>sys.stderr, ' Language: %r' % language
territory = None
elem = tree.find('.//identity/territory')
@@ -166,9 +191,10 @@ def main():
territory = elem.attrib['type']
else:
territory = '001' # world
- print>>sys.stderr, ' Territory: %r' % territory
regions = territory_containment.get(territory, [])
- print>>sys.stderr, ' Regions: %r' % regions
+
+ log('Processing %s (Language = %s; Territory = %s)',
+ filename, language, territory)
# plural rules
locale_id = '_'.join(filter(None, [
@@ -376,7 +402,7 @@ def main():
date_formats[elem.attrib.get('type')] = \
dates.parse_pattern(unicode(elem.findtext('dateFormat/pattern')))
except ValueError, e:
- print>>sys.stderr, 'ERROR: %s' % e
+ error(e)
elif elem.tag == 'alias':
date_formats = Alias(_translate_alias(
['date_formats'], elem.attrib['path'])
@@ -393,7 +419,7 @@ def main():
time_formats[elem.attrib.get('type')] = \
dates.parse_pattern(unicode(elem.findtext('timeFormat/pattern')))
except ValueError, e:
- print>>sys.stderr, 'ERROR: %s' % e
+ error(e)
elif elem.tag == 'alias':
time_formats = Alias(_translate_alias(
['time_formats'], elem.attrib['path'])
@@ -410,7 +436,7 @@ def main():
datetime_formats[elem.attrib.get('type')] = \
unicode(elem.findtext('dateTimeFormat/pattern'))
except ValueError, e:
- print>>sys.stderr, 'ERROR: %s' % e
+ error(e)
elif elem.tag == 'alias':
datetime_formats = Alias(_translate_alias(
['datetime_formats'], elem.attrib['path'])
@@ -482,7 +508,7 @@ def main():
unit_patterns[unit_type][pattern.attrib['count']] = \
unicode(pattern.text)
- outfile = open(os.path.join(destdir, 'localedata', stem + '.dat'), 'wb')
+ outfile = open(data_filename, 'wb')
try:
pickle.dump(data, outfile, 2)
finally: