summaryrefslogtreecommitdiff
path: root/util
diff options
context:
space:
mode:
authorKonstantin Ritt <ritt.ks@gmail.com>2012-11-21 06:08:24 +0200
committerThe Qt Project <gerrit-noreply@qt-project.org>2012-11-21 18:23:59 +0100
commitfe8962d3a506a850878401309f81247c80f3d203 (patch)
treeef90091eb6f7d6eaaddf0ff357f76cf5ddcf768f /util
parente7c79face6ec361e4ad0ff8303f1becafac8b2b6 (diff)
downloadqtbase-fe8962d3a506a850878401309f81247c80f3d203.tar.gz
Use likelySubtags to instantiate a locale id from it's short form
...just like described in http://www.unicode.org/reports/tr35/#Likely_Subtags. This is much more effective than current "guessing" algorithm + makes it possible to instantiate a locale by the script or territory code only. Change-Id: I674f8476e65b01c56960b6e83a1a346df0715274 Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Diffstat (limited to 'util')
-rwxr-xr-xutil/local_database/cldr2qlocalexml.py55
-rwxr-xr-xutil/local_database/qlocalexml2cpp.py97
2 files changed, 120 insertions, 32 deletions
diff --git a/util/local_database/cldr2qlocalexml.py b/util/local_database/cldr2qlocalexml.py
index 893b662f25..1604d0e14b 100755
--- a/util/local_database/cldr2qlocalexml.py
+++ b/util/local_database/cldr2qlocalexml.py
@@ -48,6 +48,7 @@ from xpathlite import DraftResolution
from dateconverter import convert_date
import re
+findAlias = xpathlite.findAlias
findEntry = xpathlite.findEntry
findEntryInFile = xpathlite._findEntryInFile
findTagsInFile = xpathlite.findTagsInFile
@@ -116,6 +117,12 @@ def generateLocaleInfo(path):
if not path.endswith(".xml"):
return {}
+
+ # skip legacy/compatibility ones
+ alias = findAlias(path)
+ if alias:
+ raise xpathlite.Error("alias to \"%s\"" % alias)
+
language_code = findEntryInFile(path, "identity/language", attribute="type")[0]
if language_code == 'root':
# just skip it
@@ -128,18 +135,16 @@ def generateLocaleInfo(path):
# ### actually there is only one locale with variant: en_US_POSIX
# does anybody care about it at all?
if variant_code:
- return {}
+ raise xpathlite.Error("we do not support variants (\"%s\")" % variant_code)
language_id = enumdata.languageCodeToId(language_code)
if language_id <= 0:
- sys.stderr.write("unknown language code \"" + language_code + "\"\n")
- return {}
+ raise xpathlite.Error("unknown language code \"%s\"" % language_code)
language = enumdata.language_list[language_id][0]
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
- sys.stderr.write("unknown script code \"" + script_code + "\"\n")
- return {}
+ raise xpathlite.Error("unknown script code \"%s\"" % script_code)
script = enumdata.script_list[script_id][0]
# we should handle fully qualified names with the territory
@@ -147,8 +152,7 @@ def generateLocaleInfo(path):
return {}
country_id = enumdata.countryCodeToId(country_code)
if country_id <= 0:
- sys.stderr.write("unknown country code \"" + country_code + "\"\n")
- return {}
+ raise xpathlite.Error("unknown country code \"%s\"" % country_code)
country = enumdata.country_list[country_id][0]
# So we say we accept only those values that have "contributed" or
@@ -557,9 +561,13 @@ cldr_files = os.listdir(cldr_dir)
locale_database = {}
for file in cldr_files:
- l = generateLocaleInfo(cldr_dir + "/" + file)
- if not l:
- sys.stderr.write("skipping file \"" + file + "\"\n")
+ try:
+ l = generateLocaleInfo(cldr_dir + "/" + file)
+ if not l:
+ sys.stderr.write("skipping file \"" + file + "\"\n")
+ continue
+ except xpathlite.Error as e:
+ sys.stderr.write("skipping file \"%s\" (%s)\n" % (file, str(e)))
continue
locale_database[(l['language_id'], l['script_id'], l['country_id'], l['variant_code'])] = l
@@ -611,16 +619,15 @@ def _parseLocale(l):
script = "AnyScript"
country = "AnyCountry"
- if l == "und": # we are treating unknown locale like C
- return (None, None, None)
+ if l == "und":
+ raise xpathlite.Error("we are treating unknown locale like C")
items = l.split("_")
language_code = items[0]
if language_code != "und":
language_id = enumdata.languageCodeToId(language_code)
if language_id == -1:
- sys.stderr.write("unknown language code \"" + language_code + "\"\n")
- return (None, None, None)
+ raise xpathlite.Error("unknown language code \"%s\"" % language_code)
language = enumdata.language_list[language_id][0]
if len(items) > 1:
@@ -631,16 +638,14 @@ def _parseLocale(l):
if len(script_code) == 4:
script_id = enumdata.scriptCodeToId(script_code)
if script_id == -1:
- sys.stderr.write("unknown script code \"" + script_code + "\"\n")
- return (None, None, None)
+ raise xpathlite.Error("unknown script code \"%s\"" % script_code)
script = enumdata.script_list[script_id][0]
else:
country_code = script_code
if country_code:
country_id = enumdata.countryCodeToId(country_code)
if country_id == -1:
- sys.stderr.write("unknown country code \"" + country_code + "\"\n")
- return (None, None, None)
+ raise xpathlite.Error("unknown country code \"%s\"" % country_code)
country = enumdata.country_list[country_id][0]
return (language, script, country)
@@ -651,13 +656,15 @@ for ns in findTagsInFile(cldr_dir + "/../supplemental/likelySubtags.xml", "likel
for data in ns[1:][0]: # ns looks like this: [u'likelySubtag', [(u'from', u'aa'), (u'to', u'aa_Latn_ET')]]
tmp[data[0]] = data[1]
- (from_language, from_script, from_country) = _parseLocale(tmp[u"from"])
- if not from_language:
- sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n")
+ try:
+ (from_language, from_script, from_country) = _parseLocale(tmp[u"from"])
+ except xpathlite.Error as e:
+ sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
continue
- (to_language, to_script, to_country) = _parseLocale(tmp[u"to"])
- if not to_language:
- sys.stderr.write("skipping likelySubtag " + tmp[u"from"] + " -> " + tmp[u"to"] + "\n")
+ try:
+ (to_language, to_script, to_country) = _parseLocale(tmp[u"to"])
+ except xpathlite.Error as e:
+ sys.stderr.write("skipping likelySubtag \"%s\" -> \"%s\" (%s)\n" % (tmp[u"from"], tmp[u"to"], str(e)))
continue
# substitute according to http://www.unicode.org/reports/tr35/#Likely_Subtags
if to_country == "AnyCountry" and from_country != to_country:
diff --git a/util/local_database/qlocalexml2cpp.py b/util/local_database/qlocalexml2cpp.py
index b82e1516b4..06fabcc77e 100755
--- a/util/local_database/qlocalexml2cpp.py
+++ b/util/local_database/qlocalexml2cpp.py
@@ -291,7 +291,7 @@ class Locale:
self.currencyFormat = eltText(firstChildElt(elt, "currencyFormat"))
self.currencyNegativeFormat = eltText(firstChildElt(elt, "currencyNegativeFormat"))
-def loadLocaleMap(doc, language_map, script_map, country_map):
+def loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map):
result = {}
locale_list_elt = firstChildElt(doc.documentElement, "localeList")
@@ -307,6 +307,28 @@ def loadLocaleMap(doc, language_map, script_map, country_map):
country_id = countryNameToId(locale.country, country_map)
if country_id == -1:
sys.stderr.write("Cannot find a country id for '%s'\n" % locale.country)
+
+ if language_id != 1: # C
+ if country_id == 0:
+ sys.stderr.write("loadLocaleMap: No country id for '%s'\n" % locale.language)
+
+ if script_id == 0:
+ # find default script for a given language and country (see http://www.unicode.org/reports/tr35/#Likely_Subtags)
+ for key in likely_subtags_map.keys():
+ tmp = likely_subtags_map[key]
+ if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == locale.country:
+ locale.script = tmp["to"][1]
+ script_id = scriptNameToId(locale.script, script_map)
+ break
+ if script_id == 0 and country_id != 0:
+ # try with no country
+ for key in likely_subtags_map.keys():
+ tmp = likely_subtags_map[key]
+ if tmp["from"][0] == locale.language and tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry":
+ locale.script = tmp["to"][1]
+ script_id = scriptNameToId(locale.script, script_map)
+ break
+
result[(language_id, script_id, country_id)] = locale
locale_elt = nextSiblingElt(locale_elt, "locale")
@@ -321,14 +343,22 @@ def compareLocaleKeys(key1, key2):
l1 = compareLocaleKeys.locale_map[key1]
l2 = compareLocaleKeys.locale_map[key2]
- if l1.language in compareLocaleKeys.default_map:
- default = compareLocaleKeys.default_map[l1.language]
- if l1.country == default and key1[1] == 0:
+ if (l1.language, l1.script) in compareLocaleKeys.default_map.keys():
+ default = compareLocaleKeys.default_map[(l1.language, l1.script)]
+ if l1.country == default:
return -1
- if l2.country == default and key2[1] == 0:
+ if l2.country == default:
return 1
if key1[1] != key2[1]:
+ if (l2.language, l2.script) in compareLocaleKeys.default_map.keys():
+ default = compareLocaleKeys.default_map[(l2.language, l2.script)]
+ if l2.country == default:
+ return 1
+ if l1.country == default:
+ return -1
+
+ if key1[1] != key2[1]:
return key1[1] - key2[1]
else:
return key1[0] - key2[0]
@@ -476,9 +506,9 @@ def main():
default_map = {}
for key in likely_subtags_map.keys():
tmp = likely_subtags_map[key]
- if tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry" and tmp["from"][1] == "AnyScript":
- default_map[tmp["to"][0]] = tmp["to"][2]
- locale_map = loadLocaleMap(doc, language_map, script_map, country_map)
+ if tmp["from"][1] == "AnyScript" and tmp["from"][2] == "AnyCountry" and tmp["to"][2] != "AnyCountry":
+ default_map[(tmp["to"][0], tmp["to"][1])] = tmp["to"][2]
+ locale_map = loadLocaleMap(doc, language_map, script_map, country_map, likely_subtags_map)
dupes = findDupes(language_map, country_map)
cldr_version = eltText(firstChildElt(doc.documentElement, "version"))
@@ -495,6 +525,57 @@ def main():
*/\n\n\n\
" % (str(datetime.date.today()), cldr_version) )
+ # Likely subtags map
+ data_temp_file.write("static const QLocaleId likely_subtags[] = {\n")
+ index = 0
+ for key in likely_subtags_map.keys():
+ tmp = likely_subtags_map[key]
+ from_language = languageNameToId(tmp["from"][0], language_map)
+ from_script = scriptNameToId(tmp["from"][1], script_map)
+ from_country = countryNameToId(tmp["from"][2], country_map)
+ to_language = languageNameToId(tmp["to"][0], language_map)
+ to_script = scriptNameToId(tmp["to"][1], script_map)
+ to_country = countryNameToId(tmp["to"][2], country_map)
+
+ cmnt_from = ""
+ if from_language != 0:
+ cmnt_from = cmnt_from + language_map[from_language][1]
+ else:
+ cmnt_from = cmnt_from + "und"
+ if from_script != 0:
+ if cmnt_from:
+ cmnt_from = cmnt_from + "_"
+ cmnt_from = cmnt_from + script_map[from_script][1]
+ if from_country != 0:
+ if cmnt_from:
+ cmnt_from = cmnt_from + "_"
+ cmnt_from = cmnt_from + country_map[from_country][1]
+ cmnt_to = ""
+ if to_language != 0:
+ cmnt_to = cmnt_to + language_map[to_language][1]
+ else:
+ cmnt_from = cmnt_from + "und"
+ if to_script != 0:
+ if cmnt_to:
+ cmnt_to = cmnt_to + "_"
+ cmnt_to = cmnt_to + script_map[to_script][1]
+ if to_country != 0:
+ if cmnt_to:
+ cmnt_to = cmnt_to + "_"
+ cmnt_to = cmnt_to + country_map[to_country][1]
+
+ data_temp_file.write(" ")
+ data_temp_file.write("{ %3d, %2d, %3d }, { %3d, %2d, %3d }" % (from_language, from_script, from_country, to_language, to_script, to_country))
+ index += 1
+ if index != len(likely_subtags_map):
+ data_temp_file.write(",")
+ else:
+ data_temp_file.write(" ")
+ data_temp_file.write(" // %s -> %s\n" % (cmnt_from, cmnt_to))
+ data_temp_file.write("};\n")
+
+ data_temp_file.write("\n")
+
# Locale index
data_temp_file.write("static const quint16 locale_index[] = {\n")
index = 0