fixed missing '-' in block names, enhanced the hack for ABI aliasing.

* genUnicode.py, xmlunicode.c, include/libxml/xmlunicode.h: fixed missing '-' in block names, enhanced the hack for ABI aliasing.
author: William M. Brack <wbrack@src.gnome.org> 2003-11-10 15:49:27 +0000
committer: William M. Brack <wbrack@src.gnome.org> 2003-11-10 15:49:27 +0000
commit: 8763df8dc8fbe7f86eb2f1439683157e1f1898eb (patch)
tree: 1bfebe605fae9d76682f7784e3b593501022a1ca /genUnicode.py
parent: ea939087b982cffc39521369632fe34243f3153a (diff)
download: libxml2-8763df8dc8fbe7f86eb2f1439683157e1f1898eb.tar.gz
1 files changed, 41 insertions, 74 deletions
diff --git a/genUnicode.py b/genUnicode.py
index daee97b2..d3da5a0c 100755
--- a/genUnicode.py
+++ b/genUnicode.py
@@ -6,9 +6,8 @@
 #
 # NOTE: there is an 'alias' facility for blocks which are not present in
 #	the current release, but are needed for ABI compatibility.  This
-#	must be accomplished MANUALLY!  Define the alias in the variable
-#	'blockAliases', then MANUALLY provide a function to return the
-#	appropriate value.
+#	must be accomplished MANUALLY!  Please see the comments below under
+#     'blockAliases'
 #
 import sys
 import string
@@ -17,7 +16,15 @@ import time
 webpage = "http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1d5b.html"
 sources = "Blocks-4.0.1d1b.txt UnicodeData-4.0.1d1b.txt"
 
-blockAliases = "CombiningMarksforSymbols Greek PrivateUse"
+#
+# blockAliases is a small hack - it is used for mapping block names which
+# were were used in the 3.1 release, but are missing or changed in the current
+# release.  The format is "OldBlockName:NewBlockName1[,NewBlockName2[,...]]"
+blockAliases = []
+blockAliases.append("CombiningMarksforSymbols:CombiningDiacriticalMarksforSymbols")
+blockAliases.append("Greek:GreekandCoptic")
+blockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," + 
+	"SupplementaryPrivateUseArea-B")
 
 # minTableSize gives the minimum number of ranges which must be present
 # before a range table is produced.  If there are less than this
@@ -26,24 +33,13 @@ minTableSize = 8
 
 (blockfile, catfile) = string.split(sources)
 
-#
-# First create a dictionary for the block names
-#
-BlockNames = {}
-
-#
-# Next put in aliases for blocks not currently present, but needed
-# for ABI compatibility (THIS IS A HORRIBLE HACK!)
-#
-aliases = string.split(blockAliases, ' ')
-for name in aliases:
-    BlockNames[name] = []
 
 #
 # Now process the "blocks" file, reducing it to a dictionary
 # indexed by blockname, containing a tuple with the applicable
 # block range
 #
+BlockNames = {}
 try:
     blocks = open(blockfile, "r")
 except:
@@ -65,10 +61,28 @@ for line in blocks.readlines():
     except:
         print "Failed to process line: %s" % (line)
         continue
-    BlockNames[name] = ("0x"+start, "0x"+end)
+    start = "0x" + start
+    end = "0x" + end
+    try:
+        BlockNames[name].append((start, end))
+    except:
+        BlockNames[name] = [(start, end)]
 blocks.close()
 print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
 
+for block in blockAliases:
+    alias = string.split(block,':')
+    alist = string.split(alias[1],',')
+    for comp in alist:
+        if BlockNames.has_key(comp):
+            if alias[0] not in BlockNames:
+                BlockNames[alias[0]] = []
+            for r in BlockNames[comp]:
+                BlockNames[alias[0]].append(r)
+        else:
+            print "Alias %s: %s not in Blocks" % (alias[0], comp)
+            continue
+
 #
 # Next process the Categories file. This is more complex, since
 # the file is in code sequence, and we need to invert it.  We use
@@ -267,7 +281,7 @@ for block in bkeys:
         output.write(',\n')
     else:
         flag = 1
-    output.write('  {"%s", xmlUCSIs%s}' % (name, name))
+    output.write('  {"%s", xmlUCSIs%s}' % (block, name))
 output.write('};\n\n')
 
 output.write('static xmlUnicodeRange xmlUnicodeCats[] = {\n')
@@ -355,16 +369,19 @@ static xmlIntFunc
 for block in bkeys:
     name = string.replace(block, '-', '')
     header.write("XMLPUBFUN int XMLCALL xmlUCSIs%s\t(int code);\n" % name)
-    if len(BlockNames[block]) == 0:	# ignore aliases
-        continue
-    (start, end) = BlockNames[block]
     output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
     output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
                  (block))
     output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
-    output.write("int\nxmlUCSIs%s(int code) {\n" % name)
-    output.write("    return((code >= %s) && (code <= %s));\n" % (start, end))
-    output.write("}\n\n")
+    output.write("int\nxmlUCSIs%s(int code) {\n    return(" % name)
+    flag = 0
+    for (start, end) in BlockNames[block]:
+        if flag:
+            output.write(" ||\n           ")
+        else:
+            flag = 1
+        output.write("((code >= %s) && (code <= %s))" % (start, end))
+    output.write(");\n}\n\n")
 
 header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsBlock\t(int code, const char *block);\n\n")
 output.write(
@@ -437,56 +454,6 @@ xmlUCSIsCat(int code, const char *cat) {
     return (func(code));
 }
 
-/*
-    The following routines are an UGLY HACK to provide aliases for block
-    names which are not in the current release, but are needed for ABI
-    compatibility.
-*/
-
-/**
- * xmlUCSIsCombiningMarksforSymbols:
- * @code: UCS code point
- *
- * Check whether the character is part of CombiningMarksforSymbols UCS Block
- *
- * Returns 1 if true 0 otherwise
- */
-int
-xmlUCSIsCombiningMarksforSymbols(int code) {
-    return((code >= 0x20D0) && (code <= 0x20FF));
-}
-
-/**
- * xmlUCSIsGreek:
- * @code: UCS code point
- *
- * Check whether the character is part of Greek UCS Block
- *
- * Returns 1 if true 0 otherwise
- */
-int
-xmlUCSIsGreek(int code) {
-    return((code >= 0x370) && (code <= 0x3FF));
-}
-
-/**
- * xmlUCSIsPrivateUse:
- * @code: UCS code point
- *
- * Check whether the character is part of PrivateUse UCS Block
- *
- * Returns 1 if true 0 otherwise
- */
-int
-xmlUCSIsPrivateUse(int code) {
-    if ( ((code >= 0xE000)  && (code <= 0xF8FF)) ||
-	 ((code >= 0xF0000) && (code <= 0xFFFFD))||
-	 ((code >= 0x100000)&& (code <= 0x10FFFD)) )
-	return (1);
-    else
-	return (0);
-}
-
 
 #endif /* LIBXML_UNICODE_ENABLED */
 """)
author	William M. Brack <wbrack@src.gnome.org>	2003-11-10 15:49:27 +0000
committer	William M. Brack <wbrack@src.gnome.org>	2003-11-10 15:49:27 +0000
commit	8763df8dc8fbe7f86eb2f1439683157e1f1898eb (patch)
tree	1bfebe605fae9d76682f7784e3b593501022a1ca /genUnicode.py
parent	ea939087b982cffc39521369632fe34243f3153a (diff)
download	libxml2-8763df8dc8fbe7f86eb2f1439683157e1f1898eb.tar.gz