diff options
Diffstat (limited to 'Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js')
-rw-r--r-- | Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js new file mode 100644 index 000000000..dc578cfec --- /dev/null +++ b/Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2012, 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +function printHeader() +{ + var copyright = ( + "/*" + "\n" + + " * Copyright (C) 2012-2013, 2015-2016 Apple Inc. All rights reserved." + "\n" + + " *" + "\n" + + " * Redistribution and use in source and binary forms, with or without" + "\n" + + " * modification, are permitted provided that the following conditions" + "\n" + + " * are met:" + "\n" + + " * 1. Redistributions of source code must retain the above copyright" + "\n" + + " * notice, this list of conditions and the following disclaimer." + "\n" + + " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" + + " * notice, this list of conditions and the following disclaimer in the" + "\n" + + " * documentation and/or other materials provided with the distribution." + "\n" + + " *" + "\n" + + " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" + + " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" + + " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" + + " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" + + " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" + + " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" + + " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" + + " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" + + " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" + + " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" + + " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" + + " */"); + + print(copyright); + print(); + print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalize.js"); + print(); + print('#include "config.h"'); + print('#include "YarrCanonicalize.h"'); + print(); + print("namespace JSC { namespace Yarr {"); + print(); +} + +function printFooter() +{ + print("} } // JSC::Yarr"); + print(); +} + +// Helper function to convert a number to a fixed width hex representation of a UChar32. +function hex(x) +{ + var s = Number(x).toString(16); + while (s.length < 4) + s = 0 + s; + return "0x" + s; +} + +// See ES 6.0, 21.2.2.8.2 Steps 3 +function canonicalize(ch) +{ + var u = String.fromCharCode(ch).toUpperCase(); + if (u.length > 1) + return ch; + var cu = u.charCodeAt(0); + if (ch >= 128 && cu < 128) + return ch; + return cu; +} + +var MAX_UCS2 = 0xFFFF; + +function createUCS2CanonicalGroups() +{ + var groupedCanonically = []; + // Pass 1: populate groupedCanonically - this is mapping from canonicalized + // values back to the set of character code that canonicalize to them. + for (var i = 0; i <= MAX_UCS2; ++i) { + var ch = canonicalize(i); + if (!groupedCanonically[ch]) + groupedCanonically[ch] = []; + groupedCanonically[ch].push(i); + } + + return groupedCanonically; +} + +function createTables(prefix, maxValue, canonicalGroups) +{ + var prefixLower = prefix.toLowerCase(); + var prefixUpper = prefix.toUpperCase(); + var typeInfo = []; + var characterSetInfo = []; + // Pass 2: populate typeInfo & characterSetInfo. For every character calculate + // a typeInfo value, described by the types above, and a value payload. + for (cu in canonicalGroups) { + // The set of characters that canonicalize to cu + var characters = canonicalGroups[cu]; + + // If there is only one, it is unique. + if (characters.length == 1) { + typeInfo[characters[0]] = "CanonicalizeUnique:0"; + continue; + } + + // Sort the array. + characters.sort(function(x,y){return x-y;}); + + // If there are more than two characters, create an entry in characterSetInfo. + if (characters.length > 2) { + for (i in characters) + typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length; + characterSetInfo.push(characters); + + continue; + } + + // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner. + var lo = characters[0]; + var hi = characters[1]; + var delta = hi - lo; + if (delta == 1) { + var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0"; + typeInfo[lo] = type; + typeInfo[hi] = type; + } else { + typeInfo[lo] = "CanonicalizeRangeLo:" + delta; + typeInfo[hi] = "CanonicalizeRangeHi:" + delta; + } + } + + var rangeInfo = []; + // Pass 3: coallesce types into ranges. + for (var end = 0; end <= maxValue; ++end) { + var begin = end; + var type = typeInfo[end]; + while (end < maxValue && typeInfo[end + 1] == type) + ++end; + rangeInfo.push({begin:begin, end:end, type:type}); + } + + for (i in characterSetInfo) { + var characters = "" + var set = characterSetInfo[i]; + for (var j in set) + characters += hex(set[j]) + ", "; + print("const UChar32 " + prefixLower + "CharacterSet" + i + "[] = { " + characters + "0 };"); + } + print(); + print("static const size_t " + prefixUpper + "_CANONICALIZATION_SETS = " + characterSetInfo.length + ";"); + print("const UChar32* const " + prefixLower + "CharacterSetInfo[" + prefixUpper + "_CANONICALIZATION_SETS] = {"); + for (i in characterSetInfo) + print(" " + prefixLower + "CharacterSet" + i + ","); + print("};"); + print(); + print("const size_t " + prefixUpper + "_CANONICALIZATION_RANGES = " + rangeInfo.length + ";"); + print("const CanonicalizationRange " + prefixLower + "RangeInfo[" + prefixUpper + "_CANONICALIZATION_RANGES] = {"); + for (i in rangeInfo) { + var info = rangeInfo[i]; + var typeAndValue = info.type.split(':'); + print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); + } + print("};"); + print(); +} + +printHeader(); + +createTables("UCS2", MAX_UCS2, createUCS2CanonicalGroups()); + +printFooter(); + |