/* * Copyright (C) 2012-2018 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ function printHeader() { var copyright = ( "/*" + "\n" + " * Copyright (C) 2012-2018 Apple Inc. All rights reserved." + "\n" + " *" + "\n" + " * Redistribution and use in source and binary forms, with or without" + "\n" + " * modification, are permitted provided that the following conditions" + "\n" + " * are met:" + "\n" + " * 1. Redistributions of source code must retain the above copyright" + "\n" + " * notice, this list of conditions and the following disclaimer." + "\n" + " * 2. Redistributions in binary form must reproduce the above copyright" + "\n" + " * notice, this list of conditions and the following disclaimer in the" + "\n" + " * documentation and/or other materials provided with the distribution." + "\n" + " *" + "\n" + " * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY" + "\n" + " * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" + "\n" + " * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR" + "\n" + " * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR" + "\n" + " * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL," + "\n" + " * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO," + "\n" + " * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR" + "\n" + " * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY" + "\n" + " * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT" + "\n" + " * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE" + "\n" + " * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. " + "\n" + " */"); print(copyright); print(); print("// DO NOT EDIT! - this file autogenerated by YarrCanonicalize.js"); print(); print('#include "config.h"'); print('#include "YarrCanonicalize.h"'); print(); print("namespace JSC { namespace Yarr {"); print(); } function printFooter() { print("} } // JSC::Yarr"); print(); } // Helper function to convert a number to a fixed width hex representation of a UChar32. function hex(x) { var s = Number(x).toString(16); while (s.length < 4) s = 0 + s; return "0x" + s; } // See ES 6.0, 21.2.2.8.2 Steps 3 function canonicalize(ch) { var u = String.fromCharCode(ch).toUpperCase(); if (u.length > 1) return ch; var cu = u.charCodeAt(0); if (ch >= 128 && cu < 128) return ch; return cu; } var MAX_UCS2 = 0xFFFF; function createUCS2CanonicalGroups() { var groupedCanonically = []; // Pass 1: populate groupedCanonically - this is mapping from canonicalized // values back to the set of character code that canonicalize to them. for (var i = 0; i <= MAX_UCS2; ++i) { var ch = canonicalize(i); if (!groupedCanonically[ch]) groupedCanonically[ch] = []; groupedCanonically[ch].push(i); } return groupedCanonically; } function createTables(prefix, maxValue, canonicalGroups) { var prefixLower = prefix.toLowerCase(); var prefixUpper = prefix.toUpperCase(); var typeInfo = []; var characterSetInfo = []; // Pass 2: populate typeInfo & characterSetInfo. For every character calculate // a typeInfo value, described by the types above, and a value payload. for (cu in canonicalGroups) { // The set of characters that canonicalize to cu var characters = canonicalGroups[cu]; // If there is only one, it is unique. if (characters.length == 1) { typeInfo[characters[0]] = "CanonicalizeUnique:0"; continue; } // Sort the array. characters.sort(function(x,y){return x-y;}); // If there are more than two characters, create an entry in characterSetInfo. if (characters.length > 2) { for (i in characters) typeInfo[characters[i]] = "CanonicalizeSet:" + characterSetInfo.length; characterSetInfo.push(characters); continue; } // We have a pair, mark alternating ranges, otherwise track whether this is the low or high partner. var lo = characters[0]; var hi = characters[1]; var delta = hi - lo; if (delta == 1) { var type = lo & 1 ? "CanonicalizeAlternatingUnaligned:0" : "CanonicalizeAlternatingAligned:0"; typeInfo[lo] = type; typeInfo[hi] = type; } else { typeInfo[lo] = "CanonicalizeRangeLo:" + delta; typeInfo[hi] = "CanonicalizeRangeHi:" + delta; } } var rangeInfo = []; // Pass 3: coallesce types into ranges. for (var end = 0; end <= maxValue; ++end) { var begin = end; var type = typeInfo[end]; while (end < maxValue && typeInfo[end + 1] == type) ++end; rangeInfo.push({begin:begin, end:end, type:type}); } for (i in characterSetInfo) { var characters = "" var set = characterSetInfo[i]; for (var j in set) characters += hex(set[j]) + ", "; print("const UChar32 " + prefixLower + "CharacterSet" + i + "[] = { " + characters + "0 };"); } print(); print("static const size_t " + prefixUpper + "_CANONICALIZATION_SETS = " + characterSetInfo.length + ";"); print("const UChar32* const " + prefixLower + "CharacterSetInfo[" + prefixUpper + "_CANONICALIZATION_SETS] = {"); for (i in characterSetInfo) print(" " + prefixLower + "CharacterSet" + i + ","); print("};"); print(); print("const size_t " + prefixUpper + "_CANONICALIZATION_RANGES = " + rangeInfo.length + ";"); print("const CanonicalizationRange " + prefixLower + "RangeInfo[" + prefixUpper + "_CANONICALIZATION_RANGES] = {"); for (i in rangeInfo) { var info = rangeInfo[i]; var typeAndValue = info.type.split(':'); print(" { " + hex(info.begin) + ", " + hex(info.end) + ", " + hex(typeAndValue[1]) + ", " + typeAndValue[0] + " },"); } print("};"); print(); // Create canonical table for LChar domain let line = "const uint16_t canonicalTableLChar[256] = {"; for (let i = 0; i < 256; i++) { if (!(i % 16)) { print(line); line = " "; } let canonicalChar = canonicalize(i); line = line + (canonicalChar < 16 ? "0x0" : "0x") + canonicalChar.toString(16); if ((i % 16) != 15) line += ", "; else if (i != 255) line += ","; } print(line); print("};"); print(); } printHeader(); createTables("UCS2", MAX_UCS2, createUCS2CanonicalGroups()); printFooter();