encode-URI-test.html [plain text]

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">

<html>

<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<title>test of JavaScript URI encoding and decoding methods</title>
</head>

<body>

<script type="text/javascript">

// --------

// Helper functions.

function hexDigit(number)
{
    if (number >= 10)
        return String.fromCharCode(number + 55);
    return number;
}

function printable(s)
{
    if (s == "")
        return "empty string";
    var p = "";
    for (var i = 0; i < s.length; i++) {
        var c = s.charAt(i);
        var cc = s.charCodeAt(i);
        if (c == "\\") {
            p += "\\\\";
        } else if (c == "\"") {
            p += "\\\"";
        } else if (c == "\n") {
            p += "\\n";
        } else if (c == "\r") {
            p += "\\r";
        } else if (c == "\t") {
            p += "\\t";
        } else if (cc >= 20 && cc < 0x7F) {
            p += c;
        } else if (cc <= 0xFF) {
            p += "\\x" + hexDigit(cc >> 4) + hexDigit(cc & 0xF);
        } else if (cc <= 0xFFFF) {
            p += "\\u" + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
        } else {
            p += "\\U" + hexDigit((cc >> 28) & 0xF) + hexDigit((cc >> 24) & 0xF) + hexDigit((cc >> 20) & 0xF) + hexDigit((cc >> 16) & 0xF)
                       + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
        }
    }
    return "\"" + p + "\"";
}

function escapedCharacter(c)
{
    // UTF-8 is what Gecko does, but not what WinIE 6 does.
    // It makes much more sense, though, to produce encodings that actually work in URLs.
    // So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things.

    // Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here.
    switch (c) {
        case    0x80: return "%C2%80";
        case   0x7FF: return "%DF%BF";
        case   0x800: return "%E0%A0%80";
        case  0x2022: return "%E2%80%A2";
        case  0xD7FF: return "%ED%9F%BF";
        case  0xD800: return "%ED%A0%80";
        case  0xE000: return "%EE%80%80";
        case  0xFFFC: return "%EF%BF%BC";
        case  0xFFFD: return "%EF%BF%BD";
        case  0xFFFE: return "%EF%BF%BE";
        case  0xFFFF: return "%EF%BF%BF";
        case 0x10000: return "%F0%90%80%80";
    }

    if (c < 0 || c > 0x7F) {
        window.alert("escapedCharacter doesn't know how to escape character code " + c);
        return "?";
    }
    
    return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
}

function forEachCharacterCode(f, s)
{
    for (var i = 0; i < s.length; i++) {
        f(s.charCodeAt(i));
    }
}

function call(functionName, parameter)
{
    try {
        result = eval(functionName + "(parameter)");
    } catch (e) {
        result = "exception";
    }
    return result;
}

// --------

// Build up tables with expected results.

var expectedResult = new Object;

function addExpectedNonEscaped(f, c)
{
    expectedResult[f + "(" + String.fromCharCode(c) + ")"] = String.fromCharCode(c);
}

function addNoEscape(c)
{
    addExpectedNonEscaped("escape", c);
    addExpectedNonEscaped("encodeURI", c);
    addExpectedNonEscaped("encodeURIComponent", c);
}

function addEscapeNoEscape(c)
{
    addExpectedNonEscaped("escape", c);
}

function addURIComponentNoEscape(c)
{
    addExpectedNonEscaped("encodeURI", c);
    addExpectedNonEscaped("encodeURIComponent", c);
}

function addURINoEscape(c)
{
    addExpectedNonEscaped("encodeURI", c);
    expectedResult["decodeURI(" + escapedCharacter(c) + ")"] = escapedCharacter(c);
    expectedResult["decodeURI(" + escapedCharacter(c).toLowerCase() + ")"] = escapedCharacter(c).toLowerCase();
}

forEachCharacterCode(addNoEscape, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_");
forEachCharacterCode(addEscapeNoEscape, "+/");
forEachCharacterCode(addURINoEscape, "@#$&+,/:;=?");
forEachCharacterCode(addURIComponentNoEscape, "!'()~");

// WinIE 6's escape function does not escape @, although Gecko's does.
expectedResult["escape(@)"] = "@";

// --------

// Run tests.

var failureCount = 0;

function test(functionName, parameter, desiredResult)
{
    var alternateResult = expectedResult[functionName + "(" + parameter + ")"];
    if (alternateResult)
        desiredResult = alternateResult;
    var result = call(functionName, parameter);
    if (result != desiredResult) {
        var s = "called " + functionName + " on " + printable(parameter) + " and got " + printable(result) + " instead of " + printable(desiredResult);
        document.writeln("<p>" + s + "</p>");
        failureCount += 1;
    }
}

function testEscape(parameter, expected)
{
    test("escape", parameter, expected);
    test("encodeURI", parameter, expected);
    test("encodeURIComponent", parameter, expected);
}

function testUnescape(parameter, expected)
{
    test("unescape", parameter, expected);
}

function testDecode(parameter, expected)
{
    if (expected == "\uFFFE" || expected == "\uFFFF")
        expected = "\uFFFD";

    test("decodeURI", parameter, expected);
    test("decodeURIComponent", parameter, expected);
}

function testUnescapeAndDecode(parameter, expectedUnescape, expectedDecode)
{
    testUnescape(parameter, expectedUnescape);
    testDecode(parameter, expectedDecode);
}

function testCharacter(c)
{
    var s = String.fromCharCode(c);
    var escaped = escapedCharacter(c);

    testEscape(s, escaped);
    testUnescape(escaped, s);
    testUnescape(escaped.toLowerCase(), s);
    testDecode(escaped, s);
    testDecode(escaped.toLowerCase(), s);
}

for (var c = 0; c <= 128; c++) {
    testCharacter(c);
}
testCharacter(0x7FF);
testCharacter(0x800);
testCharacter(0x2022);
testCharacter(0xD7FF);
testCharacter(0xE000);
testCharacter(0xFFFC);
testCharacter(0xFFFD);

// These tests are currently turned off because it's not yet entirely clear what correct behavior
// is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns
// U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same.
// And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater
// than U+FFFF (outside the BMP) from working properly.

//testCharacter(0xD800);
//testCharacter(0xDBFF);
//testCharacter(0xDC00);
//testCharacter(0xDFFF);
//testCharacter(0xFFFE);
//testCharacter(0xFFFF);
//testCharacter(0x10000);

testUnescapeAndDecode("%", "%", "exception");
testUnescapeAndDecode("%0", "%0", "exception");
testUnescapeAndDecode("%a", "%a", "exception");
testUnescapeAndDecode("%u", "%u", "exception");
testUnescapeAndDecode("%xx", "%xx", "exception");
testUnescapeAndDecode("%u004", "%u004", "exception");
testUnescapeAndDecode("%u0041", "A", "exception");
testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception");

testUnescapeAndDecode(String.fromCharCode(0x80), String.fromCharCode(0x80), String.fromCharCode(0x80));
testUnescapeAndDecode(String.fromCharCode(0xD800), String.fromCharCode(0xD800), String.fromCharCode(0xD800));

testUnescapeAndDecode("%C2%80", String.fromCharCode(0x80), String.fromCharCode(0x80));
testUnescapeAndDecode("%C2", "%C2", "exception");
testUnescapeAndDecode("%C2" + String.fromCharCode(0x80), "%C2" + String.fromCharCode(0x80), "exception");

// The characters below hav to be literal because String.fromCharCode will make a single character
// and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF.
// For most JavaScript engines, this will turn into two characters because they use UTF-16
// instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16
// behavior, forbids it, or doesn't say either way.
testEscape("𐀀", "%F0%90%80%80");
testUnescapeAndDecode("%F0%90%80%80", "𐀀", "𐀀");
testEscape("𦏵", "%F0%A6%8F%B5");
testUnescapeAndDecode("%F0%A6%8F%B5", "𦏵", "𦏵");
testEscape("𯿿", "%F0%AF%BF%BF");
testUnescapeAndDecode("%F0%AF%BF%BF", "𯿿", "𯿿");

// --------

// Summarize.

var failuresMessage;
if (failureCount) {
    failuresMessage = failureCount + " tests failed.";
} else {
    failuresMessage = "No failures.";
}
document.writeln("<p>Testing complete. " + failuresMessage + "</p>");

// --------

</script>

</body>

</html>