#include "config.h"
#include "URL.h"
#include "DecodeEscapeSequences.h"
#include "MIMETypeRegistry.h"
#include "TextEncoding.h"
#include "UUID.h"
#include <stdio.h>
#include <unicode/uidna.h>
#include <wtf/HashMap.h>
#include <wtf/HexNumber.h>
#include <wtf/StdLibExtras.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/text/StringHash.h>
using namespace WTF;
namespace WebCore {
typedef Vector<char, 512> CharBuffer;
typedef Vector<UChar, 512> UCharBuffer;
static const unsigned maximumValidPortNumber = 0xFFFE;
static const unsigned invalidPortNumber = 0xFFFF;
static inline bool isLetterMatchIgnoringCase(UChar character, char lowercaseLetter)
{
ASSERT(isASCIILower(lowercaseLetter));
return (character | 0x20) == lowercaseLetter;
}
static const char wsScheme[] = {'w', 's'};
static const char ftpScheme[] = {'f', 't', 'p'};
static const char ftpPort[] = {'2', '1'};
static const char wssScheme[] = {'w', 's', 's'};
static const char fileScheme[] = {'f', 'i', 'l', 'e'};
static const char httpScheme[] = {'h', 't', 't', 'p'};
static const char httpPort[] = {'8', '0'};
static const char httpsScheme[] = {'h', 't', 't', 'p', 's'};
static const char httpsPort[] = {'4', '4', '3'};
static const char gopherScheme[] = {'g', 'o', 'p', 'h', 'e', 'r'};
static const char gopherPort[] = {'7', '0'};
static inline bool isLetterMatchIgnoringCase(char character, char lowercaseLetter)
{
ASSERT(isASCIILower(lowercaseLetter));
return (character | 0x20) == lowercaseLetter;
}
enum URLCharacterClasses {
SchemeFirstChar = 1 << 0,
SchemeChar = 1 << 1,
UserInfoChar = 1 << 2,
HostnameChar = 1 << 3,
IPv6Char = 1 << 4,
PathSegmentEndChar = 1 << 5,
BadChar = 1 << 6
};
static const unsigned char characterClassTable[256] = {
PathSegmentEndChar, BadChar,
BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, UserInfoChar,
BadChar, PathSegmentEndChar | BadChar,
UserInfoChar, UserInfoChar | HostnameChar | IPv6Char | BadChar,
UserInfoChar, UserInfoChar,
UserInfoChar, UserInfoChar,
UserInfoChar, SchemeChar | UserInfoChar,
UserInfoChar,
SchemeChar | UserInfoChar | HostnameChar,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
PathSegmentEndChar,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
UserInfoChar | IPv6Char, UserInfoChar,
BadChar, UserInfoChar,
BadChar, PathSegmentEndChar | BadChar,
0,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
0,
0, 0,
0,
UserInfoChar | HostnameChar,
0,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
0,
0, 0, UserInfoChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar,
BadChar, BadChar, BadChar, BadChar
};
enum PercentEncodeCharacterClass {
PercentEncodeSimple = 255,
PercentEncodeDefault = 127,
PercentEncodePassword = 63,
PercentEncodeUsername = 31,
};
static const unsigned char percentEncodeClassTable[256] = {
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeDefault,
0,
PercentEncodeDefault,
PercentEncodeDefault,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
PercentEncodePassword,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0,
PercentEncodeUsername,
0,
PercentEncodeDefault,
0,
PercentEncodeDefault,
PercentEncodeDefault,
PercentEncodePassword,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0,
0,
PercentEncodePassword,
0,
0,
0,
PercentEncodeDefault,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0,
0,
0,
0,
0,
PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple,
PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple, PercentEncodeSimple
};
static int copyPathRemovingDots(char* dst, const char* src, int srcStart, int srcEnd);
static void encodeRelativeString(const String& rel, const TextEncoding&, CharBuffer& ouput);
static String substituteBackslashes(const String&);
static inline bool isSchemeFirstChar(char c) { return characterClassTable[static_cast<unsigned char>(c)] & SchemeFirstChar; }
static inline bool isSchemeFirstChar(UChar c) { return c <= 0xff && (characterClassTable[c] & SchemeFirstChar); }
static inline bool isSchemeChar(char c) { return characterClassTable[static_cast<unsigned char>(c)] & SchemeChar; }
static inline bool isSchemeChar(UChar c) { return c <= 0xff && (characterClassTable[c] & SchemeChar); }
static inline bool isUserInfoChar(unsigned char c) { return characterClassTable[c] & UserInfoChar; }
static inline bool isHostnameChar(unsigned char c) { return characterClassTable[c] & HostnameChar; }
static inline bool isIPv6Char(unsigned char c) { return characterClassTable[c] & IPv6Char; }
static inline bool isPathSegmentEndChar(char c) { return characterClassTable[static_cast<unsigned char>(c)] & PathSegmentEndChar; }
static inline bool isPathSegmentEndChar(UChar c) { return c <= 0xff && (characterClassTable[c] & PathSegmentEndChar); }
static inline bool isBadChar(unsigned char c) { return characterClassTable[c] & BadChar; }
static inline bool isSchemeCharacterMatchIgnoringCase(char character, char schemeCharacter)
{
ASSERT(isSchemeChar(character));
ASSERT(schemeCharacter & 0x20);
ASSERT(isASCIILower(schemeCharacter) || (!isASCIIUpper(schemeCharacter) && isSchemeChar(schemeCharacter)));
return (character | 0x20) == schemeCharacter;
}
String encodeWithURLEscapeSequences(const String& notEncodedString, PercentEncodeCharacterClass whatToEncode);
static void copyASCII(const String& string, char* dest)
{
if (string.isEmpty())
return;
if (string.is8Bit())
memcpy(dest, string.characters8(), string.length());
else {
const UChar* src = string.characters16();
size_t length = string.length();
for (size_t i = 0; i < length; i++)
dest[i] = static_cast<char>(src[i]);
}
}
static void appendASCII(const String& base, const char* rel, size_t len, CharBuffer& buffer)
{
buffer.resize(base.length() + len + 1);
copyASCII(base, buffer.data());
memcpy(buffer.data() + base.length(), rel, len);
buffer[buffer.size() - 1] = '\0';
}
static int findFirstOf(StringView string, unsigned startPosition, const char* target)
{
unsigned length = string.length();
for (unsigned i = startPosition; i < length; ++i) {
for (unsigned j = 0; target[j]; ++j) {
if (string[i] == target[j])
return i;
}
}
return -1;
}
static inline void checkEncodedString(const String& url)
{
ASSERT_UNUSED(url, url.containsOnlyASCII());
ASSERT_UNUSED(url, url.isEmpty() || isSchemeFirstChar(url[0]));
}
inline bool URL::protocolIs(const String& string, const char* protocol)
{
return WebCore::protocolIs(string, protocol);
}
void URL::invalidate()
{
m_isValid = false;
m_protocolIsInHTTPFamily = false;
m_schemeEnd = 0;
m_userStart = 0;
m_userEnd = 0;
m_passwordEnd = 0;
m_hostEnd = 0;
m_portEnd = 0;
m_pathEnd = 0;
m_pathAfterLastSlash = 0;
m_queryEnd = 0;
m_fragmentEnd = 0;
}
URL::URL(ParsedURLStringTag, const String& url)
{
parse(url);
ASSERT(url == m_string);
}
URL::URL(const URL& base, const String& relative)
{
init(base, relative, UTF8Encoding());
}
URL::URL(const URL& base, const String& relative, const TextEncoding& encoding)
{
init(base, relative, encoding.encodingForFormSubmission());
}
static bool shouldTrimFromURL(unsigned char c)
{
return c <= ' ';
}
void URL::init(const URL& base, const String& relative, const TextEncoding& encoding)
{
if (!base.m_isValid && !base.isEmpty()) {
m_string = relative;
invalidate();
return;
}
String rel = relative;
if (rel.contains('\\') && !(protocolIsJavaScript(rel) || protocolIs(rel, "data")))
rel = substituteBackslashes(rel);
bool allASCII = rel.containsOnlyASCII();
CharBuffer strBuffer;
char* str;
size_t len;
if (allASCII) {
len = rel.length();
strBuffer.resize(len + 1);
copyASCII(rel, strBuffer.data());
strBuffer[len] = 0;
str = strBuffer.data();
} else {
encodeRelativeString(rel, encoding, strBuffer);
str = strBuffer.data();
len = strlen(str);
}
while (len && shouldTrimFromURL(*str)) {
str++;
--len;
}
while (len && shouldTrimFromURL(str[len - 1]))
str[--len] = '\0';
bool absolute = false;
char* p = str;
if (isSchemeFirstChar(*p)) {
++p;
while (isSchemeChar(*p)) {
++p;
}
if (*p == ':') {
if (p[1] != '/' && equalIgnoringCase(base.protocol(), String(str, p - str)) && base.isHierarchical())
str = p + 1;
else
absolute = true;
}
}
CharBuffer parseBuffer;
if (absolute) {
parse(str, &relative);
} else {
if (!base.isHierarchical()) {
if (str[0] == '#') {
appendASCII(base.m_string.left(base.m_queryEnd), str, len, parseBuffer);
parse(parseBuffer.data(), &relative);
} else {
m_string = relative;
invalidate();
}
return;
}
switch (str[0]) {
case '\0':
*this = base;
removeFragmentIdentifier();
break;
case '#': {
appendASCII(base.m_string.left(base.m_queryEnd), str, len, parseBuffer);
parse(parseBuffer.data(), &relative);
break;
}
case '?': {
appendASCII(base.m_string.left(base.m_pathEnd), str, len, parseBuffer);
parse(parseBuffer.data(), &relative);
break;
}
case '/':
if (str[1] == '/') {
appendASCII(base.m_string.left(base.m_schemeEnd + 1), str, len, parseBuffer);
parse(parseBuffer.data(), &relative);
} else {
appendASCII(base.m_string.left(base.m_portEnd), str, len, parseBuffer);
parse(parseBuffer.data(), &relative);
}
break;
default:
{
const size_t bufferSize = base.m_pathEnd + 1 + len + 1;
parseBuffer.resize(bufferSize);
char* bufferPos = parseBuffer.data();
char* bufferStart = bufferPos;
CharBuffer baseStringBuffer(base.m_string.length());
copyASCII(base.m_string, baseStringBuffer.data());
const char* baseString = baseStringBuffer.data();
const char* baseStringStart = baseString;
const char* pathStart = baseStringStart + base.m_portEnd;
while (baseStringStart < pathStart)
*bufferPos++ = *baseStringStart++;
char* bufferPathStart = bufferPos;
const char* baseStringEnd = baseString + base.m_pathEnd;
while (baseStringEnd > baseStringStart && baseStringEnd[-1] != '/')
baseStringEnd--;
if (baseStringEnd == baseStringStart) {
if (base.m_schemeEnd + 1 != base.m_pathEnd && *str && *str != '?' && *str != '#')
*bufferPos++ = '/';
} else {
bufferPos += copyPathRemovingDots(bufferPos, baseStringStart, 0, baseStringEnd - baseStringStart);
}
const char* relStringStart = str;
const char* relStringPos = relStringStart;
while (*relStringPos && *relStringPos != '?' && *relStringPos != '#') {
if (relStringPos[0] == '.' && bufferPos[-1] == '/') {
if (isPathSegmentEndChar(relStringPos[1])) {
relStringPos += 1;
if (relStringPos[0] == '/')
relStringPos++;
continue;
} else if (relStringPos[1] == '.' && isPathSegmentEndChar(relStringPos[2])) {
relStringPos += 2;
if (relStringPos[0] == '/')
relStringPos++;
if (bufferPos > bufferPathStart + 1)
bufferPos--;
while (bufferPos > bufferPathStart + 1 && bufferPos[-1] != '/')
bufferPos--;
continue;
}
}
*bufferPos = *relStringPos;
relStringPos++;
bufferPos++;
}
strncpy(bufferPos, relStringPos, bufferSize - (bufferPos - bufferStart));
parse(parseBuffer.data(), &relative);
ASSERT(strlen(parseBuffer.data()) + 1 <= parseBuffer.size());
break;
}
}
}
}
URL URL::isolatedCopy() const
{
URL result = *this;
result.m_string = result.m_string.isolatedCopy();
return result;
}
String URL::lastPathComponent() const
{
if (!hasPath())
return String();
unsigned end = m_pathEnd - 1;
if (m_string[end] == '/')
--end;
size_t start = m_string.reverseFind('/', end);
if (start < static_cast<unsigned>(m_portEnd))
return String();
++start;
return m_string.substring(start, end - start + 1);
}
String URL::protocol() const
{
return m_string.left(m_schemeEnd);
}
String URL::host() const
{
int start = hostStart();
return m_string.substring(start, m_hostEnd - start);
}
unsigned short URL::port() const
{
if (m_hostEnd == m_portEnd || m_hostEnd == m_portEnd - 1)
return 0;
bool ok = false;
unsigned number;
if (m_string.is8Bit())
number = charactersToUIntStrict(m_string.characters8() + m_hostEnd + 1, m_portEnd - m_hostEnd - 1, &ok);
else
number = charactersToUIntStrict(m_string.characters16() + m_hostEnd + 1, m_portEnd - m_hostEnd - 1, &ok);
if (!ok || number > maximumValidPortNumber)
return invalidPortNumber;
return number;
}
String URL::user() const
{
return decodeURLEscapeSequences(m_string.substring(m_userStart, m_userEnd - m_userStart));
}
String URL::pass() const
{
if (m_passwordEnd == m_userEnd)
return String();
return decodeURLEscapeSequences(m_string.substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1));
}
String URL::encodedUser() const
{
return m_string.substring(m_userStart, m_userEnd - m_userStart);
}
String URL::encodedPass() const
{
if (m_passwordEnd == m_userEnd)
return String();
return m_string.substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1);
}
String URL::fragmentIdentifier() const
{
if (m_fragmentEnd == m_queryEnd)
return String();
return m_string.substring(m_queryEnd + 1, m_fragmentEnd - (m_queryEnd + 1));
}
bool URL::hasFragmentIdentifier() const
{
return m_fragmentEnd != m_queryEnd;
}
String URL::baseAsString() const
{
return m_string.left(m_pathAfterLastSlash);
}
#if !USE(CF)
String URL::fileSystemPath() const
{
if (!isValid() || !isLocalFile())
return String();
return decodeURLEscapeSequences(path());
}
#endif
#ifdef NDEBUG
static inline void assertProtocolIsGood(const char*)
{
}
#else
static void assertProtocolIsGood(const char* protocol)
{
const char* p = protocol;
while (*p) {
ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z'));
++p;
}
}
#endif
bool URL::protocolIs(const char* protocol) const
{
assertProtocolIsGood(protocol);
ASSERT(!equalIgnoringCase(protocol, String("javascript")));
if (!m_isValid)
return false;
for (int i = 0; i < m_schemeEnd; ++i) {
if (!protocol[i] || !isSchemeCharacterMatchIgnoringCase(m_string[i], protocol[i]))
return false;
}
return !protocol[m_schemeEnd]; }
String URL::query() const
{
if (m_queryEnd == m_pathEnd)
return String();
return m_string.substring(m_pathEnd + 1, m_queryEnd - (m_pathEnd + 1));
}
String URL::path() const
{
return m_string.substring(m_portEnd, m_pathEnd - m_portEnd);
}
bool URL::setProtocol(const String& s)
{
size_t separatorPosition = s.find(':');
String newProtocol = s.substring(0, separatorPosition);
if (!isValidProtocol(newProtocol))
return false;
if (!m_isValid) {
parse(newProtocol + ':' + m_string);
return true;
}
parse(newProtocol + m_string.substring(m_schemeEnd));
return true;
}
void URL::setHost(const String& s)
{
if (!m_isValid)
return;
bool slashSlashNeeded = m_userStart == m_schemeEnd + 1;
parse(m_string.left(hostStart()) + (slashSlashNeeded ? "//" : "") + s + m_string.substring(m_hostEnd));
}
void URL::removePort()
{
if (m_hostEnd == m_portEnd)
return;
parse(m_string.left(m_hostEnd) + m_string.substring(m_portEnd));
}
void URL::setPort(unsigned short i)
{
if (!m_isValid)
return;
bool colonNeeded = m_portEnd == m_hostEnd;
int portStart = (colonNeeded ? m_hostEnd : m_hostEnd + 1);
parse(m_string.left(portStart) + (colonNeeded ? ":" : "") + String::number(i) + m_string.substring(m_portEnd));
}
void URL::setHostAndPort(const String& hostAndPort)
{
if (!m_isValid)
return;
bool slashSlashNeeded = m_userStart == m_schemeEnd + 1;
parse(m_string.left(hostStart()) + (slashSlashNeeded ? "//" : "") + hostAndPort + m_string.substring(m_portEnd));
}
void URL::setUser(const String& user)
{
if (!m_isValid)
return;
int end = m_userEnd;
if (!user.isEmpty()) {
String u = encodeWithURLEscapeSequences(user, PercentEncodeUsername);
if (m_userStart == m_schemeEnd + 1)
u = "//" + u;
if (end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@'))
u.append('@');
parse(m_string.left(m_userStart) + u + m_string.substring(end));
} else {
if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string[end] == '@')
end += 1;
if (m_userStart != end)
parse(m_string.left(m_userStart) + m_string.substring(end));
}
}
void URL::setPass(const String& password)
{
if (!m_isValid)
return;
int end = m_passwordEnd;
if (!password.isEmpty()) {
String p = ":" + encodeWithURLEscapeSequences(password, PercentEncodePassword) + "@";
if (m_userEnd == m_schemeEnd + 1)
p = "//" + p;
if (end != m_hostEnd && m_string[end] == '@')
end += 1;
parse(m_string.left(m_userEnd) + p + m_string.substring(end));
} else {
if (m_userStart == m_userEnd && end != m_hostEnd && m_string[end] == '@')
end += 1;
if (m_userEnd != end)
parse(m_string.left(m_userEnd) + m_string.substring(end));
}
}
void URL::setFragmentIdentifier(const String& s)
{
if (!m_isValid)
return;
parse(m_string.left(m_queryEnd) + "#" + s);
}
void URL::removeFragmentIdentifier()
{
if (!m_isValid)
return;
parse(m_string.left(m_queryEnd));
}
void URL::setQuery(const String& query)
{
if (!m_isValid)
return;
if ((query.isEmpty() || query[0] != '?') && !query.isNull())
parse(m_string.left(m_pathEnd) + "?" + query + m_string.substring(m_queryEnd));
else
parse(m_string.left(m_pathEnd) + query + m_string.substring(m_queryEnd));
}
void URL::setPath(const String& s)
{
if (!m_isValid)
return;
String path = s;
if (path.isEmpty() || path[0] != '/')
path = "/" + path;
parse(m_string.left(m_portEnd) + encodeWithURLEscapeSequences(path) + m_string.substring(m_pathEnd));
}
String decodeURLEscapeSequences(const String& string)
{
return decodeEscapeSequences<URLEscapeSequence>(string, UTF8Encoding());
}
String decodeURLEscapeSequences(const String& string, const TextEncoding& encoding)
{
return decodeEscapeSequences<URLEscapeSequence>(string, encoding);
}
static void appendEscapedChar(char*& buffer, unsigned char c)
{
*buffer++ = '%';
placeByteAsHex(c, buffer);
}
static void appendEscapingBadChars(char*& buffer, const char* strStart, size_t length)
{
char* p = buffer;
const char* str = strStart;
const char* strEnd = strStart + length;
while (str < strEnd) {
unsigned char c = *str++;
if (isBadChar(c)) {
if (c == '%' || c == '?')
*p++ = c;
else if (c != 0x09 && c != 0x0a && c != 0x0d)
appendEscapedChar(p, c);
} else
*p++ = c;
}
buffer = p;
}
static void escapeAndAppendNonHierarchicalPart(char*& buffer, const char* strStart, size_t length)
{
char* p = buffer;
const char* str = strStart;
const char* strEnd = strStart + length;
while (str < strEnd) {
unsigned char c = *str++;
if (c == 0x09 || c == 0x0a || c == 0x0d)
continue;
if (c < 0x20 || c >= 127) {
appendEscapedChar(p, c);
continue;
}
*p++ = c;
}
buffer = p;
}
static int copyPathRemovingDots(char* dst, const char* src, int srcStart, int srcEnd)
{
char* bufferPathStart = dst;
if (srcStart != srcEnd) {
const char* baseStringStart = src + srcStart;
const char* baseStringEnd = src + srcEnd;
const char* baseStringPos = baseStringStart;
ASSERT(baseStringPos[0] == '/');
*dst = *baseStringPos;
baseStringPos++;
dst++;
while (baseStringPos < baseStringEnd) {
if (baseStringPos[0] == '.' && dst[-1] == '/') {
if (baseStringPos[1] == '/' || baseStringPos + 1 == baseStringEnd) {
baseStringPos += 2;
continue;
} else if (baseStringPos[1] == '.' && (baseStringPos[2] == '/' ||
baseStringPos + 2 == baseStringEnd)) {
baseStringPos += 3;
if (dst > bufferPathStart + 1)
dst--;
while (dst > bufferPathStart && dst[-1] != '/')
dst--;
continue;
}
}
*dst = *baseStringPos;
baseStringPos++;
dst++;
}
}
*dst = '\0';
return dst - bufferPathStart;
}
static inline bool hasSlashDotOrDotDot(const char* str)
{
const unsigned char* p = reinterpret_cast<const unsigned char*>(str);
if (!*p)
return false;
unsigned char pc = *p;
while (unsigned char c = *++p) {
if (c == '.' && (pc == '/' || pc == '.'))
return true;
pc = c;
}
return false;
}
void URL::parse(const String& string)
{
checkEncodedString(string);
CharBuffer buffer(string.length() + 1);
copyASCII(string, buffer.data());
buffer[string.length()] = '\0';
parse(buffer.data(), &string);
}
#if PLATFORM(IOS)
static bool shouldCanonicalizeScheme = true;
void enableURLSchemeCanonicalization(bool enableSchemeCanonicalization)
{
shouldCanonicalizeScheme = enableSchemeCanonicalization;
}
#endif
template<size_t length>
static inline bool equal(const char* a, const char (&b)[length])
{
#if PLATFORM(IOS)
if (!shouldCanonicalizeScheme) {
for (size_t i = 0; i < length; ++i) {
if (toASCIILower(a[i]) != b[i])
return false;
}
return true;
}
#endif
for (size_t i = 0; i < length; ++i) {
if (a[i] != b[i])
return false;
}
return true;
}
template<size_t lengthB>
static inline bool equal(const char* stringA, size_t lengthA, const char (&stringB)[lengthB])
{
return lengthA == lengthB && equal(stringA, stringB);
}
static inline bool isDefaultPortForScheme(const char* port, size_t portLength, const char* scheme, size_t schemeLength)
{
switch (schemeLength) {
case 2:
return equal(scheme, wsScheme) && equal(port, portLength, httpPort);
case 3:
if (equal(scheme, ftpScheme))
return equal(port, portLength, ftpPort);
if (equal(scheme, wssScheme))
return equal(port, portLength, httpsPort);
break;
case 4:
return equal(scheme, httpScheme) && equal(port, portLength, httpPort);
case 5:
return equal(scheme, httpsScheme) && equal(port, portLength, httpsPort);
case 6:
return equal(scheme, gopherScheme) && equal(port, portLength, gopherPort);
}
return false;
}
static inline bool hostPortIsEmptyButCredentialsArePresent(int hostStart, int portEnd, char userinfoEndChar)
{
return userinfoEndChar == '@' && hostStart == portEnd;
}
static bool isNonFileHierarchicalScheme(const char* scheme, size_t schemeLength)
{
switch (schemeLength) {
case 2:
return equal(scheme, wsScheme);
case 3:
return equal(scheme, ftpScheme) || equal(scheme, wssScheme);
case 4:
return equal(scheme, httpScheme);
case 5:
return equal(scheme, httpsScheme);
case 6:
return equal(scheme, gopherScheme);
}
return false;
}
static bool isCanonicalHostnameLowercaseForScheme(const char* scheme, size_t schemeLength)
{
switch (schemeLength) {
case 2:
return equal(scheme, wsScheme);
case 3:
return equal(scheme, ftpScheme) || equal(scheme, wssScheme);
case 4:
return equal(scheme, httpScheme) || equal(scheme, fileScheme);
case 5:
return equal(scheme, httpsScheme);
case 6:
return equal(scheme, gopherScheme);
}
return false;
}
void URL::parse(const char* url, const String* originalString)
{
if (!url || url[0] == '\0') {
m_string = originalString ? *originalString : url;
invalidate();
return;
}
if (!isSchemeFirstChar(url[0])) {
m_string = originalString ? *originalString : url;
invalidate();
return;
}
int schemeEnd = 0;
while (isSchemeChar(url[schemeEnd]))
schemeEnd++;
if (url[schemeEnd] != ':') {
m_string = originalString ? *originalString : url;
invalidate();
return;
}
int userStart = schemeEnd + 1;
int userEnd;
int passwordStart;
int passwordEnd;
int hostStart;
int hostEnd;
int portStart;
int portEnd;
bool hierarchical = url[schemeEnd + 1] == '/';
bool hasSecondSlash = hierarchical && url[schemeEnd + 2] == '/';
bool isFile = schemeEnd == 4
&& isLetterMatchIgnoringCase(url[0], 'f')
&& isLetterMatchIgnoringCase(url[1], 'i')
&& isLetterMatchIgnoringCase(url[2], 'l')
&& isLetterMatchIgnoringCase(url[3], 'e');
m_protocolIsInHTTPFamily = isLetterMatchIgnoringCase(url[0], 'h')
&& isLetterMatchIgnoringCase(url[1], 't')
&& isLetterMatchIgnoringCase(url[2], 't')
&& isLetterMatchIgnoringCase(url[3], 'p')
&& (url[4] == ':' || (isLetterMatchIgnoringCase(url[4], 's') && url[5] == ':'));
if ((hierarchical && hasSecondSlash) || isNonFileHierarchicalScheme(url, schemeEnd)) {
if (hierarchical) {
userStart++;
if (hasSecondSlash) {
userStart++;
if (isNonFileHierarchicalScheme(url, schemeEnd)) {
while (url[userStart] == '/')
userStart++;
}
}
}
userEnd = userStart;
int colonPos = 0;
while (isUserInfoChar(url[userEnd])) {
if (url[userEnd] == ':' && colonPos == 0)
colonPos = userEnd;
userEnd++;
}
if (url[userEnd] == '@') {
if (colonPos != 0) {
passwordEnd = userEnd;
userEnd = colonPos;
passwordStart = colonPos + 1;
} else
passwordStart = passwordEnd = userEnd;
hostStart = passwordEnd + 1;
} else if (url[userEnd] == '[' || isPathSegmentEndChar(url[userEnd])) {
userEnd = userStart;
passwordStart = passwordEnd = userEnd;
hostStart = userStart;
} else {
m_string = originalString ? *originalString : url;
invalidate();
return;
}
hostEnd = hostStart;
if (url[hostEnd] == '[') {
hostEnd++;
while (isIPv6Char(url[hostEnd]))
hostEnd++;
if (url[hostEnd] == ']')
hostEnd++;
else {
m_string = originalString ? *originalString : url;
invalidate();
return;
}
} else {
while (isHostnameChar(url[hostEnd]))
hostEnd++;
}
if (url[hostEnd] == ':') {
portStart = portEnd = hostEnd + 1;
portEnd = portStart;
while (isASCIIDigit(url[portEnd]))
portEnd++;
} else
portStart = portEnd = hostEnd;
if (!isPathSegmentEndChar(url[portEnd])) {
m_string = originalString ? *originalString : url;
invalidate();
return;
}
if (hostPortIsEmptyButCredentialsArePresent(hostStart, portEnd, url[passwordEnd])) {
m_string = originalString ? *originalString : url;
invalidate();
return;
}
if (userStart == portEnd && !m_protocolIsInHTTPFamily && !isFile) {
userStart -= 2;
userEnd = userStart;
passwordStart = userEnd;
passwordEnd = passwordStart;
hostStart = passwordEnd;
hostEnd = hostStart;
portStart = hostEnd;
portEnd = hostEnd;
}
} else {
userEnd = userStart;
passwordStart = passwordEnd = userEnd;
hostStart = hostEnd = passwordEnd;
portStart = portEnd = hostEnd;
}
int pathStart = portEnd;
int pathEnd = pathStart;
while (url[pathEnd] && url[pathEnd] != '?' && url[pathEnd] != '#')
pathEnd++;
int queryStart = pathEnd;
int queryEnd = queryStart;
if (url[queryStart] == '?') {
while (url[queryEnd] && url[queryEnd] != '#')
queryEnd++;
}
int fragmentStart = queryEnd;
int fragmentEnd = fragmentStart;
if (url[fragmentStart] == '#') {
fragmentStart++;
fragmentEnd = fragmentStart;
while (url[fragmentEnd])
fragmentEnd++;
}
Vector<char, 4096> buffer(fragmentEnd * 3 + 1);
char *p = buffer.data();
const char *strPtr = url;
const char *schemeEndPtr = url + schemeEnd;
#if PLATFORM(IOS)
if (shouldCanonicalizeScheme || m_protocolIsInHTTPFamily) {
while (strPtr < schemeEndPtr)
*p++ = toASCIILower(*strPtr++);
} else {
while (strPtr < schemeEndPtr)
*p++ = *strPtr++;
}
#else
while (strPtr < schemeEndPtr)
*p++ = toASCIILower(*strPtr++);
#endif
m_schemeEnd = p - buffer.data();
bool hostIsLocalHost = portEnd - userStart == 9
&& isLetterMatchIgnoringCase(url[userStart], 'l')
&& isLetterMatchIgnoringCase(url[userStart+1], 'o')
&& isLetterMatchIgnoringCase(url[userStart+2], 'c')
&& isLetterMatchIgnoringCase(url[userStart+3], 'a')
&& isLetterMatchIgnoringCase(url[userStart+4], 'l')
&& isLetterMatchIgnoringCase(url[userStart+5], 'h')
&& isLetterMatchIgnoringCase(url[userStart+6], 'o')
&& isLetterMatchIgnoringCase(url[userStart+7], 's')
&& isLetterMatchIgnoringCase(url[userStart+8], 't');
bool degenerateFilePath = pathStart == pathEnd && (hostStart == hostEnd || hostIsLocalHost);
bool haveNonHostAuthorityPart = userStart != userEnd || passwordStart != passwordEnd || hostEnd != portEnd;
*p++ = ':';
if (isFile ? !degenerateFilePath : (haveNonHostAuthorityPart || hostStart != hostEnd)) {
*p++ = '/';
*p++ = '/';
m_userStart = p - buffer.data();
strPtr = url + userStart;
const char* userEndPtr = url + userEnd;
while (strPtr < userEndPtr) {
char c = *strPtr++;
ASSERT(isUserInfoChar(c));
*p++ = c;
}
m_userEnd = p - buffer.data();
if (passwordEnd != passwordStart) {
*p++ = ':';
strPtr = url + passwordStart;
const char* passwordEndPtr = url + passwordEnd;
while (strPtr < passwordEndPtr) {
char c = *strPtr++;
ASSERT(isUserInfoChar(c));
*p++ = c;
}
}
m_passwordEnd = p - buffer.data();
if (p - buffer.data() != m_userStart)
*p++ = '@';
if (!(isFile && hostIsLocalHost && !haveNonHostAuthorityPart)) {
strPtr = url + hostStart;
const char* hostEndPtr = url + hostEnd;
if (isCanonicalHostnameLowercaseForScheme(buffer.data(), m_schemeEnd)) {
while (strPtr < hostEndPtr) {
char c = toASCIILower(*strPtr++);
ASSERT(isHostnameChar(c) || c == '[' || c == ']' || c == ':');
*p++ = c;
}
} else {
while (strPtr < hostEndPtr) {
char c = *strPtr++;
ASSERT(isHostnameChar(c) || c == '[' || c == ']' || c == ':');
*p++ = c;
}
}
}
m_hostEnd = p - buffer.data();
if (hostEnd != portStart) {
const char* portStr = url + portStart;
size_t portLength = portEnd - portStart;
if ((portLength && !isDefaultPortForScheme(portStr, portLength, buffer.data(), m_schemeEnd))
|| (hostStart == hostEnd && hostEnd != portStart)) {
*p++ = ':';
const char* portEndPtr = url + portEnd;
while (portStr < portEndPtr)
*p++ = *portStr++;
}
}
m_portEnd = p - buffer.data();
} else {
if (isFile) {
ASSERT(degenerateFilePath);
*p++ = '/';
*p++ = '/';
}
m_userStart = m_userEnd = m_passwordEnd = m_hostEnd = m_portEnd = p - buffer.data();
}
if ((m_protocolIsInHTTPFamily || isFile) && pathEnd == pathStart)
*p++ = '/';
if (!hierarchical)
escapeAndAppendNonHierarchicalPart(p, url + pathStart, pathEnd - pathStart);
else if (!hasSlashDotOrDotDot(url))
appendEscapingBadChars(p, url + pathStart, pathEnd - pathStart);
else {
CharBuffer pathBuffer(pathEnd - pathStart + 1);
size_t length = copyPathRemovingDots(pathBuffer.data(), url, pathStart, pathEnd);
appendEscapingBadChars(p, pathBuffer.data(), length);
}
m_pathEnd = p - buffer.data();
int i;
for (i = m_pathEnd; i > m_portEnd; --i) {
if (buffer[i - 1] == '/')
break;
}
m_pathAfterLastSlash = i;
appendEscapingBadChars(p, url + queryStart, queryEnd - queryStart);
m_queryEnd = p - buffer.data();
if (fragmentEnd != queryEnd) {
*p++ = '#';
escapeAndAppendNonHierarchicalPart(p, url + fragmentStart, fragmentEnd - fragmentStart);
}
m_fragmentEnd = p - buffer.data();
ASSERT(p - buffer.data() <= static_cast<int>(buffer.size()));
ASSERT(buffer.size() > 0);
if (originalString && equal(originalString->impl(), buffer.data(), m_fragmentEnd))
m_string = *originalString;
else
m_string = String(buffer.data(), m_fragmentEnd);
m_isValid = true;
}
bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b)
{
if (a.m_queryEnd != b.m_queryEnd)
return false;
unsigned queryLength = a.m_queryEnd;
for (unsigned i = 0; i < queryLength; ++i)
if (a.string()[i] != b.string()[i])
return false;
return true;
}
bool protocolHostAndPortAreEqual(const URL& a, const URL& b)
{
if (a.m_schemeEnd != b.m_schemeEnd)
return false;
int hostStartA = a.hostStart();
int hostLengthA = a.hostEnd() - hostStartA;
int hostStartB = b.hostStart();
int hostLengthB = b.hostEnd() - b.hostStart();
if (hostLengthA != hostLengthB)
return false;
for (int i = 0; i < a.m_schemeEnd; ++i)
if (a.string()[i] != b.string()[i])
return false;
for (int i = 0; i < hostLengthA; ++i)
if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
return false;
if (a.port() != b.port())
return false;
return true;
}
String encodeWithURLEscapeSequences(const String& notEncodedString, PercentEncodeCharacterClass whatToEncode)
{
CString asUTF8 = notEncodedString.utf8();
CharBuffer buffer(asUTF8.length() * 3 + 1);
char* p = buffer.data();
const char* str = asUTF8.data();
const char* strEnd = str + asUTF8.length();
while (str < strEnd) {
unsigned char c = *str++;
if (percentEncodeClassTable[c] >= whatToEncode)
appendEscapedChar(p, c);
else
*p++ = c;
}
ASSERT(p - buffer.data() <= static_cast<int>(buffer.size()));
return String(buffer.data(), p - buffer.data());
}
String encodeWithURLEscapeSequences(const String& notEncodedString)
{
CString asUTF8 = notEncodedString.utf8();
CharBuffer buffer(asUTF8.length() * 3 + 1);
char* p = buffer.data();
const char* str = asUTF8.data();
const char* strEnd = str + asUTF8.length();
while (str < strEnd) {
unsigned char c = *str++;
if (isBadChar(c))
appendEscapedChar(p, c);
else
*p++ = c;
}
ASSERT(p - buffer.data() <= static_cast<int>(buffer.size()));
return String(buffer.data(), p - buffer.data());
}
static bool containsOnlyASCII(StringView string)
{
if (string.is8Bit())
return charactersAreAllASCII(string.characters8(), string.length());
return charactersAreAllASCII(string.characters16(), string.length());
}
static bool protocolIs(StringView stringURL, const char* protocol)
{
assertProtocolIsGood(protocol);
unsigned length = stringURL.length();
for (unsigned i = 0; i < length; ++i) {
if (!protocol[i])
return stringURL[i] == ':';
if (!isLetterMatchIgnoringCase(stringURL[i], protocol[i]))
return false;
}
return false;
}
static void appendEncodedHostname(UCharBuffer& buffer, StringView string)
{
const unsigned hostnameBufferLength = 2048;
if (string.length() > hostnameBufferLength || containsOnlyASCII(string)) {
append(buffer, string);
return;
}
UChar hostnameBuffer[hostnameBufferLength];
UErrorCode error = U_ZERO_ERROR;
int32_t numCharactersConverted = uidna_IDNToASCII(string.upconvertedCharacters(), string.length(), hostnameBuffer,
hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, 0, &error);
if (error == U_ZERO_ERROR)
buffer.append(hostnameBuffer, numCharactersConverted);
}
static void findHostnamesInMailToURL(StringView string, Vector<std::pair<int, int>>& nameRanges)
{
nameRanges.clear();
int p = 0;
while (1) {
int hostnameOrStringStart = findFirstOf(string, p, "\"@?");
if (hostnameOrStringStart == -1)
return;
UChar c = string[hostnameOrStringStart];
p = hostnameOrStringStart + 1;
if (c == '?')
return;
if (c == '@') {
int hostnameStart = p;
int hostnameEnd = findFirstOf(string, p, ">,?");
bool done;
if (hostnameEnd == -1) {
hostnameEnd = string.length();
done = true;
} else {
p = hostnameEnd;
done = false;
}
nameRanges.append(std::make_pair(hostnameStart, hostnameEnd));
if (done)
return;
} else {
ASSERT(c == '"');
while (1) {
int escapedCharacterOrStringEnd = findFirstOf(string, p, "\"\\");
if (escapedCharacterOrStringEnd == -1)
return;
c = string[escapedCharacterOrStringEnd];
p = escapedCharacterOrStringEnd + 1;
if (c == '"')
break;
ASSERT(c == '\\');
if (p == static_cast<int>(string.length()))
return;
++p;
}
}
}
}
static bool findHostnameInHierarchicalURL(StringView string, int& startOffset, int& endOffset)
{
int separator = findFirstOf(string, 0, ":");
if (separator == -1 || separator + 2 >= static_cast<int>(string.length()) || string[separator + 1] != '/' || string[separator + 2] != '/')
return false;
if (!isSchemeFirstChar(string[0]))
return false;
for (int i = 1; i < separator; ++i) {
if (!isSchemeChar(string[i]))
return false;
}
int authorityStart = separator + 3;
int hostnameEnd = string.length();
for (int i = authorityStart; i < hostnameEnd; ++i) {
UChar c = string[i];
if (c == ':' || (isPathSegmentEndChar(c) && c != 0)) {
hostnameEnd = i;
break;
}
}
int userInfoTerminator = findFirstOf(string, authorityStart, "@");
int hostnameStart;
if (userInfoTerminator == -1 || userInfoTerminator > hostnameEnd)
hostnameStart = authorityStart;
else
hostnameStart = userInfoTerminator + 1;
startOffset = hostnameStart;
endOffset = hostnameEnd;
return true;
}
static void encodeHostnames(StringView string, UCharBuffer& buffer)
{
buffer.clear();
if (protocolIs(string, "mailto")) {
Vector<std::pair<int, int>> hostnameRanges;
findHostnamesInMailToURL(string, hostnameRanges);
int n = hostnameRanges.size();
int p = 0;
for (int i = 0; i < n; ++i) {
const std::pair<int, int>& r = hostnameRanges[i];
append(buffer, string.substring(p, r.first - p));
appendEncodedHostname(buffer, string.substring(r.first, r.second - r.first));
p = r.second;
}
append(buffer, string.substring(p));
} else {
int hostStart, hostEnd;
if (findHostnameInHierarchicalURL(string, hostStart, hostEnd)) {
append(buffer, string.substring(0, hostStart)); appendEncodedHostname(buffer, string.substring(hostStart, hostEnd - hostStart));
append(buffer, string.substring(hostEnd)); } else {
append(buffer, string);
}
}
}
static void encodeRelativeString(const String& rel, const TextEncoding& encoding, CharBuffer& output)
{
UCharBuffer s;
encodeHostnames(rel, s);
TextEncoding pathEncoding(UTF8Encoding());
int pathEnd = -1;
if (encoding != pathEncoding && encoding.isValid() && !protocolIs(rel, "mailto") && !protocolIs(rel, "data") && !protocolIsJavaScript(rel)) {
pathEnd = findFirstOf(StringView(s.data(), s.size()), 0, "#?");
}
if (pathEnd == -1) {
CString decoded = pathEncoding.encode(StringView(s.data(), s.size()), URLEncodedEntitiesForUnencodables);
output.resize(decoded.length());
memcpy(output.data(), decoded.data(), decoded.length());
} else {
CString pathDecoded = pathEncoding.encode(StringView(s.data(), pathEnd), URLEncodedEntitiesForUnencodables);
CString otherDecoded = encoding.encode(StringView(s.data() + pathEnd, s.size() - pathEnd), URLEncodedEntitiesForUnencodables);
output.resize(pathDecoded.length() + otherDecoded.length());
memcpy(output.data(), pathDecoded.data(), pathDecoded.length());
memcpy(output.data() + pathDecoded.length(), otherDecoded.data(), otherDecoded.length());
}
output.append('\0'); }
static String substituteBackslashes(const String& string)
{
size_t questionPos = string.find('?');
size_t hashPos = string.find('#');
unsigned pathEnd;
if (hashPos != notFound && (questionPos == notFound || questionPos > hashPos))
pathEnd = hashPos;
else if (questionPos != notFound)
pathEnd = questionPos;
else
pathEnd = string.length();
return string.left(pathEnd).replace('\\','/') + string.substring(pathEnd);
}
bool URL::isHierarchical() const
{
if (!m_isValid)
return false;
ASSERT(m_string[m_schemeEnd] == ':');
return m_string[m_schemeEnd + 1] == '/';
}
void URL::copyToBuffer(Vector<char, 512>& buffer) const
{
buffer.resize(m_string.length());
copyASCII(m_string, buffer.data());
}
bool protocolIs(const String& url, const char* protocol)
{
assertProtocolIsGood(protocol);
for (int i = 0; ; ++i) {
if (!protocol[i])
return url[i] == ':';
if (!isLetterMatchIgnoringCase(url[i], protocol[i]))
return false;
}
}
bool isValidProtocol(const String& protocol)
{
if (protocol.isEmpty())
return false;
if (!isSchemeFirstChar(protocol[0]))
return false;
unsigned protocolLength = protocol.length();
for (unsigned i = 1; i < protocolLength; i++) {
if (!isSchemeChar(protocol[i]))
return false;
}
return true;
}
#ifndef NDEBUG
void URL::print() const
{
printf("%s\n", m_string.utf8().data());
}
#endif
String URL::strippedForUseAsReferrer() const
{
URL referrer(*this);
referrer.setUser(String());
referrer.setPass(String());
referrer.removeFragmentIdentifier();
return referrer.string();
}
bool URL::isLocalFile() const
{
return protocolIs("file");
}
bool protocolIsJavaScript(const String& url)
{
return protocolIs(url, "javascript");
}
bool protocolIsInHTTPFamily(const String& url)
{
return isLetterMatchIgnoringCase(url[0], 'h')
&& isLetterMatchIgnoringCase(url[1], 't')
&& isLetterMatchIgnoringCase(url[2], 't')
&& isLetterMatchIgnoringCase(url[3], 'p')
&& (url[4] == ':' || (isLetterMatchIgnoringCase(url[4], 's') && url[5] == ':'));
}
const URL& blankURL()
{
DEPRECATED_DEFINE_STATIC_LOCAL(URL, staticBlankURL, (ParsedURLString, "about:blank"));
return staticBlankURL;
}
bool URL::isBlankURL() const
{
return protocolIs("about");
}
bool isDefaultPortForProtocol(unsigned short port, const String& protocol)
{
if (protocol.isEmpty())
return false;
typedef HashMap<String, unsigned, CaseFoldingHash> DefaultPortsMap;
DEPRECATED_DEFINE_STATIC_LOCAL(DefaultPortsMap, defaultPorts, ());
if (defaultPorts.isEmpty()) {
defaultPorts.set("http", 80);
defaultPorts.set("https", 443);
defaultPorts.set("ftp", 21);
defaultPorts.set("ftps", 990);
}
return defaultPorts.get(protocol) == port;
}
bool portAllowed(const URL& url)
{
unsigned short port = url.port();
if (!port)
return true;
static const unsigned short blockedPortList[] = {
1, 7, 9, 11, 13, 15, 17, 19, 20, 21, 22, 23, 25, 37, 42, 43, 53, 77, 79, 87, 95, 101, 102, 103, 104, 109, 110, 111, 113, 115, 117, 119, 123, 135, 139, 143, 179, 389, 465, 512, 513, 514, 515, 526, 530, 531, 532, 540, 556, 563, 587, 601, 636, 993, 995, 2049, 3659, 4045, 6000, 6665, 6666, 6667, 6668, 6669, invalidPortNumber, };
const unsigned short* const blockedPortListEnd = blockedPortList + WTF_ARRAY_LENGTH(blockedPortList);
#ifndef NDEBUG
static bool checkedPortList = false;
if (!checkedPortList) {
for (const unsigned short* p = blockedPortList; p != blockedPortListEnd - 1; ++p)
ASSERT(*p < *(p + 1));
checkedPortList = true;
}
#endif
if (!std::binary_search(blockedPortList, blockedPortListEnd, port))
return true;
if ((port == 21 || port == 22) && url.protocolIs("ftp"))
return true;
if (url.protocolIs("file"))
return true;
return false;
}
String mimeTypeFromDataURL(const String& url)
{
ASSERT(protocolIs(url, "data"));
size_t index = url.find(';');
if (index == notFound)
index = url.find(',');
if (index != notFound) {
if (index > 5)
return url.substring(5, index - 5).lower();
return "text/plain"; }
return "";
}
String mimeTypeFromURL(const URL& url)
{
String decodedPath = decodeURLEscapeSequences(url.path());
String extension = decodedPath.substring(decodedPath.reverseFind('.') + 1);
return MIMETypeRegistry::getMIMETypeForExtension(extension);
}
bool URL::isSafeToSendToAnotherThread() const
{
return m_string.isSafeToSendToAnotherThread();
}
String URL::stringCenterEllipsizedToLength(unsigned length) const
{
if (string().length() <= length)
return string();
return string().left(length / 2 - 1) + "..." + string().right(length / 2 - 2);
}
URL URL::fakeURLWithRelativePart(const String& relativePart)
{
return URL(URL(), "webkit-fake-url://" + createCanonicalUUIDString() + '/' + relativePart);
}
URL URL::fileURLWithFileSystemPath(const String& filePath)
{
return URL(URL(), "file:///" + filePath);
}
}