#include "config.h"
#include "URL.h"
#include "DecodeEscapeSequences.h"
#include "TextEncoding.h"
#include "URLParser.h"
#include <stdio.h>
#include <unicode/uidna.h>
#include <wtf/HashMap.h>
#include <wtf/HexNumber.h>
#include <wtf/NeverDestroyed.h>
#include <wtf/StdLibExtras.h>
#include <wtf/UUID.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuilder.h>
#include <wtf/text/StringHash.h>
#include <wtf/text/TextStream.h>
namespace WebCore {
using namespace WTF;
typedef Vector<char, 512> CharBuffer;
typedef Vector<UChar, 512> UCharBuffer;
static const unsigned invalidPortNumber = 0xFFFF;
static void copyASCII(const String& string, char* dest)
{
if (string.isEmpty())
return;
if (string.is8Bit())
memcpy(dest, string.characters8(), string.length());
else {
const UChar* src = string.characters16();
size_t length = string.length();
for (size_t i = 0; i < length; i++)
dest[i] = static_cast<char>(src[i]);
}
}
void URL::invalidate()
{
m_isValid = false;
m_protocolIsInHTTPFamily = false;
m_cannotBeABaseURL = false;
m_schemeEnd = 0;
m_userStart = 0;
m_userEnd = 0;
m_passwordEnd = 0;
m_hostEnd = 0;
m_portEnd = 0;
m_pathEnd = 0;
m_pathAfterLastSlash = 0;
m_queryEnd = 0;
}
URL::URL(ParsedURLStringTag, const String& url)
{
URLParser parser(url);
*this = parser.result();
#if OS(WINDOWS)
ASSERT(isLocalFile() || url == m_string);
#else
ASSERT(url == m_string);
#endif
}
URL::URL(const URL& base, const String& relative)
{
URLParser parser(relative, base);
*this = parser.result();
}
URL::URL(const URL& base, const String& relative, const TextEncoding& encoding)
{
URLParser parser(relative, base, encoding.encodingForFormSubmission());
*this = parser.result();
}
static bool shouldTrimFromURL(UChar c)
{
return c <= ' ';
}
URL URL::isolatedCopy() const
{
URL result = *this;
result.m_string = result.m_string.isolatedCopy();
return result;
}
String URL::lastPathComponent() const
{
if (!hasPath())
return String();
unsigned end = m_pathEnd - 1;
if (m_string[end] == '/')
--end;
size_t start = m_string.reverseFind('/', end);
if (start < static_cast<unsigned>(m_portEnd))
return String();
++start;
return m_string.substring(start, end - start + 1);
}
StringView URL::protocol() const
{
return StringView(m_string).substring(0, m_schemeEnd);
}
String URL::host() const
{
unsigned start = hostStart();
return m_string.substring(start, m_hostEnd - start);
}
std::optional<uint16_t> URL::port() const
{
if (!m_portEnd || m_hostEnd >= m_portEnd - 1)
return std::nullopt;
bool ok = false;
unsigned number;
if (m_string.is8Bit())
number = charactersToUIntStrict(m_string.characters8() + m_hostEnd + 1, m_portEnd - m_hostEnd - 1, &ok);
else
number = charactersToUIntStrict(m_string.characters16() + m_hostEnd + 1, m_portEnd - m_hostEnd - 1, &ok);
if (!ok || number > std::numeric_limits<uint16_t>::max())
return std::nullopt;
return number;
}
String URL::hostAndPort() const
{
if (auto port = this->port())
return host() + ':' + String::number(port.value());
return host();
}
String URL::protocolHostAndPort() const
{
String result = m_string.substring(0, m_portEnd);
if (m_passwordEnd - m_userStart > 0) {
const int allowForTrailingAtSign = 1;
result.remove(m_userStart, m_passwordEnd - m_userStart + allowForTrailingAtSign);
}
return result;
}
String URL::user() const
{
return decodeURLEscapeSequences(m_string.substring(m_userStart, m_userEnd - m_userStart));
}
String URL::pass() const
{
if (m_passwordEnd == m_userEnd)
return String();
return decodeURLEscapeSequences(m_string.substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1));
}
String URL::encodedUser() const
{
return m_string.substring(m_userStart, m_userEnd - m_userStart);
}
String URL::encodedPass() const
{
if (m_passwordEnd == m_userEnd)
return String();
return m_string.substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1);
}
String URL::fragmentIdentifier() const
{
if (!hasFragmentIdentifier())
return String();
return m_string.substring(m_queryEnd + 1);
}
bool URL::hasFragmentIdentifier() const
{
return m_isValid && m_string.length() != m_queryEnd;
}
String URL::baseAsString() const
{
return m_string.left(m_pathAfterLastSlash);
}
#if !USE(CF)
String URL::fileSystemPath() const
{
if (!isValid() || !isLocalFile())
return String();
return decodeURLEscapeSequences(path());
}
#endif
#ifdef NDEBUG
static inline void assertProtocolIsGood(StringView)
{
}
#else
static void assertProtocolIsGood(StringView protocol)
{
for (auto character : protocol.codeUnits()) {
ASSERT(isASCII(character));
ASSERT(character > ' ');
ASSERT(!isASCIIUpper(character));
ASSERT(toASCIILowerUnchecked(character) == character);
}
}
#endif
static StaticLock defaultPortForProtocolMapForTestingLock;
using DefaultPortForProtocolMapForTesting = HashMap<String, uint16_t>;
static DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMapForTesting()
{
static DefaultPortForProtocolMapForTesting* defaultPortForProtocolMap;
return defaultPortForProtocolMap;
}
static DefaultPortForProtocolMapForTesting& ensureDefaultPortForProtocolMapForTesting()
{
DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMap = defaultPortForProtocolMapForTesting();
if (!defaultPortForProtocolMap)
defaultPortForProtocolMap = new DefaultPortForProtocolMapForTesting;
return *defaultPortForProtocolMap;
}
void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol)
{
auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
ensureDefaultPortForProtocolMapForTesting().add(protocol, port);
}
void clearDefaultPortForProtocolMapForTesting()
{
auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
if (auto* map = defaultPortForProtocolMapForTesting())
map->clear();
}
std::optional<uint16_t> defaultPortForProtocol(StringView protocol)
{
if (auto* overrideMap = defaultPortForProtocolMapForTesting()) {
auto locker = holdLock(defaultPortForProtocolMapForTestingLock);
ASSERT(overrideMap); auto iterator = overrideMap->find(protocol.toStringWithoutCopying());
if (iterator != overrideMap->end())
return iterator->value;
}
return URLParser::defaultPortForProtocol(protocol);
}
bool isDefaultPortForProtocol(uint16_t port, StringView protocol)
{
return defaultPortForProtocol(protocol) == port;
}
bool URL::protocolIs(const char* protocol) const
{
assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
ASSERT(!equalLettersIgnoringASCIICase(StringView(protocol), "javascript"));
if (!m_isValid)
return false;
for (unsigned i = 0; i < m_schemeEnd; ++i) {
if (!protocol[i] || !isASCIIAlphaCaselessEqual(m_string[i], protocol[i]))
return false;
}
return !protocol[m_schemeEnd]; }
bool URL::protocolIs(StringView protocol) const
{
assertProtocolIsGood(protocol);
if (!m_isValid)
return false;
if (m_schemeEnd != protocol.length())
return false;
for (unsigned i = 0; i < m_schemeEnd; ++i) {
if (!isASCIIAlphaCaselessEqual(m_string[i], protocol[i]))
return false;
}
return true;
}
String URL::query() const
{
if (m_queryEnd == m_pathEnd)
return String();
return m_string.substring(m_pathEnd + 1, m_queryEnd - (m_pathEnd + 1));
}
String URL::path() const
{
return m_string.substring(m_portEnd, m_pathEnd - m_portEnd);
}
bool URL::setProtocol(const String& s)
{
size_t separatorPosition = s.find(':');
String newProtocol = s.substring(0, separatorPosition);
auto canonicalized = URLParser::maybeCanonicalizeScheme(newProtocol);
if (!canonicalized)
return false;
if (!m_isValid) {
URLParser parser(makeString(*canonicalized, ":", m_string));
*this = parser.result();
return true;
}
URLParser parser(makeString(*canonicalized, m_string.substring(m_schemeEnd)));
*this = parser.result();
return true;
}
static bool isAllASCII(StringView string)
{
if (string.is8Bit())
return charactersAreAllASCII(string.characters8(), string.length());
return charactersAreAllASCII(string.characters16(), string.length());
}
static bool appendEncodedHostname(UCharBuffer& buffer, StringView string)
{
const unsigned hostnameBufferLength = 2048;
if (string.length() > hostnameBufferLength || isAllASCII(string)) {
append(buffer, string);
return true;
}
UChar hostnameBuffer[hostnameBufferLength];
UErrorCode error = U_ZERO_ERROR;
#if COMPILER(GCC_OR_CLANG)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
int32_t numCharactersConverted = uidna_IDNToASCII(string.upconvertedCharacters(), string.length(), hostnameBuffer,
hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, 0, &error);
#if COMPILER(GCC_OR_CLANG)
#pragma GCC diagnostic pop
#endif
if (error == U_ZERO_ERROR) {
buffer.append(hostnameBuffer, numCharactersConverted);
return true;
}
return false;
}
void URL::setHost(const String& s)
{
if (!m_isValid)
return;
auto colonIndex = s.find(':');
if (colonIndex != notFound)
return;
UCharBuffer encodedHostName;
if (!appendEncodedHostname(encodedHostName, s))
return;
bool slashSlashNeeded = m_userStart == m_schemeEnd + 1;
StringBuilder builder;
builder.append(m_string.left(hostStart()));
if (slashSlashNeeded)
builder.appendLiteral("//");
builder.append(StringView(encodedHostName.data(), encodedHostName.size()));
builder.append(m_string.substring(m_hostEnd));
URLParser parser(builder.toString());
*this = parser.result();
}
void URL::removePort()
{
if (m_hostEnd == m_portEnd)
return;
URLParser parser(m_string.left(m_hostEnd) + m_string.substring(m_portEnd));
*this = parser.result();
}
void URL::setPort(unsigned short i)
{
if (!m_isValid)
return;
bool colonNeeded = m_portEnd == m_hostEnd;
unsigned portStart = (colonNeeded ? m_hostEnd : m_hostEnd + 1);
URLParser parser(makeString(m_string.left(portStart), (colonNeeded ? ":" : ""), String::number(i), m_string.substring(m_portEnd)));
*this = parser.result();
}
void URL::setHostAndPort(const String& hostAndPort)
{
if (!m_isValid)
return;
StringView hostName(hostAndPort);
StringView port;
auto colonIndex = hostName.find(':');
if (colonIndex != notFound) {
port = hostName.substring(colonIndex + 1);
bool ok;
int portInt = port.toIntStrict(ok);
if (!ok || portInt < 0)
return;
hostName = hostName.substring(0, colonIndex);
}
if (hostName.isEmpty())
return;
UCharBuffer encodedHostName;
if (!appendEncodedHostname(encodedHostName, hostName))
return;
bool slashSlashNeeded = m_userStart == m_schemeEnd + 1;
StringBuilder builder;
builder.append(m_string.left(hostStart()));
if (slashSlashNeeded)
builder.appendLiteral("//");
builder.append(StringView(encodedHostName.data(), encodedHostName.size()));
if (!port.isEmpty()) {
builder.appendLiteral(":");
builder.append(port);
}
builder.append(m_string.substring(m_portEnd));
URLParser parser(builder.toString());
*this = parser.result();
}
static String percentEncodeCharacters(const String& input, bool(*shouldEncode)(UChar))
{
auto encode = [shouldEncode] (const String& input) {
CString utf8 = input.utf8();
auto* data = utf8.data();
StringBuilder builder;
auto length = utf8.length();
for (unsigned j = 0; j < length; j++) {
auto c = data[j];
if (shouldEncode(c)) {
builder.append('%');
builder.append(upperNibbleToASCIIHexDigit(c));
builder.append(lowerNibbleToASCIIHexDigit(c));
} else
builder.append(c);
}
return builder.toString();
};
for (size_t i = 0; i < input.length(); ++i) {
if (UNLIKELY(shouldEncode(input[i])))
return encode(input);
}
return input;
}
void URL::setUser(const String& user)
{
if (!m_isValid)
return;
unsigned end = m_userEnd;
if (!user.isEmpty()) {
String u = percentEncodeCharacters(user, URLParser::isInUserInfoEncodeSet);
if (m_userStart == m_schemeEnd + 1)
u = "//" + u;
if (end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@'))
u.append('@');
URLParser parser(makeString(StringView(m_string).left(m_userStart), u, StringView(m_string).substring(end)));
*this = parser.result();
} else {
if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string[end] == '@')
end += 1;
if (m_userStart != end) {
URLParser parser(makeString(StringView(m_string).left(m_userStart), StringView(m_string).substring(end)));
*this = parser.result();
}
}
}
void URL::setPass(const String& password)
{
if (!m_isValid)
return;
unsigned end = m_passwordEnd;
if (!password.isEmpty()) {
String p = ":" + percentEncodeCharacters(password, URLParser::isInUserInfoEncodeSet) + "@";
if (m_userEnd == m_schemeEnd + 1)
p = "//" + p;
if (end != m_hostEnd && m_string[end] == '@')
end += 1;
URLParser parser(makeString(StringView(m_string).left(m_userEnd), p, StringView(m_string).substring(end)));
*this = parser.result();
} else {
if (m_userStart == m_userEnd && end != m_hostEnd && m_string[end] == '@')
end += 1;
if (m_userEnd != end) {
URLParser parser(makeString(StringView(m_string).left(m_userEnd), StringView(m_string).substring(end)));
*this = parser.result();
}
}
}
void URL::setFragmentIdentifier(StringView identifier)
{
if (!m_isValid)
return;
*this = URLParser { makeString(StringView { m_string }.substring(0, m_queryEnd), '#', identifier) }.result();
}
void URL::removeFragmentIdentifier()
{
if (!m_isValid) {
ASSERT(!m_queryEnd);
return;
}
if (m_isValid && m_string.length() > m_queryEnd)
m_string = m_string.left(m_queryEnd);
}
void URL::removeQueryAndFragmentIdentifier()
{
if (!m_isValid)
return;
m_string = m_string.left(m_pathEnd);
m_queryEnd = m_pathEnd;
}
void URL::setQuery(const String& query)
{
if (!m_isValid)
return;
if ((query.isEmpty() || query[0] != '?') && !query.isNull()) {
URLParser parser(makeString(StringView(m_string).left(m_pathEnd), "?", query, StringView(m_string).substring(m_queryEnd)));
*this = parser.result();
} else {
URLParser parser(makeString(StringView(m_string).left(m_pathEnd), query, StringView(m_string).substring(m_queryEnd)));
*this = parser.result();
}
}
void URL::setPath(const String& s)
{
if (!m_isValid)
return;
String path = s;
if (path.isEmpty() || path[0] != '/')
path = "/" + path;
auto questionMarkOrNumberSign = [] (UChar character) {
return character == '?' || character == '#';
};
URLParser parser(makeString(StringView(m_string).left(m_portEnd), percentEncodeCharacters(path, questionMarkOrNumberSign), StringView(m_string).substring(m_pathEnd)));
*this = parser.result();
}
String decodeURLEscapeSequences(const String& string)
{
if (string.isEmpty())
return string;
return decodeEscapeSequences<URLEscapeSequence>(string, UTF8Encoding());
}
String decodeURLEscapeSequences(const String& string, const TextEncoding& encoding)
{
if (string.isEmpty())
return string;
return decodeEscapeSequences<URLEscapeSequence>(string, encoding);
}
#if PLATFORM(IOS)
static bool shouldCanonicalizeScheme = true;
void enableURLSchemeCanonicalization(bool enableSchemeCanonicalization)
{
shouldCanonicalizeScheme = enableSchemeCanonicalization;
}
#endif
template<size_t length>
static inline bool equal(const char* a, const char (&b)[length])
{
#if PLATFORM(IOS)
if (!shouldCanonicalizeScheme) {
for (size_t i = 0; i < length; ++i) {
if (toASCIILower(a[i]) != b[i])
return false;
}
return true;
}
#endif
for (size_t i = 0; i < length; ++i) {
if (a[i] != b[i])
return false;
}
return true;
}
template<size_t lengthB>
static inline bool equal(const char* stringA, size_t lengthA, const char (&stringB)[lengthB])
{
return lengthA == lengthB && equal(stringA, stringB);
}
bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b)
{
if (a.m_queryEnd != b.m_queryEnd)
return false;
unsigned queryLength = a.m_queryEnd;
for (unsigned i = 0; i < queryLength; ++i)
if (a.string()[i] != b.string()[i])
return false;
return true;
}
bool protocolHostAndPortAreEqual(const URL& a, const URL& b)
{
if (a.m_schemeEnd != b.m_schemeEnd)
return false;
unsigned hostStartA = a.hostStart();
unsigned hostLengthA = a.hostEnd() - hostStartA;
unsigned hostStartB = b.hostStart();
unsigned hostLengthB = b.hostEnd() - b.hostStart();
if (hostLengthA != hostLengthB)
return false;
for (unsigned i = 0; i < a.m_schemeEnd; ++i) {
if (a.string()[i] != b.string()[i])
return false;
}
for (unsigned i = 0; i < hostLengthA; ++i) {
if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
return false;
}
if (a.port() != b.port())
return false;
return true;
}
bool hostsAreEqual(const URL& a, const URL& b)
{
unsigned hostStartA = a.hostStart();
unsigned hostLengthA = a.hostEnd() - hostStartA;
unsigned hostStartB = b.hostStart();
unsigned hostLengthB = b.hostEnd() - hostStartB;
if (hostLengthA != hostLengthB)
return false;
for (unsigned i = 0; i < hostLengthA; ++i) {
if (a.string()[hostStartA + i] != b.string()[hostStartB + i])
return false;
}
return true;
}
String encodeWithURLEscapeSequences(const String& input)
{
return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet);
}
bool URL::isHierarchical() const
{
if (!m_isValid)
return false;
ASSERT(m_string[m_schemeEnd] == ':');
return m_string[m_schemeEnd + 1] == '/';
}
void URL::copyToBuffer(Vector<char, 512>& buffer) const
{
buffer.resize(m_string.length());
copyASCII(m_string, buffer.data());
}
template<typename StringClass>
bool protocolIsInternal(const StringClass& url, const char* protocol)
{
assertProtocolIsGood(StringView(reinterpret_cast<const LChar*>(protocol), strlen(protocol)));
bool isLeading = true;
for (unsigned i = 0, j = 0; url[i]; ++i) {
if (isLeading && shouldTrimFromURL(url[i]))
continue;
isLeading = false;
if (url[i] == '\t' || url[i] == '\r' || url[i] == '\n')
continue;
if (!protocol[j])
return url[i] == ':';
if (!isASCIIAlphaCaselessEqual(url[i], protocol[j]))
return false;
++j;
}
return false;
}
bool protocolIs(const String& url, const char* protocol)
{
return protocolIsInternal(url, protocol);
}
inline bool URL::protocolIs(const String& string, const char* protocol)
{
return WebCore::protocolIsInternal(string, protocol);
}
#ifndef NDEBUG
void URL::print() const
{
printf("%s\n", m_string.utf8().data());
}
#endif
String URL::strippedForUseAsReferrer() const
{
URL referrer(*this);
referrer.setUser(String());
referrer.setPass(String());
referrer.removeFragmentIdentifier();
return referrer.string();
}
bool URL::isLocalFile() const
{
return protocolIs("file");
}
bool protocolIsJavaScript(const String& url)
{
return protocolIsInternal(url, "javascript");
}
bool protocolIsJavaScript(StringView url)
{
return protocolIsInternal(url, "javascript");
}
bool protocolIsInHTTPFamily(const String& url)
{
auto length = url.length();
return length >= 5
&& isASCIIAlphaCaselessEqual(url[0], 'h')
&& isASCIIAlphaCaselessEqual(url[1], 't')
&& isASCIIAlphaCaselessEqual(url[2], 't')
&& isASCIIAlphaCaselessEqual(url[3], 'p')
&& (url[4] == ':' || (isASCIIAlphaCaselessEqual(url[4], 's') && length >= 6 && url[5] == ':'));
}
const URL& blankURL()
{
static NeverDestroyed<URL> staticBlankURL(ParsedURLString, "about:blank");
return staticBlankURL;
}
bool URL::isBlankURL() const
{
return protocolIs("about");
}
bool portAllowed(const URL& url)
{
std::optional<uint16_t> port = url.port();
if (!port)
return true;
static const uint16_t blockedPortList[] = {
1, 7, 9, 11, 13, 15, 17, 19, 20, 21, 22, 23, 25, 37, 42, 43, 53, 77, 79, 87, 95, 101, 102, 103, 104, 109, 110, 111, 113, 115, 117, 119, 123, 135, 139, 143, 179, 389, 465, 512, 513, 514, 515, 526, 530, 531, 532, 540, 556, 563, 587, 601, 636, 993, 995, 2049, 3659, 4045, 4190, 6000, 6665, 6666, 6667, 6668, 6669, 6679, 6697, invalidPortNumber, };
ASSERT(std::is_sorted(std::begin(blockedPortList), std::end(blockedPortList)));
if (!std::binary_search(std::begin(blockedPortList), std::end(blockedPortList), port.value()))
return true;
if ((port.value() == 21 || port.value() == 22) && url.protocolIs("ftp"))
return true;
if (url.protocolIs("file"))
return true;
return false;
}
String mimeTypeFromDataURL(const String& url)
{
ASSERT(protocolIsInternal(url, "data"));
auto index = url.find(';', 5);
if (index == notFound)
index = url.find(',', 5);
if (index == notFound) {
return emptyString();
}
if (index == 5)
return ASCIILiteral("text/plain");
ASSERT(index >= 5);
return url.substring(5, index - 5).convertToASCIILowercase();
}
String URL::stringCenterEllipsizedToLength(unsigned length) const
{
if (string().length() <= length)
return string();
return string().left(length / 2 - 1) + "..." + string().right(length / 2 - 2);
}
URL URL::fakeURLWithRelativePart(const String& relativePart)
{
return URL(URL(), "webkit-fake-url://" + createCanonicalUUIDString() + '/' + relativePart);
}
URL URL::fileURLWithFileSystemPath(const String& filePath)
{
return URL(URL(), "file:///" + filePath);
}
TextStream& operator<<(TextStream& ts, const URL& url)
{
ts << url.string();
return ts;
}
}