/* * Copyright (C) 2005, 2007, 2014 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #import "config.h" #import "NSURLExtras.h" #import <wtf/Function.h> #import <wtf/RetainPtr.h> #import <wtf/URLHelpers.h> #import <wtf/URLParser.h> #import <wtf/Vector.h> #import <wtf/cf/CFURLExtras.h> namespace WTF { using namespace URLHelpers; constexpr unsigned urlBytesBufferLength = 2048; static BOOL readIDNAllowedScriptListFile(NSString *filename) { if (!filename) return NO; FILE *file = fopen([filename fileSystemRepresentation], "r"); if (!file) return NO; // Read a word at a time. // Allow comments, starting with # character to the end of the line. while (1) { // Skip a comment if present. if (fscanf(file, " #%*[^\n\r]%*[\n\r]") == EOF) break; // Read a script name if present. char word[33]; int result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word); if (result == EOF) break; if (result == 1) { // Got a word, map to script code and put it into the array. addScriptToIDNAllowedScriptList(word); } } fclose(file); return YES; } namespace URLHelpers { void loadIDNAllowedScriptList() { static dispatch_once_t flag; dispatch_once(&flag, ^{ for (NSString *directory in NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES)) { if (readIDNAllowedScriptListFile([directory stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) return; } initializeDefaultIDNAllowedScriptList(); }); } } // namespace URLHelpers static String decodePercentEscapes(const String& string) { NSString *substring = CFBridgingRelease(CFURLCreateStringByReplacingPercentEscapes(nullptr, string.createCFString().get(), CFSTR(""))); if (!substring) return string; return substring; } NSString *decodeHostName(NSString *string) { Optional<String> host = mapHostName(string, nullptr); if (!host) return nil; return !*host ? string : (NSString *)*host; } NSString *encodeHostName(NSString *string) { Optional<String> host = mapHostName(string, decodePercentEscapes); if (!host) return nil; return !*host ? string : (NSString *)*host; } static RetainPtr<NSString> stringByTrimmingWhitespace(NSString *string) { auto trimmed = adoptNS([string mutableCopy]); CFStringTrimWhitespace((__bridge CFMutableStringRef)trimmed.get()); return trimmed; } NSURL *URLByTruncatingOneCharacterBeforeComponent(NSURL *URL, CFURLComponentType component) { if (!URL) return nil; CFRange fragRg = CFURLGetByteRangeForComponent((__bridge CFURLRef)URL, component, nullptr); if (fragRg.location == kCFNotFound) return URL; Vector<UInt8, urlBytesBufferLength> urlBytes(urlBytesBufferLength); CFIndex numBytes = CFURLGetBytes((__bridge CFURLRef)URL, urlBytes.data(), urlBytes.size()); if (numBytes == -1) { numBytes = CFURLGetBytes((__bridge CFURLRef)URL, nullptr, 0); urlBytes.grow(numBytes); CFURLGetBytes((__bridge CFURLRef)URL, urlBytes.data(), numBytes); } CFURLRef result = CFURLCreateWithBytes(nullptr, urlBytes.data(), fragRg.location - 1, kCFStringEncodingUTF8, nullptr); if (!result) result = CFURLCreateWithBytes(nullptr, urlBytes.data(), fragRg.location - 1, kCFStringEncodingISOLatin1, nullptr); return result ? CFBridgingRelease(result) : URL; } static NSURL *URLByRemovingResourceSpecifier(NSURL *URL) { return URLByTruncatingOneCharacterBeforeComponent(URL, kCFURLComponentResourceSpecifier); } NSURL *URLWithData(NSData *data, NSURL *baseURL) { if (!data) return nil; NSURL *result = nil; size_t length = [data length]; if (length > 0) { // work around <rdar://4470771>: CFURLCreateAbsoluteURLWithBytes(.., TRUE) doesn't remove non-path components. baseURL = URLByRemovingResourceSpecifier(baseURL); const UInt8 *bytes = static_cast<const UInt8*>([data bytes]); // CFURLCreateAbsoluteURLWithBytes would complain to console if we passed a path to it. if (bytes[0] == '/' && !baseURL) return nil; // NOTE: We use UTF-8 here since this encoding is used when computing strings when returning URL components // (e.g calls to NSURL -path). However, this function is not tolerant of illegal UTF-8 sequences, which // could either be a malformed string or bytes in a different encoding, like shift-jis, so we fall back // onto using ISO Latin 1 in those cases. result = CFBridgingRelease(CFURLCreateAbsoluteURLWithBytes(nullptr, bytes, length, kCFStringEncodingUTF8, (__bridge CFURLRef)baseURL, YES)); if (!result) result = CFBridgingRelease(CFURLCreateAbsoluteURLWithBytes(nullptr, bytes, length, kCFStringEncodingISOLatin1, (__bridge CFURLRef)baseURL, YES)); } else result = [NSURL URLWithString:@""]; return result; } static NSData *dataWithUserTypedString(NSString *string) { NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding]; ASSERT(userTypedData); const UInt8* inBytes = static_cast<const UInt8 *>([userTypedData bytes]); int inLength = [userTypedData length]; if (!inLength) return nil; Checked<int, RecordOverflow> mallocLength = inLength; mallocLength *= 3; // large enough to %-escape every character if (mallocLength.hasOverflowed()) return nil; char* outBytes = static_cast<char *>(malloc(mallocLength.unsafeGet())); char* p = outBytes; int outLength = 0; for (int i = 0; i < inLength; i++) { UInt8 c = inBytes[i]; if (c <= 0x20 || c >= 0x7f) { *p++ = '%'; *p++ = upperNibbleToASCIIHexDigit(c); *p++ = lowerNibbleToASCIIHexDigit(c); outLength += 3; } else { *p++ = c; outLength++; } } return [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes } NSURL *URLWithUserTypedString(NSString *string, NSURL *nsURL) { if (!string) return nil; auto mappedString = mapHostNames(stringByTrimmingWhitespace(string).get(), decodePercentEscapes); if (!mappedString) return nil; // Let's check whether the URL is bogus. URL url { URL { nsURL }, mappedString }; if (!url.createCFURL()) return nil; // FIXME: https://bugs.webkit.org/show_bug.cgi?id=186057 // We should be able to use url.createCFURL instead of using directly CFURL parsing routines. NSData *data = dataWithUserTypedString(mappedString); if (!data) return [NSURL URLWithString:@""]; return URLWithData(data, nsURL); } NSURL *URLWithUserTypedStringDeprecated(NSString *string, NSURL *URL) { if (!string) return nil; NSURL *result = URLWithUserTypedString(string, URL); if (!result) { NSData *resultData = dataWithUserTypedString(string); if (!resultData) return [NSURL URLWithString:@""]; result = URLWithData(resultData, URL); } return result; } static BOOL hasQuestionMarkOnlyQueryString(NSURL *URL) { CFRange rangeWithSeparators; CFURLGetByteRangeForComponent((__bridge CFURLRef)URL, kCFURLComponentQuery, &rangeWithSeparators); if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) return YES; return NO; } NSData *dataForURLComponentType(NSURL *URL, CFURLComponentType componentType) { Vector<UInt8, urlBytesBufferLength> allBytesBuffer(urlBytesBufferLength); CFIndex bytesFilled = CFURLGetBytes((__bridge CFURLRef)URL, allBytesBuffer.data(), allBytesBuffer.size()); if (bytesFilled == -1) { CFIndex bytesToAllocate = CFURLGetBytes((__bridge CFURLRef)URL, nullptr, 0); allBytesBuffer.grow(bytesToAllocate); bytesFilled = CFURLGetBytes((__bridge CFURLRef)URL, allBytesBuffer.data(), bytesToAllocate); } const CFURLComponentType completeURL = (CFURLComponentType)-1; CFRange range; if (componentType != completeURL) { range = CFURLGetByteRangeForComponent((__bridge CFURLRef)URL, componentType, nullptr); if (range.location == kCFNotFound) return nil; } else { range.location = 0; range.length = bytesFilled; } NSData *componentData = [NSData dataWithBytes:allBytesBuffer.data() + range.location length:range.length]; const unsigned char *bytes = static_cast<const unsigned char *>([componentData bytes]); NSMutableData *resultData = [NSMutableData data]; // NOTE: add leading '?' to query strings non-zero length query strings. // NOTE: retain question-mark only query strings. if (componentType == kCFURLComponentQuery) { if (range.length > 0 || hasQuestionMarkOnlyQueryString(URL)) [resultData appendBytes:"?" length:1]; } for (int i = 0; i < range.length; i++) { unsigned char c = bytes[i]; if (c <= 0x20 || c >= 0x7f) { char escaped[3]; escaped[0] = '%'; escaped[1] = upperNibbleToASCIIHexDigit(c); escaped[2] = lowerNibbleToASCIIHexDigit(c); [resultData appendBytes:escaped length:3]; } else { char b[1]; b[0] = c; [resultData appendBytes:b length:1]; } } return resultData; } static NSURL *URLByRemovingComponentAndSubsequentCharacter(NSURL *URL, CFURLComponentType component) { CFRange range = CFURLGetByteRangeForComponent((__bridge CFURLRef)URL, component, 0); if (range.location == kCFNotFound) return URL; // Remove one subsequent character. range.length++; Vector<UInt8, urlBytesBufferLength> buffer(urlBytesBufferLength); CFIndex numBytes = CFURLGetBytes((__bridge CFURLRef)URL, buffer.data(), buffer.size()); if (numBytes == -1) { numBytes = CFURLGetBytes((__bridge CFURLRef)URL, nullptr, 0); buffer.grow(numBytes); CFURLGetBytes((__bridge CFURLRef)URL, buffer.data(), numBytes); } UInt8* urlBytes = buffer.data(); if (numBytes < range.location) return URL; if (numBytes < range.location + range.length) range.length = numBytes - range.location; memmove(urlBytes + range.location, urlBytes + range.location + range.length, numBytes - range.location + range.length); CFURLRef result = CFURLCreateWithBytes(nullptr, urlBytes, numBytes - range.length, kCFStringEncodingUTF8, nullptr); if (!result) result = CFURLCreateWithBytes(nullptr, urlBytes, numBytes - range.length, kCFStringEncodingISOLatin1, nullptr); return result ? CFBridgingRelease(result) : URL; } NSURL *URLByRemovingUserInfo(NSURL *URL) { return URLByRemovingComponentAndSubsequentCharacter(URL, kCFURLComponentUserInfo); } NSData *originalURLData(NSURL *URL) { UInt8 *buffer = (UInt8 *)malloc(urlBytesBufferLength); CFIndex bytesFilled = CFURLGetBytes((__bridge CFURLRef)URL, buffer, urlBytesBufferLength); if (bytesFilled == -1) { CFIndex bytesToAllocate = CFURLGetBytes((__bridge CFURLRef)URL, nullptr, 0); buffer = (UInt8 *)realloc(buffer, bytesToAllocate); bytesFilled = CFURLGetBytes((__bridge CFURLRef)URL, buffer, bytesToAllocate); ASSERT(bytesFilled == bytesToAllocate); } // buffer is adopted by the NSData NSData *data = [NSData dataWithBytesNoCopy:buffer length:bytesFilled freeWhenDone:YES]; NSURL *baseURL = (__bridge NSURL *)CFURLGetBaseURL((__bridge CFURLRef)URL); if (baseURL) return originalURLData(URLWithData(data, baseURL)); return data; } NSString *userVisibleString(NSURL *URL) { NSData *data = originalURLData(URL); CString string(static_cast<const char*>([data bytes]), [data length]); return userVisibleURL(string); } BOOL isUserVisibleURL(NSString *string) { BOOL valid = YES; // get buffer char static_buffer[1024]; const char *p; BOOL success = CFStringGetCString((__bridge CFStringRef)string, static_buffer, 1023, kCFStringEncodingUTF8); p = success ? static_buffer : [string UTF8String]; int length = strlen(p); // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these // are the things that will lead _web_userVisibleString to actually change things. for (int i = 0; i < length; i++) { unsigned char c = p[i]; // escape control characters, space, and delete if (c <= 0x20 || c == 0x7f) { valid = NO; break; } else if (c == '%' && (i + 1 < length && isASCIIHexDigit(p[i + 1])) && i + 2 < length && isASCIIHexDigit(p[i + 2])) { auto u = toASCIIHexValue(p[i + 1], p[i + 2]); if (u > 0x7f) { valid = NO; break; } i += 2; } else { // Check for "xn--" in an efficient, non-case-sensitive, way. if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x20) == 'n' && p[i - 1] == '-') { valid = NO; break; } } } return valid; } NSRange rangeOfURLScheme(NSString *string) { NSRange colon = [string rangeOfString:@":"]; if (colon.location != NSNotFound && colon.location > 0) { NSRange scheme = {0, colon.location}; /* This stuff is very expensive. 10-15 msec on a 2x1.2GHz. If not cached it swamps everything else when adding items to the autocomplete DB. Makes me wonder if we even need to enforce the character set here. */ NSString *acceptableCharacters = @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-"; static NeverDestroyed<RetainPtr<NSCharacterSet>> InverseSchemeCharacterSet([[NSCharacterSet characterSetWithCharactersInString:acceptableCharacters] invertedSet]); NSRange illegals = [string rangeOfCharacterFromSet:InverseSchemeCharacterSet.get().get() options:0 range:scheme]; if (illegals.location == NSNotFound) return scheme; } return NSMakeRange(NSNotFound, 0); } BOOL looksLikeAbsoluteURL(NSString *string) { // Trim whitespace because _web_URLWithString allows whitespace. return rangeOfURLScheme(stringByTrimmingWhitespace(string).get()).location != NSNotFound; } } // namespace WebCore