#include <stdlib.h>
#include <stdio.h>
#include <security_utilities/threading.h>
#include <Carbon/Carbon.h>
#include <time.h>
#include <string.h>
#include <pthread.h>
#include "cfSimpleGet.h"
#define MAX_PATH_LEN 256
#define MAX_URL_LEN 1024
#define MAX_THREADS 100
#define DEBUG_PRINT 0
#if DEBUG_PRINT
#define dprintf(args...) printf(args)
#else
#define dprintf(args...)
#endif
#define SKIP_MULTI_QUERIES 1
static const char *nonSslSites[] = {
"cover2.cduniverse.com",
"a248.e.akamai.net",
NULL
};
static int isHostNonSsl(
const char *host)
{
const char **nss = nonSslSites;
while(*nss != NULL) {
if(!strcmp(*nss, host)) {
return 1;
}
nss++;
}
return 0;
}
static Mutex urlLock;
static void urlThreadLock()
{
urlLock.lock();
}
static void urlThreadUnlock()
{
urlLock.unlock();
}
typedef struct {
const char *host;
char path[MAX_PATH_LEN];
bool isSsl;
bool useCfNet;
int singleThread;
int quiet;
pthread_t pthr;
unsigned threadNum;
OSStatus ortn;
unsigned bytesRead;
} ThreadParams;
static void usage(char **argv)
{
printf("%s hostname path [options]\n", argv[0]);
printf("Options:\n");
printf(" u (URLAccess; default is CFNetwork)\n");
printf(" s connect via SSL\n");
printf(" t single thread access to URLSimpleDownload\n");
printf(" q quiet\n");
exit(1);
}
static void printUrl(
const char *host,
const char *path,
int isSsl)
{
if(isSsl) {
printf("https://%s%s", host, path);
}
else {
printf("http://%s%s", host, path);
}
}
static OSStatus fetchUrl(
const char *host,
const char *path,
bool isSsl,
bool useCfNet,
int singleThread,
unsigned *bytesRead, CFDataRef *cfData) {
char url[MAX_URL_LEN];
char *scheme;
OSStatus ortn;
*bytesRead = 0;
if(isSsl) {
scheme = "https://";
}
else {
scheme = "http://";
}
sprintf(url, "%s%s%s", scheme, host, path);
if(singleThread) {
urlThreadLock();
}
if(useCfNet) {
CFDataRef cd = cfSimpleGet(url);
if(cd) {
*bytesRead = CFDataGetLength(cd);
if(cfData) {
*cfData = cd;
}
else {
CFRelease(cd);
}
ortn = noErr;
}
else {
printf("implied ioErr from cfnet\n");
ortn = ioErr;
}
}
else {
Handle h = NewHandle(0);
ortn = URLSimpleDownload(url,
NULL,
h,
0, NULL, NULL); *bytesRead = GetHandleSize(h);
if((cfData != NULL) && (ortn == noErr)) {
CFDataRef cd = CFDataCreate(NULL, (UInt8 *)*h, *bytesRead);
*cfData = cd;
}
if(ortn) {
printf("%d returned from URLSimpleDownload\n", (int)ortn);
}
DisposeHandle(h);
}
if(singleThread) {
urlThreadUnlock();
}
dprintf("...read %d bytes from %s\n", (int)(*bytesRead), url);
return ortn;
}
static void *imageThread(void *arg)
{
ThreadParams *params = (ThreadParams *)arg;
params->ortn = fetchUrl(params->host,
params->path,
params->isSsl,
params->useCfNet,
params->singleThread,
¶ms->bytesRead,
NULL); pthread_exit(NULL);
return NULL;
}
static int fetchImages(
CFDataRef cfData,
const char *host,
const char *origPath,
int isSsl,
bool useCfNet,
int singleThread,
int quiet)
{
char *mungedHtml;
Size mungedLen;
char *cp;
char *imageNameStart;
char *imageNameEnd;
unsigned imageNameLen;
ThreadParams *params = NULL; ThreadParams *thisThread;
unsigned threadDex;
int prtn;
unsigned numThreads = 0; int totalErrors = 0;
char *basePath = NULL;
unsigned origPathLen = strlen(origPath);
basePath = strdup(origPath);
if(origPath[origPathLen - 1] != '/') {
unsigned basePathLen = origPathLen;
for(char *cp=basePath + origPathLen - 1; cp > basePath; cp--) {
basePathLen--;
if(*cp == '/') {
cp[1] = '\0';
break;
}
}
}
mungedLen = CFDataGetLength(cfData);
if(mungedLen == 0) {
printf("***size() of main page is zero!\n");
return 0;
}
mungedLen++;
mungedHtml = (char *)malloc(mungedLen);
memmove(mungedHtml, CFDataGetBytePtr(cfData), mungedLen-1);
mungedHtml[mungedLen - 1] = '\0';
params = (ThreadParams *)malloc(sizeof(ThreadParams) * MAX_THREADS);
for(;;) {
cp = strstr(mungedHtml, "img src");
if(cp == NULL) {
break;
}
memmove(cp, "IMG SRC", 7);
cp += 7;
}
for(;;) {
cp = strchr(mungedHtml, '\\');
if(cp == NULL) {
break;
}
*cp = '/';
}
cp = mungedHtml;
for(;;) {
cp = strstr(cp, "IMG SRC=");
if(cp == NULL) {
break;
}
cp += 8;
if(*cp == '"') {
imageNameStart = ++cp;
imageNameEnd = strchr(imageNameStart, '"');
}
else {
char *nextSpace;
imageNameStart = cp;
imageNameEnd = strchr(imageNameStart, '>');
nextSpace = strchr(imageNameStart, ' ');
if((imageNameEnd == NULL) || (imageNameEnd > nextSpace)) {
imageNameEnd = nextSpace;
}
}
if(imageNameEnd == NULL) {
printf("***Bad HTML - missing quote/bracket after image file name\n");
continue;
}
cp = imageNameEnd;
thisThread = ¶ms[numThreads];
thisThread->host = host;
thisThread->isSsl = isSsl;
thisThread->useCfNet = useCfNet;
thisThread->singleThread = singleThread;
thisThread->threadNum = numThreads;
thisThread->quiet = quiet;
thisThread->ortn = -1;
imageNameLen = imageNameEnd - imageNameStart;
if(imageNameStart[0] == '/') {
memmove(thisThread->path, imageNameStart, imageNameLen);
thisThread->path[imageNameLen] = '\0';
}
else if(strncmp(imageNameStart, "http", 4) == 0) {
const char *hostStart = strstr(imageNameStart, "//");
if((hostStart == NULL) || (hostStart > (imageNameEnd-2))) {
continue;
}
hostStart += 2;
const char *hostEnd = strchr(hostStart, '/');
if(hostEnd >= imageNameEnd) {
continue;
}
unsigned hostLen = hostEnd - hostStart;
char *hostStr = (char *)malloc(hostLen + 1);
memmove(hostStr, hostStart, hostLen);
hostStr[hostLen] = '\0';
thisThread->host = (const char *)hostStr;
memmove(thisThread->path, hostEnd, imageNameEnd-hostEnd);
thisThread->path[imageNameEnd-hostEnd] = '\0';
if(isSsl && isHostNonSsl(hostStr)) {
thisThread->isSsl = 0;
}
}
else {
unsigned basePathLen = strlen(basePath);
memmove(thisThread->path, basePath, basePathLen);
memmove(thisThread->path + basePathLen, imageNameStart, imageNameLen);
thisThread->path[basePathLen + imageNameLen] = '\0';
}
#if SKIP_MULTI_QUERIES
if(strchr(thisThread->path, '|')) {
continue;
}
#endif
if(!quiet) {
printf(" ");
printUrl(thisThread->host, thisThread->path, thisThread->isSsl);
printf(": thread %u : forking imageThread\n",
thisThread->threadNum);
}
prtn = pthread_create(&thisThread->pthr,
NULL,
imageThread,
thisThread);
if(prtn) {
printf("***Error creating pthread (%d)\n", prtn);
totalErrors++;
break;
}
numThreads++;
if(numThreads == MAX_THREADS) {
break;
}
}
free(mungedHtml);
if(!quiet) {
printf(" waiting for image threads to complete...\n");
}
for(threadDex=0; threadDex<numThreads; threadDex++) {
void *status;
thisThread = ¶ms[threadDex];
prtn = pthread_join(thisThread->pthr, &status);
if(prtn) {
printf("***pthread_join returned %d, aborting\n", prtn);
totalErrors++;
break;
}
if(!quiet || thisThread->ortn) {
printf(" ");
printUrl(thisThread->host, thisThread->path, thisThread->isSsl);
printf(": thread %u : fetch result %d, read %d bytes\n",
thisThread->threadNum,
(int)thisThread->ortn, thisThread->bytesRead);
}
if(thisThread->ortn) {
totalErrors++;
}
}
free(params);
return totalErrors;
}
int main(int argc, char **argv)
{
bool isSsl = false;
bool useCfNet = true;
int singleThread = 0;
int quiet = 0;
OSStatus ortn;
int arg;
CFDataRef cfData;
char *host;
char *path;
int ourRtn = 0;
if(argc < 3) {
usage(argv);
}
host = argv[1];
path = argv[2];
for(arg=3; arg<argc; arg++) {
switch(argv[arg][0]) {
case 's':
isSsl = true;
break;
case 'u':
useCfNet = false;
break;
case 't':
singleThread = 1;
break;
case 'q':
quiet = 1;
break;
default:
usage(argv);
}
}
printf("...fetching page at ");
printUrl(host, path, isSsl);
printf("\n");
unsigned bytesRead;
ortn = fetchUrl(host, path, isSsl, useCfNet, singleThread, &bytesRead, &cfData);
if(ortn) {
printf("***Error %d fetching from host %s path %s\n", (int)ortn, host, path);
exit(1);
}
ourRtn = fetchImages(cfData, host, path, isSsl, useCfNet, singleThread, quiet);
CFRelease(cfData);
if(ourRtn) {
printf("===%s exiting with %d %s for host %s\n", argv[0], ourRtn,
(ourRtn > 1) ? "errors" : "error", host);
}
return ourRtn;
}