dyldInitialization.cpp   [plain text]


/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
 *
 * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this
 * file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 */

#include <stddef.h>
#include <string.h>
#include <mach/mach.h>
#include <mach-o/loader.h>
#include <mach-o/ldsyms.h>
#include <mach-o/reloc.h>
#if __ppc__ || __ppc64__
	#include <mach-o/ppc/reloc.h>
#endif
#include "dyld.h"

#if __LP64__
	#define macho_header			mach_header_64
	#define LC_SEGMENT_COMMAND		LC_SEGMENT_64
	#define macho_segment_command	segment_command_64
	#define macho_section			section_64
	#define RELOC_SIZE				3
#else
	#define macho_header			mach_header
	#define LC_SEGMENT_COMMAND		LC_SEGMENT
	#define macho_segment_command	segment_command
	#define macho_section			section
	#define RELOC_SIZE				2
#endif


//
//  Code to bootstrap dyld into a runnable state
//
//

namespace dyldbootstrap {


typedef void (*Initializer)(int argc, const char* argv[], const char* envp[], const char* apple[]);

//
// For a regular executable, the crt code calls dyld to run the executables initializers.
// For a static executable, crt directly runs the initializers.
// dyld (should be static) but is a dynamic executable and needs this hack to run its own initializers.
// We pass argc, argv, etc in case libc.a uses those arguments
//
static void runDyldInitializers(const struct macho_header* mh, intptr_t slide, int argc, const char* argv[], const char* envp[], const char* apple[])
{
	const uint32_t cmd_count = mh->ncmds;
	const struct load_command* const cmds = (struct load_command*)(((char*)mh)+sizeof(macho_header));
	const struct load_command* cmd = cmds;
	for (uint32_t i = 0; i < cmd_count; ++i) {
		switch (cmd->cmd) {
			case LC_SEGMENT_COMMAND:
				{
					const struct macho_segment_command* seg = (struct macho_segment_command*)cmd;
					const struct macho_section* const sectionsStart = (struct macho_section*)((char*)seg + sizeof(struct macho_segment_command));
					const struct macho_section* const sectionsEnd = &sectionsStart[seg->nsects];
					for (const struct macho_section* sect=sectionsStart; sect < sectionsEnd; ++sect) {
						const uint8_t type = sect->flags & SECTION_TYPE;
						if ( type == S_MOD_INIT_FUNC_POINTERS ){
							Initializer* inits = (Initializer*)(sect->addr + slide);
							const uint32_t count = sect->size / sizeof(uintptr_t);
							for (uint32_t i=0; i < count; ++i) {
								Initializer func = inits[i];
								func(argc, argv, envp, apple);
							}
						}
					}
				}
				break;
		}
		cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
	}
}


//
// If the kernel does not load dyld at its preferred address, we need to apply 
// fixups to various initialized parts of the __DATA segment
//
static void rebaseDyld(const struct macho_header* mh, intptr_t slide)
{
	// get interesting pointers into dyld
	const uint32_t cmd_count = mh->ncmds;
	const struct load_command* const cmds = (struct load_command*)(((char*)mh)+sizeof(macho_header));
	const struct load_command* cmd = cmds;
	const struct macho_segment_command* linkEditSeg = NULL;
	const struct dysymtab_command* dynamicSymbolTable = NULL;
	const struct macho_section* nonLazySection = NULL;
	for (uint32_t i = 0; i < cmd_count; ++i) {
		switch (cmd->cmd) {
			case LC_SEGMENT_COMMAND:
				{
					const struct macho_segment_command* seg = (struct macho_segment_command*)cmd;
					if ( strcmp(seg->segname, "__LINKEDIT") == 0 )
						linkEditSeg = seg;
					const struct macho_section* const sectionsStart = (struct macho_section*)((char*)seg + sizeof(struct macho_segment_command));
					const struct macho_section* const sectionsEnd = &sectionsStart[seg->nsects];
					for (const struct macho_section* sect=sectionsStart; sect < sectionsEnd; ++sect) {
						const uint8_t type = sect->flags & SECTION_TYPE;
						if ( type == S_NON_LAZY_SYMBOL_POINTERS ) 
							nonLazySection = sect;
					}
				}
				break;
			case LC_DYSYMTAB:
				dynamicSymbolTable = (struct dysymtab_command *)cmd;
				break;
		}
		cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
	}
	
	// use reloc's to rebase all random data pointers
	const uintptr_t relocBase = (uintptr_t)mh;
	const relocation_info* const relocsStart = (struct relocation_info*)(linkEditSeg->vmaddr + slide + dynamicSymbolTable->locreloff - linkEditSeg->fileoff);
	const relocation_info* const relocsEnd = &relocsStart[dynamicSymbolTable->nlocrel];
	for (const relocation_info* reloc=relocsStart; reloc < relocsEnd; ++reloc) {
		if ( (reloc->r_address & R_SCATTERED) == 0 ) {
			if (reloc->r_length == RELOC_SIZE) {
				switch(reloc->r_type) {
					case GENERIC_RELOC_VANILLA:
						*((uintptr_t*)(reloc->r_address + relocBase)) += slide;
						break;
				}
			}
		}
		else {
			const struct scattered_relocation_info* sreloc = (struct scattered_relocation_info*)reloc;
			if (sreloc->r_length == RELOC_SIZE) {
				uintptr_t* locationToFix = (uintptr_t*)(sreloc->r_address + relocBase);
				switch(sreloc->r_type) {
					case GENERIC_RELOC_VANILLA:
		#if __ppc__ || __ppc64__
					case PPC_RELOC_PB_LA_PTR:
		#elif __i386__
					case GENERIC_RELOC_PB_LA_PTR:
		#endif
					// Note the use of PB_LA_PTR is unique here.  Seems like ld should strip out all lazy pointers
					// but it does not.  But, since all lazy-pointers point within dyld, they can be slid too
						*locationToFix += slide;
						break;
				}
			}
		}
	}
	
	// rebase non-lazy pointers (which all point internal to dyld, since dyld uses no shared libraries)
	if ( nonLazySection != NULL ) {
		const uint32_t pointerCount = nonLazySection->size / sizeof(uintptr_t);
		uintptr_t* const symbolPointers = (uintptr_t*)(nonLazySection->addr + slide);
		for (uint32_t j=0; j < pointerCount; ++j) {
			symbolPointers[j] += slide;
		}
	}
	
	
}

//
// For some reason the kernel loads dyld with __TEXT and __LINKEDIT writable
// rdar://problem/3702311 
//
static void segmentProtectDyld(const struct macho_header* mh, intptr_t slide)
{
	const uint32_t cmd_count = mh->ncmds;
	const struct load_command* const cmds = (struct load_command*)(((char*)mh)+sizeof(macho_header));
	const struct load_command* cmd = cmds;
	for (uint32_t i = 0; i < cmd_count; ++i) {
		switch (cmd->cmd) {
			case LC_SEGMENT_COMMAND:
				{
					const struct macho_segment_command* seg = (struct macho_segment_command*)cmd;
					vm_address_t addr = seg->vmaddr + slide;
					vm_size_t size = seg->vmsize;
					const bool setCurrentPermissions = false;
					vm_protect(mach_task_self(), addr, size, setCurrentPermissions, seg->initprot);
					//fprintf(stderr, "dyld: segment %s, 0x%08X -> 0x%08X, set to %d\n", seg->segname, addr, addr+size-1, seg->initprot);
				}
				break;
		}
		cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
	}
	
}

extern "C" void dyld_exceptions_init(const struct macho_header*, uintptr_t slide); // in dyldExceptions.cpp
extern "C" void mach_init();

//
//  This is code to bootstrap dyld.  This work in normally done for a program by dyld and crt.
//  In dyld we have to do this manually.
//
uintptr_t start(const struct mach_header* appsMachHeader, int argc, const char* argv[], intptr_t slide)
{
	// _mh_dylinker_header is magic symbol defined by static linker (ld), see <mach-o/ldsyms.h>
	const struct macho_header* dyldsMachHeader =  (const struct macho_header*)(((char*)&_mh_dylinker_header)+slide);
	
	// if kernel had to slide dyld, we need to fix up load sensitive locations
	// we have to do this before using any global variables
	if ( slide != 0 ) {
		rebaseDyld(dyldsMachHeader, slide);
	}
	
	// enable C++ exceptions to work inside dyld
	dyld_exceptions_init(dyldsMachHeader, slide);
	
	// allow dyld to use mach messaging
	mach_init();

	// set protection on segments (has to be done after mach_init)
	segmentProtectDyld(dyldsMachHeader, slide);
	
	// kernel sets up env pointer to be just past end of agv array
	const char** envp = &argv[argc+1];
	
	// kernel sets up apple pointer to be just past end of envp array
	const char** apple = envp;
	while(*apple != NULL) { ++apple; }
	++apple;

	// run all C++ initializers inside dyld
	runDyldInitializers(dyldsMachHeader, slide, argc, argv, envp, apple);
	
	// now that we are done bootstrapping dyld, call dyld's main
	return dyld::_main(appsMachHeader, argc, argv, envp, apple);
}




} // end of namespace