MachOReaderDylib.hpp [plain text]
/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
*
* Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
#ifndef __OBJECT_FILE_DYLIB_MACH_O__
#define __OBJECT_FILE_DYLIB_MACH_O__
#include <stdint.h>
#include <math.h>
#include <unistd.h>
#include <sys/param.h>
#include <vector>
#include <set>
#include <algorithm>
#include <ext/hash_map>
#include "MachOFileAbstraction.hpp"
#include "ObjectFile.h"
//
//
// To implement architecture xxx, you must write template specializations for the following method:
// Reader<xxx>::validFile()
//
//
namespace mach_o {
namespace dylib {
// forward reference
template <typename A> class Reader;
class Segment : public ObjectFile::Segment
{
public:
Segment(const char* name) { fName = name; }
virtual const char* getName() const { return fName; }
virtual bool isContentReadable() const { return true; }
virtual bool isContentWritable() const { return false; }
virtual bool isContentExecutable() const { return false; }
private:
const char* fName;
};
//
// An ExportAtom has no content. It exists so that the linker can track which imported
// symbols can from which dynamic libraries.
//
template <typename A>
class ExportAtom : public ObjectFile::Atom
{
public:
virtual ObjectFile::Reader* getFile() const { return &fOwner; }
virtual bool getTranslationUnitSource(const char** dir, const char** name) const { return false; }
virtual const char* getName() const { return fName; }
virtual const char* getDisplayName() const { return fName; }
virtual Scope getScope() const { return ObjectFile::Atom::scopeGlobal; }
virtual DefinitionKind getDefinitionKind() const { return fWeakDefinition ? kExternalWeakDefinition : kExternalDefinition; }
virtual SymbolTableInclusion getSymbolTableInclusion() const { return ObjectFile::Atom::kSymbolTableIn; }
virtual bool dontDeadStrip() const { return false; }
virtual bool isZeroFill() const { return false; }
virtual uint64_t getSize() const { return 0; }
virtual std::vector<ObjectFile::Reference*>& getReferences() const { return fgEmptyReferenceList; }
virtual bool mustRemainInSection() const { return false; }
virtual const char* getSectionName() const { return "._imports"; }
virtual Segment& getSegment() const { return fgImportSegment; }
virtual bool requiresFollowOnAtom() const{ return false; }
virtual ObjectFile::Atom& getFollowOnAtom() const { return *((ObjectFile::Atom*)NULL); }
virtual std::vector<ObjectFile::LineInfo>* getLineInfo() const { return NULL; }
virtual uint8_t getAlignment() const { return 0; }
virtual void copyRawContent(uint8_t buffer[]) const {}
virtual void setScope(Scope) { }
protected:
friend class Reader<A>;
typedef typename A::P P;
ExportAtom(ObjectFile::Reader& owner, const char* name, bool weak)
: fOwner(owner), fName(name), fWeakDefinition(weak) {}
virtual ~ExportAtom() {}
ObjectFile::Reader& fOwner;
const char* fName;
bool fWeakDefinition;
static std::vector<ObjectFile::Reference*> fgEmptyReferenceList;
static Segment fgImportSegment;
};
template <typename A>
Segment ExportAtom<A>::fgImportSegment("__LINKEDIT");
template <typename A>
std::vector<ObjectFile::Reference*> ExportAtom<A>::fgEmptyReferenceList;
//
// The reader for a dylib extracts all exported symbols names from the memory-mapped
// dylib, builds a hash table, then unmaps the file. This is an important memory
// savings for large dylibs.
//
template <typename A>
class Reader : public ObjectFile::Reader
{
public:
static bool validFile(const uint8_t* fileContent, bool executableOrDylib);
static Reader<A>* make(const uint8_t* fileContent, uint64_t fileLength, const char* path,
bool executableOrDylib, const ObjectFile::ReaderOptions& options)
{ return new Reader<A>(fileContent, fileLength, path, executableOrDylib, options); }
virtual ~Reader() {}
virtual const char* getPath() { return fPath; }
virtual time_t getModificationTime() { return 0; }
virtual DebugInfoKind getDebugInfoKind() { return ObjectFile::Reader::kDebugInfoNone; }
virtual std::vector<class ObjectFile::Atom*>& getAtoms();
virtual std::vector<class ObjectFile::Atom*>* getJustInTimeAtomsFor(const char* name);
virtual std::vector<Stab>* getStabs() { return NULL; }
virtual const char* getInstallPath() { return fDylibInstallPath; }
virtual uint32_t getTimestamp() { return fDylibTimeStamp; }
virtual uint32_t getCurrentVersion() { return fDylibtCurrentVersion; }
virtual uint32_t getCompatibilityVersion() { return fDylibCompatibilityVersion; }
virtual std::vector<const char*>* getDependentLibraryPaths();
virtual bool reExports(ObjectFile::Reader*);
virtual std::vector<const char*>* getAllowableClients();
protected:
const char* parentUmbrella() { return fParentUmbrella; }
private:
typedef typename A::P P;
typedef typename A::P::E E;
class CStringEquals
{
public:
bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
};
struct AtomAndWeak { ObjectFile::Atom* atom; bool weak; };
typedef __gnu_cxx::hash_map<const char*, AtomAndWeak, __gnu_cxx::hash<const char*>, CStringEquals> NameToAtomMap;
typedef typename NameToAtomMap::iterator NameToAtomMapIterator;
struct PathAndFlag { const char* path; bool reExport; };
Reader(const uint8_t* fileContent, uint64_t fileLength, const char* path,
bool executableOrDylib, const ObjectFile::ReaderOptions& options);
const char* fPath;
const char* fParentUmbrella;
std::vector<const char*> fAllowableClients;
const char* fDylibInstallPath;
uint32_t fDylibTimeStamp;
uint32_t fDylibtCurrentVersion;
uint32_t fDylibCompatibilityVersion;
std::vector<PathAndFlag> fDependentLibraryPaths;
NameToAtomMap fAtoms;
static bool fgLogHashtable;
static std::vector<class ObjectFile::Atom*> fgEmptyAtomList;
};
template <typename A>
std::vector<class ObjectFile::Atom*> Reader<A>::fgEmptyAtomList;
template <typename A>
bool Reader<A>::fgLogHashtable = false;
template <typename A>
Reader<A>::Reader(const uint8_t* fileContent, uint64_t fileLength, const char* path, bool executableOrDylib, const ObjectFile::ReaderOptions& options)
: fParentUmbrella(NULL), fDylibInstallPath(NULL), fDylibTimeStamp(0), fDylibtCurrentVersion(0), fDylibCompatibilityVersion(0)
{
// sanity check
if ( ! validFile(fileContent, executableOrDylib) )
throw "not a valid mach-o object file";
fPath = strdup(path);
const macho_header<P>* header = (const macho_header<P>*)fileContent;
const uint32_t cmd_count = header->ncmds();
const macho_load_command<P>* const cmds = (macho_load_command<P>*)((char*)header + sizeof(macho_header<P>));
// a "blank" stub has zero load commands
if ( (header->filetype() == MH_DYLIB_STUB) && (cmd_count == 0) ) {
// no further processing needed
munmap((caddr_t)fileContent, fileLength);
return;
}
// pass 1 builds list of all dependent libraries
const macho_load_command<P>* cmd = cmds;
for (uint32_t i = 0; i < cmd_count; ++i) {
switch (cmd->cmd()) {
case LC_LOAD_DYLIB:
case LC_LOAD_WEAK_DYLIB:
PathAndFlag entry;
entry.path = strdup(((struct macho_dylib_command<P>*)cmd)->name());
entry.reExport = false;
fDependentLibraryPaths.push_back(entry);
break;
}
cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
}
// pass 2 determines re-export info
const macho_dysymtab_command<P>* dynamicInfo = NULL;
const macho_nlist<P>* symbolTable = NULL;
const char* strings = NULL;
cmd = cmds;
for (uint32_t i = 0; i < cmd_count; ++i) {
switch (cmd->cmd()) {
case LC_SYMTAB:
{
const macho_symtab_command<P>* symtab = (macho_symtab_command<P>*)cmd;
symbolTable = (const macho_nlist<P>*)((char*)header + symtab->symoff());
strings = (char*)header + symtab->stroff();
}
break;
case LC_DYSYMTAB:
dynamicInfo = (macho_dysymtab_command<P>*)cmd;
break;
case LC_ID_DYLIB:
macho_dylib_command<P>* dylibID = (macho_dylib_command<P>*)cmd;
fDylibInstallPath = strdup(dylibID->name());
fDylibTimeStamp = dylibID->timestamp();
fDylibtCurrentVersion = dylibID->current_version();
fDylibCompatibilityVersion = dylibID->compatibility_version();
break;
case LC_SUB_UMBRELLA:
if ( !options.fFlatNamespace ) {
const char* frameworkLeafName = ((macho_sub_umbrella_command<P>*)cmd)->sub_umbrella();
for (typename std::vector<PathAndFlag>::iterator it = fDependentLibraryPaths.begin(); it != fDependentLibraryPaths.end(); it++) {
const char* dylibName = it->path;
const char* lastSlash = strrchr(dylibName, '/');
if ( (lastSlash != NULL) && (strcmp(&lastSlash[1], frameworkLeafName) == 0) )
it->reExport = true;
}
}
break;
case LC_SUB_LIBRARY:
if ( !options.fFlatNamespace ) {
const char* dylibBaseName = ((macho_sub_library_command<P>*)cmd)->sub_library();
for (typename std::vector<PathAndFlag>::iterator it = fDependentLibraryPaths.begin(); it != fDependentLibraryPaths.end(); it++) {
const char* dylibName = it->path;
const char* lastSlash = strrchr(dylibName, '/');
const char* leafStart = &lastSlash[1];
if ( lastSlash == NULL )
leafStart = dylibName;
const char* firstDot = strchr(leafStart, '.');
int len = strlen(leafStart);
if ( firstDot != NULL )
len = firstDot - leafStart;
if ( strncmp(leafStart, dylibBaseName, len) == 0 )
it->reExport = true;
}
}
break;
case LC_SUB_FRAMEWORK:
fParentUmbrella = strdup(((macho_sub_framework_command<P>*)cmd)->umbrella());
break;
}
cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
}
// Process the rest of the commands here.
cmd = cmds;
for (uint32_t i = 0; i < cmd_count; ++i) {
switch (cmd->cmd()) {
case LC_SUB_CLIENT:
const char *temp = strdup(((macho_sub_client_command<P>*)cmd)->client());
fAllowableClients.push_back(temp);
break;
}
cmd = (const macho_load_command<P>*)(((char*)cmd)+cmd->cmdsize());
}
// validate minimal load commands
if ( (fDylibInstallPath == NULL) && (header->filetype() != MH_EXECUTE) )
throw "dylib missing LC_ID_DYLIB load command";
if ( symbolTable == NULL )
throw "dylib missing LC_SYMTAB load command";
if ( dynamicInfo == NULL )
throw "dylib missing LC_DYSYMTAB load command";
// build hash table
if ( dynamicInfo->tocoff() == 0 ) {
if ( fgLogHashtable ) fprintf(stderr, "ld64: building hashtable of %u toc entries for %s\n", dynamicInfo->nextdefsym(), path);
const macho_nlist<P>* start = &symbolTable[dynamicInfo->iextdefsym()];
const macho_nlist<P>* end = &start[dynamicInfo->nextdefsym()];
fAtoms.resize(dynamicInfo->nextdefsym()); // set initial bucket count
for (const macho_nlist<P>* sym=start; sym < end; ++sym) {
AtomAndWeak bucket;
bucket.atom = NULL;
bucket.weak = ((sym->n_desc() & N_WEAK_DEF) != 0);
const char* name = strdup(&strings[sym->n_strx()]);
if ( fgLogHashtable ) fprintf(stderr, " adding %s to hash table for %s\n", name, this->getPath());
fAtoms[name] = bucket;
}
}
else {
int32_t count = dynamicInfo->ntoc();
fAtoms.resize(count); // set initial bucket count
if ( fgLogHashtable ) fprintf(stderr, "ld64: building hashtable of %u entries for %s\n", count, path);
const struct dylib_table_of_contents* toc = (dylib_table_of_contents*)((char*)header + dynamicInfo->tocoff());
for (int32_t i = 0; i < count; ++i) {
const uint32_t index = E::get32(toc[i].symbol_index);
const macho_nlist<P>* sym = &symbolTable[index];
AtomAndWeak bucket;
bucket.atom = NULL;
bucket.weak = ((sym->n_desc() & N_WEAK_DEF) != 0);
const char* name = strdup(&strings[sym->n_strx()]);
if ( fgLogHashtable ) fprintf(stderr, " adding %s to hash table for %s\n", name, this->getPath());
fAtoms[name] = bucket;
}
}
// unmap file
munmap((caddr_t)fileContent, fileLength);
}
template <typename A>
std::vector<class ObjectFile::Atom*>& Reader<A>::getAtoms()
{
// TO DO: for flat-namespace libraries, when linking flat_namespace
// we need to create an atom which references all undefines
return fgEmptyAtomList;
}
template <typename A>
std::vector<class ObjectFile::Atom*>* Reader<A>::getJustInTimeAtomsFor(const char* name)
{
std::vector<class ObjectFile::Atom*>* atoms = NULL;
NameToAtomMapIterator pos = fAtoms.find(name);
if ( pos != fAtoms.end() ) {
if ( pos->second.atom == NULL ) {
// instantiate atom and update hash table
pos->second.atom = new ExportAtom<A>(*this, name, pos->second.weak);
if ( fgLogHashtable ) fprintf(stderr, "getJustInTimeAtomsFor: %s found in %s\n", name, this->getPath());
}
// return a vector of one atom
atoms = new std::vector<class ObjectFile::Atom*>;
atoms->push_back(pos->second.atom);
}
else {
if ( fgLogHashtable ) fprintf(stderr, "getJustInTimeAtomsFor: %s NOT found in %s\n", name, this->getPath());
}
return atoms;
}
template <typename A>
std::vector<const char*>* Reader<A>::getDependentLibraryPaths()
{
std::vector<const char*>* result = new std::vector<const char*>;
for (typename std::vector<PathAndFlag>::iterator it = fDependentLibraryPaths.begin(); it != fDependentLibraryPaths.end(); it++) {
result->push_back(it->path);
}
return result;
}
template <typename A>
std::vector<const char*>* Reader<A>::getAllowableClients()
{
std::vector<const char*>* result = new std::vector<const char*>;
for (typename std::vector<const char*>::iterator it = fAllowableClients.begin();
it != fAllowableClients.end();
it++) {
result->push_back(*it);
}
return (fAllowableClients.size() != 0 ? result : NULL);
}
template <typename A>
bool Reader<A>::reExports(ObjectFile::Reader* child)
{
// A dependent dylib is re-exported under two conditions:
// 1) parent contains LC_SUB_UMBRELLA or LC_SUB_LIBRARY with child name
const char* childInstallPath = child->getInstallPath();
for (typename std::vector<PathAndFlag>::iterator it = fDependentLibraryPaths.begin(); it != fDependentLibraryPaths.end(); it++) {
if ( it->reExport && ((strcmp(it->path, child->getPath()) == 0) || ((childInstallPath!=NULL) && (strcmp(it->path, childInstallPath)==0))) )
return true;
}
// 2) child contains LC_SUB_FRAMEWORK with parent name
const char* parentUmbrellaName = ((Reader<A>*)child)->parentUmbrella();
if ( parentUmbrellaName != NULL ) {
const char* parentName = this->getPath();
const char* lastSlash = strrchr(parentName, '/');
if ( (lastSlash != NULL) && (strcmp(&lastSlash[1], parentUmbrellaName) == 0) )
return true;
}
return false;
}
template <>
bool Reader<ppc>::validFile(const uint8_t* fileContent, bool executableOrDylib)
{
const macho_header<P>* header = (const macho_header<P>*)fileContent;
if ( header->magic() != MH_MAGIC )
return false;
if ( header->cputype() != CPU_TYPE_POWERPC )
return false;
switch ( header->filetype() ) {
case MH_DYLIB:
case MH_DYLIB_STUB:
return true;
case MH_EXECUTE:
return executableOrDylib;
default:
return false;
}
}
template <>
bool Reader<ppc64>::validFile(const uint8_t* fileContent, bool executableOrDylib)
{
const macho_header<P>* header = (const macho_header<P>*)fileContent;
if ( header->magic() != MH_MAGIC_64 )
return false;
if ( header->cputype() != CPU_TYPE_POWERPC64 )
return false;
switch ( header->filetype() ) {
case MH_DYLIB:
case MH_DYLIB_STUB:
return true;
case MH_EXECUTE:
return executableOrDylib;
default:
return false;
}
}
template <>
bool Reader<x86>::validFile(const uint8_t* fileContent, bool executableOrDylib)
{
const macho_header<P>* header = (const macho_header<P>*)fileContent;
if ( header->magic() != MH_MAGIC )
return false;
if ( header->cputype() != CPU_TYPE_I386 )
return false;
switch ( header->filetype() ) {
case MH_DYLIB:
case MH_DYLIB_STUB:
return true;
case MH_EXECUTE:
return executableOrDylib;
default:
return false;
}
}
template <>
bool Reader<x86_64>::validFile(const uint8_t* fileContent, bool executableOrDylib)
{
const macho_header<P>* header = (const macho_header<P>*)fileContent;
if ( header->magic() != MH_MAGIC_64 )
return false;
if ( header->cputype() != CPU_TYPE_X86_64 )
return false;
switch ( header->filetype() ) {
case MH_DYLIB:
case MH_DYLIB_STUB:
return true;
case MH_EXECUTE:
return executableOrDylib;
default:
return false;
}
}
}; // namespace dylib
}; // namespace mach_o
#endif // __OBJECT_FILE_DYLIB_MACH_O__