#include <stdint.h>
#include <math.h>
#include <unistd.h>
#include <sys/param.h>
#include <mach-o/ranlib.h>
#include <ar.h>
#include <vector>
#include <set>
#include <map>
#include <algorithm>
#include <ext/hash_map>
#include "MachOFileAbstraction.hpp"
#include "Architectures.hpp"
#include "macho_relocatable_file.h"
#include "lto_file.h"
#include "archive_file.h"
namespace archive {
typedef const struct ranlib* ConstRanLibPtr;
template <typename A> class File;
template <typename A>
class Parser
{
public:
typedef typename A::P P;
static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
const mach_o::relocatable::ParserOptions& opts) {
return File<A>::validFile(fileContent, fileLength, opts); }
static File<A>* parse(const uint8_t* fileContent, uint64_t fileLength,
const char* path, time_t mTime,
uint32_t ordinal, const ParserOptions& opts) {
return new File<A>(fileContent, fileLength, path, mTime,
ordinal, opts);
}
};
template <typename A>
class File : public ld::archive::File
{
public:
static bool validFile(const uint8_t* fileContent, uint64_t fileLength,
const mach_o::relocatable::ParserOptions& opts);
File(const uint8_t* fileContent, uint64_t fileLength,
const char* pth, time_t modTime,
uint32_t ord, const ParserOptions& opts);
virtual ~File() {}
virtual bool forEachAtom(ld::File::AtomHandler&) const;
virtual bool justInTimeforEachAtom(const char* name, ld::File::AtomHandler&) const;
virtual uint32_t subFileCount() const { return _archiveFilelength/sizeof(ar_hdr); }
virtual bool justInTimeDataOnlyforEachAtom(const char* name, ld::File::AtomHandler& handler) const;
private:
static bool validMachOFile(const uint8_t* fileContent, uint64_t fileLength,
const mach_o::relocatable::ParserOptions& opts);
static bool validLTOFile(const uint8_t* fileContent, uint64_t fileLength,
const mach_o::relocatable::ParserOptions& opts);
static cpu_type_t architecture();
class Entry : ar_hdr
{
public:
const char* name() const;
time_t modificationTime() const;
const uint8_t* content() const;
uint32_t contentSize() const;
const Entry* next() const;
private:
bool hasLongName() const;
unsigned int getLongNameSpace() const;
};
class CStringEquals
{
public:
bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); }
};
typedef __gnu_cxx::hash_map<const char*, const struct ranlib*, __gnu_cxx::hash<const char*>, CStringEquals> NameToEntryMap;
typedef typename A::P P;
typedef typename A::P::E E;
struct MemberState { ld::relocatable::File* file; bool logged; bool loaded; };
typedef std::map<const class Entry*, MemberState> MemberToStateMap;
const struct ranlib* ranlibHashSearch(const char* name) const;
MemberState& makeObjectFileForMember(const Entry* member) const;
bool memberHasObjCCategories(const Entry* member) const;
void dumpTableOfContents();
void buildHashTable();
const uint8_t* _archiveFileContent;
uint64_t _archiveFilelength;
const struct ranlib* _tableOfContents;
uint32_t _tableOfContentCount;
const char* _tableOfContentStrings;
mutable MemberToStateMap _instantiatedEntries;
NameToEntryMap _hashTable;
const bool _forceLoadAll;
const bool _forceLoadObjC;
const bool _forceLoadThis;
const bool _objc2ABI;
const bool _verboseLoad;
const bool _logAllFiles;
const mach_o::relocatable::ParserOptions _objOpts;
};
template <typename A>
bool File<A>::Entry::hasLongName() const
{
return ( strncmp(this->ar_name, AR_EFMT1, strlen(AR_EFMT1)) == 0 );
}
template <typename A>
unsigned int File<A>::Entry::getLongNameSpace() const
{
char* endptr;
long result = strtol(&this->ar_name[strlen(AR_EFMT1)], &endptr, 10);
return result;
}
template <typename A>
const char* File<A>::Entry::name() const
{
if ( this->hasLongName() ) {
int len = this->getLongNameSpace();
static char longName[256];
strncpy(longName, ((char*)this)+sizeof(ar_hdr), len);
longName[len] = '\0';
return longName;
}
else {
static char shortName[20];
strncpy(shortName, this->ar_name, 16);
shortName[16] = '\0';
char* space = strchr(shortName, ' ');
if ( space != NULL )
*space = '\0';
return shortName;
}
}
template <typename A>
time_t File<A>::Entry::modificationTime() const
{
char temp[14];
strncpy(temp, this->ar_date, 12);
temp[12] = '\0';
char* endptr;
return (time_t)strtol(temp, &endptr, 10);
}
template <typename A>
const uint8_t* File<A>::Entry::content() const
{
if ( this->hasLongName() )
return ((uint8_t*)this) + sizeof(ar_hdr) + this->getLongNameSpace();
else
return ((uint8_t*)this) + sizeof(ar_hdr);
}
template <typename A>
uint32_t File<A>::Entry::contentSize() const
{
char temp[12];
strncpy(temp, this->ar_size, 10);
temp[10] = '\0';
char* endptr;
long size = strtol(temp, &endptr, 10);
if ( this->hasLongName() )
size -= this->getLongNameSpace();
return size;
}
template <typename A>
const class File<A>::Entry* File<A>::Entry::next() const
{
const uint8_t* p = this->content() + contentSize();
p = (const uint8_t*)(((uintptr_t)p+3) & (-4)); return (class File<A>::Entry*)p;
}
template <> cpu_type_t File<x86>::architecture() { return CPU_TYPE_I386; }
template <> cpu_type_t File<x86_64>::architecture() { return CPU_TYPE_X86_64; }
template <> cpu_type_t File<arm>::architecture() { return CPU_TYPE_ARM; }
template <typename A>
bool File<A>::validMachOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
{
return mach_o::relocatable::isObjectFile(fileContent, fileLength, opts);
}
template <typename A>
bool File<A>::validLTOFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
{
return lto::isObjectFile(fileContent, fileLength, opts.architecture, opts.subType);
}
template <typename A>
bool File<A>::validFile(const uint8_t* fileContent, uint64_t fileLength, const mach_o::relocatable::ParserOptions& opts)
{
if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
return false;
const Entry* const start = (Entry*)&fileContent[8];
const Entry* const end = (Entry*)&fileContent[fileLength];
for (const Entry* p=start; p < end; p = p->next()) {
const char* memberName = p->name();
if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
continue;
return (validMachOFile(p->content(), p->contentSize(), opts) || validLTOFile(p->content(), p->contentSize(), opts));
}
return true;
}
template <typename A>
File<A>::File(const uint8_t fileContent[], uint64_t fileLength, const char* pth, time_t modTime,
uint32_t ord, const ParserOptions& opts)
: ld::archive::File(strdup(pth), modTime, ord),
_archiveFileContent(fileContent), _archiveFilelength(fileLength),
_tableOfContents(NULL), _tableOfContentCount(0), _tableOfContentStrings(NULL),
_forceLoadAll(opts.forceLoadAll), _forceLoadObjC(opts.forceLoadObjC),
_forceLoadThis(opts.forceLoadThisArchive), _objc2ABI(opts.objcABI2), _verboseLoad(opts.verboseLoad),
_logAllFiles(opts.logAllFiles), _objOpts(opts.objOpts)
{
if ( strncmp((const char*)fileContent, "!<arch>\n", 8) != 0 )
throw "not an archive";
if ( !_forceLoadAll ) {
const Entry* const firstMember = (Entry*)&_archiveFileContent[8];
if ( (strcmp(firstMember->name(), SYMDEF_SORTED) == 0) || (strcmp(firstMember->name(), SYMDEF) == 0) ) {
const uint8_t* contents = firstMember->content();
uint32_t ranlibArrayLen = E::get32(*((uint32_t*)contents));
_tableOfContents = (const struct ranlib*)&contents[4];
_tableOfContentCount = ranlibArrayLen / sizeof(struct ranlib);
_tableOfContentStrings = (const char*)&contents[ranlibArrayLen+8];
if ( ((uint8_t*)(&_tableOfContents[_tableOfContentCount]) > &fileContent[fileLength])
|| ((uint8_t*)_tableOfContentStrings > &fileContent[fileLength]) )
throw "malformed archive, perhaps wrong architecture";
this->buildHashTable();
}
else
throw "archive has no table of contents";
}
}
template <>
bool File<x86>::memberHasObjCCategories(const Entry* member) const
{
if ( _objc2ABI ) {
return mach_o::relocatable::hasObjC2Categories(member->content());
}
else {
return false;
}
}
template <typename A>
bool File<A>::memberHasObjCCategories(const Entry* member) const
{
return mach_o::relocatable::hasObjC2Categories(member->content());
}
template <typename A>
typename File<A>::MemberState& File<A>::makeObjectFileForMember(const Entry* member) const
{
typename MemberToStateMap::iterator pos = _instantiatedEntries.find(member);
if ( pos != _instantiatedEntries.end() )
return pos->second;
const char* memberName = member->name();
char memberPath[strlen(this->path()) + strlen(memberName)+4];
strcpy(memberPath, this->path());
strcat(memberPath, "(");
strcat(memberPath, memberName);
strcat(memberPath, ")");
try {
if ( member > (Entry*)(_archiveFileContent+_archiveFilelength) )
throwf("corrupt archive, member starts past end of file");
if ( (member->content() + member->contentSize()) > (_archiveFileContent+_archiveFilelength) )
throwf("corrupt archive, member contents extends past end of file");
const char* mPath = strdup(memberPath);
uint32_t memberIndex = ((uint8_t*)member - _archiveFileContent)/sizeof(ar_hdr);
ld::relocatable::File* result = mach_o::relocatable::parse(member->content(), member->contentSize(),
mPath, member->modificationTime(),
this->ordinal() + memberIndex, _objOpts);
if ( result != NULL ) {
MemberState state = {result, false, false};
_instantiatedEntries[member] = state;
return _instantiatedEntries[member];
}
result = lto::parse(member->content(), member->contentSize(),
mPath, member->modificationTime(), this->ordinal() + memberIndex,
_objOpts.architecture, _objOpts.subType, _logAllFiles);
if ( result != NULL ) {
MemberState state = {result, false, false};
_instantiatedEntries[member] = state;
return _instantiatedEntries[member];
}
throwf("archive member '%s' with length %d is not mach-o or llvm bitcode", memberName, member->contentSize());
}
catch (const char* msg) {
throwf("in %s, %s", memberPath, msg);
}
}
template <typename A>
bool File<A>::forEachAtom(ld::File::AtomHandler& handler) const
{
bool didSome = false;
if ( _forceLoadAll || _forceLoadThis ) {
const Entry* const start = (Entry*)&_archiveFileContent[8];
const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
for (const Entry* p=start; p < end; p = p->next()) {
const char* memberName = p->name();
if ( (p==start) && ((strcmp(memberName, SYMDEF_SORTED) == 0) || (strcmp(memberName, SYMDEF) == 0)) )
continue;
MemberState& state = this->makeObjectFileForMember(p);
if ( _verboseLoad ) {
if ( _forceLoadThis )
printf("-force_load forced load of %s(%s)\n", this->path(), memberName);
else
printf("-all_load forced load of %s(%s)\n", this->path(), memberName);
state.logged = true;
}
didSome |= state.file->forEachAtom(handler);
state.loaded = true;
}
}
else if ( _forceLoadObjC ) {
for(typename NameToEntryMap::const_iterator it = _hashTable.begin(); it != _hashTable.end(); ++it) {
if ( (strncmp(it->first, ".objc_c", 7) == 0) || (strncmp(it->first, "_OBJC_CLASS_$_", 14) == 0) ) {
const Entry* member = (Entry*)&_archiveFileContent[E::get32(it->second->ran_off)];
MemberState& state = this->makeObjectFileForMember(member);
if ( _verboseLoad && !state.logged ) {
printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
state.logged = true;
}
if ( ! state.loaded ) {
didSome |= state.file->forEachAtom(handler);
state.loaded = true;
}
}
}
const Entry* const start = (Entry*)&_archiveFileContent[8];
const Entry* const end = (Entry*)&_archiveFileContent[_archiveFilelength];
for (const Entry* member=start; member < end; member = member->next()) {
if ( _instantiatedEntries.count(member) == 0 ) {
if ( this->memberHasObjCCategories(member) ) {
MemberState& state = this->makeObjectFileForMember(member);
if ( _verboseLoad && !state.logged ) {
printf("-ObjC forced load of %s(%s)\n", this->path(), member->name());
state.logged = true;
}
if ( ! state.loaded ) {
didSome |= state.file->forEachAtom(handler);
state.loaded = true;
}
}
}
}
}
return didSome;
}
template <typename A>
bool File<A>::justInTimeforEachAtom(const char* name, ld::File::AtomHandler& handler) const
{
if ( _forceLoadAll || _forceLoadThis )
return false;
const struct ranlib* result = ranlibHashSearch(name);
if ( result != NULL ) {
const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
MemberState& state = this->makeObjectFileForMember(member);
if ( ! state.loaded && !state.logged ) {
if ( _verboseLoad ) {
printf("%s forced load of %s(%s)\n", name, this->path(), member->name());
state.logged = true;
}
state.loaded = true;
return state.file->forEachAtom(handler);
}
}
return false;
}
class CheckIsDataSymbolHandler : public ld::File::AtomHandler
{
public:
CheckIsDataSymbolHandler(const char* n) : _name(n), _isData(false) {}
virtual void doAtom(const class ld::Atom& atom) {
if ( strcmp(atom.name(), _name) == 0 ) {
if ( atom.section().type() != ld::Section::typeCode )
_isData = true;
}
}
virtual void doFile(const class ld::File&) {}
bool symbolIsDataDefinition() { return _isData; }
private:
const char* _name;
bool _isData;
};
template <typename A>
bool File<A>::justInTimeDataOnlyforEachAtom(const char* name, ld::File::AtomHandler& handler) const
{
if ( _forceLoadAll || _forceLoadThis )
return false;
const struct ranlib* result = ranlibHashSearch(name);
if ( result != NULL ) {
const Entry* member = (Entry*)&_archiveFileContent[E::get32(result->ran_off)];
MemberState& state = this->makeObjectFileForMember(member);
if ( ! state.loaded ) {
CheckIsDataSymbolHandler checker(name);
state.file->forEachAtom(checker);
if ( checker.symbolIsDataDefinition() ) {
if ( _verboseLoad && !state.logged ) {
printf("%s forced load of %s(%s)\n", name, this->path(), member->name());
state.logged = true;
}
state.loaded = true;
return state.file->forEachAtom(handler);
}
}
}
return false;
}
typedef const struct ranlib* ConstRanLibPtr;
template <typename A>
ConstRanLibPtr File<A>::ranlibHashSearch(const char* name) const
{
typename NameToEntryMap::const_iterator pos = _hashTable.find(name);
if ( pos != _hashTable.end() )
return pos->second;
else
return NULL;
}
template <typename A>
void File<A>::buildHashTable()
{
for (int i = _tableOfContentCount-1; i >= 0; --i) {
const struct ranlib* entry = &_tableOfContents[i];
const char* entryName = &_tableOfContentStrings[E::get32(entry->ran_un.ran_strx)];
if ( E::get32(entry->ran_off) > _archiveFilelength ) {
throwf("malformed archive TOC entry for %s, offset %d is beyond end of file %lld\n",
entryName, entry->ran_off, _archiveFilelength);
}
_hashTable[entryName] = entry;
}
}
template <typename A>
void File<A>::dumpTableOfContents()
{
for (unsigned int i=0; i < _tableOfContentCount; ++i) {
const struct ranlib* e = &_tableOfContents[i];
printf("%s in %s\n", &_tableOfContentStrings[E::get32(e->ran_un.ran_strx)], ((Entry*)&_archiveFileContent[E::get32(e->ran_off)])->name());
}
}
ld::archive::File* parse(const uint8_t* fileContent, uint64_t fileLength,
const char* path, time_t modTime, uint32_t ordinal, const ParserOptions& opts)
{
switch ( opts.objOpts.architecture ) {
case CPU_TYPE_X86_64:
if ( archive::Parser<x86_64>::validFile(fileContent, fileLength, opts.objOpts) )
return archive::Parser<x86_64>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
break;
case CPU_TYPE_I386:
if ( archive::Parser<x86>::validFile(fileContent, fileLength, opts.objOpts) )
return archive::Parser<x86>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
break;
case CPU_TYPE_ARM:
if ( archive::Parser<arm>::validFile(fileContent, fileLength, opts.objOpts) )
return archive::Parser<arm>::parse(fileContent, fileLength, path, modTime, ordinal, opts);
break;
}
return NULL;
}
};