GlobalModuleIndex.cpp [plain text]
#include "ASTReaderInternals.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/OnDiskHashTable.h"
#include "clang/Serialization/ASTBitCodes.h"
#include "clang/Serialization/GlobalModuleIndex.h"
#include "clang/Serialization/Module.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Bitcode/BitstreamReader.h"
#include "llvm/Bitcode/BitstreamWriter.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LockFileManager.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include <cstdio>
using namespace clang;
using namespace serialization;
namespace {
enum {
GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
};
enum IndexRecordTypes {
INDEX_METADATA,
MODULE,
IDENTIFIER_INDEX
};
}
static const char * const IndexFileName = "modules.idx";
static const unsigned CurrentVersion = 1;
namespace {
class IdentifierIndexReaderTrait {
public:
typedef StringRef external_key_type;
typedef StringRef internal_key_type;
typedef SmallVector<unsigned, 2> data_type;
static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
return a == b;
}
static unsigned ComputeHash(const internal_key_type& a) {
return llvm::HashString(a);
}
static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char*& d) {
using namespace clang::io;
unsigned KeyLen = ReadUnalignedLE16(d);
unsigned DataLen = ReadUnalignedLE16(d);
return std::make_pair(KeyLen, DataLen);
}
static const internal_key_type&
GetInternalKey(const external_key_type& x) { return x; }
static const external_key_type&
GetExternalKey(const internal_key_type& x) { return x; }
static internal_key_type ReadKey(const unsigned char* d, unsigned n) {
return StringRef((const char *)d, n);
}
static data_type ReadData(const internal_key_type& k,
const unsigned char* d,
unsigned DataLen) {
using namespace clang::io;
data_type Result;
while (DataLen > 0) {
unsigned ID = ReadUnalignedLE32(d);
Result.push_back(ID);
DataLen -= 4;
}
return Result;
}
};
typedef OnDiskChainedHashTable<IdentifierIndexReaderTrait> IdentifierIndexTable;
}
GlobalModuleIndex::GlobalModuleIndex(llvm::MemoryBuffer *Buffer,
llvm::BitstreamCursor Cursor)
: Buffer(Buffer), IdentifierIndex(),
NumIdentifierLookups(), NumIdentifierLookupHits()
{
bool InGlobalIndexBlock = false;
bool Done = false;
while (!Done) {
llvm::BitstreamEntry Entry = Cursor.advance();
switch (Entry.Kind) {
case llvm::BitstreamEntry::Error:
return;
case llvm::BitstreamEntry::EndBlock:
if (InGlobalIndexBlock) {
InGlobalIndexBlock = false;
Done = true;
continue;
}
return;
case llvm::BitstreamEntry::Record:
if (InGlobalIndexBlock)
break;
return;
case llvm::BitstreamEntry::SubBlock:
if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
if (Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
return;
InGlobalIndexBlock = true;
} else if (Cursor.SkipBlock()) {
return;
}
continue;
}
SmallVector<uint64_t, 64> Record;
StringRef Blob;
switch ((IndexRecordTypes)Cursor.readRecord(Entry.ID, Record, &Blob)) {
case INDEX_METADATA:
if (Record.size() < 1 || Record[0] != CurrentVersion)
return;
break;
case MODULE: {
unsigned Idx = 0;
unsigned ID = Record[Idx++];
if (ID == Modules.size())
Modules.push_back(ModuleInfo());
else
Modules.resize(ID + 1);
Modules[ID].Size = Record[Idx++];
Modules[ID].ModTime = Record[Idx++];
unsigned NameLen = Record[Idx++];
Modules[ID].FileName.assign(Record.begin() + Idx,
Record.begin() + Idx + NameLen);
Idx += NameLen;
unsigned NumDeps = Record[Idx++];
Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
Record.begin() + Idx,
Record.begin() + Idx + NumDeps);
Idx += NumDeps;
assert(Idx == Record.size() && "More module info?");
UnresolvedModules[llvm::sys::path::stem(Modules[ID].FileName)] = ID;
break;
}
case IDENTIFIER_INDEX:
if (Record[0]) {
IdentifierIndex = IdentifierIndexTable::Create(
(const unsigned char *)Blob.data() + Record[0],
(const unsigned char *)Blob.data(),
IdentifierIndexReaderTrait());
}
break;
}
}
}
GlobalModuleIndex::~GlobalModuleIndex() { }
std::pair<GlobalModuleIndex *, GlobalModuleIndex::ErrorCode>
GlobalModuleIndex::readIndex(StringRef Path) {
llvm::SmallString<128> IndexPath;
IndexPath += Path;
llvm::sys::path::append(IndexPath, IndexFileName);
llvm::OwningPtr<llvm::MemoryBuffer> Buffer;
if (llvm::MemoryBuffer::getFile(IndexPath.c_str(), Buffer) !=
llvm::errc::success)
return std::make_pair((GlobalModuleIndex *)0, EC_NotFound);
llvm::BitstreamReader Reader((const unsigned char *)Buffer->getBufferStart(),
(const unsigned char *)Buffer->getBufferEnd());
llvm::BitstreamCursor Cursor(Reader);
if (Cursor.Read(8) != 'B' ||
Cursor.Read(8) != 'C' ||
Cursor.Read(8) != 'G' ||
Cursor.Read(8) != 'I') {
return std::make_pair((GlobalModuleIndex *)0, EC_IOError);
}
return std::make_pair(new GlobalModuleIndex(Buffer.take(), Cursor), EC_None);
}
void
GlobalModuleIndex::getKnownModules(SmallVectorImpl<ModuleFile *> &ModuleFiles) {
ModuleFiles.clear();
for (unsigned I = 0, N = Modules.size(); I != N; ++I) {
if (ModuleFile *MF = Modules[I].File)
ModuleFiles.push_back(MF);
}
}
void GlobalModuleIndex::getModuleDependencies(
ModuleFile *File,
SmallVectorImpl<ModuleFile *> &Dependencies) {
llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
= ModulesByFile.find(File);
if (Known == ModulesByFile.end())
return;
Dependencies.clear();
ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies;
for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
if (ModuleFile *MF = Modules[I].File)
Dependencies.push_back(MF);
}
}
bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
Hits.clear();
if (!IdentifierIndex)
return false;
++NumIdentifierLookups;
IdentifierIndexTable &Table
= *static_cast<IdentifierIndexTable *>(IdentifierIndex);
IdentifierIndexTable::iterator Known = Table.find(Name);
if (Known == Table.end()) {
return true;
}
SmallVector<unsigned, 2> ModuleIDs = *Known;
for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
if (ModuleFile *MF = Modules[ModuleIDs[I]].File)
Hits.insert(MF);
}
++NumIdentifierLookupHits;
return true;
}
bool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) {
StringRef Name = llvm::sys::path::stem(File->FileName);
llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
if (Known == UnresolvedModules.end()) {
return true;
}
ModuleInfo &Info = Modules[Known->second];
bool Failed = true;
if (File->File->getSize() == Info.Size &&
File->File->getModificationTime() == Info.ModTime) {
Info.File = File;
ModulesByFile[File] = Known->second;
Failed = false;
}
UnresolvedModules.erase(Known);
return Failed;
}
void GlobalModuleIndex::printStats() {
std::fprintf(stderr, "*** Global Module Index Statistics:\n");
if (NumIdentifierLookups) {
fprintf(stderr, " %u / %u identifier lookups succeeded (%f%%)\n",
NumIdentifierLookupHits, NumIdentifierLookups,
(double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
}
std::fprintf(stderr, "\n");
}
namespace {
struct ModuleFileInfo {
unsigned ID;
SmallVector<unsigned, 4> Dependencies;
};
class GlobalModuleIndexBuilder {
FileManager &FileMgr;
typedef llvm::MapVector<const FileEntry *, ModuleFileInfo> ModuleFilesMap;
ModuleFilesMap ModuleFiles;
typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
InterestingIdentifierMap InterestingIdentifiers;
void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
ModuleFileInfo &getModuleFileInfo(const FileEntry *File) {
llvm::MapVector<const FileEntry *, ModuleFileInfo>::iterator Known
= ModuleFiles.find(File);
if (Known != ModuleFiles.end())
return Known->second;
unsigned NewID = ModuleFiles.size();
ModuleFileInfo &Info = ModuleFiles[File];
Info.ID = NewID;
return Info;
}
public:
explicit GlobalModuleIndexBuilder(FileManager &FileMgr) : FileMgr(FileMgr){}
bool loadModuleFile(const FileEntry *File);
void writeIndex(llvm::BitstreamWriter &Stream);
};
}
static void emitBlockID(unsigned ID, const char *Name,
llvm::BitstreamWriter &Stream,
SmallVectorImpl<uint64_t> &Record) {
Record.clear();
Record.push_back(ID);
Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
if (Name == 0 || Name[0] == 0) return;
Record.clear();
while (*Name)
Record.push_back(*Name++);
Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
}
static void emitRecordID(unsigned ID, const char *Name,
llvm::BitstreamWriter &Stream,
SmallVectorImpl<uint64_t> &Record) {
Record.clear();
Record.push_back(ID);
while (*Name)
Record.push_back(*Name++);
Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
}
void
GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
SmallVector<uint64_t, 64> Record;
Stream.EnterSubblock(llvm::bitc::BLOCKINFO_BLOCK_ID, 3);
#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
#define RECORD(X) emitRecordID(X, #X, Stream, Record)
BLOCK(GLOBAL_INDEX_BLOCK);
RECORD(INDEX_METADATA);
RECORD(MODULE);
RECORD(IDENTIFIER_INDEX);
#undef RECORD
#undef BLOCK
Stream.ExitBlock();
}
namespace {
class InterestingASTIdentifierLookupTrait
: public serialization::reader::ASTIdentifierLookupTraitBase {
public:
typedef std::pair<StringRef, bool> data_type;
data_type ReadData(const internal_key_type& k,
const unsigned char* d,
unsigned DataLen) {
using namespace clang::io;
unsigned RawID = ReadUnalignedLE32(d);
bool IsInteresting = RawID & 0x01;
return std::make_pair(k, IsInteresting);
}
};
}
bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
OwningPtr<llvm::MemoryBuffer> Buffer;
std::string ErrorStr;
Buffer.reset(FileMgr.getBufferForFile(File, &ErrorStr, true));
if (!Buffer) {
return true;
}
llvm::BitstreamReader InStreamFile;
llvm::BitstreamCursor InStream;
InStreamFile.init((const unsigned char *)Buffer->getBufferStart(),
(const unsigned char *)Buffer->getBufferEnd());
InStream.init(InStreamFile);
if (InStream.Read(8) != 'C' ||
InStream.Read(8) != 'P' ||
InStream.Read(8) != 'C' ||
InStream.Read(8) != 'H') {
return true;
}
unsigned ID = getModuleFileInfo(File).ID;
enum { Other, ControlBlock, ASTBlock } State = Other;
bool Done = false;
while (!Done) {
llvm::BitstreamEntry Entry = InStream.advance();
switch (Entry.Kind) {
case llvm::BitstreamEntry::Error:
Done = true;
continue;
case llvm::BitstreamEntry::Record:
if (State == Other) {
InStream.skipRecord(Entry.ID);
continue;
}
break;
case llvm::BitstreamEntry::SubBlock:
if (Entry.ID == CONTROL_BLOCK_ID) {
if (InStream.EnterSubBlock(CONTROL_BLOCK_ID))
return true;
State = ControlBlock;
continue;
}
if (Entry.ID == AST_BLOCK_ID) {
if (InStream.EnterSubBlock(AST_BLOCK_ID))
return true;
State = ASTBlock;
continue;
}
if (InStream.SkipBlock())
return true;
continue;
case llvm::BitstreamEntry::EndBlock:
State = Other;
continue;
}
SmallVector<uint64_t, 64> Record;
StringRef Blob;
unsigned Code = InStream.readRecord(Entry.ID, Record, &Blob);
if (State == ControlBlock && Code == IMPORTS) {
unsigned Idx = 0, N = Record.size();
while (Idx < N) {
++Idx;
++Idx;
off_t StoredSize = (off_t)Record[Idx++];
time_t StoredModTime = (time_t)Record[Idx++];
unsigned Length = Record[Idx++];
SmallString<128> ImportedFile(Record.begin() + Idx,
Record.begin() + Idx + Length);
Idx += Length;
const FileEntry *DependsOnFile
= FileMgr.getFile(ImportedFile, false,
false);
if (!DependsOnFile ||
(StoredSize != DependsOnFile->getSize()) ||
(StoredModTime != DependsOnFile->getModificationTime()))
return true;
unsigned DependsOnID = getModuleFileInfo(DependsOnFile).ID;
getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
}
continue;
}
if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
typedef OnDiskChainedHashTable<InterestingASTIdentifierLookupTrait>
InterestingIdentifierTable;
llvm::OwningPtr<InterestingIdentifierTable>
Table(InterestingIdentifierTable::Create(
(const unsigned char *)Blob.data() + Record[0],
(const unsigned char *)Blob.data()));
for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
DEnd = Table->data_end();
D != DEnd; ++D) {
std::pair<StringRef, bool> Ident = *D;
if (Ident.second)
InterestingIdentifiers[Ident.first].push_back(ID);
else
(void)InterestingIdentifiers[Ident.first];
}
}
}
return false;
}
namespace {
class IdentifierIndexWriterTrait {
public:
typedef StringRef key_type;
typedef StringRef key_type_ref;
typedef SmallVector<unsigned, 2> data_type;
typedef const SmallVector<unsigned, 2> &data_type_ref;
static unsigned ComputeHash(key_type_ref Key) {
return llvm::HashString(Key);
}
std::pair<unsigned,unsigned>
EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
unsigned KeyLen = Key.size();
unsigned DataLen = Data.size() * 4;
clang::io::Emit16(Out, KeyLen);
clang::io::Emit16(Out, DataLen);
return std::make_pair(KeyLen, DataLen);
}
void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
Out.write(Key.data(), KeyLen);
}
void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
unsigned DataLen) {
for (unsigned I = 0, N = Data.size(); I != N; ++I)
clang::io::Emit32(Out, Data[I]);
}
};
}
void GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
using namespace llvm;
Stream.Emit((unsigned)'B', 8);
Stream.Emit((unsigned)'C', 8);
Stream.Emit((unsigned)'G', 8);
Stream.Emit((unsigned)'I', 8);
emitBlockInfoBlock(Stream);
Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
SmallVector<uint64_t, 2> Record;
Record.push_back(CurrentVersion);
Stream.EmitRecord(INDEX_METADATA, Record);
for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
MEnd = ModuleFiles.end();
M != MEnd; ++M) {
Record.clear();
Record.push_back(M->second.ID);
Record.push_back(M->first->getSize());
Record.push_back(M->first->getModificationTime());
StringRef Name(M->first->getName());
Record.push_back(Name.size());
Record.append(Name.begin(), Name.end());
Record.push_back(M->second.Dependencies.size());
Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
Stream.EmitRecord(MODULE, Record);
}
{
OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
IdentifierIndexWriterTrait Trait;
for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
IEnd = InterestingIdentifiers.end();
I != IEnd; ++I) {
Generator.insert(I->first(), I->second, Trait);
}
SmallString<4096> IdentifierTable;
uint32_t BucketOffset;
{
llvm::raw_svector_ostream Out(IdentifierTable);
clang::io::Emit32(Out, 0);
BucketOffset = Generator.Emit(Out, Trait);
}
BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
Record.clear();
Record.push_back(IDENTIFIER_INDEX);
Record.push_back(BucketOffset);
Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable.str());
}
Stream.ExitBlock();
}
GlobalModuleIndex::ErrorCode
GlobalModuleIndex::writeIndex(FileManager &FileMgr, StringRef Path) {
llvm::SmallString<128> IndexPath;
IndexPath += Path;
llvm::sys::path::append(IndexPath, IndexFileName);
llvm::LockFileManager Locked(IndexPath);
switch (Locked) {
case llvm::LockFileManager::LFS_Error:
return EC_IOError;
case llvm::LockFileManager::LFS_Owned:
break;
case llvm::LockFileManager::LFS_Shared:
return EC_Building;
}
GlobalModuleIndexBuilder Builder(FileMgr);
llvm::error_code EC;
for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
D != DEnd && !EC;
D.increment(EC)) {
if (llvm::sys::path::extension(D->path()) != ".pcm") {
if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
return EC_Building;
continue;
}
const FileEntry *ModuleFile = FileMgr.getFile(D->path());
if (!ModuleFile)
continue;
if (Builder.loadModuleFile(ModuleFile))
return EC_IOError;
}
SmallVector<char, 16> OutputBuffer;
{
llvm::BitstreamWriter OutputStream(OutputBuffer);
Builder.writeIndex(OutputStream);
}
llvm::SmallString<128> IndexTmpPath;
int TmpFD;
if (llvm::sys::fs::createUniqueFile(IndexPath + "-%%%%%%%%", TmpFD,
IndexTmpPath))
return EC_IOError;
llvm::raw_fd_ostream Out(TmpFD, true);
if (Out.has_error())
return EC_IOError;
Out.write(OutputBuffer.data(), OutputBuffer.size());
Out.close();
if (Out.has_error())
return EC_IOError;
bool OldIndexExisted;
llvm::sys::fs::remove(IndexPath.str(), OldIndexExisted);
if (llvm::sys::fs::rename(IndexTmpPath.str(), IndexPath.str())) {
llvm::sys::fs::remove(IndexTmpPath.str(), OldIndexExisted);
return EC_IOError;
}
return EC_None;
}
namespace {
class GlobalIndexIdentifierIterator : public IdentifierIterator {
IdentifierIndexTable::key_iterator Current;
IdentifierIndexTable::key_iterator End;
public:
explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
Current = Idx.key_begin();
End = Idx.key_end();
}
virtual StringRef Next() {
if (Current == End)
return StringRef();
StringRef Result = *Current;
++Current;
return Result;
}
};
}
IdentifierIterator *GlobalModuleIndex::createIdentifierIterator() const {
IdentifierIndexTable &Table =
*static_cast<IdentifierIndexTable *>(IdentifierIndex);
return new GlobalIndexIdentifierIterator(Table);
}