DebugIR.cpp   [plain text]


//===--- DebugIR.cpp - Transform debug metadata to allow debugging IR -----===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// A Module transform pass that emits a succinct version of the IR and replaces
// the source file metadata to allow debuggers to step through the IR.
//
// FIXME: instead of replacing debug metadata, this pass should allow for
// additional metadata to be used to point capable debuggers to the IR file
// without destroying the mapping to the original source file.
//
//===----------------------------------------------------------------------===//

#define DEBUG_TYPE "debug-ir"

#include "llvm/ADT/ValueMap.h"
#include "DebugIR.h"
#include "llvm/DIBuilder.h"
#include "llvm/DebugInfo.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InstVisitor.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <string>

#define STR_HELPER(x) #x
#define STR(x) STR_HELPER(x)

using namespace llvm;

namespace {

/// Builds a map of Value* to line numbers on which the Value appears in a
/// textual representation of the IR by plugging into the AssemblyWriter by
/// masquerading as an AssemblyAnnotationWriter.
class ValueToLineMap : public AssemblyAnnotationWriter {
  ValueMap<const Value *, unsigned int> Lines;
  typedef ValueMap<const Value *, unsigned int>::const_iterator LineIter;

  void addEntry(const Value *V, formatted_raw_ostream &Out) {
    Out.flush();
    Lines.insert(std::make_pair(V, Out.getLine() + 1));
  }

public:

  /// Prints Module to a null buffer in order to build the map of Value pointers
  /// to line numbers.
  ValueToLineMap(const Module *M) {
    raw_null_ostream ThrowAway;
    M->print(ThrowAway, this);
  }

  // This function is called after an Instruction, GlobalValue, or GlobalAlias
  // is printed.
  void printInfoComment(const Value &V, formatted_raw_ostream &Out) {
    addEntry(&V, Out);
  }

  void emitFunctionAnnot(const Function *F, formatted_raw_ostream &Out) {
    addEntry(F, Out);
  }

  /// If V appears on a line in the textual IR representation, sets Line to the
  /// line number and returns true, otherwise returns false.
  bool getLine(const Value *V, unsigned int &Line) const {
    LineIter i = Lines.find(V);
    if (i != Lines.end()) {
      Line = i->second;
      return true;
    }
    return false;
  }
};

/// Removes debug intrisncs like llvm.dbg.declare and llvm.dbg.value.
class DebugIntrinsicsRemover : public InstVisitor<DebugIntrinsicsRemover> {
  void remove(Instruction &I) { I.eraseFromParent(); }

public:
  static void process(Module &M) {
    DebugIntrinsicsRemover Remover;
    Remover.visit(&M);
  }
  void visitDbgDeclareInst(DbgDeclareInst &I) { remove(I); }
  void visitDbgValueInst(DbgValueInst &I) { remove(I); }
  void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { remove(I); }
};

/// Removes debug metadata (!dbg) nodes from all instructions, and optionally
/// metadata named "llvm.dbg.cu" if RemoveNamedInfo is true.
class DebugMetadataRemover : public InstVisitor<DebugMetadataRemover> {
  bool RemoveNamedInfo;

public:
  static void process(Module &M, bool RemoveNamedInfo = true) {
    DebugMetadataRemover Remover(RemoveNamedInfo);
    Remover.run(&M);
  }

  DebugMetadataRemover(bool RemoveNamedInfo)
      : RemoveNamedInfo(RemoveNamedInfo) {}

  void visitInstruction(Instruction &I) {
    if (I.getMetadata(LLVMContext::MD_dbg))
      I.setMetadata(LLVMContext::MD_dbg, 0);
  }

  void run(Module *M) {
    // Remove debug metadata attached to instructions
    visit(M);

    if (RemoveNamedInfo) {
      // Remove CU named metadata (and all children nodes)
      NamedMDNode *Node = M->getNamedMetadata("llvm.dbg.cu");
      if (Node)
        M->eraseNamedMetadata(Node);
    }
  }
};

/// Updates debug metadata in a Module:
///   - changes Filename/Directory to values provided on construction
///   - adds/updates line number (DebugLoc) entries associated with each
///     instruction to reflect the instruction's location in an LLVM IR file
class DIUpdater : public InstVisitor<DIUpdater> {
  /// Builder of debug information
  DIBuilder Builder;

  /// Helper for type attributes/sizes/etc
  DataLayout Layout;

  /// Map of Value* to line numbers
  const ValueToLineMap LineTable;

  /// Map of Value* (in original Module) to Value* (in optional cloned Module)
  const ValueToValueMapTy *VMap;

  /// Directory of debug metadata
  DebugInfoFinder Finder;

  /// Source filename and directory
  StringRef Filename;
  StringRef Directory;

  // CU nodes needed when creating DI subprograms
  MDNode *FileNode;
  MDNode *LexicalBlockFileNode;
  const MDNode *CUNode;

  ValueMap<const Function *, MDNode *> SubprogramDescriptors;
  DenseMap<const Type *, MDNode *> TypeDescriptors;

public:
  DIUpdater(Module &M, StringRef Filename = StringRef(),
            StringRef Directory = StringRef(), const Module *DisplayM = 0,
            const ValueToValueMapTy *VMap = 0)
      : Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap),
        Finder(), Filename(Filename), Directory(Directory), FileNode(0),
        LexicalBlockFileNode(0), CUNode(0) {
    Finder.processModule(M);
    visit(&M);
  }

  ~DIUpdater() { Builder.finalize(); }

  void visitModule(Module &M) {
    if (Finder.compile_unit_count() > 1)
      report_fatal_error("DebugIR pass supports only a signle compile unit per "
                         "Module.");
    createCompileUnit(
        Finder.compile_unit_count() == 1 ? *Finder.compile_unit_begin() : 0);
  }

  void visitFunction(Function &F) {
    if (F.isDeclaration() || findDISubprogram(&F))
      return;

    StringRef MangledName = F.getName();
    DICompositeType Sig = createFunctionSignature(&F);

    // find line of function declaration
    unsigned Line = 0;
    if (!findLine(&F, Line)) {
      DEBUG(dbgs() << "WARNING: No line for Function " << F.getName().str()
                   << "\n");
      return;
    }

    Instruction *FirstInst = F.begin()->begin();
    unsigned ScopeLine = 0;
    if (!findLine(FirstInst, ScopeLine)) {
      DEBUG(dbgs() << "WARNING: No line for 1st Instruction in Function "
                   << F.getName().str() << "\n");
      return;
    }

    bool Local = F.hasInternalLinkage();
    bool IsDefinition = !F.isDeclaration();
    bool IsOptimized = false;

    int FuncFlags = llvm::DIDescriptor::FlagPrototyped;
    assert(CUNode && FileNode);
    DISubprogram Sub = Builder.createFunction(
        DICompileUnit(CUNode), F.getName(), MangledName, DIFile(FileNode), Line,
        Sig, Local, IsDefinition, ScopeLine, FuncFlags, IsOptimized, &F);
    assert(Sub.isSubprogram());
    DEBUG(dbgs() << "create subprogram mdnode " << *Sub << ": "
                 << "\n");

    SubprogramDescriptors.insert(std::make_pair(&F, Sub));
  }

  void visitInstruction(Instruction &I) {
    DebugLoc Loc(I.getDebugLoc());

    /// If a ValueToValueMap is provided, use it to get the real instruction as
    /// the line table was generated on a clone of the module on which we are
    /// operating.
    Value *RealInst = 0;
    if (VMap)
      RealInst = VMap->lookup(&I);

    if (!RealInst)
      RealInst = &I;

    unsigned Col = 0; // FIXME: support columns
    unsigned Line;
    if (!LineTable.getLine(RealInst, Line)) {
      // Instruction has no line, it may have been removed (in the module that
      // will be passed to the debugger) so there is nothing to do here.
      DEBUG(dbgs() << "WARNING: no LineTable entry for instruction " << RealInst
                   << "\n");
      DEBUG(RealInst->dump());
      return;
    }

    DebugLoc NewLoc;
    if (!Loc.isUnknown())
      // I had a previous debug location: re-use the DebugLoc
      NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()),
                             Loc.getInlinedAt(RealInst->getContext()));
    else if (MDNode *scope = findScope(&I))
      NewLoc = DebugLoc::get(Line, Col, scope, 0);
    else {
      DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I
                   << ". no DebugLoc will be present."
                   << "\n");
      return;
    }

    addDebugLocation(I, NewLoc);
  }

private:

  void createCompileUnit(MDNode *CUToReplace) {
    std::string Flags;
    bool IsOptimized = false;
    StringRef Producer;
    unsigned RuntimeVersion(0);
    StringRef SplitName;

    if (CUToReplace) {
      // save fields from existing CU to re-use in the new CU
      DICompileUnit ExistingCU(CUToReplace);
      Producer = ExistingCU.getProducer();
      IsOptimized = ExistingCU.isOptimized();
      Flags = ExistingCU.getFlags();
      RuntimeVersion = ExistingCU.getRunTimeVersion();
      SplitName = ExistingCU.getSplitDebugFilename();
    } else {
      Producer =
          "LLVM Version " STR(LLVM_VERSION_MAJOR) "." STR(LLVM_VERSION_MINOR);
    }

    CUNode =
        Builder.createCompileUnit(dwarf::DW_LANG_C99, Filename, Directory,
                                  Producer, IsOptimized, Flags, RuntimeVersion);

    if (CUToReplace)
      CUToReplace->replaceAllUsesWith(const_cast<MDNode *>(CUNode));

    DICompileUnit CU(CUNode);
    FileNode = Builder.createFile(Filename, Directory);
    LexicalBlockFileNode = Builder.createLexicalBlockFile(CU, DIFile(FileNode));
  }

  /// Returns the MDNode* that represents the DI scope to associate with I
  MDNode *findScope(const Instruction *I) {
    const Function *F = I->getParent()->getParent();
    if (MDNode *ret = findDISubprogram(F))
      return ret;

    DEBUG(dbgs() << "WARNING: Using fallback lexical block file scope "
                 << LexicalBlockFileNode << " as scope for instruction " << I
                 << "\n");
    return LexicalBlockFileNode;
  }

  /// Returns the MDNode* that is the descriptor for F
  MDNode *findDISubprogram(const Function *F) {
    typedef ValueMap<const Function *, MDNode *>::const_iterator FuncNodeIter;
    FuncNodeIter i = SubprogramDescriptors.find(F);
    if (i != SubprogramDescriptors.end())
      return i->second;

    DEBUG(dbgs() << "searching for DI scope node for Function " << F
                 << " in a list of " << Finder.subprogram_count()
                 << " subprogram nodes"
                 << "\n");

    for (DebugInfoFinder::iterator i = Finder.subprogram_begin(),
                                   e = Finder.subprogram_end();
         i != e; ++i) {
      DISubprogram S(*i);
      if (S.getFunction() == F) {
        DEBUG(dbgs() << "Found DISubprogram " << *i << " for function "
                     << S.getFunction() << "\n");
        return *i;
      }
    }
    DEBUG(dbgs() << "unable to find DISubprogram node for function "
                 << F->getName().str() << "\n");
    return 0;
  }

  /// Sets Line to the line number on which V appears and returns true. If a
  /// line location for V is not found, returns false.
  bool findLine(const Value *V, unsigned &Line) {
    if (LineTable.getLine(V, Line))
      return true;

    if (VMap) {
      Value *mapped = VMap->lookup(V);
      if (mapped && LineTable.getLine(mapped, Line))
        return true;
    }
    return false;
  }

  std::string getTypeName(Type *T) {
    std::string TypeName;
    raw_string_ostream TypeStream(TypeName);
    T->print(TypeStream);
    TypeStream.flush();
    return TypeName;
  }

  /// Returns the MDNode that represents type T if it is already created, or 0
  /// if it is not.
  MDNode *getType(const Type *T) {
    typedef DenseMap<const Type *, MDNode *>::const_iterator TypeNodeIter;
    TypeNodeIter i = TypeDescriptors.find(T);
    if (i != TypeDescriptors.end())
      return i->second;
    return 0;
  }

  /// Returns a DebugInfo type from an LLVM type T.
  DIDerivedType getOrCreateType(Type *T) {
    MDNode *N = getType(T);
    if (N)
      return DIDerivedType(N);
    else if (T->isVoidTy())
      return DIDerivedType(0);
    else if (T->isStructTy()) {
      N = Builder.createStructType(
          DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode),
          0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0,
          DIType(0), DIArray(0)); // filled in later

      // N is added to the map (early) so that element search below can find it,
      // so as to avoid infinite recursion for structs that contain pointers to
      // their own type.
      TypeDescriptors[T] = N;
      DICompositeType StructDescriptor(N);

      SmallVector<Value *, 4> Elements;
      for (unsigned i = 0; i < T->getStructNumElements(); ++i)
        Elements.push_back(getOrCreateType(T->getStructElementType(i)));

      // set struct elements
      StructDescriptor.setTypeArray(Builder.getOrCreateArray(Elements));
    } else if (T->isPointerTy()) {
      Type *PointeeTy = T->getPointerElementType();
      if (!(N = getType(PointeeTy)))
        N = Builder.createPointerType(
            getOrCreateType(PointeeTy), Layout.getPointerTypeSizeInBits(T),
            Layout.getPrefTypeAlignment(T), getTypeName(T));
    } else if (T->isArrayTy()) {
      SmallVector<Value *, 1> Subrange;
      Subrange.push_back(
          Builder.getOrCreateSubrange(0, T->getArrayNumElements() - 1));

      N = Builder.createArrayType(Layout.getTypeSizeInBits(T),
                                  Layout.getPrefTypeAlignment(T),
                                  getOrCreateType(T->getArrayElementType()),
                                  Builder.getOrCreateArray(Subrange));
    } else {
      int encoding = llvm::dwarf::DW_ATE_signed;
      if (T->isIntegerTy())
        encoding = llvm::dwarf::DW_ATE_unsigned;
      else if (T->isFloatingPointTy())
        encoding = llvm::dwarf::DW_ATE_float;

      N = Builder.createBasicType(getTypeName(T), T->getPrimitiveSizeInBits(),
                                  0, encoding);
    }
    TypeDescriptors[T] = N;
    return DIDerivedType(N);
  }

  /// Returns a DebugInfo type that represents a function signature for Func.
  DICompositeType createFunctionSignature(const Function *Func) {
    SmallVector<Value *, 4> Params;
    DIDerivedType ReturnType(getOrCreateType(Func->getReturnType()));
    Params.push_back(ReturnType);

    const Function::ArgumentListType &Args(Func->getArgumentList());
    for (Function::ArgumentListType::const_iterator i = Args.begin(),
                                                    e = Args.end();
         i != e; ++i) {
      Type *T(i->getType());
      Params.push_back(getOrCreateType(T));
    }

    DIArray ParamArray = Builder.getOrCreateArray(Params);
    return Builder.createSubroutineType(DIFile(FileNode), ParamArray);
  }

  /// Associates Instruction I with debug location Loc.
  void addDebugLocation(Instruction &I, DebugLoc Loc) {
    MDNode *MD = Loc.getAsMDNode(I.getContext());
    I.setMetadata(LLVMContext::MD_dbg, MD);
  }
};

/// Sets Filename/Directory from the Module identifier and returns true, or
/// false if source information is not present.
bool getSourceInfoFromModule(const Module &M, std::string &Directory,
                             std::string &Filename) {
  std::string PathStr(M.getModuleIdentifier());
  if (PathStr.length() == 0 || PathStr == "<stdin>")
    return false;

  Filename = sys::path::filename(PathStr);
  SmallVector<char, 16> Path(PathStr.begin(), PathStr.end());
  sys::path::remove_filename(Path);
  Directory = StringRef(Path.data(), Path.size());
  return true;
}

// Sets Filename/Directory from debug information in M and returns true, or
// false if no debug information available, or cannot be parsed.
bool getSourceInfoFromDI(const Module &M, std::string &Directory,
                         std::string &Filename) {
  NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
  if (!CUNode || CUNode->getNumOperands() == 0)
    return false;

  DICompileUnit CU(CUNode->getOperand(0));
  if (!CU.Verify())
    return false;

  Filename = CU.getFilename();
  Directory = CU.getDirectory();
  return true;
}

} // anonymous namespace

namespace llvm {

bool DebugIR::getSourceInfo(const Module &M) {
  ParsedPath = getSourceInfoFromDI(M, Directory, Filename) ||
               getSourceInfoFromModule(M, Directory, Filename);
  return ParsedPath;
}

bool DebugIR::updateExtension(StringRef NewExtension) {
  size_t dot = Filename.find_last_of(".");
  if (dot == std::string::npos)
    return false;

  Filename.erase(dot);
  Filename += NewExtension.str();
  return true;
}

void DebugIR::generateFilename(OwningPtr<int> &fd) {
  SmallVector<char, 16> PathVec;
  fd.reset(new int);
  sys::fs::createTemporaryFile("debug-ir", "ll", *fd, PathVec);
  StringRef Path(PathVec.data(), PathVec.size());
  Filename = sys::path::filename(Path);
  sys::path::remove_filename(PathVec);
  Directory = StringRef(PathVec.data(), PathVec.size());

  GeneratedPath = true;
}

std::string DebugIR::getPath() {
  SmallVector<char, 16> Path;
  sys::path::append(Path, Directory, Filename);
  Path.resize(Filename.size() + Directory.size() + 2);
  Path[Filename.size() + Directory.size() + 1] = '\0';
  return std::string(Path.data());
}

void DebugIR::writeDebugBitcode(const Module *M, int *fd) {
  OwningPtr<raw_fd_ostream> Out;
  std::string error;

  if (!fd) {
    std::string Path = getPath();
    Out.reset(new raw_fd_ostream(Path.c_str(), error));
    DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to file "
                 << Path << "\n");
  } else {
    DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to fd "
                 << *fd << "\n");
    Out.reset(new raw_fd_ostream(*fd, true));
  }

  M->print(*Out, 0);
  Out->close();
}

void DebugIR::createDebugInfo(Module &M, OwningPtr<Module> &DisplayM) {
  if (M.getFunctionList().size() == 0)
    // no functions -- no debug info needed
    return;

  OwningPtr<ValueToValueMapTy> VMap;

  if (WriteSourceToDisk && (HideDebugIntrinsics || HideDebugMetadata)) {
    VMap.reset(new ValueToValueMapTy);
    DisplayM.reset(CloneModule(&M, *VMap));

    if (HideDebugIntrinsics)
      DebugIntrinsicsRemover::process(*DisplayM);

    if (HideDebugMetadata)
      DebugMetadataRemover::process(*DisplayM);
  }

  DIUpdater R(M, Filename, Directory, DisplayM.get(), VMap.get());
}

bool DebugIR::isMissingPath() { return Filename.empty() || Directory.empty(); }

bool DebugIR::runOnModule(Module &M) {
  OwningPtr<int> fd;

  if (isMissingPath() && !getSourceInfo(M)) {
    if (!WriteSourceToDisk)
      report_fatal_error("DebugIR unable to determine file name in input. "
                         "Ensure Module contains an identifier, a valid "
                         "DICompileUnit, or construct DebugIR with "
                         "non-empty Filename/Directory parameters.");
    else
      generateFilename(fd);
  }

  if (!GeneratedPath && WriteSourceToDisk)
    updateExtension(".debug-ll");

  // Clear line numbers. Keep debug info (if any) if we were able to read the
  // file name from the DICompileUnit descriptor.
  DebugMetadataRemover::process(M, !ParsedPath);

  OwningPtr<Module> DisplayM;
  createDebugInfo(M, DisplayM);
  if (WriteSourceToDisk) {
    Module *OutputM = DisplayM.get() ? DisplayM.get() : &M;
    writeDebugBitcode(OutputM, fd.get());
  }

  DEBUG(M.dump());
  return true;
}

bool DebugIR::runOnModule(Module &M, std::string &Path) {
  bool result = runOnModule(M);
  Path = getPath();
  return result;
}

} // llvm namespace

char DebugIR::ID = 0;
INITIALIZE_PASS(DebugIR, "debug-ir", "Enable debugging IR", false, false)

ModulePass *llvm::createDebugIRPass(bool HideDebugIntrinsics,
                                    bool HideDebugMetadata, StringRef Directory,
                                    StringRef Filename) {
  return new DebugIR(HideDebugIntrinsics, HideDebugMetadata, Directory,
                     Filename);
}

ModulePass *llvm::createDebugIRPass() { return new DebugIR(); }