InclusionRewriter.cpp   [plain text]


//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This code rewrites include invocations into their expansions.  This gives you
// a file with all included files merged into it.
//
//===----------------------------------------------------------------------===//

#include "clang/Rewrite/Frontend/Rewriters.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/PreprocessorOutputOptions.h"
#include "llvm/Support/raw_ostream.h"

using namespace clang;
using namespace llvm;

namespace {

class InclusionRewriter : public PPCallbacks {
  /// Information about which #includes were actually performed,
  /// created by preprocessor callbacks.
  struct FileChange {
    SourceLocation From;
    FileID Id;
    SrcMgr::CharacteristicKind FileType;
    FileChange(SourceLocation From) : From(From) {
    }
  };
  Preprocessor &PP; ///< Used to find inclusion directives.
  SourceManager &SM; ///< Used to read and manage source files.
  raw_ostream &OS; ///< The destination stream for rewritten contents.
  bool ShowLineMarkers; ///< Show #line markers.
  bool UseLineDirective; ///< Use of line directives or line markers.
  typedef std::map<unsigned, FileChange> FileChangeMap;
  FileChangeMap FileChanges; /// Tracks which files were included where.
  /// Used transitively for building up the FileChanges mapping over the
  /// various \c PPCallbacks callbacks.
  FileChangeMap::iterator LastInsertedFileChange;
public:
  InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
  bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
private:
  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                           SrcMgr::CharacteristicKind FileType,
                           FileID PrevFID);
  virtual void FileSkipped(const FileEntry &ParentFile,
                           const Token &FilenameTok,
                           SrcMgr::CharacteristicKind FileType);
  virtual void InclusionDirective(SourceLocation HashLoc,
                                  const Token &IncludeTok,
                                  StringRef FileName,
                                  bool IsAngled,
                                  CharSourceRange FilenameRange,
                                  const FileEntry *File,
                                  StringRef SearchPath,
                                  StringRef RelativePath,
                                  const Module *Imported);
  void WriteLineInfo(const char *Filename, int Line,
                     SrcMgr::CharacteristicKind FileType,
                     StringRef EOL, StringRef Extra = StringRef());
  void OutputContentUpTo(const MemoryBuffer &FromFile,
                         unsigned &WriteFrom, unsigned WriteTo,
                         StringRef EOL, int &lines,
                         bool EnsureNewline = false);
  void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
                           const MemoryBuffer &FromFile, StringRef EOL,
                           unsigned &NextToWrite, int &Lines);
  const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
  StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
};

}  // end anonymous namespace

/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
                                     bool ShowLineMarkers)
    : PP(PP), SM(PP.getSourceManager()), OS(OS),
    ShowLineMarkers(ShowLineMarkers),
    LastInsertedFileChange(FileChanges.end()) {
  // If we're in microsoft mode, use normal #line instead of line markers.
  UseLineDirective = PP.getLangOpts().MicrosoftExt;
}

/// Write appropriate line information as either #line directives or GNU line
/// markers depending on what mode we're in, including the \p Filename and
/// \p Line we are located at, using the specified \p EOL line separator, and
/// any \p Extra context specifiers in GNU line directives.
void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
                                      SrcMgr::CharacteristicKind FileType,
                                      StringRef EOL, StringRef Extra) {
  if (!ShowLineMarkers)
    return;
  if (UseLineDirective) {
    OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
  } else {
    // Use GNU linemarkers as described here:
    // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
    OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
    if (!Extra.empty())
      OS << Extra;
    if (FileType == SrcMgr::C_System)
      // "`3' This indicates that the following text comes from a system header
      // file, so certain warnings should be suppressed."
      OS << " 3";
    else if (FileType == SrcMgr::C_ExternCSystem)
      // as above for `3', plus "`4' This indicates that the following text
      // should be treated as being wrapped in an implicit extern "C" block."
      OS << " 3 4";
  }
  OS << EOL;
}

/// FileChanged - Whenever the preprocessor enters or exits a #include file
/// it invokes this handler.
void InclusionRewriter::FileChanged(SourceLocation Loc,
                                    FileChangeReason Reason,
                                    SrcMgr::CharacteristicKind NewFileType,
                                    FileID) {
  if (Reason != EnterFile)
    return;
  if (LastInsertedFileChange == FileChanges.end())
    // we didn't reach this file (eg: the main file) via an inclusion directive
    return;
  LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
  LastInsertedFileChange->second.FileType = NewFileType;
  LastInsertedFileChange = FileChanges.end();
}

/// Called whenever an inclusion is skipped due to canonical header protection
/// macros.
void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
                                    const Token &/*FilenameTok*/,
                                    SrcMgr::CharacteristicKind /*FileType*/) {
  assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
    "found via an inclusion directive, was skipped");
  FileChanges.erase(LastInsertedFileChange);
  LastInsertedFileChange = FileChanges.end();
}

/// This should be called whenever the preprocessor encounters include
/// directives. It does not say whether the file has been included, but it
/// provides more information about the directive (hash location instead
/// of location inside the included file). It is assumed that the matching
/// FileChanged() or FileSkipped() is called after this.
void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
                                           const Token &/*IncludeTok*/,
                                           StringRef /*FileName*/,
                                           bool /*IsAngled*/,
                                           CharSourceRange /*FilenameRange*/,
                                           const FileEntry * /*File*/,
                                           StringRef /*SearchPath*/,
                                           StringRef /*RelativePath*/,
                                           const Module * /*Imported*/) {
  assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
    "directive was found before the previous one was processed");
  std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
    std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc)));
  assert(p.second && "Unexpected revisitation of the same include directive");
  LastInsertedFileChange = p.first;
}

/// Simple lookup for a SourceLocation (specifically one denoting the hash in
/// an inclusion directive) in the map of inclusion information, FileChanges.
const InclusionRewriter::FileChange *
InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
  FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
  if (I != FileChanges.end())
    return &I->second;
  return NULL;
}

/// Detect the likely line ending style of \p FromFile by examining the first
/// newline found within it.
static StringRef DetectEOL(const MemoryBuffer &FromFile) {
  // detect what line endings the file uses, so that added content does not mix
  // the style
  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
  if (Pos == NULL)
    return "\n";
  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
    return "\n\r";
  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
    return "\r\n";
  return "\n";
}

/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
/// \p WriteTo - 1.
void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
                                          unsigned &WriteFrom, unsigned WriteTo,
                                          StringRef EOL, int &Line,
                                          bool EnsureNewline) {
  if (WriteTo <= WriteFrom)
    return;
  OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
  // count lines manually, it's faster than getPresumedLoc()
  Line += std::count(FromFile.getBufferStart() + WriteFrom,
                     FromFile.getBufferStart() + WriteTo, '\n');
  if (EnsureNewline) {
    char LastChar = FromFile.getBufferStart()[WriteTo - 1];
    if (LastChar != '\n' && LastChar != '\r')
      OS << EOL;
  }
  WriteFrom = WriteTo;
}

/// Print characters from \p FromFile starting at \p NextToWrite up until the
/// inclusion directive at \p StartToken, then print out the inclusion
/// inclusion directive disabled by a #if directive, updating \p NextToWrite
/// and \p Line to track the number of source lines visited and the progress
/// through the \p FromFile buffer.
void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
                                            const Token &StartToken,
                                            const MemoryBuffer &FromFile,
                                            StringRef EOL,
                                            unsigned &NextToWrite, int &Line) {
  OutputContentUpTo(FromFile, NextToWrite,
    SM.getFileOffset(StartToken.getLocation()), EOL, Line);
  Token DirectiveToken;
  do {
    DirectiveLex.LexFromRawLexer(DirectiveToken);
  } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
  OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
  OutputContentUpTo(FromFile, NextToWrite,
    SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
    EOL, Line);
  OS << "#endif /* expanded by -frewrite-includes */" << EOL;
}

/// Find the next identifier in the pragma directive specified by \p RawToken.
StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
                                                Token &RawToken) {
  RawLex.LexFromRawLexer(RawToken);
  if (RawToken.is(tok::raw_identifier))
    PP.LookUpIdentifierInfo(RawToken);
  if (RawToken.is(tok::identifier))
    return RawToken.getIdentifierInfo()->getName();
  return StringRef();
}

/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
/// and including content of included files recursively.
bool InclusionRewriter::Process(FileID FileId,
                                SrcMgr::CharacteristicKind FileType)
{
  bool Invalid;
  const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
  if (Invalid) // invalid inclusion
    return true;
  const char *FileName = FromFile.getBufferIdentifier();
  Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
  RawLex.SetCommentRetentionState(false);

  StringRef EOL = DetectEOL(FromFile);

  // Per the GNU docs: "1" indicates the start of a new file.
  WriteLineInfo(FileName, 1, FileType, EOL, " 1");

  if (SM.getFileIDSize(FileId) == 0)
    return true;

  // The next byte to be copied from the source file
  unsigned NextToWrite = 0;
  int Line = 1; // The current input file line number.

  Token RawToken;
  RawLex.LexFromRawLexer(RawToken);

  // TODO: Consider adding a switch that strips possibly unimportant content,
  // such as comments, to reduce the size of repro files.
  while (RawToken.isNot(tok::eof)) {
    if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
      RawLex.setParsingPreprocessorDirective(true);
      Token HashToken = RawToken;
      RawLex.LexFromRawLexer(RawToken);
      if (RawToken.is(tok::raw_identifier))
        PP.LookUpIdentifierInfo(RawToken);
      if (RawToken.is(tok::identifier)) {
        switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
          case tok::pp_include:
          case tok::pp_include_next:
          case tok::pp_import: {
            CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
              Line);
            if (const FileChange *Change = FindFileChangeLocation(
                HashToken.getLocation())) {
              // now include and recursively process the file
              if (Process(Change->Id, Change->FileType))
                // and set lineinfo back to this file, if the nested one was
                // actually included
                // `2' indicates returning to a file (after having included
                // another file.
                WriteLineInfo(FileName, Line, FileType, EOL, " 2");
            } else
              // fix up lineinfo (since commented out directive changed line
              // numbers) for inclusions that were skipped due to header guards
              WriteLineInfo(FileName, Line, FileType, EOL);
            break;
          }
          case tok::pp_pragma: {
            StringRef Identifier = NextIdentifierName(RawLex, RawToken);
            if (Identifier == "clang" || Identifier == "GCC") {
              if (NextIdentifierName(RawLex, RawToken) == "system_header") {
                // keep the directive in, commented out
                CommentOutDirective(RawLex, HashToken, FromFile, EOL,
                  NextToWrite, Line);
                // update our own type
                FileType = SM.getFileCharacteristic(RawToken.getLocation());
                WriteLineInfo(FileName, Line, FileType, EOL);
              }
            } else if (Identifier == "once") {
              // keep the directive in, commented out
              CommentOutDirective(RawLex, HashToken, FromFile, EOL,
                NextToWrite, Line);
              WriteLineInfo(FileName, Line, FileType, EOL);
            }
            break;
          }
          default:
            break;
        }
      }
      RawLex.setParsingPreprocessorDirective(false);
    }
    RawLex.LexFromRawLexer(RawToken);
  }
  OutputContentUpTo(FromFile, NextToWrite,
    SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
    /*EnsureNewline*/true);
  return true;
}

/// InclusionRewriterInInput - Implement -frewrite-includes mode.
void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
                                   const PreprocessorOutputOptions &Opts) {
  SourceManager &SM = PP.getSourceManager();
  InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
                                                     Opts.ShowLineMarkers);
  PP.addPPCallbacks(Rewrite);

  // First let the preprocessor process the entire file and call callbacks.
  // Callbacks will record which #include's were actually performed.
  PP.EnterMainSourceFile();
  Token Tok;
  // Only preprocessor directives matter here, so disable macro expansion
  // everywhere else as an optimization.
  // TODO: It would be even faster if the preprocessor could be switched
  // to a mode where it would parse only preprocessor directives and comments,
  // nothing else matters for parsing or processing.
  PP.SetMacroExpansionOnlyInDirectives();
  do {
    PP.Lex(Tok);
  } while (Tok.isNot(tok::eof));
  Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
  OS->flush();
}