llvm-mcmarkup.cpp   [plain text]


//===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Example simple parser implementation for the MC assembly markup language.
//
//===----------------------------------------------------------------------===//

#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <system_error>
using namespace llvm;

static cl::list<std::string>
       InputFilenames(cl::Positional, cl::desc("<input files>"),
                      cl::ZeroOrMore);
static cl::opt<bool>
DumpTags("dump-tags", cl::desc("List all tags encountered in input"));

static StringRef ToolName;

/// Trivial lexer for the markup parser. Input is always handled a character
/// at a time. The lexer just encapsulates EOF and lookahead handling.
class MarkupLexer {
  StringRef::const_iterator Start;
  StringRef::const_iterator CurPtr;
  StringRef::const_iterator End;
public:
  MarkupLexer(StringRef Source)
    : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
  // When processing non-markup, input is consumed a character at a time.
  bool isEOF() { return CurPtr == End; }
  int getNextChar() {
    if (CurPtr == End) return EOF;
    return *CurPtr++;
  }
  int peekNextChar() {
    if (CurPtr == End) return EOF;
    return *CurPtr;
  }
  StringRef::const_iterator getPosition() const { return CurPtr; }
};

/// A markup tag is a name and a (usually empty) list of modifiers.
class MarkupTag {
  StringRef Name;
  StringRef Modifiers;
  SMLoc StartLoc;
public:
  MarkupTag(StringRef n, StringRef m, SMLoc Loc)
    : Name(n), Modifiers(m), StartLoc(Loc) {}
  StringRef getName() const { return Name; }
  StringRef getModifiers() const { return Modifiers; }
  SMLoc getLoc() const { return StartLoc; }
};

/// A simple parser implementation for creating MarkupTags from input text.
class MarkupParser {
  MarkupLexer &Lex;
  SourceMgr &SM;
public:
  MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
  /// Create a MarkupTag from the current position in the MarkupLexer.
  /// The parseTag() method should be called when the lexer has processed
  /// the opening '<' character. Input will be consumed up to and including
  /// the ':' which terminates the tag open.
  MarkupTag parseTag();
  /// Issue a diagnostic and terminate program execution.
  void FatalError(SMLoc Loc, StringRef Msg);
};

void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
  SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
  exit(1);
}

// Example handler for when a tag is recognized.
static void processStartTag(MarkupTag &Tag) {
  // If we're just printing the tags, do that, otherwise do some simple
  // colorization.
  if (DumpTags) {
    outs() << Tag.getName();
    if (Tag.getModifiers().size())
      outs() << " " << Tag.getModifiers();
    outs() << "\n";
    return;
  }

  if (!outs().has_colors())
    return;
  // Color registers as red and immediates as cyan. Those don't have nested
  // tags, so don't bother keeping a stack of colors to reset to.
  if (Tag.getName() == "reg")
    outs().changeColor(raw_ostream::RED);
  else if (Tag.getName() == "imm")
    outs().changeColor(raw_ostream::CYAN);
}

// Example handler for when the end of a tag is recognized.
static void processEndTag(MarkupTag &Tag) {
  // If we're printing the tags, there's nothing more to do here. Otherwise,
  // set the color back the normal.
  if (DumpTags)
    return;
  if (!outs().has_colors())
    return;
  // Just reset to basic white.
  outs().changeColor(raw_ostream::WHITE, false);
}

MarkupTag MarkupParser::parseTag() {
  // First off, extract the tag into it's own StringRef so we can look at it
  // outside of the context of consuming input.
  StringRef::const_iterator Start = Lex.getPosition();
  SMLoc Loc = SMLoc::getFromPointer(Start - 1);
  while(Lex.getNextChar() != ':') {
    // EOF is an error.
    if (Lex.isEOF())
      FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
  }
  StringRef RawTag(Start, Lex.getPosition() - Start - 1);
  std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
  return MarkupTag(SplitTag.first, SplitTag.second, Loc);
}

static void parseMCMarkup(StringRef Filename) {
  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr =
      MemoryBuffer::getFileOrSTDIN(Filename);
  if (std::error_code EC = BufferPtr.getError()) {
    errs() << ToolName << ": " << EC.message() << '\n';
    return;
  }
  std::unique_ptr<MemoryBuffer> &Buffer = BufferPtr.get();

  SourceMgr SrcMgr;

  StringRef InputSource = Buffer->getBuffer();

  // Tell SrcMgr about this buffer, which is what the parser will pick up.
  SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());

  MarkupLexer Lex(InputSource);
  MarkupParser Parser(Lex, SrcMgr);

  SmallVector<MarkupTag, 4> TagStack;

  for (int CurChar = Lex.getNextChar();
       CurChar != EOF;
       CurChar = Lex.getNextChar()) {
    switch (CurChar) {
    case '<': {
      // A "<<" is output as a literal '<' and does not start a markup tag.
      if (Lex.peekNextChar() == '<') {
        (void)Lex.getNextChar();
        break;
      }
      // Parse the markup entry.
      TagStack.push_back(Parser.parseTag());

      // Do any special handling for the start of a tag.
      processStartTag(TagStack.back());
      continue;
    }
    case '>': {
      SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
      // A ">>" is output as a literal '>' and does not end a markup tag.
      if (Lex.peekNextChar() == '>') {
        (void)Lex.getNextChar();
        break;
      }
      // Close out the innermost tag.
      if (TagStack.empty())
        Parser.FatalError(Loc, "'>' without matching '<'");

      // Do any special handling for the end of a tag.
      processEndTag(TagStack.back());

      TagStack.pop_back();
      continue;
    }
    default:
      break;
    }
    // For anything else, just echo the character back out.
    if (!DumpTags && CurChar != EOF)
      outs() << (char)CurChar;
  }

  // If there are any unterminated markup tags, issue diagnostics for them.
  while (!TagStack.empty()) {
    MarkupTag &Tag = TagStack.back();
    SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
                        "unterminated markup tag");
    TagStack.pop_back();
  }
}

int main(int argc, char **argv) {
  // Print a stack trace if we signal out.
  sys::PrintStackTraceOnErrorSignal();
  PrettyStackTraceProgram X(argc, argv);

  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
  cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");

  ToolName = argv[0];

  // If no input files specified, read from stdin.
  if (InputFilenames.size() == 0)
    InputFilenames.push_back("-");

  std::for_each(InputFilenames.begin(), InputFilenames.end(),
                parseMCMarkup);
  return 0;
}