#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Signals.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include <algorithm>
using namespace llvm;
static cl::opt<std::string>
CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
static cl::opt<std::string>
InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
cl::init("-"), cl::value_desc("filename"));
static cl::opt<std::string>
CheckPrefix("check-prefix", cl::init("CHECK"),
cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
static cl::opt<bool>
NoCanonicalizeWhiteSpace("strict-whitespace",
cl::desc("Do not treat all horizontal whitespace as equivalent"));
class Pattern {
SMLoc PatternLoc;
StringRef FixedStr;
std::string RegExStr;
std::vector<std::pair<StringRef, unsigned> > VariableUses;
std::vector<std::pair<StringRef, unsigned> > VariableDefs;
public:
Pattern() { }
bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
size_t Match(StringRef Buffer, size_t &MatchLen,
StringMap<StringRef> &VariableTable) const;
void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
const StringMap<StringRef> &VariableTable) const;
private:
static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM);
unsigned ComputeMatchDistance(StringRef Buffer,
const StringMap<StringRef> &VariableTable) const;
};
bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
PatternLoc = SMLoc::getFromPointer(PatternStr.data());
while (!PatternStr.empty() &&
(PatternStr.back() == ' ' || PatternStr.back() == '\t'))
PatternStr = PatternStr.substr(0, PatternStr.size()-1);
if (PatternStr.empty()) {
SM.PrintMessage(PatternLoc, "found empty check string with prefix '" +
CheckPrefix+":'", "error");
return true;
}
if (PatternStr.size() < 2 ||
(PatternStr.find("{{") == StringRef::npos &&
PatternStr.find("[[") == StringRef::npos)) {
FixedStr = PatternStr;
return false;
}
unsigned CurParen = 1;
while (!PatternStr.empty()) {
if (PatternStr.size() >= 2 &&
PatternStr[0] == '{' && PatternStr[1] == '{') {
size_t End = PatternStr.find("}}");
if (End == StringRef::npos) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
"found start of regex string with no end '}}'", "error");
return true;
}
if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
return true;
PatternStr = PatternStr.substr(End+2);
continue;
}
if (PatternStr.size() >= 2 &&
PatternStr[0] == '[' && PatternStr[1] == '[') {
size_t End = PatternStr.find("]]");
if (End == StringRef::npos) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
"invalid named regex reference, no ]] found", "error");
return true;
}
StringRef MatchStr = PatternStr.substr(2, End-2);
PatternStr = PatternStr.substr(End+2);
size_t NameEnd = MatchStr.find(':');
StringRef Name = MatchStr.substr(0, NameEnd);
if (Name.empty()) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
"invalid name in named regex: empty name", "error");
return true;
}
for (unsigned i = 0, e = Name.size(); i != e; ++i)
if (Name[i] != '_' &&
(Name[i] < 'a' || Name[i] > 'z') &&
(Name[i] < 'A' || Name[i] > 'Z') &&
(Name[i] < '0' || Name[i] > '9')) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
"invalid name in named regex", "error");
return true;
}
if (isdigit(Name[0])) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
"invalid name in named regex", "error");
return true;
}
if (NameEnd == StringRef::npos) {
VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
continue;
}
VariableDefs.push_back(std::make_pair(Name, CurParen));
RegExStr += '(';
++CurParen;
if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
return true;
RegExStr += ')';
}
size_t FixedMatchEnd = PatternStr.find("{{");
FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
PatternStr = PatternStr.substr(FixedMatchEnd);
continue;
}
return false;
}
void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
switch (FixedStr[i]) {
case '(':
case ')':
case '^':
case '$':
case '|':
case '*':
case '+':
case '?':
case '.':
case '[':
case '\\':
case '{':
TheStr += '\\';
default:
TheStr += FixedStr[i];
break;
}
}
}
bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
SourceMgr &SM) {
Regex R(RegexStr);
std::string Error;
if (!R.isValid(Error)) {
SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()),
"invalid regex: " + Error, "error");
return true;
}
RegExStr += RegexStr.str();
CurParen += R.getNumMatches();
return false;
}
size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
StringMap<StringRef> &VariableTable) const {
if (!FixedStr.empty()) {
MatchLen = FixedStr.size();
return Buffer.find(FixedStr);
}
StringRef RegExToMatch = RegExStr;
std::string TmpStr;
if (!VariableUses.empty()) {
TmpStr = RegExStr;
unsigned InsertOffset = 0;
for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
StringMap<StringRef>::iterator it =
VariableTable.find(VariableUses[i].first);
if (it == VariableTable.end())
return StringRef::npos;
std::string Value;
AddFixedStringToRegEx(it->second, Value);
TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
Value.begin(), Value.end());
InsertOffset += Value.size();
}
RegExToMatch = TmpStr;
}
SmallVector<StringRef, 4> MatchInfo;
if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
return StringRef::npos;
assert(!MatchInfo.empty() && "Didn't get any match");
StringRef FullMatch = MatchInfo[0];
for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) {
assert(VariableDefs[i].second < MatchInfo.size() &&
"Internal paren error");
VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second];
}
MatchLen = FullMatch.size();
return FullMatch.data()-Buffer.data();
}
unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
const StringMap<StringRef> &VariableTable) const {
StringRef ExampleString(FixedStr);
if (ExampleString.empty())
ExampleString = RegExStr;
StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
BufferPrefix = BufferPrefix.split('\n').first;
return BufferPrefix.edit_distance(ExampleString);
}
void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
const StringMap<StringRef> &VariableTable) const{
if (!VariableUses.empty()) {
for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
StringRef Var = VariableUses[i].first;
StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
SmallString<256> Msg;
raw_svector_ostream OS(Msg);
if (it == VariableTable.end()) {
OS << "uses undefined variable \"";
OS.write_escaped(Var) << "\"";;
} else {
OS << "with variable \"";
OS.write_escaped(Var) << "\" equal to \"";
OS.write_escaped(it->second) << "\"";
}
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), OS.str(), "note",
false);
}
}
size_t NumLinesForward = 0;
size_t Best = StringRef::npos;
double BestQuality = 0;
for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
if (Buffer[i] == '\n')
++NumLinesForward;
if (Buffer[i] == ' ' || Buffer[i] == '\t')
continue;
unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
double Quality = Distance + (NumLinesForward / 100.);
if (Quality < BestQuality || Best == StringRef::npos) {
Best = i;
BestQuality = Quality;
}
}
if (Best != StringRef::npos && BestQuality < 50) {
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
"possible intended match here", "note");
}
}
struct CheckString {
Pattern Pat;
SMLoc Loc;
bool IsCheckNext;
std::vector<std::pair<SMLoc, Pattern> > NotStrings;
CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
: Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
};
static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
SmallString<128> NewFile;
NewFile.reserve(MB->getBufferSize());
for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
Ptr != End; ++Ptr) {
if (*Ptr != ' ' && *Ptr != '\t') {
NewFile.push_back(*Ptr);
continue;
}
NewFile.push_back(' ');
while (Ptr+1 != End &&
(Ptr[1] == ' ' || Ptr[1] == '\t'))
++Ptr;
}
MemoryBuffer *MB2 =
MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
delete MB;
return MB2;
}
static bool ReadCheckFile(SourceMgr &SM,
std::vector<CheckString> &CheckStrings) {
std::string ErrorStr;
MemoryBuffer *F =
MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr);
if (F == 0) {
errs() << "Could not open check file '" << CheckFilename << "': "
<< ErrorStr << '\n';
return true;
}
if (!NoCanonicalizeWhiteSpace)
F = CanonicalizeInputFile(F);
SM.AddNewSourceBuffer(F, SMLoc());
StringRef Buffer = F->getBuffer();
std::vector<std::pair<SMLoc, Pattern> > NotMatches;
while (1) {
Buffer = Buffer.substr(Buffer.find(CheckPrefix));
if (Buffer.empty())
break;
const char *CheckPrefixStart = Buffer.data();
bool IsCheckNext = false, IsCheckNot = false;
if (Buffer[CheckPrefix.size()] == ':') {
Buffer = Buffer.substr(CheckPrefix.size()+1);
} else if (Buffer.size() > CheckPrefix.size()+6 &&
memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
Buffer = Buffer.substr(CheckPrefix.size()+7);
IsCheckNext = true;
} else if (Buffer.size() > CheckPrefix.size()+5 &&
memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
Buffer = Buffer.substr(CheckPrefix.size()+6);
IsCheckNot = true;
} else {
Buffer = Buffer.substr(1);
continue;
}
Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
size_t EOL = Buffer.find_first_of("\n\r");
SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
Pattern P;
if (P.ParsePattern(Buffer.substr(0, EOL), SM))
return true;
Buffer = Buffer.substr(EOL);
if (IsCheckNext && CheckStrings.empty()) {
SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
"found '"+CheckPrefix+"-NEXT:' without previous '"+
CheckPrefix+ ": line", "error");
return true;
}
if (IsCheckNot) {
NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
P));
continue;
}
CheckStrings.push_back(CheckString(P,
PatternLoc,
IsCheckNext));
std::swap(NotMatches, CheckStrings.back().NotStrings);
}
if (CheckStrings.empty()) {
errs() << "error: no check strings found with prefix '" << CheckPrefix
<< ":'\n";
return true;
}
if (!NotMatches.empty()) {
errs() << "error: '" << CheckPrefix
<< "-NOT:' not supported after last check line.\n";
return true;
}
return false;
}
static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
StringRef Buffer,
StringMap<StringRef> &VariableTable) {
SM.PrintMessage(CheckStr.Loc, "expected string not found in input",
"error");
Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here",
"note");
CheckStr.Pat.PrintFailureInfo(SM, Buffer, VariableTable);
}
static unsigned CountNumNewlinesBetween(StringRef Range) {
unsigned NumNewLines = 0;
while (1) {
Range = Range.substr(Range.find_first_of("\n\r"));
if (Range.empty()) return NumNewLines;
++NumNewLines;
if (Range.size() > 1 &&
(Range[1] == '\n' || Range[1] == '\r') &&
(Range[0] != Range[1]))
Range = Range.substr(1);
Range = Range.substr(1);
}
}
int main(int argc, char **argv) {
sys::PrintStackTraceOnErrorSignal();
PrettyStackTraceProgram X(argc, argv);
cl::ParseCommandLineOptions(argc, argv);
SourceMgr SM;
std::vector<CheckString> CheckStrings;
if (ReadCheckFile(SM, CheckStrings))
return 2;
std::string ErrorStr;
MemoryBuffer *F =
MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr);
if (F == 0) {
errs() << "Could not open input file '" << InputFilename << "': "
<< ErrorStr << '\n';
return true;
}
if (!NoCanonicalizeWhiteSpace)
F = CanonicalizeInputFile(F);
SM.AddNewSourceBuffer(F, SMLoc());
StringMap<StringRef> VariableTable;
StringRef Buffer = F->getBuffer();
const char *LastMatch = Buffer.data();
for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
const CheckString &CheckStr = CheckStrings[StrNo];
StringRef SearchFrom = Buffer;
size_t MatchLen = 0;
Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen, VariableTable));
if (Buffer.empty()) {
PrintCheckFailed(SM, CheckStr, SearchFrom, VariableTable);
return 1;
}
StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch);
if (CheckStr.IsCheckNext) {
assert(LastMatch != F->getBufferStart() &&
"CHECK-NEXT can't be the first check in a file");
unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
if (NumNewLines == 0) {
SM.PrintMessage(CheckStr.Loc,
CheckPrefix+"-NEXT: is on the same line as previous match",
"error");
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
"'next' match was here", "note");
SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
"previous match was here", "note");
return 1;
}
if (NumNewLines != 1) {
SM.PrintMessage(CheckStr.Loc,
CheckPrefix+
"-NEXT: is not on the line after the previous match",
"error");
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
"'next' match was here", "note");
SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
"previous match was here", "note");
return 1;
}
}
for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size();
ChunkNo != e; ++ChunkNo) {
size_t MatchLen = 0;
size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion,
MatchLen,
VariableTable);
if (Pos == StringRef::npos) continue;
SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
CheckPrefix+"-NOT: string occurred!", "error");
SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
CheckPrefix+"-NOT: pattern specified here", "note");
return 1;
}
Buffer = Buffer.substr(MatchLen);
LastMatch = Buffer.data();
}
return 0;
}