#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
#include <algorithm>
#include <map>
#include <string>
#include <vector>
using namespace llvm;
static cl::opt<std::string>
CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
static cl::opt<std::string>
InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
cl::init("-"), cl::value_desc("filename"));
static cl::opt<std::string>
CheckPrefix("check-prefix", cl::init("CHECK"),
cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
static cl::opt<bool>
NoCanonicalizeWhiteSpace("strict-whitespace",
cl::desc("Do not treat all horizontal whitespace as equivalent"));
class Pattern {
SMLoc PatternLoc;
bool MatchEOF;
StringRef FixedStr;
std::string RegExStr;
unsigned LineNumber;
std::vector<std::pair<StringRef, unsigned> > VariableUses;
std::map<StringRef, unsigned> VariableDefs;
public:
Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { }
bool ParsePattern(StringRef PatternStr, SourceMgr &SM, unsigned LineNumber);
size_t Match(StringRef Buffer, size_t &MatchLen,
StringMap<StringRef> &VariableTable) const;
void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
const StringMap<StringRef> &VariableTable) const;
private:
static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
void AddBackrefToRegEx(unsigned BackrefNum);
unsigned ComputeMatchDistance(StringRef Buffer,
const StringMap<StringRef> &VariableTable) const;
bool EvaluateExpression(StringRef Expr, std::string &Value) const;
size_t FindRegexVarEnd(StringRef Str);
};
bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
unsigned LineNumber) {
this->LineNumber = LineNumber;
PatternLoc = SMLoc::getFromPointer(PatternStr.data());
while (!PatternStr.empty() &&
(PatternStr.back() == ' ' || PatternStr.back() == '\t'))
PatternStr = PatternStr.substr(0, PatternStr.size()-1);
if (PatternStr.empty()) {
SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
"found empty check string with prefix '" +
CheckPrefix+":'");
return true;
}
if (PatternStr.size() < 2 ||
(PatternStr.find("{{") == StringRef::npos &&
PatternStr.find("[[") == StringRef::npos)) {
FixedStr = PatternStr;
return false;
}
unsigned CurParen = 1;
while (!PatternStr.empty()) {
if (PatternStr.startswith("{{")) {
size_t End = PatternStr.find("}}");
if (End == StringRef::npos) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
SourceMgr::DK_Error,
"found start of regex string with no end '}}'");
return true;
}
RegExStr += '(';
++CurParen;
if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
return true;
RegExStr += ')';
PatternStr = PatternStr.substr(End+2);
continue;
}
if (PatternStr.startswith("[[")) {
size_t End = FindRegexVarEnd(PatternStr.substr(2));
if (End == StringRef::npos) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
SourceMgr::DK_Error,
"invalid named regex reference, no ]] found");
return true;
}
StringRef MatchStr = PatternStr.substr(2, End);
PatternStr = PatternStr.substr(End+4);
size_t NameEnd = MatchStr.find(':');
StringRef Name = MatchStr.substr(0, NameEnd);
if (Name.empty()) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
"invalid name in named regex: empty name");
return true;
}
bool IsExpression = false;
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
if (i == 0 && Name[i] == '@') {
if (NameEnd != StringRef::npos) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
SourceMgr::DK_Error,
"invalid name in named regex definition");
return true;
}
IsExpression = true;
continue;
}
if (Name[i] != '_' && !isalnum(Name[i]) &&
(!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
SourceMgr::DK_Error, "invalid name in named regex");
return true;
}
}
if (isdigit(Name[0])) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
"invalid name in named regex");
return true;
}
if (NameEnd == StringRef::npos) {
if (VariableDefs.find(Name) != VariableDefs.end()) {
unsigned VarParenNum = VariableDefs[Name];
if (VarParenNum < 1 || VarParenNum > 9) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
SourceMgr::DK_Error,
"Can't back-reference more than 9 variables");
return true;
}
AddBackrefToRegEx(VarParenNum);
} else {
VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
}
continue;
}
VariableDefs[Name] = CurParen;
RegExStr += '(';
++CurParen;
if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
return true;
RegExStr += ')';
}
size_t FixedMatchEnd = PatternStr.find("{{");
FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
PatternStr = PatternStr.substr(FixedMatchEnd);
}
return false;
}
void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
switch (FixedStr[i]) {
case '(':
case ')':
case '^':
case '$':
case '|':
case '*':
case '+':
case '?':
case '.':
case '[':
case '\\':
case '{':
TheStr += '\\';
default:
TheStr += FixedStr[i];
break;
}
}
}
bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
SourceMgr &SM) {
Regex R(RS);
std::string Error;
if (!R.isValid(Error)) {
SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
"invalid regex: " + Error);
return true;
}
RegExStr += RS.str();
CurParen += R.getNumMatches();
return false;
}
void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
std::string Backref = std::string("\\") +
std::string(1, '0' + BackrefNum);
RegExStr += Backref;
}
bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
if (!Expr.startswith("@LINE"))
return false;
Expr = Expr.substr(StringRef("@LINE").size());
int Offset = 0;
if (!Expr.empty()) {
if (Expr[0] == '+')
Expr = Expr.substr(1);
else if (Expr[0] != '-')
return false;
if (Expr.getAsInteger(10, Offset))
return false;
}
Value = llvm::itostr(LineNumber + Offset);
return true;
}
size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
StringMap<StringRef> &VariableTable) const {
if (MatchEOF) {
MatchLen = 0;
return Buffer.size();
}
if (!FixedStr.empty()) {
MatchLen = FixedStr.size();
return Buffer.find(FixedStr);
}
StringRef RegExToMatch = RegExStr;
std::string TmpStr;
if (!VariableUses.empty()) {
TmpStr = RegExStr;
unsigned InsertOffset = 0;
for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
std::string Value;
if (VariableUses[i].first[0] == '@') {
if (!EvaluateExpression(VariableUses[i].first, Value))
return StringRef::npos;
} else {
StringMap<StringRef>::iterator it =
VariableTable.find(VariableUses[i].first);
if (it == VariableTable.end())
return StringRef::npos;
AddFixedStringToRegEx(it->second, Value);
}
TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
Value.begin(), Value.end());
InsertOffset += Value.size();
}
RegExToMatch = TmpStr;
}
SmallVector<StringRef, 4> MatchInfo;
if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
return StringRef::npos;
assert(!MatchInfo.empty() && "Didn't get any match");
StringRef FullMatch = MatchInfo[0];
for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
E = VariableDefs.end();
I != E; ++I) {
assert(I->second < MatchInfo.size() && "Internal paren error");
VariableTable[I->first] = MatchInfo[I->second];
}
MatchLen = FullMatch.size();
return FullMatch.data()-Buffer.data();
}
unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
const StringMap<StringRef> &VariableTable) const {
StringRef ExampleString(FixedStr);
if (ExampleString.empty())
ExampleString = RegExStr;
StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
BufferPrefix = BufferPrefix.split('\n').first;
return BufferPrefix.edit_distance(ExampleString);
}
void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
const StringMap<StringRef> &VariableTable) const{
if (!VariableUses.empty()) {
for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
SmallString<256> Msg;
raw_svector_ostream OS(Msg);
StringRef Var = VariableUses[i].first;
if (Var[0] == '@') {
std::string Value;
if (EvaluateExpression(Var, Value)) {
OS << "with expression \"";
OS.write_escaped(Var) << "\" equal to \"";
OS.write_escaped(Value) << "\"";
} else {
OS << "uses incorrect expression \"";
OS.write_escaped(Var) << "\"";
}
} else {
StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
if (it == VariableTable.end()) {
OS << "uses undefined variable \"";
OS.write_escaped(Var) << "\"";
} else {
OS << "with variable \"";
OS.write_escaped(Var) << "\" equal to \"";
OS.write_escaped(it->second) << "\"";
}
}
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
OS.str());
}
}
size_t NumLinesForward = 0;
size_t Best = StringRef::npos;
double BestQuality = 0;
for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
if (Buffer[i] == '\n')
++NumLinesForward;
if (Buffer[i] == ' ' || Buffer[i] == '\t')
continue;
unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
double Quality = Distance + (NumLinesForward / 100.);
if (Quality < BestQuality || Best == StringRef::npos) {
Best = i;
BestQuality = Quality;
}
}
if (Best && Best != StringRef::npos && BestQuality < 50) {
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
SourceMgr::DK_Note, "possible intended match here");
}
}
size_t Pattern::FindRegexVarEnd(StringRef Str) {
size_t Offset = 0;
size_t BracketDepth = 0;
while (!Str.empty()) {
if (Str.startswith("]]") && BracketDepth == 0)
return Offset;
if (Str[0] == '\\') {
Str = Str.substr(2);
Offset += 2;
} else {
switch (Str[0]) {
default:
break;
case '[':
BracketDepth++;
break;
case ']':
assert(BracketDepth > 0 && "Invalid regex");
BracketDepth--;
break;
}
Str = Str.substr(1);
Offset++;
}
}
return StringRef::npos;
}
struct CheckString {
Pattern Pat;
SMLoc Loc;
bool IsCheckNext;
std::vector<std::pair<SMLoc, Pattern> > NotStrings;
CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
: Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
};
static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
SmallString<128> NewFile;
NewFile.reserve(MB->getBufferSize());
for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
Ptr != End; ++Ptr) {
if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
continue;
}
if (*Ptr != ' ' && *Ptr != '\t') {
NewFile.push_back(*Ptr);
continue;
}
NewFile.push_back(' ');
while (Ptr+1 != End &&
(Ptr[1] == ' ' || Ptr[1] == '\t'))
++Ptr;
}
MemoryBuffer *MB2 =
MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
delete MB;
return MB2;
}
static bool ReadCheckFile(SourceMgr &SM,
std::vector<CheckString> &CheckStrings) {
OwningPtr<MemoryBuffer> File;
if (error_code ec =
MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), File)) {
errs() << "Could not open check file '" << CheckFilename << "': "
<< ec.message() << '\n';
return true;
}
MemoryBuffer *F = File.take();
if (!NoCanonicalizeWhiteSpace)
F = CanonicalizeInputFile(F);
SM.AddNewSourceBuffer(F, SMLoc());
StringRef Buffer = F->getBuffer();
std::vector<std::pair<SMLoc, Pattern> > NotMatches;
unsigned LineNumber = 1;
while (1) {
size_t PrefixLoc = Buffer.find(CheckPrefix);
if (PrefixLoc == StringRef::npos)
break;
LineNumber += Buffer.substr(0, PrefixLoc).count('\n');
Buffer = Buffer.substr(PrefixLoc);
const char *CheckPrefixStart = Buffer.data();
bool IsCheckNext = false, IsCheckNot = false;
if (Buffer[CheckPrefix.size()] == ':') {
Buffer = Buffer.substr(CheckPrefix.size()+1);
} else if (Buffer.size() > CheckPrefix.size()+6 &&
memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
Buffer = Buffer.substr(CheckPrefix.size()+6);
IsCheckNext = true;
} else if (Buffer.size() > CheckPrefix.size()+5 &&
memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
Buffer = Buffer.substr(CheckPrefix.size()+5);
IsCheckNot = true;
} else {
Buffer = Buffer.substr(1);
continue;
}
Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
size_t EOL = Buffer.find_first_of("\n\r");
SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
Pattern P;
if (P.ParsePattern(Buffer.substr(0, EOL), SM, LineNumber))
return true;
Buffer = Buffer.substr(EOL);
if (IsCheckNext && CheckStrings.empty()) {
SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
SourceMgr::DK_Error,
"found '"+CheckPrefix+"-NEXT:' without previous '"+
CheckPrefix+ ": line");
return true;
}
if (IsCheckNot) {
NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
P));
continue;
}
CheckStrings.push_back(CheckString(P,
PatternLoc,
IsCheckNext));
std::swap(NotMatches, CheckStrings.back().NotStrings);
}
if (!NotMatches.empty()) {
CheckStrings.push_back(CheckString(Pattern(true),
SMLoc::getFromPointer(Buffer.data()),
false));
std::swap(NotMatches, CheckStrings.back().NotStrings);
}
if (CheckStrings.empty()) {
errs() << "error: no check strings found with prefix '" << CheckPrefix
<< ":'\n";
return true;
}
return false;
}
static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
StringRef Buffer,
StringMap<StringRef> &VariableTable) {
SM.PrintMessage(CheckStr.Loc, SourceMgr::DK_Error,
"expected string not found in input");
Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
"scanning from here");
CheckStr.Pat.PrintFailureInfo(SM, Buffer, VariableTable);
}
static unsigned CountNumNewlinesBetween(StringRef Range) {
unsigned NumNewLines = 0;
while (1) {
Range = Range.substr(Range.find_first_of("\n\r"));
if (Range.empty()) return NumNewLines;
++NumNewLines;
if (Range.size() > 1 &&
(Range[1] == '\n' || Range[1] == '\r') &&
(Range[0] != Range[1]))
Range = Range.substr(1);
Range = Range.substr(1);
}
}
int main(int argc, char **argv) {
sys::PrintStackTraceOnErrorSignal();
PrettyStackTraceProgram X(argc, argv);
cl::ParseCommandLineOptions(argc, argv);
SourceMgr SM;
std::vector<CheckString> CheckStrings;
if (ReadCheckFile(SM, CheckStrings))
return 2;
OwningPtr<MemoryBuffer> File;
if (error_code ec =
MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) {
errs() << "Could not open input file '" << InputFilename << "': "
<< ec.message() << '\n';
return 2;
}
MemoryBuffer *F = File.take();
if (F->getBufferSize() == 0) {
errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
return 2;
}
if (!NoCanonicalizeWhiteSpace)
F = CanonicalizeInputFile(F);
SM.AddNewSourceBuffer(F, SMLoc());
StringMap<StringRef> VariableTable;
StringRef Buffer = F->getBuffer();
const char *LastMatch = Buffer.data();
for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
const CheckString &CheckStr = CheckStrings[StrNo];
StringRef SearchFrom = Buffer;
size_t MatchLen = 0;
size_t MatchPos = CheckStr.Pat.Match(Buffer, MatchLen, VariableTable);
Buffer = Buffer.substr(MatchPos);
if (MatchPos == StringRef::npos) {
PrintCheckFailed(SM, CheckStr, SearchFrom, VariableTable);
return 1;
}
StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch);
if (CheckStr.IsCheckNext) {
assert(LastMatch != F->getBufferStart() &&
"CHECK-NEXT can't be the first check in a file");
unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
if (NumNewLines == 0) {
SM.PrintMessage(CheckStr.Loc, SourceMgr::DK_Error,
CheckPrefix+"-NEXT: is on the same line as previous match");
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
SourceMgr::DK_Note, "'next' match was here");
SM.PrintMessage(SMLoc::getFromPointer(LastMatch), SourceMgr::DK_Note,
"previous match was here");
return 1;
}
if (NumNewLines != 1) {
SM.PrintMessage(CheckStr.Loc, SourceMgr::DK_Error, CheckPrefix+
"-NEXT: is not on the line after the previous match");
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
SourceMgr::DK_Note, "'next' match was here");
SM.PrintMessage(SMLoc::getFromPointer(LastMatch), SourceMgr::DK_Note,
"previous match was here");
return 1;
}
}
for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size();
ChunkNo != e; ++ChunkNo) {
size_t MatchLen = 0;
size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion,
MatchLen,
VariableTable);
if (Pos == StringRef::npos) continue;
SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), SourceMgr::DK_Error,
CheckPrefix+"-NOT: string occurred!");
SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, SourceMgr::DK_Note,
CheckPrefix+"-NOT: pattern specified here");
return 1;
}
Buffer = Buffer.substr(MatchLen);
LastMatch = Buffer.data();
}
return 0;
}