GenericTaintChecker.cpp [plain text]
#include "ClangSACheckers.h"
#include "clang/AST/Attr.h"
#include "clang/Basic/Builtins.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include <climits>
using namespace clang;
using namespace ento;
namespace {
class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
check::PreStmt<CallExpr> > {
public:
static void *getTag() { static int Tag; return &Tag; }
void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
private:
static const unsigned InvalidArgIndex = UINT_MAX;
static const unsigned ReturnValueIndex = UINT_MAX - 1;
mutable std::unique_ptr<BugType> BT;
inline void initBugType() const {
if (!BT)
BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
}
bool checkPre(const CallExpr *CE, CheckerContext &C) const;
void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
static bool isStdin(const Expr *E, CheckerContext &C);
static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
CheckerContext &C) const;
ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
static const char MsgUncontrolledFormatString[];
bool checkUncontrolledFormatString(const CallExpr *CE,
CheckerContext &C) const;
static const char MsgSanitizeSystemArgs[];
bool checkSystemCall(const CallExpr *CE, StringRef Name,
CheckerContext &C) const;
static const char MsgTaintedBufferSize[];
bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
CheckerContext &C) const;
bool generateReportIfTainted(const Expr *E, const char Msg[],
CheckerContext &C) const;
typedef SmallVector<unsigned, 2> ArgVector;
struct TaintPropagationRule {
ArgVector SrcArgs;
ArgVector DstArgs;
TaintPropagationRule() {}
TaintPropagationRule(unsigned SArg,
unsigned DArg, bool TaintRet = false) {
SrcArgs.push_back(SArg);
DstArgs.push_back(DArg);
if (TaintRet)
DstArgs.push_back(ReturnValueIndex);
}
TaintPropagationRule(unsigned SArg1, unsigned SArg2,
unsigned DArg, bool TaintRet = false) {
SrcArgs.push_back(SArg1);
SrcArgs.push_back(SArg2);
DstArgs.push_back(DArg);
if (TaintRet)
DstArgs.push_back(ReturnValueIndex);
}
static TaintPropagationRule
getTaintPropagationRule(const FunctionDecl *FDecl,
StringRef Name,
CheckerContext &C);
inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
inline bool isNull() const { return SrcArgs.empty(); }
inline bool isDestinationArgument(unsigned ArgNum) const {
return (std::find(DstArgs.begin(),
DstArgs.end(), ArgNum) != DstArgs.end());
}
static inline bool isTaintedOrPointsToTainted(const Expr *E,
ProgramStateRef State,
CheckerContext &C) {
return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
(E->getType().getTypePtr()->isPointerType() &&
State->isTainted(getPointedToSymbol(C, E))));
}
ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
};
};
const unsigned GenericTaintChecker::ReturnValueIndex;
const unsigned GenericTaintChecker::InvalidArgIndex;
const char GenericTaintChecker::MsgUncontrolledFormatString[] =
"Untrusted data is used as a format string "
"(CWE-134: Uncontrolled Format String)";
const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
"Untrusted data is passed to a system call "
"(CERT/STR02-C. Sanitize data passed to complex subsystems)";
const char GenericTaintChecker::MsgTaintedBufferSize[] =
"Untrusted data is used to specify the buffer size "
"(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
"character data and the null terminator)";
}
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
GenericTaintChecker::TaintPropagationRule
GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
const FunctionDecl *FDecl,
StringRef Name,
CheckerContext &C) {
TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
.Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
.Case("atol", TaintPropagationRule(0, ReturnValueIndex))
.Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
.Case("getc", TaintPropagationRule(0, ReturnValueIndex))
.Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
.Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
.Case("getw", TaintPropagationRule(0, ReturnValueIndex))
.Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
.Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
.Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
.Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
.Case("read", TaintPropagationRule(0, 2, 1, true))
.Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
.Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
.Case("fgets", TaintPropagationRule(2, 0, true))
.Case("getline", TaintPropagationRule(2, 0))
.Case("getdelim", TaintPropagationRule(3, 0))
.Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
.Default(TaintPropagationRule());
if (!Rule.isNull())
return Rule;
unsigned BId = 0;
if ( (BId = FDecl->getMemoryFunctionKind()) )
switch(BId) {
case Builtin::BImemcpy:
case Builtin::BImemmove:
case Builtin::BIstrncpy:
case Builtin::BIstrncat:
return TaintPropagationRule(1, 2, 0, true);
case Builtin::BIstrlcpy:
case Builtin::BIstrlcat:
return TaintPropagationRule(1, 2, 0, false);
case Builtin::BIstrndup:
return TaintPropagationRule(0, 1, ReturnValueIndex);
default:
break;
};
if (Rule.isNull()) {
if (C.isCLibraryFunction(FDecl, "snprintf") ||
C.isCLibraryFunction(FDecl, "sprintf"))
return TaintPropagationRule(InvalidArgIndex, 0, true);
else if (C.isCLibraryFunction(FDecl, "strcpy") ||
C.isCLibraryFunction(FDecl, "stpcpy") ||
C.isCLibraryFunction(FDecl, "strcat"))
return TaintPropagationRule(1, 0, true);
else if (C.isCLibraryFunction(FDecl, "bcopy"))
return TaintPropagationRule(0, 2, 1, false);
else if (C.isCLibraryFunction(FDecl, "strdup") ||
C.isCLibraryFunction(FDecl, "strdupa"))
return TaintPropagationRule(0, ReturnValueIndex);
else if (C.isCLibraryFunction(FDecl, "wcsdup"))
return TaintPropagationRule(0, ReturnValueIndex);
}
return TaintPropagationRule();
}
void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
CheckerContext &C) const {
if (checkPre(CE, C))
return;
addSourcesPre(CE, C);
}
void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
CheckerContext &C) const {
if (propagateFromPre(CE, C))
return;
addSourcesPost(CE, C);
}
void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
CheckerContext &C) const {
ProgramStateRef State = nullptr;
const FunctionDecl *FDecl = C.getCalleeDecl(CE);
if (!FDecl || FDecl->getKind() != Decl::Function)
return;
StringRef Name = C.getCalleeName(FDecl);
if (Name.empty())
return;
TaintPropagationRule Rule =
TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
if (!Rule.isNull()) {
State = Rule.process(CE, C);
if (!State)
return;
C.addTransition(State);
return;
}
FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
.Case("fscanf", &GenericTaintChecker::preFscanf)
.Default(nullptr);
if (evalFunction)
State = (this->*evalFunction)(CE, C);
if (!State)
return;
C.addTransition(State);
}
bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
if (TaintArgs.isEmpty())
return false;
for (llvm::ImmutableSet<unsigned>::iterator
I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
unsigned ArgNum = *I;
if (ArgNum == ReturnValueIndex) {
State = State->addTaint(CE, C.getLocationContext());
continue;
}
if (CE->getNumArgs() < (ArgNum + 1))
return false;
const Expr* Arg = CE->getArg(ArgNum);
SymbolRef Sym = getPointedToSymbol(C, Arg);
if (Sym)
State = State->addTaint(Sym);
}
State = State->remove<TaintArgsOnPostVisit>();
if (State != C.getState()) {
C.addTransition(State);
return true;
}
return false;
}
void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
CheckerContext &C) const {
const FunctionDecl *FDecl = C.getCalleeDecl(CE);
if (!FDecl || FDecl->getKind() != Decl::Function)
return;
StringRef Name = C.getCalleeName(FDecl);
if (Name.empty())
return;
FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
.Case("scanf", &GenericTaintChecker::postScanf)
.Case("getchar", &GenericTaintChecker::postRetTaint)
.Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
.Case("getenv", &GenericTaintChecker::postRetTaint)
.Case("fopen", &GenericTaintChecker::postRetTaint)
.Case("fdopen", &GenericTaintChecker::postRetTaint)
.Case("freopen", &GenericTaintChecker::postRetTaint)
.Case("getch", &GenericTaintChecker::postRetTaint)
.Case("wgetch", &GenericTaintChecker::postRetTaint)
.Case("socket", &GenericTaintChecker::postSocket)
.Default(nullptr);
ProgramStateRef State = nullptr;
if (evalFunction)
State = (this->*evalFunction)(CE, C);
if (!State)
return;
C.addTransition(State);
}
bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
if (checkUncontrolledFormatString(CE, C))
return true;
const FunctionDecl *FDecl = C.getCalleeDecl(CE);
if (!FDecl || FDecl->getKind() != Decl::Function)
return false;
StringRef Name = C.getCalleeName(FDecl);
if (Name.empty())
return false;
if (checkSystemCall(CE, Name, C))
return true;
if (checkTaintedBufferSize(CE, FDecl, C))
return true;
return false;
}
SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
const Expr* Arg) {
ProgramStateRef State = C.getState();
SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
if (AddrVal.isUnknownOrUndef())
return nullptr;
Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
if (!AddrLoc)
return nullptr;
const PointerType *ArgTy =
dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
SVal Val = State->getSVal(*AddrLoc,
ArgTy ? ArgTy->getPointeeType(): QualType());
return Val.getAsSymbol();
}
ProgramStateRef
GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
bool IsTainted = false;
for (ArgVector::const_iterator I = SrcArgs.begin(),
E = SrcArgs.end(); I != E; ++I) {
unsigned ArgNum = *I;
if (ArgNum == InvalidArgIndex) {
for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
if (isDestinationArgument(i))
continue;
if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
break;
}
break;
}
if (CE->getNumArgs() < (ArgNum + 1))
return State;
if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
break;
}
if (!IsTainted)
return State;
for (ArgVector::const_iterator I = DstArgs.begin(),
E = DstArgs.end(); I != E; ++I) {
unsigned ArgNum = *I;
if (ArgNum == InvalidArgIndex) {
for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
const Expr *Arg = CE->getArg(i);
const Type *ArgTy = Arg->getType().getTypePtr();
QualType PType = ArgTy->getPointeeType();
if ((!PType.isNull() && !PType.isConstQualified())
|| (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
State = State->add<TaintArgsOnPostVisit>(i);
}
continue;
}
if (ArgNum == ReturnValueIndex) {
State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
continue;
}
assert(ArgNum < CE->getNumArgs());
State = State->add<TaintArgsOnPostVisit>(ArgNum);
}
return State;
}
ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
CheckerContext &C) const {
assert(CE->getNumArgs() >= 2);
ProgramStateRef State = C.getState();
if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
isStdin(CE->getArg(0), C)) {
for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
State = State->add<TaintArgsOnPostVisit>(i);
return State;
}
return nullptr;
}
ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
if (CE->getNumArgs() < 3)
return State;
SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
return State;
State = State->addTaint(CE, C.getLocationContext());
return State;
}
ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
if (CE->getNumArgs() < 2)
return State;
for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
const Expr* Arg = CE->getArg(i);
SymbolRef Sym = getPointedToSymbol(C, Arg);
if (Sym)
State = State->addTaint(Sym);
}
return State;
}
ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
CheckerContext &C) const {
return C.getState()->addTaint(CE, C.getLocationContext());
}
bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
ProgramStateRef State = C.getState();
SVal Val = State->getSVal(E, C.getLocationContext());
const MemRegion *MemReg = Val.getAsRegion();
const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
if (!SymReg)
return false;
const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
if (!Sm)
return false;
const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
if (!DeclReg)
return false;
if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
D = D->getCanonicalDecl();
if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
if (const PointerType * PtrTy =
dyn_cast<PointerType>(D->getType().getTypePtr()))
if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
return true;
}
return false;
}
static bool getPrintfFormatArgumentNum(const CallExpr *CE,
const CheckerContext &C,
unsigned int &ArgNum) {
const FunctionDecl *FDecl = C.getCalleeDecl(CE);
if (!FDecl)
return false;
for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
ArgNum = Format->getFormatIdx() - 1;
if ((Format->getType()->getName() == "printf") &&
CE->getNumArgs() > ArgNum)
return true;
}
if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
ArgNum = 0;
return true;
}
return false;
}
bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
const char Msg[],
CheckerContext &C) const {
assert(E);
ProgramStateRef State = C.getState();
if (!State->isTainted(getPointedToSymbol(C, E)) &&
!State->isTainted(E, C.getLocationContext()))
return false;
if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
initBugType();
auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
report->addRange(E->getSourceRange());
C.emitReport(std::move(report));
return true;
}
return false;
}
bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
CheckerContext &C) const{
unsigned int ArgNum = 0;
if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
return false;
return generateReportIfTainted(CE->getArg(ArgNum),
MsgUncontrolledFormatString, C);
}
bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
StringRef Name,
CheckerContext &C) const {
unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
.Case("system", 0)
.Case("popen", 0)
.Case("execl", 0)
.Case("execle", 0)
.Case("execlp", 0)
.Case("execv", 0)
.Case("execvp", 0)
.Case("execvP", 0)
.Case("execve", 0)
.Case("dlopen", 0)
.Default(UINT_MAX);
if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
return false;
return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
}
bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
const FunctionDecl *FDecl,
CheckerContext &C) const {
unsigned ArgNum = InvalidArgIndex;
unsigned BId = 0;
if ( (BId = FDecl->getMemoryFunctionKind()) )
switch(BId) {
case Builtin::BImemcpy:
case Builtin::BImemmove:
case Builtin::BIstrncpy:
ArgNum = 2;
break;
case Builtin::BIstrndup:
ArgNum = 1;
break;
default:
break;
};
if (ArgNum == InvalidArgIndex) {
if (C.isCLibraryFunction(FDecl, "malloc") ||
C.isCLibraryFunction(FDecl, "calloc") ||
C.isCLibraryFunction(FDecl, "alloca"))
ArgNum = 0;
else if (C.isCLibraryFunction(FDecl, "memccpy"))
ArgNum = 3;
else if (C.isCLibraryFunction(FDecl, "realloc"))
ArgNum = 1;
else if (C.isCLibraryFunction(FDecl, "bcopy"))
ArgNum = 2;
}
return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
}
void ento::registerGenericTaintChecker(CheckerManager &mgr) {
mgr.registerChecker<GenericTaintChecker>();
}