RangeConstraintManager.cpp   [plain text]


//== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//  This file defines RangeConstraintManager, a class that tracks simple
//  equality and inequality constraints on symbolic values of ProgramState.
//
//===----------------------------------------------------------------------===//

#include "SimpleConstraintManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/ImmutableSet.h"
#include "llvm/Support/raw_ostream.h"

using namespace clang;
using namespace ento;

namespace { class ConstraintRange {}; }
static int ConstraintRangeIndex = 0;

/// A Range represents the closed range [from, to].  The caller must
/// guarantee that from <= to.  Note that Range is immutable, so as not
/// to subvert RangeSet's immutability.
namespace {
class Range : public std::pair<const llvm::APSInt*,
                                                const llvm::APSInt*> {
public:
  Range(const llvm::APSInt &from, const llvm::APSInt &to)
    : std::pair<const llvm::APSInt*, const llvm::APSInt*>(&from, &to) {
    assert(from <= to);
  }
  bool Includes(const llvm::APSInt &v) const {
    return *first <= v && v <= *second;
  }
  const llvm::APSInt &From() const {
    return *first;
  }
  const llvm::APSInt &To() const {
    return *second;
  }
  const llvm::APSInt *getConcreteValue() const {
    return &From() == &To() ? &From() : NULL;
  }

  void Profile(llvm::FoldingSetNodeID &ID) const {
    ID.AddPointer(&From());
    ID.AddPointer(&To());
  }
};


class RangeTrait : public llvm::ImutContainerInfo<Range> {
public:
  // When comparing if one Range is less than another, we should compare
  // the actual APSInt values instead of their pointers.  This keeps the order
  // consistent (instead of comparing by pointer values) and can potentially
  // be used to speed up some of the operations in RangeSet.
  static inline bool isLess(key_type_ref lhs, key_type_ref rhs) {
    return *lhs.first < *rhs.first || (!(*rhs.first < *lhs.first) &&
                                       *lhs.second < *rhs.second);
  }
};

/// RangeSet contains a set of ranges. If the set is empty, then
///  there the value of a symbol is overly constrained and there are no
///  possible values for that symbol.
class RangeSet {
  typedef llvm::ImmutableSet<Range, RangeTrait> PrimRangeSet;
  PrimRangeSet ranges; // no need to make const, since it is an
                       // ImmutableSet - this allows default operator=
                       // to work.
public:
  typedef PrimRangeSet::Factory Factory;
  typedef PrimRangeSet::iterator iterator;

  RangeSet(PrimRangeSet RS) : ranges(RS) {}

  iterator begin() const { return ranges.begin(); }
  iterator end() const { return ranges.end(); }

  bool isEmpty() const { return ranges.isEmpty(); }

  /// Construct a new RangeSet representing '{ [from, to] }'.
  RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to)
    : ranges(F.add(F.getEmptySet(), Range(from, to))) {}

  /// Profile - Generates a hash profile of this RangeSet for use
  ///  by FoldingSet.
  void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); }

  /// getConcreteValue - If a symbol is contrained to equal a specific integer
  ///  constant then this method returns that value.  Otherwise, it returns
  ///  NULL.
  const llvm::APSInt* getConcreteValue() const {
    return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : 0;
  }

private:
  void IntersectInRange(BasicValueFactory &BV, Factory &F,
                        const llvm::APSInt &Lower,
                        const llvm::APSInt &Upper,
                        PrimRangeSet &newRanges,
                        PrimRangeSet::iterator &i,
                        PrimRangeSet::iterator &e) const {
    // There are six cases for each range R in the set:
    //   1. R is entirely before the intersection range.
    //   2. R is entirely after the intersection range.
    //   3. R contains the entire intersection range.
    //   4. R starts before the intersection range and ends in the middle.
    //   5. R starts in the middle of the intersection range and ends after it.
    //   6. R is entirely contained in the intersection range.
    // These correspond to each of the conditions below.
    for (/* i = begin(), e = end() */; i != e; ++i) {
      if (i->To() < Lower) {
        continue;
      }
      if (i->From() > Upper) {
        break;
      }

      if (i->Includes(Lower)) {
        if (i->Includes(Upper)) {
          newRanges = F.add(newRanges, Range(BV.getValue(Lower),
                                             BV.getValue(Upper)));
          break;
        } else
          newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To()));
      } else {
        if (i->Includes(Upper)) {
          newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper)));
          break;
        } else
          newRanges = F.add(newRanges, *i);
      }
    }
  }

  const llvm::APSInt &getMinValue() const {
    assert(!isEmpty());
    return ranges.begin()->From();
  }

  bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
    // This function has nine cases, the cartesian product of range-testing
    // both the upper and lower bounds against the symbol's type.
    // Each case requires a different pinning operation.
    // The function returns false if the described range is entirely outside
    // the range of values for the associated symbol.
    APSIntType Type(getMinValue());
    APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower);
    APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper);

    switch (LowerTest) {
    case APSIntType::RTR_Below:
      switch (UpperTest) {
      case APSIntType::RTR_Below:
        // The entire range is outside the symbol's set of possible values.
        // If this is a conventionally-ordered range, the state is infeasible.
        if (Lower < Upper)
          return false;

        // However, if the range wraps around, it spans all possible values.
        Lower = Type.getMinValue();
        Upper = Type.getMaxValue();
        break;
      case APSIntType::RTR_Within:
        // The range starts below what's possible but ends within it. Pin.
        Lower = Type.getMinValue();
        Type.apply(Upper);
        break;
      case APSIntType::RTR_Above:
        // The range spans all possible values for the symbol. Pin.
        Lower = Type.getMinValue();
        Upper = Type.getMaxValue();
        break;
      }
      break;
    case APSIntType::RTR_Within:
      switch (UpperTest) {
      case APSIntType::RTR_Below:
        // The range wraps around, but all lower values are not possible.
        Type.apply(Lower);
        Upper = Type.getMaxValue();
        break;
      case APSIntType::RTR_Within:
        // The range may or may not wrap around, but both limits are valid.
        Type.apply(Lower);
        Type.apply(Upper);
        break;
      case APSIntType::RTR_Above:
        // The range starts within what's possible but ends above it. Pin.
        Type.apply(Lower);
        Upper = Type.getMaxValue();
        break;
      }
      break;
    case APSIntType::RTR_Above:
      switch (UpperTest) {
      case APSIntType::RTR_Below:
        // The range wraps but is outside the symbol's set of possible values.
        return false;
      case APSIntType::RTR_Within:
        // The range starts above what's possible but ends within it (wrap).
        Lower = Type.getMinValue();
        Type.apply(Upper);
        break;
      case APSIntType::RTR_Above:
        // The entire range is outside the symbol's set of possible values.
        // If this is a conventionally-ordered range, the state is infeasible.
        if (Lower < Upper)
          return false;

        // However, if the range wraps around, it spans all possible values.
        Lower = Type.getMinValue();
        Upper = Type.getMaxValue();
        break;
      }
      break;
    }

    return true;
  }

public:
  // Returns a set containing the values in the receiving set, intersected with
  // the closed range [Lower, Upper]. Unlike the Range type, this range uses
  // modular arithmetic, corresponding to the common treatment of C integer
  // overflow. Thus, if the Lower bound is greater than the Upper bound, the
  // range is taken to wrap around. This is equivalent to taking the
  // intersection with the two ranges [Min, Upper] and [Lower, Max],
  // or, alternatively, /removing/ all integers between Upper and Lower.
  RangeSet Intersect(BasicValueFactory &BV, Factory &F,
                     llvm::APSInt Lower, llvm::APSInt Upper) const {
    if (!pin(Lower, Upper))
      return F.getEmptySet();

    PrimRangeSet newRanges = F.getEmptySet();

    PrimRangeSet::iterator i = begin(), e = end();
    if (Lower <= Upper)
      IntersectInRange(BV, F, Lower, Upper, newRanges, i, e);
    else {
      // The order of the next two statements is important!
      // IntersectInRange() does not reset the iteration state for i and e.
      // Therefore, the lower range most be handled first.
      IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e);
      IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e);
    }

    return newRanges;
  }

  void print(raw_ostream &os) const {
    bool isFirst = true;
    os << "{ ";
    for (iterator i = begin(), e = end(); i != e; ++i) {
      if (isFirst)
        isFirst = false;
      else
        os << ", ";

      os << '[' << i->From().toString(10) << ", " << i->To().toString(10)
         << ']';
    }
    os << " }";
  }

  bool operator==(const RangeSet &other) const {
    return ranges == other.ranges;
  }
};
} // end anonymous namespace

typedef llvm::ImmutableMap<SymbolRef,RangeSet> ConstraintRangeTy;

namespace clang {
namespace ento {
template<>
struct ProgramStateTrait<ConstraintRange>
  : public ProgramStatePartialTrait<ConstraintRangeTy> {
  static inline void *GDMIndex() { return &ConstraintRangeIndex; }
};
}
}

namespace {
class RangeConstraintManager : public SimpleConstraintManager{
  RangeSet GetRange(ProgramStateRef state, SymbolRef sym);
public:
  RangeConstraintManager(SubEngine *subengine, BasicValueFactory &BVF)
    : SimpleConstraintManager(subengine, BVF) {}

  ProgramStateRef assumeSymNE(ProgramStateRef state, SymbolRef sym,
                             const llvm::APSInt& Int,
                             const llvm::APSInt& Adjustment);

  ProgramStateRef assumeSymEQ(ProgramStateRef state, SymbolRef sym,
                             const llvm::APSInt& Int,
                             const llvm::APSInt& Adjustment);

  ProgramStateRef assumeSymLT(ProgramStateRef state, SymbolRef sym,
                             const llvm::APSInt& Int,
                             const llvm::APSInt& Adjustment);

  ProgramStateRef assumeSymGT(ProgramStateRef state, SymbolRef sym,
                             const llvm::APSInt& Int,
                             const llvm::APSInt& Adjustment);

  ProgramStateRef assumeSymGE(ProgramStateRef state, SymbolRef sym,
                             const llvm::APSInt& Int,
                             const llvm::APSInt& Adjustment);

  ProgramStateRef assumeSymLE(ProgramStateRef state, SymbolRef sym,
                             const llvm::APSInt& Int,
                             const llvm::APSInt& Adjustment);

  const llvm::APSInt* getSymVal(ProgramStateRef St, SymbolRef sym) const;
  ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym);

  ProgramStateRef removeDeadBindings(ProgramStateRef St, SymbolReaper& SymReaper);

  void print(ProgramStateRef St, raw_ostream &Out,
             const char* nl, const char *sep);

private:
  RangeSet::Factory F;
};

} // end anonymous namespace

ConstraintManager *
ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, SubEngine *Eng) {
  return new RangeConstraintManager(Eng, StMgr.getBasicVals());
}

const llvm::APSInt* RangeConstraintManager::getSymVal(ProgramStateRef St,
                                                      SymbolRef sym) const {
  const ConstraintRangeTy::data_type *T = St->get<ConstraintRange>(sym);
  return T ? T->getConcreteValue() : NULL;
}

ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
                                                    SymbolRef Sym) {
  const RangeSet *Ranges = State->get<ConstraintRange>(Sym);

  // If we don't have any information about this symbol, it's underconstrained.
  if (!Ranges)
    return ConditionTruthVal();

  // If we have a concrete value, see if it's zero.
  if (const llvm::APSInt *Value = Ranges->getConcreteValue())
    return *Value == 0;

  BasicValueFactory &BV = getBasicVals();
  APSIntType IntType = BV.getAPSIntType(Sym->getType());
  llvm::APSInt Zero = IntType.getZeroValue();

  // Check if zero is in the set of possible values.
  if (Ranges->Intersect(BV, F, Zero, Zero).isEmpty())
    return false;

  // Zero is a possible value, but it is not the /only/ possible value.
  return ConditionTruthVal();
}

/// Scan all symbols referenced by the constraints. If the symbol is not alive
/// as marked in LSymbols, mark it as dead in DSymbols.
ProgramStateRef 
RangeConstraintManager::removeDeadBindings(ProgramStateRef state,
                                           SymbolReaper& SymReaper) {

  ConstraintRangeTy CR = state->get<ConstraintRange>();
  ConstraintRangeTy::Factory& CRFactory = state->get_context<ConstraintRange>();

  for (ConstraintRangeTy::iterator I = CR.begin(), E = CR.end(); I != E; ++I) {
    SymbolRef sym = I.getKey();
    if (SymReaper.maybeDead(sym))
      CR = CRFactory.remove(CR, sym);
  }

  return state->set<ConstraintRange>(CR);
}

RangeSet
RangeConstraintManager::GetRange(ProgramStateRef state, SymbolRef sym) {
  if (ConstraintRangeTy::data_type* V = state->get<ConstraintRange>(sym))
    return *V;

  // Lazily generate a new RangeSet representing all possible values for the
  // given symbol type.
  BasicValueFactory &BV = getBasicVals();
  QualType T = sym->getType();

  RangeSet Result(F, BV.getMinValue(T), BV.getMaxValue(T));

  // Special case: references are known to be non-zero.
  if (T->isReferenceType()) {
    APSIntType IntType = BV.getAPSIntType(T);
    Result = Result.Intersect(BV, F, ++IntType.getZeroValue(),
                                     --IntType.getZeroValue());
  }

  return Result;
}

//===------------------------------------------------------------------------===
// assumeSymX methods: public interface for RangeConstraintManager.
//===------------------------------------------------------------------------===/

// The syntax for ranges below is mathematical, using [x, y] for closed ranges
// and (x, y) for open ranges. These ranges are modular, corresponding with
// a common treatment of C integer overflow. This means that these methods
// do not have to worry about overflow; RangeSet::Intersect can handle such a
// "wraparound" range.
// As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
// UINT_MAX, 0, 1, and 2.

ProgramStateRef 
RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
                                    const llvm::APSInt &Int,
                                    const llvm::APSInt &Adjustment) {
  // Before we do any real work, see if the value can even show up.
  APSIntType AdjustmentType(Adjustment);
  if (AdjustmentType.testInRange(Int) != APSIntType::RTR_Within)
    return St;

  llvm::APSInt Lower = AdjustmentType.convert(Int) - Adjustment;
  llvm::APSInt Upper = Lower;
  --Lower;
  ++Upper;

  // [Int-Adjustment+1, Int-Adjustment-1]
  // Notice that the lower bound is greater than the upper bound.
  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Upper, Lower);
  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
}

ProgramStateRef 
RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
                                    const llvm::APSInt &Int,
                                    const llvm::APSInt &Adjustment) {
  // Before we do any real work, see if the value can even show up.
  APSIntType AdjustmentType(Adjustment);
  if (AdjustmentType.testInRange(Int) != APSIntType::RTR_Within)
    return NULL;

  // [Int-Adjustment, Int-Adjustment]
  llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, AdjInt, AdjInt);
  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
}

ProgramStateRef 
RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
                                    const llvm::APSInt &Int,
                                    const llvm::APSInt &Adjustment) {
  // Before we do any real work, see if the value can even show up.
  APSIntType AdjustmentType(Adjustment);
  switch (AdjustmentType.testInRange(Int)) {
  case APSIntType::RTR_Below:
    return NULL;
  case APSIntType::RTR_Within:
    break;
  case APSIntType::RTR_Above:
    return St;
  }

  // Special case for Int == Min. This is always false.
  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  llvm::APSInt Min = AdjustmentType.getMinValue();
  if (ComparisonVal == Min)
    return NULL;

  llvm::APSInt Lower = Min-Adjustment;
  llvm::APSInt Upper = ComparisonVal-Adjustment;
  --Upper;

  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
}

ProgramStateRef 
RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
                                    const llvm::APSInt &Int,
                                    const llvm::APSInt &Adjustment) {
  // Before we do any real work, see if the value can even show up.
  APSIntType AdjustmentType(Adjustment);
  switch (AdjustmentType.testInRange(Int)) {
  case APSIntType::RTR_Below:
    return St;
  case APSIntType::RTR_Within:
    break;
  case APSIntType::RTR_Above:
    return NULL;
  }

  // Special case for Int == Max. This is always false.
  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  llvm::APSInt Max = AdjustmentType.getMaxValue();
  if (ComparisonVal == Max)
    return NULL;

  llvm::APSInt Lower = ComparisonVal-Adjustment;
  llvm::APSInt Upper = Max-Adjustment;
  ++Lower;

  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
}

ProgramStateRef 
RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
                                    const llvm::APSInt &Int,
                                    const llvm::APSInt &Adjustment) {
  // Before we do any real work, see if the value can even show up.
  APSIntType AdjustmentType(Adjustment);
  switch (AdjustmentType.testInRange(Int)) {
  case APSIntType::RTR_Below:
    return St;
  case APSIntType::RTR_Within:
    break;
  case APSIntType::RTR_Above:
    return NULL;
  }

  // Special case for Int == Min. This is always feasible.
  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  llvm::APSInt Min = AdjustmentType.getMinValue();
  if (ComparisonVal == Min)
    return St;

  llvm::APSInt Max = AdjustmentType.getMaxValue();
  llvm::APSInt Lower = ComparisonVal-Adjustment;
  llvm::APSInt Upper = Max-Adjustment;

  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
}

ProgramStateRef 
RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
                                    const llvm::APSInt &Int,
                                    const llvm::APSInt &Adjustment) {
  // Before we do any real work, see if the value can even show up.
  APSIntType AdjustmentType(Adjustment);
  switch (AdjustmentType.testInRange(Int)) {
  case APSIntType::RTR_Below:
    return NULL;
  case APSIntType::RTR_Within:
    break;
  case APSIntType::RTR_Above:
    return St;
  }

  // Special case for Int == Max. This is always feasible.
  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
  llvm::APSInt Max = AdjustmentType.getMaxValue();
  if (ComparisonVal == Max)
    return St;

  llvm::APSInt Min = AdjustmentType.getMinValue();
  llvm::APSInt Lower = Min-Adjustment;
  llvm::APSInt Upper = ComparisonVal-Adjustment;

  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
}

//===------------------------------------------------------------------------===
// Pretty-printing.
//===------------------------------------------------------------------------===/

void RangeConstraintManager::print(ProgramStateRef St, raw_ostream &Out,
                                   const char* nl, const char *sep) {

  ConstraintRangeTy Ranges = St->get<ConstraintRange>();

  if (Ranges.isEmpty()) {
    Out << nl << sep << "Ranges are empty." << nl;
    return;
  }

  Out << nl << sep << "Ranges of symbol values:";
  for (ConstraintRangeTy::iterator I=Ranges.begin(), E=Ranges.end(); I!=E; ++I){
    Out << nl << ' ' << I.getKey() << " : ";
    I.getData().print(Out);
  }
  Out << nl;
}