IMPCaches.hpp   [plain text]


//
//  IMPCaches.hpp
//  dyld_shared_cache_builder
//
//  Created by Thomas Deniau on 18/12/2019.
//

#ifndef IMPCaches_hpp
#define IMPCaches_hpp

#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <set>
#include <map>
#include <string>
#include <random>

#define DEBUG_SLOTS 1

template <typename P> class objc_method_t;

namespace IMPCaches {
class Selector;
class Constraint;

class ClassData {
private:
#if DEBUG_SLOTS
    std::vector<const Selector*> slots;
#else
    std::vector<bool> slots;
#endif

    // Scratchpad for constraintForMethod
    std::vector<int> allowedValues;

    void resetSlots();

public:
    bool isMetaclass;

    // True most of the time, but can also be false in 2 cases:
    // * We have entries for classes for which we don't want to generate IMP caches
    //   when they are superclasses of "interesting" (= for which we want an IMP cache)
    //   classes, so that we can properly attach non-cross-image categories to them and
    //   reference the right IMP for child classes which are actually interesting
    // * We can also have failed to generate a cache for this class, or for a class
    //   in its flattening superclass hierarchy
    bool shouldGenerateImpCache;

    // Class has duplicates or is a child of a class with duplicates.
    bool isPartOfDuplicateSet = false;

    // This is set when we drop a class because a class in its flattening superclasss
    // hierarchy was dropped. In that case, we won't try to flip its shouldGenerateImpCache
    // value back to true when restoring a snapshot. (We could keep track of all the
    // dependencies but it would be very messy and the reward is only a few classes here and there).
    bool droppedBecauseFlatteningSuperclassWasDropped = false;

    uint8_t backtracks = 0;

    // For debug purposes
    const char* name;

    struct Method {
        const char* installName = nullptr;
        const char* className = nullptr;
        const char* categoryName = nullptr;
        Selector* selector = nullptr;
        bool wasInlined = false;
        bool fromFlattening = false;
    };

    std::vector<Method> methods;

    std::string description() const;

    int neededBits;

    void didFinishAddingMethods();

    // Not const because this uses the slots as a scratchpad
    Constraint constraintForMethod(const Selector* m);

    bool operator<(const ClassData& r) const {
        if (isMetaclass != r.isMetaclass) return isMetaclass;
        return strcmp(name, r.name) < 0;
    }

    struct PlacementAttempt {
        int numberOfBitsToSet;
        int shift;
        int neededBits;

        PlacementAttempt(int _numberOfBitsToSet, int _shift, int _neededBits) : numberOfBitsToSet(_numberOfBitsToSet), shift(_shift), neededBits(_neededBits) {

        }

        std::string description() const;

        friend bool operator<(const PlacementAttempt& l, const PlacementAttempt& r)
        {
            return  std::tie(l.neededBits, l.numberOfBitsToSet, l.shift)
            < std::tie(r.neededBits, r.numberOfBitsToSet, r.shift);
        }

        int mask() const {
            return (1 << neededBits) - 1;
        }

        struct PreviousMethodAddress {
            int address;
            int fixedBitsMask;
        };

        struct PreviousState {
            int neededBits;
            int shift;
            int mask;
            std::unordered_map<Selector*, PreviousMethodAddress> methods;
        };

        struct Result {
            bool success;
            PreviousState previousState;
        };
    };

    // Possibilities we go through in the greedy backtracking algorithm findShiftsAndMask()
    // Each class has a set (attempts()) of shift-mask possibilities, ordered, and we go through them
    // sequentially.
    PlacementAttempt attemptForShift(int shift, int neededBits) const;
    std::vector<PlacementAttempt> attempts() const;

    int shift;

    inline int modulo() const {
        return 1 << neededBits;
    }

    inline int mask() const {
        return modulo() - 1;
    }

    // Attempt to place the class with the shift/mask in the attempt argument.
    typename PlacementAttempt::Result applyAttempt(const PlacementAttempt& attempt, std::minstd_rand & randomNumberGenerator);

    // Reassign the addresses as they were before we produced resultToBacktrackFrom
    void backtrack(typename PlacementAttempt::Result& resultToBacktrackFrom);

    // Did we have to grow the size of the hash table to one more bit when attempting to place it?
    bool hadToIncreaseSize() const;

    // Not const because this stomps over the slots array for checking
    bool checkConsistency();

    // Size in bytes needed for this hash table in the shared cache.
    size_t sizeInSharedCache() const;

    // Used to store the location of a flattening root's superclass, so that
    // we can compute its final vmAddr once we are in the ObjC optimizer.
    struct ClassLocator {
        const char *installName;
        unsigned segmentIndex;
        unsigned segmentOffset;
        bool operator==(const ClassLocator & other);
    };

    std::string_view flatteningRootName;
    std::set<std::string_view> flattenedSuperclasses;
    std::optional<ClassLocator> flatteningRootSuperclass;

    // For debug purposes
    bool operator==(const ClassData &other);
};

/// A unique selector. Has a name, a list of classes it's used in, and an address (which is actually an offset from
/// the beginning of the selectors section). Due to how the placement algorithm work, it also has a current
/// partial address and the corresponding bitmask of fixed bits. The algorithm adds bits to the partial address
/// as it makes progress and updates the mask accordingly
class Selector {
public:
    // For debug purposes
    const char* name;
    
    /// Classes the selector is used in
    std::vector<IMPCaches::ClassData*> classes;
    
    /// Which bits of address are already set
    int fixedBitsMask;
    
    /// Current 128-byte bucket index for this selector. Only the bits in fixedBitsMask are actually frozen.
    int inProgressBucketIndex;
    
    /// Full offset of the selector, including its low 7 bits (so, not the bucket's index ; inProgressBucketIndex (assuming all bits are setr by now) << 7 + some low bits)
    int offset; // including low bits

    /// Number of bits that you would need to freeze if you were to use this selector with this shift and mask.
    int numberOfBitsToSet(int shift, int mask) const {
        int fixedBits = (fixedBitsMask >> shift) & mask;
        return __builtin_popcount(mask) - __builtin_popcount(fixedBits);
    }

    int numberOfSetBits() const {
        return __builtin_popcount(fixedBitsMask);
    }

    unsigned int size() const {
        return (unsigned int)strlen(name) + 1;
    }

    // For debug purposes
    bool operator==(const Selector &other) const {
        return (strcmp(name, other.name) == 0)
            && (inProgressBucketIndex == other.inProgressBucketIndex)
            && (fixedBitsMask == other.fixedBitsMask);
    }

    bool operator!=(const Selector &other) const {
        return !(*this == other);
    }

};

class AddressSpace;
std::ostream& operator<<(std::ostream& o, const AddressSpace& c);

struct Hole {
    // [startAddress, endAddress[
    int startAddress;
    int endAddress;
    int size() const {
        return endAddress - startAddress;
    }

    // All our intervals are non-overlapping
    bool operator<(const Hole& other) const {
        auto a = std::make_tuple(size(), startAddress);
        auto b = std::make_tuple(other.size(), other.startAddress);
        return a < b;
    }
};

/// Represents the holes left by the selector placement algorithm, to be filled later with other selectors we did not target.
class HoleMap {
public:
    
    /// Returns the position at which we should place a string of size `size`.
    int addStringOfSize(unsigned size);
    
    /// Total size of all the holes
    unsigned long totalHoleSize() const;
    
    // Empty the hole map.
    void clear();

    HoleMap();

private:
    friend class AddressSpace;
    friend std::ostream& operator<<  (std::ostream& o, const HoleMap& m);

    int endAddress = 0;
    std::set<IMPCaches::Hole> holes;
};

// A selector that is known to be at offset 0, to let objc easily compute
// the offset of a selector given the SEL.
constexpr std::string_view magicSelector = "\xf0\x9f\xa4\xaf";

/// This is used to place the selectors in 128-byte buckets.
/// The "indices" below are the indices of the 128-byte bucket. To get an actual selector offset from this,
/// we shift the index to the left by 7 bits, and assign low bits depending on the length of each selector (this happens
/// in @see computeLowBits()).
/// The goal of this class is to validate that selectors can actually be placed in the buckets without overflowing
/// the 128-byte total length limit (based on the length of each individual selector)
class AddressSpace {
public:
    int sizeAtIndex(int idx) const;
    int sizeAvailableAfterIndex(int idx) const;
    bool canPlaceMethodAtIndex(const Selector* method, int idx) const;
    void placeMethodAtIndex(Selector* method, int idx);
    
    // If we decided to drop any classes, remove the selectors that were only present in them
    void removeUninterestingSelectors();
    
    // Once all selectors are placed in their 128-byte buckets,
    // actually assign the low 7 bits for each, and make a map of the
    // holes so that we can fill them with other selectors later.
    void computeLowBits(HoleMap& selectorsHoleMap) const;

    std::string description() const;

    static const int maximumIndex = (1 << 17) - 1;
    static constexpr int bagSizeShift = 7;

    friend std::ostream& operator<<  (std::ostream& o, const AddressSpace& c);
private:
    inline int bagSizeAtIndex(int idx) const {
        static constexpr int bagSize = 1 << bagSizeShift;
        static constexpr int bag0Size = bagSize - (magicSelector.length() + 1);
        return idx ? bagSize : bag0Size;
    }
    bool canPlaceWithoutFillingOverflowCellAtIndex(int idx) const;
    std::unordered_map<int, std::vector<Selector*>> methodsByIndex;
    std::unordered_map<int, int> sizes;
};

/// Represents a constraint on some of the bits of an address
/// It stores a set of allowed values for a given range of bits (shift and mask)
class Constraint {
public:
    int mask;
    int shift;
    std::unordered_set<int> allowedValues;

    Constraint intersecting(const Constraint& other) const;
    friend std::ostream& operator << (std::ostream& o, const Constraint& c);

    bool operator==(const Constraint& other) const {
        return mask == other.mask &&
                shift == other.shift &&
                allowedValues == other.allowedValues;
    }

    struct Hasher {
        size_t operator()(const IMPCaches::Constraint& c) const {
            return c.shift << 24 | c.mask << 16 | c.allowedValues.size() << 8 | *c.allowedValues.begin();
        }
    };
};

/// Merges several Constraints together to generate a simplified constraint equivalent to the original set of constraints
class ConstraintSet {
    std::unordered_set<Constraint, Constraint::Hasher> constraints;

public:
    std::optional<Constraint> mergedConstraint;

    bool add(const Constraint& c);
    void clear();
};

class SelectorMap {
public:
    using UnderlyingMap = std::map<std::string_view, std::unique_ptr<IMPCaches::Selector>>;
    UnderlyingMap map;
    SelectorMap();
};

// Implemented in OptimizerObjC
size_t sizeForImpCacheWithCount(int entries);

struct ClassKey {
    std::string_view name;
    bool metaclass;

    size_t hash() const {
        std::size_t seed = 0;
        seed ^= std::hash<std::string_view>()(name) + 0x9e3779b9 + (seed<<6) + (seed>>2);
        seed ^= std::hash<bool>()(metaclass) + 0x9e3779b9 + (seed<<6) + (seed>>2);
        return seed;
    }

    bool operator==(const ClassKey &other) const {
        return (name == other.name) && (metaclass == other.metaclass);
    }
};

struct ClassKeyHasher {
    size_t operator()(const ClassKey& k) const {
        return k.hash();
    }
};

}


#endif /* IMPCaches_hpp */