IMPCachesBuilder.hpp   [plain text]


//
//  IMPCachesBuilder.hpp
//  dyld
//
//  Created by Thomas Deniau on 20/01/2020.
//

#pragma once

#include "CacheBuilder.h"
#include "IMPCaches.hpp"
#include <random>

namespace IMPCaches {

class IMPCachesBuilder {
public:
    SelectorMap selectors;

    /// Parses source dylibs to figure out which methods end up in which caches
    bool parseDylibs(Diagnostics& diag);

    /// Builds a map of the class hierarchy across all dylibs. This is especially used to resolve
    /// cross-dylib dependencies for superclasses and categories.
    void buildClassesMap(Diagnostics& diag);

    /// The entry point of the algorithm
    void buildPerfectHashes(HoleMap& holeMap, Diagnostics& diag);

    /// Regenerate the hole map if we needed to evict dylibs.
    void computeLowBits(HoleMap& holeMap);
    
    // Total size the hash tables will need.
    size_t totalIMPCachesSize() const;

    void clear() {
        // FIXME: Implement this
    }

    /** Classes for which we want to generate IMP caches according to the input JSON config laid down by OrderFiles
     * The value is the index of the class in the json file (they are ordered by decreasing order of importance)
     * This isn't just a vector because we also need to test for membership quickly */
    std::unordered_map<std::string_view, int> neededClasses;
    std::unordered_map<std::string_view, int> neededMetaclasses;

    // Classes for which we don't generate IMP caches, but which we need to track
    // to attach categories to them and find the right implementation for
    // inlined selectors
    std::unordered_set<std::string_view> trackedClasses;
    std::unordered_set<std::string_view> trackedMetaclasses;

    // List of classes with the same name that appear in different images.
    // We should not try to play with fire and try to support duplicated
    // classes in IMP caches.

    using ClassSet = std::unordered_set<ClassKey, ClassKeyHasher>;
    ClassSet duplicateClasses;

    /// Selectors which we want to inline into child classes' caches.
    std::unordered_set<std::string_view> selectorsToInline;
    std::vector<const Selector*> inlinedSelectors;

    // Class hierarchies to flatten:
    // In every class, include every selector including
    // the ones from superclasses up to the flattening root.
    // This lets us enable constant caches for some of the classes which are not leaves.
    // We avoid the pyramid of doom by making sure selectors from superclasses are
    // included in child caches, up until some flattening root, and msgSend will
    // fallback to the superclass of the flattening root if it can't find the selector
    // it expects.
    std::unordered_set<std::string_view> metaclassHierarchiesToFlatten;
    std::unordered_set<std::string_view> classHierarchiesToFlatten;

    /// All the dylibs the algorith, works on.
    std::vector<CacheBuilder::DylibInfo> & dylibs;

    IMPCachesBuilder(std::vector<CacheBuilder::DylibInfo>& allDylibs, const dyld3::json::Node& optimizerConfiguration, Diagnostics& diag, TimeRecorder& timeRecorder, const dyld3::closure::FileSystem& fileSystem);

    struct ObjCClass {

        const dyld3::MachOAnalyzer* superclassMA = nullptr;
        const uint8_t*      metaClass           = nullptr;
        const uint8_t*      superclass          = nullptr;
        uint64_t            methodListVMaddr    = 0;
        const char*         className           = nullptr;
        bool                isRootClass         = false;
        bool                isMetaClass         = false;

        const IMPCaches::ClassData::ClassLocator superclassLocator() const {
            uint64_t loadAddress = superclassMA->preferredLoadAddress();
            __block std::optional<unsigned> segmentIndex;
            __block std::optional<unsigned> segmentOffset;
            uint64_t superclassVMAddr = (superclass - (const uint8_t*)superclassMA) + loadAddress;
            superclassMA->forEachSegment(^(const dyld3::MachOAnalyzer::SegmentInfo &info, bool &stop) {
                if (info.vmAddr <= superclassVMAddr && (info.vmAddr + info.vmSize) > superclassVMAddr) {
                    segmentIndex = info.segIndex;
                    segmentOffset = (unsigned)(superclassVMAddr - info.vmAddr);
                    stop = true;
                    return;
                }
            });
            assert(segmentIndex.has_value() && segmentOffset.has_value());
            return IMPCaches::ClassData::ClassLocator {
                .installName = superclassMA->installName(),
                .segmentIndex = *segmentIndex,
                .segmentOffset = *segmentOffset,
            };
        }
    };
    struct ObjCCategory {
        const dyld3::MachOAnalyzer* classMA = nullptr;
        const uint8_t* cls = nullptr;
    };

private:

    /// Is this a class for which we want to generate an IMP cache?
    bool isClassInteresting(const ObjCClass& theClass) const;

    /// Is this a class for which we want to generate an IMP cache, or on which we want to track method attachment by categories?
    bool isClassInterestingOrTracked(const ObjCClass& theClass) const;

    /// Adds a method to a given class's cache.
    void addMethod(IMPCaches::ClassData* classDataPtr, const char* methodName, const char* installName, const char* className, const char* catName, bool inlined, bool fromFlattening);

    /// Inline a method from a parent's cache to a child's cache.
    void inlineMethodIfNeeded(IMPCaches::ClassData* classToInlineIn, const char* classToInlineFrom, const char* catToInlineFrom, const char* installNameToInlineFrom, const char* name, std::set<Selector*> & seenSelectors, bool isFlattening);

    /// Map from location (address in the mmapped source dylib) to class info built by buildClassesMap()
    std::unordered_map<const uint8_t*, ObjCClass> objcClasses;

    /// Map from location (address in the mmapped source dylib) to category info built by buildClassesMap()
    std::unordered_map<const uint8_t*, ObjCCategory> objcCategories;

    /// Address space where the selectors are going to live. Used to determine at which address we'll actually layout each selector.
    IMPCaches::AddressSpace addressSpace;

    /// Find a shift and a mask for each class. Returns the number of classes that we could not place.
    int findShiftsAndMasks();
    
    /// Shuffles selectors around to satisfy size constraints.  Returns the number of classes that we could not place.
    int solveGivenShiftsAndMasks();

    /// Determine classes we need to track category attachment on (for later inlining)
    void buildTrackedClasses(CacheBuilder::DylibInfo& dylib, const dyld3::MachOAnalyzer* ma);

    /// Parses the method lists in the source dylibs to determine what will end up in which IMP cache.
    void populateMethodLists(CacheBuilder::DylibInfo& dylib, const dyld3::MachOAnalyzer* ma, int* duplicateClassCount);

    /// Go through categories and add the methods from the category to the corresponding class's IMP cache
    void attachCategories(CacheBuilder::DylibInfo& dylib, const dyld3::MachOAnalyzer* ma);

    // Inline some selectors (driven by the OrderFiles) from parent caches into child caches.
    void inlineSelectors(CacheBuilder::DylibInfo& dylib, std::unordered_map<std::string_view, CacheBuilder::DylibInfo*> & dylibsByInstallName, const dyld3::MachOAnalyzer* ma);

    void fillAllClasses(std::vector<IMPCaches::ClassData*> & allClasses);
    void fillAllMethods(std::vector<IMPCaches::Selector*> & allMethods);
    void removeUninterestingClasses();

    struct TargetClassFindingResult {
        bool success;
        const dyld3::MachOLoaded* foundInDylib;
        const uint8_t* location;
    };

    struct BindTarget {
        std::string                 symbolName      = "";
        const dyld3::MachOAnalyzer* targetDylib     = nullptr;
        bool                        isWeakImport    = false;
    };

    struct DylibAndDeps {
        const dyld3::MachOAnalyzer*         ma = nullptr;
        __block std::vector<std::string>    dependentLibraries;
    };

    TargetClassFindingResult findTargetClass(const dyld3::MachOAnalyzer* ma, uint64_t targetClassVMAddr, uint64_t targetClassPointerVMAddr, const char* logContext, const std::unordered_map<uint64_t, uint64_t> & bindLocations, std::vector<BindTarget> & bindTargets, std::unordered_map<std::string, DylibAndDeps> &dylibMap);

    const std::string * nameAndIsMetaclassPairFromNode(const dyld3::json::Node & node, bool* metaclass);

    Diagnostics& _diagnostics;
    TimeRecorder& _timeRecorder;
    const dyld3::closure::FileSystem& _fileSystem;
};

}