pgtrace.c   [plain text]


/*
 * Copyright (c) 2015 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#if CONFIG_PGTRACE
#include <mach/mach_types.h>
#include <IOKit/IOLib.h>
#include <sys/msgbuf.h>
#include <sys/errno.h>
#include <arm64/pgtrace.h>
#include <libkern/OSDebug.h>

typedef struct {
    queue_chain_t chain;

    pmap_t      pmap;
    vm_offset_t start;
    vm_offset_t end;
} probe_t;

#if CONFIG_PGTRACE_NONKEXT
#include "pgtrace_decoder.h"

//--------------------------------------------
// Macros
//
#define RBUF_DEFAULT_SIZE   1024
#define RBUF_IDX(idx, mask) ((idx) & (mask))
#define MSG_MAX             130

//--------------------------------------------
// Types
//
typedef uint8_t RWLOCK;

typedef struct {
    uint64_t                id;
    pgtrace_run_result_t    res;
    void                    *stack[PGTRACE_STACK_DEPTH];
} log_t;

//--------------------------------------------
// Statics
//
static struct {
    log_t           *logs;              // Protect
    uint32_t        size;               // Protect
    uint64_t        rdidx, wridx;       // Protect
    decl_simple_lock_data(, loglock);

    uint64_t id;
    uint32_t option;
    uint32_t enabled;
    uint32_t bytes;

    queue_head_t    probes;             // Protect

    lck_grp_t       *lock_grp;
    lck_grp_attr_t  *lock_grp_attr;
    lck_attr_t      *lock_attr;
    lck_mtx_t       probelock;
} pgtrace = {};

//--------------------------------------------
// Globals
//
void pgtrace_init(void)
{
    simple_lock_init(&pgtrace.loglock, 0);

    pgtrace.lock_attr = lck_attr_alloc_init();
    pgtrace.lock_grp_attr = lck_grp_attr_alloc_init();
    pgtrace.lock_grp = lck_grp_alloc_init("pgtrace_lock", pgtrace.lock_grp_attr);

    lck_mtx_init(&pgtrace.probelock, pgtrace.lock_grp, pgtrace.lock_attr);

    queue_init(&pgtrace.probes);

    pgtrace.size = RBUF_DEFAULT_SIZE;
    pgtrace.logs = kalloc(RBUF_DEFAULT_SIZE * sizeof(log_t));
}
    
void pgtrace_clear_probe(void)
{
    probe_t *p, *next;
    queue_head_t *q = &pgtrace.probes;

    lck_mtx_lock(&pgtrace.probelock);

    p = (probe_t *)queue_first(q);
    while (!queue_end(q, (queue_entry_t)p)) {
        next = (probe_t *)queue_next(&(p->chain));

        queue_remove(q, p, probe_t *, chain);
        kfree(p, sizeof(probe_t));

        p = next;
    }

    lck_mtx_unlock(&pgtrace.probelock);

    return;
}

int pgtrace_add_probe(thread_t thread, vm_offset_t start, vm_offset_t end)
{
    probe_t *p;
    queue_head_t *q = &pgtrace.probes;

    if (start > end) {
        kprintf("%s Invalid start=%lx end=%lx\n", __func__, start, end);
        return -1;
    }

    p = kalloc(sizeof(probe_t));
    p->start = start;
    p->end = end;
    if (thread == NULL) {
        p->pmap = NULL;
    } else {
        p->pmap = vm_map_pmap(thread->map);
    }

    lck_mtx_lock(&pgtrace.probelock);
    queue_enter(q, p, probe_t *, chain);
    lck_mtx_unlock(&pgtrace.probelock);

    return 0;
}

void pgtrace_start(void)
{
    probe_t *p;
    queue_head_t *q = &pgtrace.probes;

    kprintf("%s\n", __func__);

    if (pgtrace.enabled) {
        return;
    }

    pgtrace.enabled = 1;

    lck_mtx_lock(&pgtrace.probelock);

    queue_iterate(q, p, probe_t *, chain) {
        pmap_pgtrace_add_page(p->pmap, p->start, p->end);
    }

    lck_mtx_unlock(&pgtrace.probelock);

    return;
}

void pgtrace_stop(void)
{
    probe_t *p;
    queue_head_t *q = &pgtrace.probes;

    kprintf("%s\n", __func__);

    lck_mtx_lock(&pgtrace.probelock);

    queue_iterate(q, p, probe_t *, chain) { 
        pmap_pgtrace_delete_page(p->pmap, p->start, p->end);
    }

    lck_mtx_unlock(&pgtrace.probelock);

    pgtrace.enabled = 0;
}

uint32_t pgtrace_get_size(void)
{
    return pgtrace.size;
}

bool pgtrace_set_size(uint32_t size)
{
    log_t *old_buf, *new_buf;
    uint32_t old_size, new_size = 1;

    // round up to next power of 2
    while (size > new_size) {
        new_size <<= 1;
        if (new_size > 0x100000) {
            // over million entries
            kprintf("%s: size=%x new_size=%x is too big\n", __func__, size, new_size);
            return false;
        }
    }

    new_buf = kalloc(new_size * sizeof(log_t));
    if (new_buf == NULL) {
        kprintf("%s: can't allocate new_size=%x\n entries", __func__, new_size);
        return false;
    }

    pgtrace_stop();

    simple_lock(&pgtrace.loglock);
    old_buf = pgtrace.logs;
    old_size = pgtrace.size;
    pgtrace.logs = new_buf;
    pgtrace.size = new_size;
    pgtrace.rdidx = pgtrace.wridx = 0;
    simple_unlock(&pgtrace.loglock);

    if (old_buf) {
        kfree(old_buf, old_size * sizeof(log_t));
    }

    return true;
}

void pgtrace_clear_trace(void)
{
    simple_lock(&pgtrace.loglock);
    pgtrace.rdidx = pgtrace.wridx = 0;
    simple_unlock(&pgtrace.loglock);
}

boolean_t pgtrace_active(void)
{
    return (pgtrace.enabled > 0);
}

uint32_t pgtrace_get_option(void)
{
    return pgtrace.option;
}

void pgtrace_set_option(uint32_t option)
{
    pgtrace.option = option;
}

// pgtrace_write_log() is in interrupt disabled context
void pgtrace_write_log(pgtrace_run_result_t res)
{
    uint8_t i;
    log_t log = {};
    const char *rwmap[] = { "R", "W", "PREFETCH" };

    log.id = pgtrace.id++;
    log.res = res;

    if (pgtrace.option & PGTRACE_OPTION_KPRINTF) {
        char msg[MSG_MAX];
        char *p;

        p = msg;

        snprintf(p, MSG_MAX, "%llu %s ", res.rr_time, rwmap[res.rr_rw]);
        p += strlen(p);

        for (i = 0; i < res.rr_num; i++) {
            snprintf(p, MSG_MAX-(p-msg), "%lx=%llx ", res.rr_addrdata[i].ad_addr, res.rr_addrdata[i].ad_data);
            p += strlen(p);
        }

        kprintf("%s %s\n", __func__, msg);
    }
    
    if (pgtrace.option & PGTRACE_OPTION_STACK) {
        OSBacktrace(log.stack, PGTRACE_STACK_DEPTH);
    }

    pgtrace.bytes += sizeof(log);

    simple_lock(&pgtrace.loglock);

    pgtrace.logs[RBUF_IDX(pgtrace.wridx, pgtrace.size-1)] = log;

    // Advance rdidx if ring is full
    if (RBUF_IDX(pgtrace.wridx, pgtrace.size-1) == RBUF_IDX(pgtrace.rdidx, pgtrace.size-1) &&
        (pgtrace.wridx != pgtrace.rdidx)) {
        pgtrace.rdidx++;
    }
    pgtrace.wridx++;

    // Signal if ring was empty
    if (pgtrace.wridx == (pgtrace.rdidx + 1)) {
        thread_wakeup(pgtrace.logs);
    }

    simple_unlock(&pgtrace.loglock);

    return;
}

// pgtrace_read_log() is in user thread
int64_t pgtrace_read_log(uint8_t *buf, uint32_t size)
{
    int total, front, back;
    boolean_t ints;
    wait_result_t wr;

    if (pgtrace.enabled == FALSE) {
        return -EINVAL;
    }

    total = size / sizeof(log_t);

    // Check if buf is too small
    if (buf && total == 0) {
        return -EINVAL;
    }

    ints = ml_set_interrupts_enabled(FALSE);
    simple_lock(&pgtrace.loglock);

    // Wait if ring is empty
    if (pgtrace.rdidx == pgtrace.wridx) {
        assert_wait(pgtrace.logs, THREAD_ABORTSAFE);

        simple_unlock(&pgtrace.loglock);
        ml_set_interrupts_enabled(ints);

        wr = thread_block(NULL);
        if (wr != THREAD_AWAKENED) {
            return -EINTR;
        }

        ints = ml_set_interrupts_enabled(FALSE);
        simple_lock(&pgtrace.loglock);
    }

    // Trim the size
    if ((pgtrace.rdidx + total) > pgtrace.wridx) {
        total = (int)(pgtrace.wridx - pgtrace.rdidx);
    }

    // Copy front
    if ((RBUF_IDX(pgtrace.rdidx, pgtrace.size-1) + total) >= pgtrace.size) {
        front = pgtrace.size - RBUF_IDX(pgtrace.rdidx, pgtrace.size-1);
    } else {
        front = total;
    }

    memcpy(buf, &(pgtrace.logs[RBUF_IDX(pgtrace.rdidx, pgtrace.size-1)]), front*sizeof(log_t));

    // Copy back if any
    back = total-front;
    if (back) {
        buf += front * sizeof(log_t);
        memcpy(buf, pgtrace.logs, back*sizeof(log_t));
    }

    pgtrace.rdidx += total;

    simple_unlock(&pgtrace.loglock);
    ml_set_interrupts_enabled(ints);

    return total*sizeof(log_t);
}

int pgtrace_get_stats(pgtrace_stats_t *stats)
{
    if (!stats) {
        return -1;
    }

    stats->stat_logger.sl_bytes = pgtrace.bytes;
    pgtrace_decoder_get_stats(stats);

    return 0;
}

#else // CONFIG_PGTRACE_NONKEXT

static struct {
    bool            active;
    decoder_t       *decoder;
    logger_t        *logger;
    queue_head_t    probes;

    lck_grp_t       *lock_grp;
    lck_grp_attr_t  *lock_grp_attr;
    lck_attr_t      *lock_attr;
    lck_mtx_t       probelock;
} pgtrace = {};

//------------------------------------
// functions for pmap fault handler
// - pgtrace_decode_and_run
// - pgtrace_write_log
//------------------------------------
int pgtrace_decode_and_run(uint32_t inst, vm_offset_t fva, vm_map_offset_t *cva_page, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    vm_offset_t pa, cva;
    pgtrace_instruction_info_t info;
    vm_offset_t cva_front_page = cva_page[0];
    vm_offset_t cva_cur_page = cva_page[1];

    pgtrace.decoder->decode(inst, ss, &info);

    if (info.addr == fva) {
        cva = cva_cur_page + (fva & ARM_PGMASK);
    } else {
        // which means a front page is not a tracing page
        cva = cva_front_page + (fva & ARM_PGMASK);
    }
    
    pa = mmu_kvtop(cva);
    if (!pa) {
        panic("%s: invalid address cva=%lx fva=%lx info.addr=%lx inst=%x", __func__, cva, fva, info.addr, inst);
    }

    absolutetime_to_nanoseconds(mach_absolute_time(), &res->rr_time);

    pgtrace.decoder->run(inst, pa, cva, ss, res);

    return 0;
}

int pgtrace_write_log(pgtrace_run_result_t res)
{
    pgtrace.logger->write(res);
    return 0;
}

//------------------------------------
// functions for kext
//  - pgtrace_init
//  - pgtrace_add_probe
//  - pgtrace_clear_probe
//  - pgtrace_start
//  - pgtrace_stop
//  - pgtrace_active
//------------------------------------
int pgtrace_init(decoder_t *decoder, logger_t *logger)
{
    kprintf("%s decoder=%p logger=%p\n", __func__, decoder, logger);

    assert(decoder && logger);

    if (decoder->magic != 0xfeedface || logger->magic != 0xfeedface ||
        strcmp(decoder->arch, "arm64") != 0 || strcmp(logger->arch, "arm64") != 0) {
        kprintf("%s:wrong decoder/logger magic=%llx/%llx arch=%s/%s", __func__, decoder->magic, logger->magic, decoder->arch, logger->arch);
        return EINVAL;
    }

    pgtrace.lock_attr = lck_attr_alloc_init();
    pgtrace.lock_grp_attr = lck_grp_attr_alloc_init();
    pgtrace.lock_grp = lck_grp_alloc_init("pgtrace_lock", pgtrace.lock_grp_attr);

    lck_mtx_init(&pgtrace.probelock, pgtrace.lock_grp, pgtrace.lock_attr);

    queue_init(&pgtrace.probes);
    pgtrace.decoder = decoder;
    pgtrace.logger = logger;

    return 0;
}
    
int pgtrace_add_probe(thread_t thread, vm_offset_t start, vm_offset_t end)
{
    probe_t *p;
    queue_head_t *q = &pgtrace.probes;

    kprintf("%s start=%lx end=%lx\n", __func__, start, end);

    if (start > end) {
        kprintf("%s Invalid start=%lx end=%lx\n", __func__, start, end);
        return -1;
    }

    p = kalloc(sizeof(probe_t));
    p->start = start;
    p->end = end;
    if (thread == NULL) {
        p->pmap = NULL;
    } else {
        p->pmap = vm_map_pmap(thread->map);
    }

    lck_mtx_lock(&pgtrace.probelock);
    queue_enter(q, p, probe_t *, chain);
    lck_mtx_unlock(&pgtrace.probelock);

    return 0;
}

void pgtrace_clear_probe(void)
{
    probe_t *p, *next;
    queue_head_t *q = &pgtrace.probes;

    kprintf("%s\n", __func__);

    lck_mtx_lock(&pgtrace.probelock);

    p = (probe_t *)queue_first(q);
    while (!queue_end(q, (queue_entry_t)p)) {
        next = (probe_t *)queue_next(&(p->chain));

        queue_remove(q, p, probe_t *, chain);
        kfree(p, sizeof(probe_t));

        p = next;
    }

    lck_mtx_unlock(&pgtrace.probelock);

    return;
}

void pgtrace_start(void)
{
    probe_t *p;
    queue_head_t *q = &pgtrace.probes;

    kprintf("%s\n", __func__);

    if (pgtrace.active == true) {
        return;
    }

    pgtrace.active = true;

    lck_mtx_lock(&pgtrace.probelock);

    queue_iterate(q, p, probe_t *, chain) {
        pmap_pgtrace_add_page(p->pmap, p->start, p->end);
    }

    lck_mtx_unlock(&pgtrace.probelock);

    return;
}

void pgtrace_stop(void)
{
    probe_t *p;
    queue_head_t *q = &pgtrace.probes;

    kprintf("%s\n", __func__);

    lck_mtx_lock(&pgtrace.probelock);

    queue_iterate(q, p, probe_t *, chain) {
        pmap_pgtrace_delete_page(p->pmap, p->start, p->end);
    }

    lck_mtx_unlock(&pgtrace.probelock);

    pgtrace.active = false;
}

bool pgtrace_active(void)
{
    return pgtrace.active;
}
#endif // CONFIG_PGTRACE_NONKEXT
#else
// empty funcs for release kernel
extern void pgtrace_stop(void);
extern void pgtrace_start(void);
extern void pgtrace_clear_probe(void);
extern void pgtrace_add_probe(void);
extern void pgtrace_init(void);
extern void pgtrace_active(void);
void pgtrace_stop(void) {}
void pgtrace_start(void) {}
void pgtrace_clear_probe(void) {}
void pgtrace_add_probe(void) {}
void pgtrace_init(void) {}
void pgtrace_active(void) {}
#endif