#include <arm/cpu_data_internal.h>
#include <arm/machine_routines.h>
#include <arm64/monotonic.h>
#include <kern/assert.h>
#include <kern/debug.h>
#include <kern/kpc.h>
#include <kern/monotonic.h>
#include <machine/atomic.h>
#include <machine/limits.h>
#include <os/overflow.h>
#include <pexpert/arm64/board_config.h>
#include <pexpert/device_tree.h>
#include <pexpert/pexpert.h>
#include <stdatomic.h>
#include <stdint.h>
#include <string.h>
#include <sys/errno.h>
#include <sys/monotonic.h>
#if MACH_ASSERT
#define CTRL_REG_SET(reg, val) do { \
__builtin_arm_wsr64((reg), (val)); \
uint64_t __check_reg = __builtin_arm_rsr64((reg)); \
if (__check_reg != (val)) { \
panic("value written to %s was not read back (wrote %llx, read %llx)", \
#reg, (val), __check_reg); \
} \
} while (0)
#else
#define CTRL_REG_SET(reg, val) __builtin_arm_wsr64((reg), (val))
#endif
#pragma mark core counters
bool mt_core_supported = true;
static const ml_topology_info_t *topology_info;
#define PMC0 "s3_2_c15_c0_0"
#define PMC1 "s3_2_c15_c1_0"
#define PMC2 "s3_2_c15_c2_0"
#define PMC3 "s3_2_c15_c3_0"
#define PMC4 "s3_2_c15_c4_0"
#define PMC5 "s3_2_c15_c5_0"
#define PMC6 "s3_2_c15_c6_0"
#define PMC7 "s3_2_c15_c7_0"
#define PMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
X(6, A); X(7, A)
#if CORE_NCTRS > 8
#define PMC8 "s3_2_c15_c9_0"
#define PMC9 "s3_2_c15_c10_0"
#define PMC_8_9(X, A) X(8, A); X(9, A)
#else // CORE_NCTRS > 8
#define PMC_8_9(X, A)
#endif // CORE_NCTRS > 8
#define PMC_ALL(X, A) PMC_0_7(X, A); PMC_8_9(X, A)
#define CTR_MAX ((UINT64_C(1) << 47) - 1)
#define CYCLES 0
#define INSTRS 1
#define PIO_PMC0_OFFSET (0x200)
#define CTR_POS(CTR) (CTR)
#define PMCR0_CTR_EN(CTR) (UINT64_C(1) << CTR_POS(CTR))
#define PMCR0_FIXED_EN (PMCR0_CTR_EN(CYCLES) | PMCR0_CTR_EN(INSTRS))
enum {
PMCR0_INTGEN_OFF = 0,
PMCR0_INTGEN_PMI = 1,
PMCR0_INTGEN_AIC = 2,
PMCR0_INTGEN_HALT = 3,
PMCR0_INTGEN_FIQ = 4,
};
#define PMCR0_INTGEN_SET(X) ((uint64_t)(X) << 8)
#if CPMU_AIC_PMI
#define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_AIC)
#else
#define PMCR0_INTGEN_INIT PMCR0_INTGEN_SET(PMCR0_INTGEN_FIQ)
#endif
#define PMCR0_PMI_SHIFT (12)
#define PMCR0_CTR_GE8_PMI_SHIFT (44)
#define PMCR0_PMI_EN(CTR) (UINT64_C(1) << (PMCR0_PMI_SHIFT + CTR_POS(CTR)))
#define PMCR0_PMI_INIT (PMCR0_PMI_EN(CYCLES) | PMCR0_PMI_EN(INSTRS))
#define PMCR0_DISCNT_EN (UINT64_C(1) << 20)
#define PMCR0_WFRFE_EN (UINT64_C(1) << 22)
#define PMCR0_L2CGLOBAL_EN (UINT64_C(1) << 23)
#define PMCR0_USEREN_EN (UINT64_C(1) << 30)
#define PMCR0_CTR_GE8_EN_SHIFT (32)
#define PMCR0_INIT (PMCR0_INTGEN_INIT | PMCR0_PMI_INIT)
#define PMCR1 "s3_1_c15_c1_0"
#define PMCR1_EL0A32_EN(CTR) (UINT64_C(1) << (0 + CTR_POS(CTR)))
#define PMCR1_EL0A64_EN(CTR) (UINT64_C(1) << (8 + CTR_POS(CTR)))
#define PMCR1_EL1A64_EN(CTR) (UINT64_C(1) << (16 + CTR_POS(CTR)))
#if defined(APPLEHURRICANE)
#define PMCR1_EL3A64_EN(CTR) UINT64_C(0)
#else
#define PMCR1_EL3A64_EN(CTR) (UINT64_C(1) << (24 + CTR_POS(CTR)))
#endif
#define PMCR1_ALL_EN(CTR) (PMCR1_EL0A32_EN(CTR) | PMCR1_EL0A64_EN(CTR) | \
PMCR1_EL1A64_EN(CTR) | PMCR1_EL3A64_EN(CTR))
#define PMCR1_INIT (PMCR1_ALL_EN(CYCLES) | PMCR1_ALL_EN(INSTRS))
static inline void
core_init_execution_modes(void)
{
uint64_t pmcr1;
pmcr1 = __builtin_arm_rsr64(PMCR1);
pmcr1 |= PMCR1_INIT;
__builtin_arm_wsr64(PMCR1, pmcr1);
}
#define PMCR2 "s3_1_c15_c2_0"
#define PMCR3 "s3_1_c15_c3_0"
#define PMCR4 "s3_1_c15_c4_0"
#define PMSR "s3_1_c15_c13_0"
#define PMSR_OVF(CTR) (1ULL << (CTR))
#define PMESR0 "S3_1_c15_c5_0"
#define PMESR1 "S3_1_c15_c6_0"
static int
core_init(__unused mt_device_t dev)
{
return ENOTSUP;
}
struct mt_cpu *
mt_cur_cpu(void)
{
return &getCpuDatap()->cpu_monotonic;
}
uint64_t
mt_core_snap(unsigned int ctr)
{
switch (ctr) {
#define PMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(PMC ## CTR)
PMC_ALL(PMC_RD, 0);
#undef PMC_RD
default:
panic("monotonic: invalid core counter read: %u", ctr);
__builtin_unreachable();
}
}
void
mt_core_set_snap(unsigned int ctr, uint64_t count)
{
switch (ctr) {
case 0:
__builtin_arm_wsr64(PMC0, count);
break;
case 1:
__builtin_arm_wsr64(PMC1, count);
break;
default:
panic("monotonic: invalid core counter %u write %llu", ctr, count);
__builtin_unreachable();
}
}
static void
core_set_enabled(void)
{
uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0);
pmcr0 |= PMCR0_INIT | PMCR0_FIXED_EN;
if (kpc_get_running() & KPC_CLASS_CONFIGURABLE_MASK) {
uint64_t kpc_ctrs = kpc_get_configurable_pmc_mask(
KPC_CLASS_CONFIGURABLE_MASK) << MT_CORE_NFIXED;
#if KPC_ARM64_CONFIGURABLE_COUNT > 6
uint64_t ctrs_ge8 = kpc_ctrs >> 8;
pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_EN_SHIFT;
pmcr0 |= ctrs_ge8 << PMCR0_CTR_GE8_PMI_SHIFT;
kpc_ctrs &= (1ULL << 8) - 1;
#endif
kpc_ctrs |= kpc_ctrs << PMCR0_PMI_SHIFT;
pmcr0 |= kpc_ctrs;
}
__builtin_arm_wsr64(PMCR0, pmcr0);
#if MACH_ASSERT
uint64_t pmcr0_check = __builtin_arm_rsr64(PMCR0);
if ((pmcr0_check & (PMCR0_INIT | PMCR0_FIXED_EN)) != (PMCR0_INIT | PMCR0_FIXED_EN)) {
panic("monotonic: hardware ignored enable (read %llx, wrote %llx)",
pmcr0_check, pmcr0);
}
#endif
}
static void
core_idle(__unused cpu_data_t *cpu)
{
assert(cpu != NULL);
assert(ml_get_interrupts_enabled() == FALSE);
#if DEBUG
uint64_t pmcr0 = __builtin_arm_rsr64(PMCR0);
if ((pmcr0 & PMCR0_FIXED_EN) == 0) {
panic("monotonic: counters disabled before idling, pmcr0 = 0x%llx\n", pmcr0);
}
uint64_t pmcr1 = __builtin_arm_rsr64(PMCR1);
if ((pmcr1 & PMCR1_INIT) == 0) {
panic("monotonic: counter modes disabled before idling, pmcr1 = 0x%llx\n", pmcr1);
}
#endif
__builtin_arm_wsr64(PMCR0, PMCR0_INIT);
mt_update_fixed_counts();
}
#pragma mark uncore performance monitor
#if HAS_UNCORE_CTRS
static bool mt_uncore_initted = false;
#if UNCORE_PER_CLUSTER
#define MAX_NMONITORS MAX_CPU_CLUSTERS
static uintptr_t cpm_impl[MAX_NMONITORS] = {};
#else
#define MAX_NMONITORS (1)
#endif
#if UNCORE_VERSION >= 2
#define UPMSR_OVF_POS 2
#else
#define UPMSR_OVF_POS 1
#endif
#define UPMSR_OVF(R, CTR) ((R) >> ((CTR) + UPMSR_OVF_POS) & 0x1)
#define UPMSR_OVF_MASK (((UINT64_C(1) << UNCORE_NCTRS) - 1) << UPMSR_OVF_POS)
#define UPMPCM "s3_7_c15_c5_4"
#define UPMPCM_CORE(ID) (UINT64_C(1) << (ID))
static int32_t uncore_pmi_mask = 0;
static uint16_t uncore_active_ctrs = 0;
static_assert(sizeof(uncore_active_ctrs) * CHAR_BIT >= UNCORE_NCTRS,
"counter mask should fit the full range of counters");
bool mt_uncore_enabled = false;
static struct uncore_config {
union {
uint8_t uce_ctrs[UNCORE_NCTRS];
uint64_t uce_regs[UNCORE_NCTRS / 8];
} uc_events;
union {
uint16_t uccm_masks[UNCORE_NCTRS];
uint64_t uccm_regs[UNCORE_NCTRS / 4];
} uc_cpu_masks[MAX_NMONITORS];
} uncore_config;
static struct uncore_monitor {
uint64_t um_snaps[UNCORE_NCTRS];
uint64_t um_counts[UNCORE_NCTRS];
lck_spin_t um_lock;
bool um_sleeping;
} uncore_monitors[MAX_NMONITORS];
static unsigned int
uncore_nmonitors(void)
{
#if UNCORE_PER_CLUSTER
return topology_info->num_clusters;
#else
return 1;
#endif
}
static unsigned int
uncmon_get_curid(void)
{
#if UNCORE_PER_CLUSTER
return cpu_cluster_id();
#else
return 0;
#endif
}
static int
uncmon_lock(struct uncore_monitor *mon)
{
int intrs_en = ml_set_interrupts_enabled(FALSE);
lck_spin_lock(&mon->um_lock);
return intrs_en;
}
static void
uncmon_unlock(struct uncore_monitor *mon, int intrs_en)
{
lck_spin_unlock(&mon->um_lock);
(void)ml_set_interrupts_enabled(intrs_en);
}
#if UNCORE_VERSION >= 2
#define UPMCR0_INTEN_POS 20
#define UPMCR0_INTGEN_POS 16
#else
#define UPMCR0_INTEN_POS 12
#define UPMCR0_INTGEN_POS 8
#endif
enum {
UPMCR0_INTGEN_OFF = 0,
UPMCR0_INTGEN_AIC = 2,
UPMCR0_INTGEN_HALT = 3,
UPMCR0_INTGEN_FIQ = 4,
};
#define UPMCR0_INTEN (((1ULL << UNCORE_NCTRS) - 1) << UPMCR0_INTEN_POS)
#define UPMCR0_INIT (UPMCR0_INTEN | (UPMCR0_INTGEN_FIQ << UPMCR0_INTGEN_POS))
static inline void
uncmon_set_counting_locked_l(__unused unsigned int monid, uint64_t enctrmask)
{
#define UPMCR0 "s3_7_c15_c0_4"
__builtin_arm_wsr64(UPMCR0, UPMCR0_INIT | enctrmask);
}
#if UNCORE_PER_CLUSTER
static inline void
uncmon_set_counting_locked_r(unsigned int monid, uint64_t enctrmask)
{
const uintptr_t upmcr0_offset = 0x4180;
*(uint64_t *)(cpm_impl[monid] + upmcr0_offset) = UPMCR0_INIT | enctrmask;
}
#endif
#define UPMC_MAX ((UINT64_C(1) << 48) - 1)
#define UPMC0 "s3_7_c15_c7_4"
#define UPMC1 "s3_7_c15_c8_4"
#define UPMC2 "s3_7_c15_c9_4"
#define UPMC3 "s3_7_c15_c10_4"
#define UPMC4 "s3_7_c15_c11_4"
#define UPMC5 "s3_7_c15_c12_4"
#define UPMC6 "s3_7_c15_c13_4"
#define UPMC7 "s3_7_c15_c14_4"
#if UNCORE_NCTRS > 8
#define UPMC8 "s3_7_c15_c0_5"
#define UPMC9 "s3_7_c15_c1_5"
#define UPMC10 "s3_7_c15_c2_5"
#define UPMC11 "s3_7_c15_c3_5"
#define UPMC12 "s3_7_c15_c4_5"
#define UPMC13 "s3_7_c15_c5_5"
#define UPMC14 "s3_7_c15_c6_5"
#define UPMC15 "s3_7_c15_c7_5"
#endif
#define UPMC_0_7(X, A) X(0, A); X(1, A); X(2, A); X(3, A); X(4, A); X(5, A); \
X(6, A); X(7, A)
#if UNCORE_NCTRS <= 8
#define UPMC_ALL(X, A) UPMC_0_7(X, A)
#else
#define UPMC_8_15(X, A) X(8, A); X(9, A); X(10, A); X(11, A); X(12, A); \
X(13, A); X(14, A); X(15, A)
#define UPMC_ALL(X, A) UPMC_0_7(X, A); UPMC_8_15(X, A)
#endif
static inline uint64_t
uncmon_read_counter_locked_l(__unused unsigned int monid, unsigned int ctr)
{
assert(ctr < UNCORE_NCTRS);
switch (ctr) {
#define UPMC_RD(CTR, UNUSED) case (CTR): return __builtin_arm_rsr64(UPMC ## CTR)
UPMC_ALL(UPMC_RD, 0);
#undef UPMC_RD
default:
panic("monotonic: invalid counter read %u", ctr);
__builtin_unreachable();
}
}
static inline void
uncmon_write_counter_locked_l(__unused unsigned int monid, unsigned int ctr,
uint64_t count)
{
assert(count < UPMC_MAX);
assert(ctr < UNCORE_NCTRS);
switch (ctr) {
#define UPMC_WR(CTR, COUNT) case (CTR): \
return __builtin_arm_wsr64(UPMC ## CTR, (COUNT))
UPMC_ALL(UPMC_WR, count);
#undef UPMC_WR
default:
panic("monotonic: invalid counter write %u", ctr);
}
}
#if UNCORE_PER_CLUSTER
uintptr_t upmc_offs[UNCORE_NCTRS] = {
[0] = 0x4100, [1] = 0x4248, [2] = 0x4110, [3] = 0x4250, [4] = 0x4120,
[5] = 0x4258, [6] = 0x4130, [7] = 0x4260, [8] = 0x4140, [9] = 0x4268,
[10] = 0x4150, [11] = 0x4270, [12] = 0x4160, [13] = 0x4278,
[14] = 0x4170, [15] = 0x4280,
};
static inline uint64_t
uncmon_read_counter_locked_r(unsigned int mon_id, unsigned int ctr)
{
assert(mon_id < uncore_nmonitors());
assert(ctr < UNCORE_NCTRS);
return *(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]);
}
static inline void
uncmon_write_counter_locked_r(unsigned int mon_id, unsigned int ctr,
uint64_t count)
{
assert(count < UPMC_MAX);
assert(ctr < UNCORE_NCTRS);
assert(mon_id < uncore_nmonitors());
*(uint64_t *)(cpm_impl[mon_id] + upmc_offs[ctr]) = count;
}
#endif
static inline void
uncmon_update_locked(unsigned int monid, unsigned int curid, unsigned int ctr)
{
struct uncore_monitor *mon = &uncore_monitors[monid];
uint64_t snap = 0;
if (curid == monid) {
snap = uncmon_read_counter_locked_l(monid, ctr);
} else {
#if UNCORE_PER_CLUSTER
snap = uncmon_read_counter_locked_r(monid, ctr);
#endif
}
assert(snap >= mon->um_snaps[ctr]);
mon->um_counts[ctr] += snap - mon->um_snaps[ctr];
mon->um_snaps[ctr] = snap;
}
static inline void
uncmon_program_events_locked_l(unsigned int monid)
{
#define UPMESR0 "s3_7_c15_c1_4"
CTRL_REG_SET(UPMESR0, uncore_config.uc_events.uce_regs[0]);
#if UNCORE_NCTRS > 8
#define UPMESR1 "s3_7_c15_c11_5"
CTRL_REG_SET(UPMESR1, uncore_config.uc_events.uce_regs[1]);
#endif
#define UPMECM0 "s3_7_c15_c3_4"
#define UPMECM1 "s3_7_c15_c4_4"
CTRL_REG_SET(UPMECM0,
uncore_config.uc_cpu_masks[monid].uccm_regs[0]);
CTRL_REG_SET(UPMECM1,
uncore_config.uc_cpu_masks[monid].uccm_regs[1]);
#if UNCORE_NCTRS > 8
#define UPMECM2 "s3_7_c15_c8_5"
#define UPMECM3 "s3_7_c15_c9_5"
CTRL_REG_SET(UPMECM2,
uncore_config.uc_cpu_masks[monid].uccm_regs[2]);
CTRL_REG_SET(UPMECM3,
uncore_config.uc_cpu_masks[monid].uccm_regs[3]);
#endif
}
#if UNCORE_PER_CLUSTER
static inline void
uncmon_program_events_locked_r(unsigned int monid)
{
const uintptr_t upmesr_offs[2] = {[0] = 0x41b0, [1] = 0x41b8, };
for (unsigned int i = 0; i < sizeof(upmesr_offs) / sizeof(upmesr_offs[0]);
i++) {
*(uint64_t *)(cpm_impl[monid] + upmesr_offs[i]) =
uncore_config.uc_events.uce_regs[i];
}
const uintptr_t upmecm_offs[4] = {
[0] = 0x4190, [1] = 0x4198, [2] = 0x41a0, [3] = 0x41a8,
};
for (unsigned int i = 0; i < sizeof(upmecm_offs) / sizeof(upmecm_offs[0]);
i++) {
*(uint64_t *)(cpm_impl[monid] + upmecm_offs[i]) =
uncore_config.uc_cpu_masks[monid].uccm_regs[i];
}
}
#endif
static void
uncmon_clear_int_locked_l(__unused unsigned int monid)
{
__builtin_arm_wsr64(UPMSR, 0);
}
#if UNCORE_PER_CLUSTER
static void
uncmon_clear_int_locked_r(unsigned int monid)
{
const uintptr_t upmsr_off = 0x41c0;
*(uint64_t *)(cpm_impl[monid] + upmsr_off) = 0;
}
#endif
static uint64_t
uncmon_get_pmi_mask(unsigned int monid)
{
uint64_t pmi_mask = uncore_pmi_mask;
#if UNCORE_PER_CLUSTER
pmi_mask &= topology_info->clusters[monid].cpu_mask;
#else
#pragma unused(monid)
#endif
return pmi_mask;
}
static void
uncmon_init_locked_l(unsigned int monid)
{
CTRL_REG_SET(UPMPCM, uncmon_get_pmi_mask(monid));
uncmon_set_counting_locked_l(monid,
mt_uncore_enabled ? uncore_active_ctrs : 0);
}
#if UNCORE_PER_CLUSTER
static uintptr_t acc_impl[MAX_NMONITORS] = {};
static void
uncmon_init_locked_r(unsigned int monid)
{
const uintptr_t upmpcm_off = 0x1010;
*(uint64_t *)(acc_impl[monid] + upmpcm_off) = uncmon_get_pmi_mask(monid);
uncmon_set_counting_locked_r(monid,
mt_uncore_enabled ? uncore_active_ctrs : 0);
}
#endif
static int
uncore_init(__unused mt_device_t dev)
{
#if HAS_UNCORE_CTRS
assert(MT_NDEVS > 0);
mt_devices[MT_NDEVS - 1].mtd_nmonitors = (uint8_t)uncore_nmonitors();
#endif
#if DEVELOPMENT || DEBUG
bool parsed_arg = PE_parse_boot_argn("uncore_pmi_mask", &uncore_pmi_mask,
sizeof(uncore_pmi_mask));
if (parsed_arg) {
#if UNCORE_PER_CLUSTER
if (__builtin_popcount(uncore_pmi_mask) != (int)uncore_nmonitors()) {
panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask);
}
for (unsigned int i = 0; i < uncore_nmonitors(); i++) {
if (__builtin_popcountll(uncmon_get_pmi_mask(i)) != 1) {
panic("monotonic: invalid uncore PMI CPU for cluster %d in mask 0x%x",
i, uncore_pmi_mask);
}
}
#else
if (__builtin_popcount(uncore_pmi_mask) != 1) {
panic("monotonic: invalid uncore PMI mask 0x%x", uncore_pmi_mask);
}
#endif
} else
#endif
{
#if UNCORE_PER_CLUSTER
for (unsigned int i = 0; i < topology_info->num_clusters; i++) {
uncore_pmi_mask |= 1ULL << topology_info->clusters[i].first_cpu_id;
}
#else
uncore_pmi_mask |= 1;
#endif
}
assert(uncore_pmi_mask != 0);
unsigned int curmonid = uncmon_get_curid();
for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
#if UNCORE_PER_CLUSTER
ml_topology_cluster_t *cluster = &topology_info->clusters[monid];
cpm_impl[monid] = (uintptr_t)cluster->cpm_IMPL_regs;
acc_impl[monid] = (uintptr_t)cluster->acc_IMPL_regs;
assert(cpm_impl[monid] != 0 && acc_impl[monid] != 0);
#endif
struct uncore_monitor *mon = &uncore_monitors[monid];
lck_spin_init(&mon->um_lock, mt_lock_grp, NULL);
int intrs_en = uncmon_lock(mon);
if (monid != curmonid) {
#if UNCORE_PER_CLUSTER
uncmon_init_locked_r(monid);
#endif
} else {
uncmon_init_locked_l(monid);
}
uncmon_unlock(mon, intrs_en);
}
mt_uncore_initted = true;
return 0;
}
static void
uncmon_read_all_counters(unsigned int monid, unsigned int curmonid,
uint64_t ctr_mask, uint64_t *counts)
{
struct uncore_monitor *mon = &uncore_monitors[monid];
int intrs_en = uncmon_lock(mon);
for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
if (ctr_mask & (1ULL << ctr)) {
uncmon_update_locked(monid, curmonid, ctr);
counts[ctr] = mon->um_counts[ctr];
}
}
uncmon_unlock(mon, intrs_en);
}
static int
uncore_read(uint64_t ctr_mask, uint64_t *counts_out)
{
assert(ctr_mask != 0);
assert(counts_out != NULL);
if (!uncore_active_ctrs) {
return EPWROFF;
}
if (ctr_mask & ~uncore_active_ctrs) {
return EINVAL;
}
unsigned int curmonid = uncmon_get_curid();
for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
uint64_t *counts = counts_out + (UNCORE_NCTRS * monid);
uncmon_read_all_counters(monid, curmonid, ctr_mask, counts);
}
return 0;
}
static int
uncore_add(struct monotonic_config *config, uint32_t *ctr_out)
{
if (mt_uncore_enabled) {
return EBUSY;
}
uint32_t available = ~uncore_active_ctrs & config->allowed_ctr_mask;
if (available == 0) {
return ENOSPC;
}
uint32_t valid_ctrs = (UINT32_C(1) << UNCORE_NCTRS) - 1;
if ((available & valid_ctrs) == 0) {
return E2BIG;
}
uint32_t ctr = __builtin_ffsll(available) - 1;
uncore_active_ctrs |= UINT64_C(1) << ctr;
uncore_config.uc_events.uce_ctrs[ctr] = (uint8_t)config->event;
uint64_t cpu_mask = UINT64_MAX;
if (config->cpu_mask != 0) {
cpu_mask = config->cpu_mask;
}
for (unsigned int i = 0; i < uncore_nmonitors(); i++) {
#if UNCORE_PER_CLUSTER
const unsigned int shift = topology_info->clusters[i].first_cpu_id;
#else
const unsigned int shift = 0;
#endif
uncore_config.uc_cpu_masks[i].uccm_masks[ctr] = (uint16_t)(cpu_mask >> shift);
}
*ctr_out = ctr;
return 0;
}
static void
uncore_reset(void)
{
mt_uncore_enabled = false;
unsigned int curmonid = uncmon_get_curid();
for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
struct uncore_monitor *mon = &uncore_monitors[monid];
bool remote = monid != curmonid;
int intrs_en = uncmon_lock(mon);
if (remote) {
#if UNCORE_PER_CLUSTER
uncmon_set_counting_locked_r(monid, 0);
#endif
} else {
uncmon_set_counting_locked_l(monid, 0);
}
for (int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
if (uncore_active_ctrs & (1U << ctr)) {
if (remote) {
#if UNCORE_PER_CLUSTER
uncmon_write_counter_locked_r(monid, ctr, 0);
#endif
} else {
uncmon_write_counter_locked_l(monid, ctr, 0);
}
}
}
memset(&mon->um_snaps, 0, sizeof(mon->um_snaps));
memset(&mon->um_counts, 0, sizeof(mon->um_counts));
if (remote) {
#if UNCORE_PER_CLUSTER
uncmon_clear_int_locked_r(monid);
#endif
} else {
uncmon_clear_int_locked_l(monid);
}
uncmon_unlock(mon, intrs_en);
}
uncore_active_ctrs = 0;
memset(&uncore_config, 0, sizeof(uncore_config));
for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
struct uncore_monitor *mon = &uncore_monitors[monid];
bool remote = monid != curmonid;
int intrs_en = uncmon_lock(mon);
if (remote) {
#if UNCORE_PER_CLUSTER
uncmon_program_events_locked_r(monid);
#endif
} else {
uncmon_program_events_locked_l(monid);
}
uncmon_unlock(mon, intrs_en);
}
}
static void
uncmon_set_enabled_l(unsigned int monid, bool enable)
{
struct uncore_monitor *mon = &uncore_monitors[monid];
int intrs_en = uncmon_lock(mon);
if (enable) {
uncmon_program_events_locked_l(monid);
uncmon_set_counting_locked_l(monid, uncore_active_ctrs);
} else {
uncmon_set_counting_locked_l(monid, 0);
}
uncmon_unlock(mon, intrs_en);
}
#if UNCORE_PER_CLUSTER
static void
uncmon_set_enabled_r(unsigned int monid, bool enable)
{
struct uncore_monitor *mon = &uncore_monitors[monid];
int intrs_en = uncmon_lock(mon);
if (enable) {
uncmon_program_events_locked_r(monid);
uncmon_set_counting_locked_r(monid, uncore_active_ctrs);
} else {
uncmon_set_counting_locked_r(monid, 0);
}
uncmon_unlock(mon, intrs_en);
}
#endif
static void
uncore_set_enabled(bool enable)
{
mt_uncore_enabled = enable;
unsigned int curmonid = uncmon_get_curid();
for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
if (monid != curmonid) {
#if UNCORE_PER_CLUSTER
uncmon_set_enabled_r(monid, enable);
#endif
} else {
uncmon_set_enabled_l(monid, enable);
}
}
}
static void
uncore_fiq(uint64_t upmsr)
{
uint64_t disable_ctr_mask = (upmsr & UPMSR_OVF_MASK) >> UPMSR_OVF_POS;
assert(!(disable_ctr_mask & ~uncore_active_ctrs));
unsigned int monid = uncmon_get_curid();
struct uncore_monitor *mon = &uncore_monitors[monid];
int intrs_en = uncmon_lock(mon);
uncmon_set_counting_locked_l(monid,
uncore_active_ctrs & ~disable_ctr_mask);
for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
if (UPMSR_OVF(upmsr, ctr)) {
uncmon_update_locked(monid, monid, ctr);
mon->um_snaps[ctr] = 0;
uncmon_write_counter_locked_l(monid, ctr, 0);
}
}
uncmon_clear_int_locked_l(monid);
uncmon_set_counting_locked_l(monid, uncore_active_ctrs);
uncmon_unlock(mon, intrs_en);
}
static void
uncore_save(void)
{
if (!uncore_active_ctrs) {
return;
}
unsigned int curmonid = uncmon_get_curid();
for (unsigned int monid = 0; monid < uncore_nmonitors(); monid++) {
struct uncore_monitor *mon = &uncore_monitors[monid];
int intrs_en = uncmon_lock(mon);
if (mt_uncore_enabled) {
if (monid != curmonid) {
#if UNCORE_PER_CLUSTER
uncmon_set_counting_locked_r(monid, 0);
#endif
} else {
uncmon_set_counting_locked_l(monid, 0);
}
}
for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
if (uncore_active_ctrs & (1U << ctr)) {
uncmon_update_locked(monid, curmonid, ctr);
}
}
mon->um_sleeping = true;
uncmon_unlock(mon, intrs_en);
}
}
static void
uncore_restore(void)
{
if (!uncore_active_ctrs) {
return;
}
unsigned int curmonid = uncmon_get_curid();
struct uncore_monitor *mon = &uncore_monitors[curmonid];
int intrs_en = uncmon_lock(mon);
if (!mon->um_sleeping) {
goto out;
}
for (unsigned int ctr = 0; ctr < UNCORE_NCTRS; ctr++) {
if (uncore_active_ctrs & (1U << ctr)) {
uncmon_write_counter_locked_l(curmonid, ctr, mon->um_snaps[ctr]);
}
}
uncmon_program_events_locked_l(curmonid);
uncmon_init_locked_l(curmonid);
mon->um_sleeping = false;
out:
uncmon_unlock(mon, intrs_en);
}
#endif
#pragma mark common hooks
void
mt_early_init(void)
{
topology_info = ml_get_topology_info();
}
void
mt_cpu_idle(cpu_data_t *cpu)
{
core_idle(cpu);
}
void
mt_cpu_run(cpu_data_t *cpu)
{
struct mt_cpu *mtc;
assert(cpu != NULL);
assert(ml_get_interrupts_enabled() == FALSE);
mtc = &cpu->cpu_monotonic;
for (int i = 0; i < MT_CORE_NFIXED; i++) {
mt_core_set_snap(i, mtc->mtc_snaps[i]);
}
core_init_execution_modes();
core_set_enabled();
}
void
mt_cpu_down(cpu_data_t *cpu)
{
mt_cpu_idle(cpu);
}
void
mt_cpu_up(cpu_data_t *cpu)
{
mt_cpu_run(cpu);
}
void
mt_sleep(void)
{
#if HAS_UNCORE_CTRS
uncore_save();
#endif
}
void
mt_wake_per_core(void)
{
#if HAS_UNCORE_CTRS
if (mt_uncore_initted) {
uncore_restore();
}
#endif
}
uint64_t
mt_count_pmis(void)
{
uint64_t npmis = 0;
for (unsigned int i = 0; i < topology_info->num_cpus; i++) {
cpu_data_t *cpu = (cpu_data_t *)CpuDataEntries[topology_info->cpus[i].cpu_id].cpu_data_vaddr;
npmis += cpu->cpu_monotonic.mtc_npmis;
}
return npmis;
}
static void
mt_cpu_pmi(cpu_data_t *cpu, uint64_t pmcr0)
{
assert(cpu != NULL);
assert(ml_get_interrupts_enabled() == FALSE);
__builtin_arm_wsr64(PMCR0, PMCR0_INIT);
__builtin_arm_isb(ISB_SY);
cpu->cpu_monotonic.mtc_npmis += 1;
cpu->cpu_stat.pmi_cnt_wake += 1;
#if MONOTONIC_DEBUG
if (!PMCR0_PMI(pmcr0)) {
kprintf("monotonic: mt_cpu_pmi but no PMI (PMCR0 = %#llx)\n",
pmcr0);
}
#else
#pragma unused(pmcr0)
#endif
uint64_t pmsr = __builtin_arm_rsr64(PMSR);
#if MONOTONIC_DEBUG
printf("monotonic: cpu = %d, PMSR = 0x%llx, PMCR0 = 0x%llx\n",
cpu_number(), pmsr, pmcr0);
#endif
#if MACH_ASSERT
uint64_t handled = 0;
#endif
for (unsigned int i = 0; i < MT_CORE_NFIXED; i++) {
if ((pmsr & PMSR_OVF(i)) == 0) {
continue;
}
#if MACH_ASSERT
handled |= 1ULL << i;
#endif
uint64_t count = mt_cpu_update_count(cpu, i);
cpu->cpu_monotonic.mtc_counts[i] += count;
mt_core_set_snap(i, mt_core_reset_values[i]);
cpu->cpu_monotonic.mtc_snaps[i] = mt_core_reset_values[i];
if (mt_microstackshots && mt_microstackshot_ctr == i) {
bool user_mode = false;
arm_saved_state_t *state = get_user_regs(current_thread());
if (state) {
user_mode = PSR64_IS_USER(get_saved_state_cpsr(state));
}
KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 1),
mt_microstackshot_ctr, user_mode);
mt_microstackshot_pmi_handler(user_mode, mt_microstackshot_ctx);
} else if (mt_debug) {
KDBG_RELEASE(KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_DEBUG, 2),
i, count);
}
}
for (unsigned int i = MT_CORE_NFIXED; i < CORE_NCTRS; i++) {
if (pmsr & PMSR_OVF(i)) {
#if MACH_ASSERT
handled |= 1ULL << i;
#endif
extern void kpc_pmi_handler(unsigned int ctr);
kpc_pmi_handler(i);
}
}
#if MACH_ASSERT
uint64_t pmsr_after_handling = __builtin_arm_rsr64(PMSR);
if (pmsr_after_handling != 0) {
unsigned int first_ctr_ovf = __builtin_ffsll(pmsr_after_handling) - 1;
uint64_t count = 0;
const char *extra = "";
if (first_ctr_ovf >= CORE_NCTRS) {
extra = " (invalid counter)";
} else {
count = mt_core_snap(first_ctr_ovf);
}
panic("monotonic: PMI status not cleared on exit from handler, "
"PMSR = 0x%llx HANDLE -> -> 0x%llx, handled 0x%llx, "
"PMCR0 = 0x%llx, PMC%d = 0x%llx%s", pmsr, pmsr_after_handling,
handled, __builtin_arm_rsr64(PMCR0), first_ctr_ovf, count, extra);
}
#endif
core_set_enabled();
}
#if CPMU_AIC_PMI
void
mt_cpmu_aic_pmi(cpu_id_t source)
{
struct cpu_data *curcpu = getCpuDatap();
if (source != curcpu->interrupt_nub) {
panic("monotonic: PMI from IOCPU %p delivered to %p", source,
curcpu->interrupt_nub);
}
mt_cpu_pmi(curcpu, __builtin_arm_rsr64(PMCR0));
}
#endif
void
mt_fiq(void *cpu, uint64_t pmcr0, uint64_t upmsr)
{
#if CPMU_AIC_PMI
#pragma unused(cpu, pmcr0)
#else
mt_cpu_pmi(cpu, pmcr0);
#endif
#if HAS_UNCORE_CTRS
uncore_fiq(upmsr);
#else
#pragma unused(upmsr)
#endif
}
static uint32_t mt_xc_sync;
static void
mt_microstackshot_start_remote(__unused void *arg)
{
cpu_data_t *cpu = getCpuDatap();
__builtin_arm_wsr64(PMCR0, PMCR0_INIT);
for (int i = 0; i < MT_CORE_NFIXED; i++) {
uint64_t count = mt_cpu_update_count(cpu, i);
cpu->cpu_monotonic.mtc_counts[i] += count;
mt_core_set_snap(i, mt_core_reset_values[i]);
cpu->cpu_monotonic.mtc_snaps[i] = mt_core_reset_values[i];
}
core_set_enabled();
if (os_atomic_dec(&mt_xc_sync, relaxed) == 0) {
thread_wakeup((event_t)&mt_xc_sync);
}
}
int
mt_microstackshot_start_arch(uint64_t period)
{
uint64_t reset_value = 0;
int ovf = os_sub_overflow(CTR_MAX, period, &reset_value);
if (ovf) {
return ERANGE;
}
mt_core_reset_values[mt_microstackshot_ctr] = reset_value;
cpu_broadcast_xcall(&mt_xc_sync, TRUE, mt_microstackshot_start_remote,
mt_microstackshot_start_remote );
return 0;
}
#pragma mark dev nodes
struct mt_device mt_devices[] = {
[0] = {
.mtd_name = "core",
.mtd_init = core_init,
},
#if HAS_UNCORE_CTRS
[1] = {
.mtd_name = "uncore",
.mtd_init = uncore_init,
.mtd_add = uncore_add,
.mtd_reset = uncore_reset,
.mtd_enable = uncore_set_enabled,
.mtd_read = uncore_read,
.mtd_ncounters = UNCORE_NCTRS,
}
#endif
};
static_assert(
(sizeof(mt_devices) / sizeof(mt_devices[0])) == MT_NDEVS,
"MT_NDEVS macro should be same as the length of mt_devices");