pmc.c   [plain text]


/*
 * Copyright (c) 2009 Apple Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this
 * file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 */

#include <kern/kalloc.h>
#include <kern/kern_types.h>
#include <kern/locks.h>
#include <kern/misc_protos.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/zalloc.h>
#include <machine/machine_cpu.h>

#include <pmc/pmc.h>

#include <libkern/OSAtomic.h>

#if defined(__i386__) || defined(__x86_64__)
#include <i386/mp.h>
#endif

#if CONFIG_COUNTERS

/* various debug logging enable */
#undef DEBUG_COUNTERS

typedef uint8_t pmc_state_event_t;

#define PMC_STATE_EVENT_START				0
#define PMC_STATE_EVENT_STOP				1
#define PMC_STATE_EVENT_FREE				2
#define PMC_STATE_EVENT_INTERRUPT			3
#define PMC_STATE_EVENT_END_OF_INTERRUPT	4
#define PMC_STATE_EVENT_CONTEXT_IN			5
#define PMC_STATE_EVENT_CONTEXT_OUT			6
#define PMC_STATE_EVENT_LOAD_FINISHED		7
#define PMC_STATE_EVENT_STORE_FINISHED		8

/* PMC spin timeouts */
#define PMC_SPIN_THRESHOLD	10	/* Number of spins to allow before checking mach_absolute_time() */
#define PMC_SPIN_TIMEOUT_US	10	/* Time in microseconds before the spin causes an assert */

uint64_t pmc_spin_timeout_count = 0;	/* Number of times where a PMC spin loop causes a timeout */

#ifdef DEBUG_COUNTERS
#	include <pexpert/pexpert.h>
#	define COUNTER_DEBUG(...) \
	do { \
		kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \
		kprintf(__VA_ARGS__); \
	} while(0)

#	define PRINT_PERF_MON(x)	\
	do { \
		kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \
			x, x->object, x->useCount, \
			(x->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING) ? \
			1 : 0); \
	} while(0)

static const char const * pmc_state_state_name(pmc_state_t state) {
	switch (PMC_STATE_STATE(state)) {
		case PMC_STATE_STATE_INVALID:
			return "INVALID";
		case PMC_STATE_STATE_STOP:
			return "STOP";
		case PMC_STATE_STATE_CAN_RUN:
			return "CAN_RUN";
		case PMC_STATE_STATE_LOAD:
			return "LOAD";
		case PMC_STATE_STATE_RUN:
			return "RUN";
		case PMC_STATE_STATE_STORE:
			return "STORE";
		case PMC_STATE_STATE_INTERRUPT:
			return "INTERRUPT";
		case PMC_STATE_STATE_DEALLOC:
			return "DEALLOC";
		default:
			return "UNKNOWN";
	}
}

static const char const * pmc_state_event_name(pmc_state_event_t event) {
	switch (event) {
		case PMC_STATE_EVENT_START:
			return "START";
		case PMC_STATE_EVENT_STOP:
			return "STOP";
		case PMC_STATE_EVENT_FREE:
			return "FREE";
		case PMC_STATE_EVENT_INTERRUPT:
			return "INTERRUPT";
		case PMC_STATE_EVENT_END_OF_INTERRUPT:
			return "END OF INTERRUPT";
		case PMC_STATE_EVENT_CONTEXT_IN:
			return "CONTEXT IN";
		case PMC_STATE_EVENT_CONTEXT_OUT:
			return "CONTEXT OUT";
		case PMC_STATE_EVENT_LOAD_FINISHED:
			return "LOAD_FINISHED";
		case PMC_STATE_EVENT_STORE_FINISHED:
			return "STORE_FINISHED";
		default:
			return "UNKNOWN";
	}
}

#	define PMC_STATE_FORMAT	"<%s, %u, %s%s%s>"
#	define PMC_STATE_ARGS(x)	pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \
					((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "")
#else
#	define COUNTER_DEBUG(...)
#	define PRINT_PERF_MON(x)
#	define PMC_STATE_FORMAT
#	define PMC_STATE_ARGS(x)
#endif

/*!struct
 * pmc_config is the data behind a pmc_config_t.
 * @member object A pointer to an instance of IOPerformanceCounterConfiguration
 * @member method A pointer to a method to call to handle PMI.
 * @member interrupt_after_value Cause a PMI after the counter counts this many
 * events.
 * @member refCon Passed to the @method method as the refCon argument.
 */
struct pmc_config {
	pmc_config_object_t object;
	volatile pmc_interrupt_method_t method;
	uint64_t interrupt_after_value;
	void *refCon;
};

/*
 * Allocation Zones
 * 
 * Two allocation zones - Perf zone small and Perf zone big.
 * Each zone has associated maximums, defined below.
 * The small zone is the max of the smallest allocation objects (all sizes on
 * K64):
 *	perf_monitor_t - 48 bytes
 *		perf_monitor_methods_t - 28 bytes
 *	pmc_reservation_t - 48 bytes
 *  pmc_config_t - 32 bytes
 * perf_small_zone unit size is (on K64) 48 bytes
 * perf_small_zone max count must be max number of perf monitors, plus (max
 * number of reservations * 2). The "*2" is because each reservation has a
 * pmc_config_t within.
 *
 * Big zone is max of the larger allocation units
 *	pmc_t - 144 bytes
 *		pmc_methods_t - 116 bytes
 * perf_big_zone unit size is (on K64) 144 bytes
 * perf_big_zone max count is the max number of PMCs we support.
 */

static zone_t perf_small_zone = NULL;
#define MAX_PERF_SMALLS		(256 + 8196 + 8196)
#define PERF_SMALL_UNIT_SZ	(MAX(MAX(sizeof(struct perf_monitor), \
	sizeof(struct pmc_reservation)), sizeof(struct pmc_config))) 

static zone_t perf_big_zone = NULL;
#define MAX_PERF_BIGS		(1024)
#define PERF_BIG_UNIT_SZ	(sizeof(struct pmc))

/*
 * Locks and Lock groups
 */
static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL;
static lck_grp_attr_t *pmc_lock_grp_attr;
static lck_attr_t *pmc_lock_attr;

/* PMC tracking queue locks */

static lck_mtx_t  cpu_monitor_queue_mutex;   /* protects per-cpu queues at initialisation time */
static lck_spin_t perf_monitor_queue_spin;   /* protects adding and removing from queue */
static lck_spin_t perf_counters_queue_spin;  /* protects adding and removing from queue */

/* Reservation tracking queues lock */
static lck_spin_t reservations_spin;

/*
 * Tracking queues
 *
 * Keeps track of registered perf monitors and perf counters
 */

static queue_head_t **cpu_monitor_queues = NULL;

static queue_head_t *perf_monitors_queue = NULL;
static volatile uint32_t perf_monitors_count = 0U;

static queue_head_t *perf_counters_queue = NULL;
static volatile uint32_t perf_counters_count = 0U;

/* 
 * Reservation queues
 *
 * Keeps track of all system, task, and thread-level reservations (both active and
 * inactive).
 *
 * We track them all here (rather than in their respective task or thread only)
 * so that we can inspect our tracking data directly (rather than peeking at
 * every task and thread) to determine if/when a new reservation would
 * constitute a conflict.
 */
 
static queue_head_t *system_reservations = NULL;
static volatile uint32_t system_reservation_count = 0U;

static queue_head_t *task_reservations = NULL;
static volatile uint32_t task_reservation_count = 0U;

static queue_head_t *thread_reservations = NULL;
static volatile uint32_t thread_reservation_count = 0U;

#if XNU_KERNEL_PRIVATE

/*
 * init_pmc_locks creates and initializes all the locks and lock groups and lock
 * attributes required for the pmc sub-system.
 */
static void init_pmc_locks(void) {
	pmc_lock_attr = lck_attr_alloc_init();
	assert(pmc_lock_attr);

	pmc_lock_grp_attr = lck_grp_attr_alloc_init();
	assert(pmc_lock_grp_attr);

	pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr);
	assert(pmc_lock_grp);

	lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr);
	lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr);

	lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr);

	lck_mtx_init(&cpu_monitor_queue_mutex, pmc_lock_grp, pmc_lock_attr);
}

/*
 * init_pmc_zones initializes the allocation zones used by the pmc subsystem
 */
static void init_pmc_zones(void) {
	perf_small_zone = zinit(PERF_SMALL_UNIT_SZ, 
		MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS, 
		"pmc.small zone");

	assert(perf_small_zone);

	perf_big_zone = zinit(PERF_BIG_UNIT_SZ,
		MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS, 
		"pmc.big zone");

	assert(perf_big_zone);
}

/*
 * init_pmc_queues allocates and initializes the tracking queues for
 * registering and reserving individual pmcs and perf monitors.
 */
static void init_pmc_queues(void) {
    
	perf_monitors_queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
	assert(perf_monitors_queue);

	queue_init(perf_monitors_queue);

	perf_counters_queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
	assert(perf_counters_queue);

	queue_init(perf_counters_queue);

	system_reservations = (queue_head_t*)kalloc(sizeof(queue_t));
	assert(system_reservations);

	queue_init(system_reservations);

	task_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t));
	assert(task_reservations);

	queue_init(task_reservations);

	thread_reservations = (queue_head_t*)kalloc(sizeof(queue_head_t));
	assert(thread_reservations);

	queue_init(thread_reservations);
}

/*
 * pmc_bootstrap brings up all the necessary infrastructure required to use the
 * pmc sub-system.
 */
__private_extern__
void pmc_bootstrap(void) {
	/* build our alloc zones */
	init_pmc_zones();

	/* build the locks */
	init_pmc_locks();

	/* build our tracking queues */
	init_pmc_queues();
}

#endif /* XNU_KERNEL_PRIVATE */

/*
 * Perf Monitor Internals
 */

static perf_monitor_t perf_monitor_alloc(void) {
	/* perf monitors come from the perf small zone */
	return (perf_monitor_t)zalloc(perf_small_zone);
}

static void perf_monitor_free(void *pm) {
	zfree(perf_small_zone, pm);
}

static void perf_monitor_init(perf_monitor_t pm, int cpu) {
	assert(pm);

	pm->object = NULL;

	bzero(&(pm->methods), sizeof(perf_monitor_methods_t));

	pm->useCount = 1;	/* initial retain count of 1, for caller */
	
	pm->reservedCounters = 0;
    
	pm->cpu = cpu;

	pm->link.next = pm->link.prev = (queue_entry_t)NULL;
	pm->cpu_link.next = pm->cpu_link.prev = (queue_entry_t)NULL;
}

/*
 * perf_monitor_dequeue removes the given perf_monitor_t from the
 * perf_monitor_queue, thereby unregistering it with the system.
 */
static void perf_monitor_dequeue(perf_monitor_t pm) {
	lck_spin_lock(&perf_monitor_queue_spin);
	
	if (pm->methods.flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) {
		/* If this flag is set, the monitor is already validated to be 
		 * accessible from a single cpu only.
		 */
		queue_remove(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link); 
	}
	
	/* 
	 * remove the @pm object from the @perf_monitor_queue queue (it is of type
	 * <perf_monitor_t> and has a field called @link that is the queue_link_t
	 */
	queue_remove(perf_monitors_queue, pm, perf_monitor_t, link);

	perf_monitors_count--;

	lck_spin_unlock(&perf_monitor_queue_spin);
}

/*
 * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue,
 * thereby registering it for use with the system.
 */
static void perf_monitor_enqueue(perf_monitor_t pm) {
    
	lck_mtx_lock(&cpu_monitor_queue_mutex);
	lck_spin_lock(&perf_monitor_queue_spin);

	if (pm->cpu >= 0) {
            	/* Deferred initialisation; saves memory and permits ml_get_max_cpus()
            	 * to block until cpu initialisation is complete.
            	 */
            	if (!cpu_monitor_queues) {
            		uint32_t max_cpus;
            		queue_head_t **queues;
            		uint32_t i;
		
            		lck_spin_unlock(&perf_monitor_queue_spin);
    		
            		max_cpus = ml_get_max_cpus();

            		queues = (queue_head_t**)kalloc(sizeof(queue_head_t*) * max_cpus);
            		assert(queues);
            		for (i = 0; i < max_cpus; i++) {
            			queue_head_t *queue = (queue_head_t*)kalloc(sizeof(queue_head_t));
            			assert(queue);
            			queue_init(queue);
            			queues[i] = queue;
            		}
		
            		lck_spin_lock(&perf_monitor_queue_spin);
		
            		cpu_monitor_queues = queues;
            	}
	    
		queue_enter(cpu_monitor_queues[pm->cpu], pm, perf_monitor_t, cpu_link);
	}
	
	queue_enter(perf_monitors_queue, pm, perf_monitor_t, link);
	perf_monitors_count++;
	
	lck_spin_unlock(&perf_monitor_queue_spin);
	lck_mtx_unlock(&cpu_monitor_queue_mutex);
}

/*
 * perf_monitor_reference increments the reference count for the given
 * perf_monitor_t.
 */
static void perf_monitor_reference(perf_monitor_t pm) {
	assert(pm);

	OSIncrementAtomic(&(pm->useCount));
}

/*
 * perf_monitor_deallocate decrements the reference count for the given
 * perf_monitor_t.  If the reference count hits 0, the object is released back
 * to the perf_small_zone via a call to perf_monitor_free().
 */
static void perf_monitor_deallocate(perf_monitor_t pm) {
	assert(pm);

	/* If we just removed the last reference count */
	if(1 == OSDecrementAtomic(&(pm->useCount))) {
		/* Free the object */
		perf_monitor_free(pm);
	}
}

/*
 * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the
 * given C++ object pointer that was used when registering with the subsystem.
 *
 * If found, the method returns the perf_monitor_t with an extra reference 
 * placed on the object (or NULL if not
 * found).
 *
 * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after
 * calling perf_monitor_find.
 */
static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) {
	assert(monitor);
	perf_monitor_t element = NULL;
	perf_monitor_t found = NULL;

	lck_spin_lock(&perf_monitor_queue_spin);
	
	queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) {
 		if(element->object == monitor) {
			perf_monitor_reference(element);
			found = element;
			break;
		}
	}

	lck_spin_unlock(&perf_monitor_queue_spin);

	return found;
}

/*
 * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is
 * associated with.
 */

static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
	assert(pm);
	assert(pmc);

	/* Today, we merely add a reference count now that a new pmc is attached */
	perf_monitor_reference(pm);
}

/*
 * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf
 * monitor it is associated with.
 */
static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) {
	assert(pm);
	assert(pmc);

	/* Today, we merely remove a reference count now that the pmc is detached */
	perf_monitor_deallocate(pm);
}

/*
 * Perf Counter internals
 */

static pmc_t pmc_alloc(void) {
	return (pmc_t)zalloc(perf_big_zone);
}

static void pmc_free(void *pmc) {
	zfree(perf_big_zone, pmc);
}

/*
 * pmc_init initializes a newly allocated pmc_t
 */
static void pmc_init(pmc_t pmc) {
	assert(pmc);

	pmc->object = NULL;
	pmc->monitor = NULL;

	bzero(&pmc->methods, sizeof(pmc_methods_t));

	/* One reference for the caller */
	pmc->useCount = 1;
}

/*
 * pmc_reference increments the reference count of the given pmc_t
 */
static void pmc_reference(pmc_t pmc) {
	assert(pmc);

	OSIncrementAtomic(&(pmc->useCount));
}

/*
 * pmc_deallocate decrements the reference count of the given pmc_t. If the
 * reference count hits zero, the given pmc_t is deallocated and released back
 * to the allocation zone.
 */
static void pmc_deallocate(pmc_t pmc) {
	assert(pmc);

	/* If we just removed the last reference count */
	if(1 == OSDecrementAtomic(&(pmc->useCount))) {
		/* Free the pmc */
		pmc_free(pmc);
	}
}

/*
 * pmc_dequeue removes the given, newly *un*registered pmc from the
 * perf_counters_queue.
 */
static void pmc_dequeue(pmc_t pmc) {
	lck_spin_lock(&perf_counters_queue_spin);

	queue_remove(perf_counters_queue, pmc, pmc_t, link);

	perf_counters_count--;

	lck_spin_unlock(&perf_counters_queue_spin);
}

/*
 * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue
 */
static void pmc_enqueue(pmc_t pmc) {
	lck_spin_lock(&perf_counters_queue_spin);

	queue_enter(perf_counters_queue, pmc, pmc_t, link);

	perf_counters_count++;

	lck_spin_unlock(&perf_counters_queue_spin);
}

/*
 * pmc_find attempts to locate a pmc_t that was registered with the given
 * pmc_object_t pointer.  If found, it returns the pmc_t with an extra reference
 * which must be dropped by the caller by calling pmc_deallocate().
 */
static pmc_t pmc_find(pmc_object_t object) {
	assert(object);

	lck_spin_lock(&perf_counters_queue_spin);
	
	pmc_t element = NULL;
	pmc_t found = NULL;

	queue_iterate(perf_counters_queue, element, pmc_t, link) {
		if(element->object == object) {
			pmc_reference(element);
			found = element;
			break;
		}
	}

	lck_spin_unlock(&perf_counters_queue_spin);

	return found;
}

/*
 * Config internals
 */

/* Allocate a pmc_config_t */
static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) {
	return (pmc_config_t)zalloc(perf_small_zone);
}

/* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */
static void pmc_config_free(pmc_t pmc, pmc_config_t config) {
	assert(pmc);
	assert(config);

	if(config->object) {
		pmc->methods.free_config(pmc->object, config->object);
		config->object = NULL;
	}

	zfree(perf_small_zone, config);
}

static kern_return_t pmc_open(pmc_t pmc) {
	assert(pmc);
	assert(pmc->object);
	assert(pmc->open_object);

	return pmc->methods.open(pmc->object, pmc->open_object);
}

static kern_return_t pmc_close(pmc_t pmc) {
	assert(pmc);
	assert(pmc->object);
	assert(pmc->open_object);

	return pmc->methods.close(pmc->object, pmc->open_object);
}

/*
 * Reservation Internals
 */

static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc);
static void pmc_internal_reservation_store(pmc_reservation_t reservation);
static void pmc_internal_reservation_load(pmc_reservation_t reservation);

static pmc_reservation_t reservation_alloc(void) {
	/* pmc reservations come from the perf small zone */
	return (pmc_reservation_t)zalloc(perf_small_zone);
}

/*
 * reservation_free deallocates and releases all resources associated with the
 * given pmc_reservation_t.  This includes freeing the config used to create the
 * reservation, decrementing the reference count for the pmc used to create the
 * reservation, and deallocating the reservation's memory.
 */
static void reservation_free(pmc_reservation_t resv) {
	/* Free config */
	if(resv->config) {
		assert(resv->pmc);

		pmc_free_config(resv->pmc, resv->config);

		resv->config = NULL;
	}

	/* release PMC */
	(void)pmc_internal_reservation_set_pmc(resv, NULL);

	/* Free reservation */
	zfree(perf_small_zone, resv);
}

/*
 * reservation_init initializes a newly created reservation.
 */
static void reservation_init(pmc_reservation_t resv) {
	assert(resv);

	resv->pmc = NULL;
	resv->config = NULL;
	resv->value = 0ULL;

	resv->flags = 0U;
	resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0);
	resv->active_last_context_in = 0U;

	/*
	 * Since this member is a union, we only need to set either the task 
	 * or thread to NULL.
	 */
	resv->task = TASK_NULL;
}

/*
 * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If
 * there was one set already, it is deallocated (reference is dropped) before
 * the new one is set.  This methods increases the reference count of the given
 * pmc_t.
 *
 * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of
 * dropping the reference on any previously set pmc, and setting the reservation
 * to having no pmc set.
 */
static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) {
	assert(resv);

	if(resv->pmc) {
		(void)pmc_close(resv->pmc);
		pmc_deallocate(resv->pmc);
		resv->pmc = NULL;
	}

	resv->pmc = pmc;

	if(resv->pmc) {
		pmc_reference(resv->pmc);
		if(KERN_SUCCESS != pmc_open(resv->pmc)) {
			pmc_deallocate(resv->pmc);
			resv->pmc = NULL;

			return KERN_FAILURE;
		}
	}

	return KERN_SUCCESS;
}

/* 
 * Used to place reservation into one of the system, task, and thread queues
 * Assumes the queue's spin lock is already held.
 */
static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) {
	assert(queue);
	assert(resv);

	queue_enter(queue, resv, pmc_reservation_t, link);
}

static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) {
	assert(queue);
	assert(resv);

	queue_remove(queue, resv, pmc_reservation_t, link);
}

/* Returns TRUE if the reservation applies to the current execution context */
static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) {
	boolean_t ret = FALSE;
	assert(resv);

	if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) {
		ret = TRUE;
	} else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) {
		if(current_task() == resv->task) {
			ret = TRUE;
		}
	} else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) {
		if(current_thread() == resv->thread) {
			ret = TRUE;
		}
	}

	return ret;
}

/*
 * pmc_accessible_core_count returns the number of logical cores that can access
 * a given @pmc.  0 means every core in the system.
 */
static uint32_t pmc_accessible_core_count(pmc_t pmc) {
	assert(pmc);

	uint32_t *cores = NULL;
	size_t coreCt = 0UL;

	if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object,
		&cores, &coreCt)) {
		coreCt = 0U;
	}

	return (uint32_t)coreCt;
}

/* spin lock for the queue must already be held */
/*
 * This method will inspect the task/thread of the reservation to see if it
 * matches the new incoming one (for thread/task reservations only).  Will only
 * return TRUE if the task/thread matches.
 */
static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t resv) {
	assert(queue);
	assert(resv);

	boolean_t ret = FALSE;
	pmc_reservation_t tmp = NULL;

	queue_iterate(queue, tmp, pmc_reservation_t, link) {
		if(tmp->pmc == resv->pmc) {
			/* PMC matches - make sure scope matches first */
			switch(PMC_FLAG_SCOPE(tmp->flags)) {
				case PMC_FLAG_SCOPE_SYSTEM:
					/*
					 * Found a reservation in system queue with same pmc - always a
					 * conflict.
					 */
					ret = TRUE;
					break;
				case PMC_FLAG_SCOPE_THREAD:
					/*
					 * Found one in thread queue with the same PMC as the
					 * argument. Only a conflict if argument scope isn't
					 * thread or system, or the threads match.
					 */
					ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) || 
						(tmp->thread == resv->thread);

					if(!ret) {
						/*
						 * so far, no conflict - check that the pmc that is
						 * being reserved isn't accessible from more than
						 * one core, if it is, we need to say it's already
						 * taken.
						 */
						if(1 != pmc_accessible_core_count(tmp->pmc)) {
							ret = TRUE;
						}
					}
					break;
				case PMC_FLAG_SCOPE_TASK:
					/* 
					 * Follow similar semantics for task scope.
					 */

					ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) ||
						(tmp->task == resv->task);
					if(!ret) {
						/*
						 * so far, no conflict - check that the pmc that is
						 * being reserved isn't accessible from more than
						 * one core, if it is, we need to say it's already
						 * taken.
						 */
						if(1 != pmc_accessible_core_count(tmp->pmc)) {
							ret = TRUE;
						}
					}

					break;
			}

			if(ret) break;
		}
	}

	return ret;
}

/*
 * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be 
 * added to its target queue without creating conflicts (target queue is 
 * determined by the reservation's scope flags). Further, this method returns
 * FALSE if any level contains a reservation for a PMC that can be accessed from
 * more than just 1 core, and the given reservation also wants the same PMC.
 */
static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) {
	assert(resv);
	boolean_t ret = TRUE;

	if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) ||
		pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) ||
		pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) {
		ret = FALSE;
	}

	return ret;
}

static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) {
	assert(thread);

	/* See if this thread needs it's PMC flag set */
	pmc_reservation_t tmp = NULL;

	if(!newFlag) {
		/*
		 * If the parent task just dropped its reservation, iterate the thread
		 * reservations to see if we need to keep the pmc flag set for the given
		 * thread or not.
		 */
		lck_spin_lock(&reservations_spin);
	
		queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) {
			if(tmp->thread == thread) {
				newFlag = TRUE;
				break;
			}
		}

		lck_spin_unlock(&reservations_spin);
	}

	if(newFlag) {
		OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud);
	} else {
		OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud);
	}
}

/* 
 * This operation is (worst case) O(N*M) where N is number of threads in the
 * given task, and M is the number of thread reservations in our system.
 */
static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) {
	assert(task);
	thread_t thread = NULL;

	if(newFlag) {
		OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud);
	} else {
		OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud);
	}

	task_lock(task);

	queue_iterate(&task->threads, thread, thread_t, task_threads) {
		/* propagate the task's mask down to each thread  */
		pmc_internal_update_thread_flag(thread, newFlag);
	}

	task_unlock(task);
}

/*
 * pmc_internal_reservation_add adds a reservation to the global tracking queues after
 * ensuring there are no reservation conflicts.  To do this, it takes all the
 * spin locks for all the queue (to ensure no other core goes and adds a
 * reservation for the same pmc to a queue that has already been checked).
 */
static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) {
	assert(resv);

	boolean_t ret = FALSE;

	/* always lock all three in the same order */
	lck_spin_lock(&reservations_spin);

	/* Check if the reservation can be added without conflicts */
	if(pmc_internal_reservation_validate_for_pmc(resv)) {
	    
		/* add reservation to appropriate scope */
		switch(PMC_FLAG_SCOPE(resv->flags)) {
		case PMC_FLAG_SCOPE_SYSTEM:
			/* Simply add it to the system queue */
			pmc_internal_reservation_enqueue(system_reservations, resv);
			system_reservation_count++;
			
			lck_spin_unlock(&reservations_spin);

			break;

		case PMC_FLAG_SCOPE_TASK:
			assert(resv->task);

			/* Not only do we enqueue it in our local queue for tracking */
			pmc_internal_reservation_enqueue(task_reservations, resv);
			task_reservation_count++;

			lck_spin_unlock(&reservations_spin);

			/* update the task mask, and propagate it to existing threads */
			pmc_internal_update_task_flag(resv->task, TRUE);
			break;

		/* Thread-switched counter */
		case PMC_FLAG_SCOPE_THREAD:
			assert(resv->thread);

			/*
			 * Works the same as a task-switched counter, only at
			 * thread-scope
			 */

			pmc_internal_reservation_enqueue(thread_reservations, resv);
			thread_reservation_count++;

			lck_spin_unlock(&reservations_spin);
			
			pmc_internal_update_thread_flag(resv->thread, TRUE);
			break;
		}
		
		ret = TRUE;
	} else {
		lck_spin_unlock(&reservations_spin);
	}			
	
	return ret;
}

static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) {
	uint32_t * cores;
	size_t core_cnt;
	
	/* Get the list of accessible cores */
	if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) {
		boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE);

		/* Fast case: the PMC is only accessible from one core and we happen to be on it */
		if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) {
			action_func(reservation);
		} else {
			/* Call action_func on every accessible core */
#if defined(__i386__) || defined(__x86_64__)
			size_t ii;
			cpumask_t mask = 0;
			
			/* Build a mask for the accessible cores */
			if (core_cnt > 0) {
				for (ii = 0; ii < core_cnt; ii++) {
					mask |= cpu_to_cpumask(cores[ii]);
				}
			} else {
				/* core_cnt = 0 really means all cpus */
				mask = CPUMASK_ALL;
			}
			mp_cpus_call(mask, ASYNC, action_func, reservation);
#else
#error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture
#endif
		}

		ml_set_interrupts_enabled(intrs_enabled);
	}
	
}

/*
 * pmc_internal_reservation_remove removes the given reservation from the appropriate
 * reservation queue according to its scope. 
 *
 * NOTE: The scope flag must have been set for this method to function.
 */
static void pmc_internal_reservation_remove(pmc_reservation_t resv) {
	assert(resv);

	/*
	 * Due to the way the macros are written, we can't just blindly queue-remove
	 * the reservation without knowing which queue it's in. We figure this out
	 * using the reservation's scope flags.
	 */

	/* Lock the global spin lock */
	lck_spin_lock(&reservations_spin);

	switch(PMC_FLAG_SCOPE(resv->flags)) {

		case PMC_FLAG_SCOPE_SYSTEM:
			pmc_internal_reservation_dequeue(system_reservations, resv);
			system_reservation_count--;
			
			lck_spin_unlock(&reservations_spin);
			
			break;

		case PMC_FLAG_SCOPE_TASK:
			/* remove from the global queue */
			pmc_internal_reservation_dequeue(task_reservations, resv);
			task_reservation_count--;

			/* unlock the global */
			lck_spin_unlock(&reservations_spin);

			/* Recalculate task's counter mask */
			pmc_internal_update_task_flag(resv->task, FALSE);
			
			break;

		case PMC_FLAG_SCOPE_THREAD:
			pmc_internal_reservation_dequeue(thread_reservations, resv);
			thread_reservation_count--;

			lck_spin_unlock(&reservations_spin);

			/* recalculate the thread's counter mask */
			pmc_internal_update_thread_flag(resv->thread, FALSE);

			break;
	}
}

/* Reservation State Machine
 *
 * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a 
 * set of 9 events to provide MP-safe bookkeeping and control flow.  The 3-tuple is comprised 
 * of a state, a count of active contexts, and a set of modifier flags.  A state machine defines
 * the possible transitions at each event point given the current 3-tuple.  Atomicity is handled
 * by reading the current 3-tuple, applying the transformations indicated by the state machine
 * and then attempting to OSCompareAndSwap the transformed value.  If the OSCompareAndSwap fails,
 * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are
 * available.
 *
 * The state machine is described using tuple notation for the current state and a related notation
 * for describing the transformations.  For concisness, the flag and state names are abbreviated as
 * follows:
 * 
 * states:
 * S = STOP
 * CR = CAN_RUN
 * L = LOAD
 * R = RUN
 * ST = STORE
 * I = INTERRUPT
 * D = DEALLOC
 *
 * flags:
 *
 * S = STOPPING
 * D = DEALLOCING
 * I = INTERRUPTING
 *
 * The tuple notation is formed from the following pattern:
 *
 * tuple = < state, active-context-count, flags >
 * state = S | CR | L | R | ST | I | D
 * active-context-count = 0 | >0 | 1 | >1
 * flags = flags flag | blank
 * flag = S | D | I
 *
 * The transform notation is similar, but only describes the modifications made to the current state.
 * The notation is formed from the following pattern:
 * 
 * transform = < state, active-context-count, flags >
 * state = S | CR | L | R | ST | I | D
 * active-context-count = + | - | blank
 * flags = flags flag | flags !flag | blank
 * flag = S | D | I
 *
 * And now for the state machine:
 * State		Start		Stop		Free		Interrupt		End Interrupt		Context In		Context Out	Load Finished		Store Finished
 * <CR, 0, >				<S, , >		<D, , >			<L, +, >
 * <D, 0, >
 * <D, 1, D>									< , -, !D>
 * <D, >1, D>									< , -, >
 * <I, 0, D>									<D, , !D>
 * <I, 0, S>	< , , !S>				< , , !SD>		<S, , !S>
 * <I, 0, >					< , , S>	< , , D>	<CR, , >
 * <L, 1, D>									<ST, -, >
 * <L, 1, ID>									<ST, -, >
 * <L, 1, IS>							< , , !SD>	<ST, -, >
 * <L, 1, S>	< , , !S>				< , , !SD>		<ST, -, >
 * <L, 1, >					< , , S>	< , , D>	< , , IS>							< , +, >	<R, , >
 * <L, >1, D>									< , -, >		<R, -, >
 * <L, >1, ID>									< , -, >		<R, -, >
 * <L, >1, IS>							< , , !SD>	< , -, >		<R, -, >
 * <L, >1, S>	< , , !S>				< , , !SD>		< , -, >		<R, -, >
 * <L, >1, >				< , , S>	< , , D>	< , , IS>							< , +, >		< , -, >		<R, , >
 * <R, 1, D>									<ST, -, >
 * <R, 1, ID>									<ST, -, >
 * <R, 1, IS>							< , , !SD>	<ST, -, >
 * <R, 1, S>	< , , !S>				< , , !SD>		<ST, -, >
 * <R, 1, >					< , , S>	< , , D>	< , , IS>							< , +, >	<ST, -, >
 * <R, >1, D>									< , -, >
 * <R, >1, ID>									< , -, >
 * <R, >1, IS>							< , , !SD>	< , -, >
 * <R, >1, S>	< , , !S>				< , , !SD>		< , -, >
 * <R, >1, >				< , , S>	< , , D>	< , , IS>							< , +, >		< , -, >
 * <S, 0, >		<CR, , >				<D, , >
 * <S, 1, ID>									<I, -, !I>
 * <S, 1, IS>							< , , !SD>	<I, -, !I>
 * <S, 1, S>	< , , !S>				<D, , !SD>		< , -, !S>
 * <S, 1, >					< , , S>	<D, , D>	<L, +, >		<CR, -, >
 * <S, >1, ID>									< , -, >
 * <S, >1, IS>							< , , !SD>	< , -, >
 * <S, >1, S>	< , , !S>				<D, , !SD>		< , -, >
 * <S, >1, >				< , , S>	<D, , D>		<L, +, >		< , -, >
 * <ST, 0, D>									<D, , !D>
 * <ST, 0, ID>									<I, , !I>
 * <ST, 0, IS>							< , , !SD>	<I, , !I>
 * <ST, 0, S>	< , , !S>				< , , !SD>		<S, , !S>
 * <ST, 0, >				< , , S>	< , , D>	< , , IS>							< , +, >		<CR, , >
 * <ST, >0, D>									< , -, >							<D, , >
 * <ST, >0, ID>								< , -, >							<S, , >
 * <ST, >0, IS>							< , , !SD>										< , -, >			<S, , >
 * <ST, >0, S>	< , , !S>				< , , !SD>		< , -, >							<S, , >
 * <ST, >0, >				< , , S>	< , , D>	< , , IS>							< , +, >		< , -, >			<L, , >
 */

static uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) {
	uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0);
	
	switch (event) {
		case PMC_STATE_EVENT_START:
			switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
				case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
					new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING);
					break;
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
					if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
					}
					break;
			}
			break;
		case PMC_STATE_EVENT_STOP:
			switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
				case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
					new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
					break;
				case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
					new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
					break;
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
					if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
						new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0);
					}
					break;
			}
			break;
		case PMC_STATE_EVENT_FREE:
			switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
				case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
					new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
					break;
				case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
					new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
					break;
				case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
					new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
					break;
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
					new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING);
					break;
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
					if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0);
					} else {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
					}
					break;
			}
			break;
		case PMC_STATE_EVENT_INTERRUPT:
			switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
					new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0);
					break;
			}
			break;
		case PMC_STATE_EVENT_END_OF_INTERRUPT:
			switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
				case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING):
					new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
					break;
				case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING):
					new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
					break;
				case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0):
					new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
					break;
			}
			break;
		case PMC_STATE_EVENT_CONTEXT_IN:
			switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
				case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0):
					new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
					break;
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
					new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0);
					break;
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
					if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0);
					}
					break;
			}
			break;
		case PMC_STATE_EVENT_CONTEXT_OUT:
			switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
				case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING):
					if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
						new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING);
					} else {
						new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
					}					
					break;
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
					if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
						new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
					}
					break;
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0):
					if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
					} else {
						new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
					}
					break;
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
					if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING);
					} else {
						new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
					}
					break;
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING):
					if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
						new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING);
					} else {
						new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
					}
					break;
				case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0):
					if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
						if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) {
							new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0);
						} else {
							new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
						}
					}
					break;
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
					if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) {
						new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0);
					}
					break;
			}
			break;
		case PMC_STATE_EVENT_LOAD_FINISHED:
			switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING):
					if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0);
					} else {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0);
					}
					break;
				case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0):
					new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0);
					break;
			}
			break;
		case PMC_STATE_EVENT_STORE_FINISHED:
			switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) {
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING):
					if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING);
					} else {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0);
					}
					break;
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING):
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING):
					if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING);
					} else {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
					}
					break;
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING):
					if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING);
					} else {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0);
					}
					break;
				case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0):
					if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0);
					} else {
						new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0);
					}
					break;
			}
			break;
	}
	
	return new_state;
}

static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) {
	pmc_state_t oldState;
	pmc_state_t newState;

	assert(reservation);
	
	/* Determine what state change, if any, we need to do.  Keep trying until either we succeed doing a transition
	 * or the there is no valid move.
	 */	
	do {
		oldState = reservation->state;
		newState = pmc_internal_reservation_next_state(oldState, event);
	} while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state)));
	
	if (newState != PMC_STATE_INVALID) {
		COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event));
	} else {
		COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event));
	}
	
	if (old_state_out != NULL) {
		*old_state_out = oldState;
	}
	
	return newState;
}
					
static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) {
	assert(reservation);
	pmc_state_t newState;
	pmc_state_t oldState;

	/* Clear that the this reservation was active when this cpu did its last context in */
	OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in));
	
	/* Move the state machine */
	if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) {
		return;
	}
	
	/* Do any actions required based on the state change */
	if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) {
		/* Just moved into STORE, so store the reservation. */
		pmc_internal_reservation_store(reservation);
	} else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
		/* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
		thread_wakeup((event_t)reservation);
	}
	
}

static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) {
	assert(reservation);
	pmc_state_t oldState;
	pmc_state_t newState;
	
	/* Move the state machine */
	if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) {
		return;
	}

	/* Mark that the reservation was active when this cpu did its last context in */
	OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
		
	/* Do any actions required based on the state change */
	if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) {
		/* Just moved into LOAD, so load the reservation. */
		pmc_internal_reservation_load(reservation);
	}
	
}

static void pmc_internal_reservation_store(pmc_reservation_t reservation) {
	assert(reservation);
	assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE);
	
	assert(reservation->pmc);
	assert(reservation->config);

	pmc_state_t newState;
	kern_return_t ret = KERN_SUCCESS;
	
	pmc_t store_pmc = reservation->pmc;
	pmc_object_t store_pmc_obj = store_pmc->object;
	perf_monitor_t store_pm = store_pmc->monitor;

	/* 
	 * Instruct the Perf Monitor that contains this counter to turn 
	 * off the global disable for this counter.
	 */
	ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1);
	if(KERN_SUCCESS != ret) {
		COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret);
		return;
	}

	/* Instruct the counter to disable itself */
	ret = store_pmc->methods.disable(store_pmc_obj);
	if(KERN_SUCCESS != ret) {
		COUNTER_DEBUG("  [error] disable: 0x%x\n", ret);
	}

	/* store the counter value into the reservation's stored count */
	ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value);
	if(KERN_SUCCESS != ret) {
		COUNTER_DEBUG("  [error] get_count: 0x%x\n", ret);
		return;
	}
		
	/* Advance the state machine now that the STORE is finished */
	if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) {
		return;
	}

	/* Do any actions required based on the state change */
	if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) {
		/* Just moved into LOAD, so load the reservation. */
		pmc_internal_reservation_load(reservation);
	} else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
		/* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
		thread_wakeup((event_t)reservation);
	}
	
}

static void pmc_internal_reservation_load(pmc_reservation_t reservation) {
	assert(reservation);
	assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD);

	pmc_state_t newState;
	kern_return_t ret = KERN_SUCCESS;

	assert(reservation->pmc);
	assert(reservation->config);
	
	pmc_t load_pmc = reservation->pmc;
	pmc_object_t load_pmc_obj = load_pmc->object;
	perf_monitor_t load_pm = load_pmc->monitor;

	/* Set the control register up with the stored configuration */
	ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object);
	if(KERN_SUCCESS != ret) {
		COUNTER_DEBUG("  [error] set_config: 0x%x\n", ret);
		return;
	}

	/* load the counter value */
	ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value);
	if(KERN_SUCCESS != ret) {
		COUNTER_DEBUG("  [error] set_count: 0x%x\n", ret);
		return;
	}

	/* Locally enable the counter */
	ret = load_pmc->methods.enable(load_pmc_obj);
	if(KERN_SUCCESS != ret) {
		COUNTER_DEBUG("  [error] enable: 0x%x\n", ret);
		return;
	}

	/*
	 * Instruct the Perf Monitor containing the pmc to enable the
	 * counter.
	 */
	ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1);
	if(KERN_SUCCESS != ret) {
		COUNTER_DEBUG("  [error] enable_counters: 0x%x\n", ret);
		/* not on the hardware. */
		return;
	}
	
	/* Advance the state machine now that the STORE is finished */
	if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) {
		return;
	}

	/* Do any actions required based on the state change */
	if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) {
		/* Just moved into STORE, so store the reservation. */
		pmc_internal_reservation_store(reservation);
	}
	
}

/*
 * pmc_accessible_from_core will return TRUE if the given @pmc is directly
 * (e.g., hardware) readable from the given logical core.
 *
 * NOTE: This method is interrupt safe.
 */
static inline boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) {
	boolean_t ret = FALSE;

	assert(pmc);

	ret = pmc->methods.accessible_from_core(pmc->object, logicalCore);

	return ret;
}

static void pmc_internal_reservation_start_cpu(void * arg) {
	pmc_reservation_t reservation = (pmc_reservation_t)arg;
	
	assert(reservation);
	

	if (pmc_internal_reservation_matches_context(reservation)) {
		/* We are in context, but the reservation may have already had the context_in method run.  Attempt
		 * to set this cpu's bit in the active_last_context_in mask.  If we set it, call context_in.
		 */
		uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in));
		
		if ((oldMask & (1U << cpu_number())) == 0) {
			COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number());
			
			pmc_internal_reservation_context_in(reservation);
		}
	}
}

static void pmc_internal_reservation_stop_cpu(void * arg) {
	pmc_reservation_t reservation = (pmc_reservation_t)arg;
	
	assert(reservation);
	
	
	if (pmc_internal_reservation_matches_context(reservation)) {
		COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number());

		pmc_internal_reservation_context_out(reservation);
	}
}	

/*!fn
 * pmc_reservation_interrupt is called when a PMC reservation which was setup
 * with an interrupt threshold counts the requested number of events. When the
 * underlying counter hits the threshold, an interrupt is generated, and this
 * method is called. This method marks the reservation as stopped, and passes
 * control off to the user-registered callback method, along with the
 * reservation (so that the user can, for example, write a 0 to the counter, and
 * restart the reservation).
 * This method assumes the reservation has a valid pmc_config_t within.
 *
 * @param target The pmc_reservation_t that caused the interrupt.
 * @param refCon User specified reference constant.
 */
static void pmc_reservation_interrupt(void *target, void *refCon) {
	pmc_reservation_t reservation = (pmc_reservation_t)target;
	pmc_state_t newState;
	uint64_t timeout;
	uint32_t spins;

	assert(reservation);

	/* Move the state machine */
	if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) {
		return;
	}

	/* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
	 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
	 * on every cpu that can access the PMC.
	 */
	pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
			
	/* Spin waiting for the state to turn to INTERRUPT */
	nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
	timeout += mach_absolute_time();
	spins = 0;
	while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) {
		/* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
		if (++spins > PMC_SPIN_THRESHOLD) {
			if (mach_absolute_time() > timeout) {
				pmc_spin_timeout_count++;
				assert(0);
			}
		}

		cpu_pause();
	}
			
	assert(reservation->config);
	assert(reservation->config->method);			
		
	/* Call the registered callback handler */
#if DEBUG_COUNTERS
	uint64_t start = mach_absolute_time();
#endif /* DEBUG */
	
	(void)reservation->config->method(reservation, refCon);
	
#if DEBUG_COUNTERS
	uint64_t end = mach_absolute_time();
	if((end - start) > 5000ULL) {
		kprintf("%s - user method %p took %llu ns\n", __FUNCTION__, 
				reservation->config->method, (end - start));
	}
#endif
	
	/* Move the state machine */
	if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) {
		return;
	}
	
	/* Do any post-move actions necessary */
	if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) {
		pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
	} else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) {
		/* Wakeup any thread blocking for this reservation to hit <DEALLOC, 0, > */
		thread_wakeup((event_t)reservation);
	}
}	

/*
 * Apple-private KPI for Apple kext's (IOProfileFamily) only
 */

#if 0
#pragma mark -
#pragma mark IOProfileFamily private KPI
#endif

/*
 * perf_monitor_register registers a new Performance Monitor, and its associated
 * callback methods.  The given perf_monitor_object_t is the first argument to
 * each callback when they are called.
 */
kern_return_t perf_monitor_register(perf_monitor_object_t monitor,
	perf_monitor_methods_t *methods) {
	int cpu = -1;

	COUNTER_DEBUG("registering perf monitor %p\n", monitor);

	if(!monitor || !methods) {
		return KERN_INVALID_ARGUMENT;
	}

	/* Protect against out-of-date driver kexts */
	if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) {
		return KERN_INVALID_ARGUMENT;
	}

	/* If the monitor requires idle notifications, ensure that it is 
	 * accessible from a single core only.
	 */
	if (methods->flags & PERFMON_FLAG_REQUIRES_IDLE_NOTIFICATIONS) {
		uint32_t *cores;
		size_t core_cnt;
	    
		if (KERN_SUCCESS == methods->accessible_cores(monitor, &cores, &core_cnt)) {
			/* 
			 * Guard against disabled cores - monitors will always match and
			 * attempt registration, irrespective of 'cpus=x' boot-arg.
			 */
			if ((core_cnt == 1) && (cores[0] < (uint32_t)ml_get_max_cpus())) {
				cpu = cores[0];
			} else {
				return KERN_INVALID_ARGUMENT;
			}
		}	    
	}

	/* All methods are required */
	if(!methods->accessible_cores |
	   !methods->enable_counters || !methods->disable_counters ||
	   !methods->on_idle || !methods->on_idle_exit) {
		return KERN_INVALID_ARGUMENT;
	}

	/* prevent dupes. */
	perf_monitor_t dupe = perf_monitor_find(monitor);
	if(dupe) {
		COUNTER_DEBUG("Duplicate registration for %p\n", monitor);
		perf_monitor_deallocate(dupe);
		return KERN_FAILURE;
	}

	perf_monitor_t pm = perf_monitor_alloc();
	if(!pm) {
		return KERN_RESOURCE_SHORTAGE;
	}

	/* initialize the object */
	perf_monitor_init(pm, cpu);

	/* copy in the registration info */
	pm->object = monitor;
	memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t));

	/* place it in the tracking queues */
	perf_monitor_enqueue(pm);

	/* debug it */
	PRINT_PERF_MON(pm);

	return KERN_SUCCESS;
}

/*
 * perf_monitor_unregister unregisters a previously registered Perf Monitor,
 * looking it up by reference pointer (the same that was used in
 * perf_monitor_register()).
 */
kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) {
	kern_return_t ret = KERN_FAILURE;

	COUNTER_DEBUG("unregistering perf monitor %p\n", monitor);

	if(!monitor) {
		return KERN_INVALID_ARGUMENT;
	}

	perf_monitor_t pm = perf_monitor_find(monitor);
	if(pm) {
		/* Remove it from the queues. */
		perf_monitor_dequeue(pm);

		/* drop extra retain from find */
		perf_monitor_deallocate(pm);

		/* and release the object */
		perf_monitor_deallocate(pm);

		ret = KERN_SUCCESS;
	} else {
		COUNTER_DEBUG("could not find a registered pm that matches!\n");
	}

	return ret;
}

/*
 * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is
 * associated with a Perf Monitor.  Perf Monitors are looked up by the reference
 * pointer that was used to previously register them. 
 *
 * PMCs are registered with a reference pointer (@pmc_object), and a set of
 * callback methods.  When the given callback methods are called from xnu, the
 * first argument will always be the reference pointer used to register the PMC.
 *
 * NOTE: @monitor must have been successfully registered via
 * perf_monitor_register before this method will succeed.
 */
kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object,
	pmc_methods_t *methods, void *object) {

	COUNTER_DEBUG("%p %p\n", monitor, pmc_object);

	if(!monitor || !pmc_object || !methods || !object) {
		return KERN_INVALID_ARGUMENT;
	}

	/* Prevent version mismatches */
	if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) {
		COUNTER_DEBUG("version mismatch\n");
		return KERN_INVALID_ARGUMENT;
	}

	/* All methods are required. */
	if(!methods->create_config || 
		!methods->free_config ||
		!methods->config_set_value || 
		!methods->config_set_threshold || 
		!methods->config_set_handler ||
		!methods->set_config || 
		!methods->get_monitor || 
		!methods->get_name ||
		!methods->accessible_from_core || 
		!methods->accessible_cores ||
		!methods->get_count || 
		!methods->set_count ||
		!methods->disable ||
		!methods->enable ||
		!methods->open || 
		!methods->close) {
		return KERN_INVALID_ARGUMENT;
	}

	/* make sure this perf monitor object is already registered */
	/*
	 * NOTE: this adds a reference to the parent, so we'll have to drop it in
	 * any failure code paths from here on out.
	 */
	perf_monitor_t pm = perf_monitor_find(monitor);
	if(!pm) {
		COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor);
		return KERN_INVALID_ARGUMENT;
	}

	/* make a new pmc */
	pmc_t pmc = pmc_alloc();
	if(!pmc) {
		/* drop the extra reference from perf_monitor_find() */
		perf_monitor_deallocate(pm);
		return KERN_RESOURCE_SHORTAGE;
	}

	/* init it */
	pmc_init(pmc);

	pmc->object = pmc_object;
	pmc->open_object = object;

	/* copy the callbacks in */
	memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t));

	pmc->monitor = pm;

	perf_monitor_add_pmc(pmc->monitor, pmc);

	/* enqueue it in our tracking queue */
	pmc_enqueue(pmc);

	/* drop extra reference from perf_monitor_find() */
	perf_monitor_deallocate(pm);

	return KERN_SUCCESS;
}

/*
 * pmc_unregister unregisters a previously registered PMC, looking it up by
 * reference point to *both* the Perf Monitor it was created with, and the PMC's
 * reference pointer itself.
 */
kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) {
	COUNTER_DEBUG("%p %p\n", monitor, pmc_object);

	if(!monitor || !pmc_object) {
		return KERN_INVALID_ARGUMENT;
	}

	pmc_t pmc = pmc_find(pmc_object);
	if(!pmc) {
		COUNTER_DEBUG("Could not find a matching pmc.\n");
		return KERN_FAILURE;
	}

	/* remove it from the global queue */
	pmc_dequeue(pmc);

	perf_monitor_remove_pmc(pmc->monitor, pmc);

	/* remove extra reference count from pmc_find() */
	pmc_deallocate(pmc);

	/* dealloc the pmc */
	pmc_deallocate(pmc);

	return KERN_SUCCESS;
}

static void perf_monitor_reservation_add(perf_monitor_t monitor) {
    assert(monitor);
    OSIncrementAtomic(&(monitor->reservedCounters));
}

static void perf_monitor_reservation_remove(perf_monitor_t monitor) {
    assert(monitor);
    OSDecrementAtomic(&(monitor->reservedCounters));    
}

#if 0
#pragma mark -
#pragma mark KPI
#endif

/*
 * Begin in-kernel and in-kext KPI methods
 */

/*
 * pmc_create_config creates a new configuration area from a given @pmc.
 *
 * NOTE: This method is not interrupt safe.
 */
kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) {
	pmc_config_t tmp = NULL;

	if(!pmc || !config) {
		return KERN_INVALID_ARGUMENT;
	}

	pmc_reference(pmc);

	tmp = pmc_config_alloc(pmc);
	if(tmp) {
		tmp->object = pmc->methods.create_config(pmc->object);

		if(!tmp->object) {
			pmc_config_free(pmc, tmp);
			tmp = NULL;
		} else {
			tmp->interrupt_after_value = 0ULL;
			tmp->method = NULL;
			tmp->refCon = NULL;
		}
	}

	pmc_deallocate(pmc);

	if(!tmp) {
		return KERN_RESOURCE_SHORTAGE;
	}

	*config = tmp;

	return KERN_SUCCESS;
}

/*
 * pmc_free_config frees a configuration area created from a given @pmc
 *
 * NOTE: This method is not interrupt safe.
 */
void pmc_free_config(pmc_t pmc, pmc_config_t config) {
	assert(pmc);
	assert(config);

	pmc_reference(pmc);

	pmc_config_free(pmc, config);

	pmc_deallocate(pmc);
}

/*
 * pmc_config_set_value sets up configuration area key-value pairs.  These pairs
 * are to be either pre-known, or looked up via CoreProfile.framework.
 *
 * NOTE: This method is not interrupt safe.
 */
kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config,
	uint8_t id, uint64_t value) {

	kern_return_t ret = KERN_INVALID_ARGUMENT;
	
	if(!pmc || !config) {
		return ret;
	}

	pmc_reference(pmc);

	ret = pmc->methods.config_set_value(config->object, id, value);

	pmc_deallocate(pmc);

	return ret;
}

/*
 * pmc_config_set_interrupt_threshold modifies a config object, instructing
 * the pmc that it should generate a call to the given pmc_interrupt_method_t
 * after the counter counts @threshold events.
 *
 * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt
 * as the first argument when the interrupt handler is invoked, and the given
 * @refCon (which may be NULL) as the second.
 *
 * See pmc_interrupt_method_t.
 *
 * NOTE: This method is not interrupt safe.
 */
kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, 
	uint64_t threshold, pmc_interrupt_method_t method, void *refCon) {
	kern_return_t ret = KERN_INVALID_ARGUMENT;

	if(!config || !pmc) {
		return ret;
	}
	
	assert(config);
	assert(pmc);

	pmc_reference(pmc);

	do {
		/*
		 * We have a minor annoyance to side-step here. The driver layer expects
		 * the config to never change once a reservation has been taken out with
		 * it.  However, in order to have the PMI method have the reservation as
		 * the first argument (in order to allow the user-method to, for
		 * example, write a 0 to it, and restart it), we need to create the
		 * pmc_reservation_t before setting it up in the config object.
		 * We overcome this by caching the method in the pmc_config_t stand-in,
		 * and mutating the pmc_config_object_t just before returning a
		 * reservation (in pmc_reserve() and friends, below).
		 */

		/* might as well stash this away too. */
		config->interrupt_after_value = threshold;
		config->method = method;
		config->refCon = refCon;

		ret = KERN_SUCCESS;

	}while(0);

	pmc_deallocate(pmc);

	return ret;
}

/*
 * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number
 * of pmc_t's returned. Callers should free this list with a call to
 * pmc_free_pmc_list().
 *
 * NOTE: This method is not interrupt safe.
 */
kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) {
	pmc_t *array = NULL;
	pmc_t pmc = NULL;
	size_t count = 0UL;
	
	do {
		/* Copy down (to the stack) the count of perf counters */
		vm_size_t size = perf_counters_count;

		/* Allocate that sized chunk */
		array = (pmc_t *)kalloc(sizeof(pmc_t) * size);
		if(!array) {
			return KERN_RESOURCE_SHORTAGE;
		}

		/* Take the spin lock */
		lck_spin_lock(&perf_counters_queue_spin);

		/* verify the size didn't change while we were allocating */
		if(size != perf_counters_count) {
			/*
			 * queue size has changed between alloc and now - go back and
			 * make another pass.
			 */

			/* drop the lock */
			lck_spin_unlock(&perf_counters_queue_spin);

			/* free the block */
			kfree(array, sizeof(pmc_t) * size);
			array = NULL;
		}

		/* if we get here, and array is NULL, we try again. */
	}while(!array);

	/* copy the bits out */
	queue_iterate(perf_counters_queue, pmc, pmc_t, link) {
		/* copy out the pointer */
		array[count++] = pmc;
	}

	lck_spin_unlock(&perf_counters_queue_spin);

	/* return the list and the size */
	*pmcs = array;
	*pmcCount = count;

	return KERN_SUCCESS;
}

/*
 * pmc_free_pmc_list frees an array of pmc_t that has been returned from
 * pmc_get_pmc_list.
 * 
 * NOTE: This method is not interrupt safe.
 */
void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) {
	if(pmcs && pmcCount) {
		COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount);

		kfree(pmcs, pmcCount * sizeof(pmc_t));
	}
}

kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) {
	kern_return_t ret = KERN_INVALID_ARGUMENT;

	if(!name || !pmcs || !pmcCount) {
		return ret;
	}

	pmc_t *list = NULL;
	size_t count = 0UL;

	if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) {
		size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL;
		size_t len = strlen(name);

		for(ii = 0UL; ii < count; ii++) {
			const char *pmcName = pmc_get_name(list[ii]);

			if(strlen(pmcName) < len) {
				/*
				 * If the pmc name is shorter than the requested match, it's no 
				 * match, as we're looking for the most specific match(es).
				 */
				continue;
			}

			if(0 == strncmp(name, pmcName, len)) {
				pmc_t temp = list[ii];
				
				// move matches to the head of the array.
				list[ii] = list[swapPtr];
				list[swapPtr] = temp;
				swapPtr++;

				// keep a count of the matches
				matchCount++;
			}
		}

		if(matchCount) {
			/*
			 * If we have matches, they are all at the head of the array, so
			 * just allocate enough space for @matchCount pmc_t's, and copy the
			 * head of the array to the new allocation.  Then free the old
			 * allocation.
			 */

			pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount);
			if(result) {
				// copy the matches
				memcpy(result, list, sizeof(pmc_t) * matchCount);

				ret = KERN_SUCCESS;
			}

			pmc_free_pmc_list(list, count);

			if(!result) {
				*pmcs = NULL;
				*pmcCount = 0UL;
				return KERN_RESOURCE_SHORTAGE;
			}

			*pmcs = result;
			*pmcCount = matchCount;
		} else {
			*pmcs = NULL;
			*pmcCount = 0UL;
		}
	}

	return ret;
}

/*
 * pmc_get_name returns a pointer (not copied) to the human-readable name of the
 * given pmc.
 *
 * NOTE: Driver authors must take care to not allocate during this method, as
 * this method *IS* interrupt safe.
 */
const char *pmc_get_name(pmc_t pmc) {
	assert(pmc);

	const char *name = pmc->methods.get_name(pmc->object);

	return name;
}

/*
 * pmc_get_accessible_core_list returns a pointer to an array of logical core
 * numbers (as well as the size of that array) that represent the local cores
 * (hardware threads) from which the given @pmc can be accessed directly.
 *
 * NOTE: This method is interrupt safe.
 */
kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores,
	size_t *logicalCoreCt) {

	kern_return_t ret = KERN_INVALID_ARGUMENT;

	if(!pmc || !logicalCores || !logicalCoreCt) {
		return ret;
	}

	ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt);

	return ret;
}

static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) {
	assert(resv);
	assert(resv->pmc);
	assert(config);
	assert(config->object);

	/* If there's no PMI to setup, return success */
	if(config->interrupt_after_value && config->method) {

		/* set the threshold */
		kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object,
			config->interrupt_after_value);

		if(KERN_SUCCESS != ret) {
			/*
			 * This is the most useful error message here, as this only happens
			 * as a result of pmc_reserve*()
			 */
			COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc);
			return FALSE;
		}

		if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object, 
			(void *)resv, &pmc_reservation_interrupt, config->refCon)) {

			COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc);
			return FALSE;
		}
	}

	return TRUE;
}

/*
 * pmc_reserve will attempt to reserve the given @pmc, with a given
 * configuration object, for counting system-wide. This method will fail with
 * KERN_FAILURE if the given pmc is already reserved at any scope.
 *
 * This method consumes the given configuration object if it returns
 * KERN_SUCCESS. Any other return value indicates the caller
 * must free the config object via pmc_free_config().
 *
 * NOTE: This method is NOT interrupt safe.
 */
kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config,
	pmc_reservation_t *reservation) {

	if(!pmc || !config || !reservation) {
		return KERN_INVALID_ARGUMENT;
	}

	pmc_reservation_t resv = reservation_alloc();
	if(!resv) {
		return KERN_RESOURCE_SHORTAGE;
	}

	reservation_init(resv);

	resv->flags |= PMC_FLAG_SCOPE_SYSTEM;
	resv->config = config;

	if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
		resv->config = NULL;
		return KERN_FAILURE;
	}
	
	/* enqueue reservation in proper place */
	if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
		/* Prevent free of config object */
		resv->config = NULL;
		
		reservation_free(resv);
		return KERN_FAILURE;
	}

	perf_monitor_reservation_add(pmc->monitor);
	
	*reservation = resv;

	return KERN_SUCCESS;
}

/*
 * pmc_reserve_task will attempt to reserve the given @pmc with a given
 * configuration object, for counting when the given @task is running on any
 * logical core that can directly access the given @pmc.  This method will fail
 * with KERN_FAILURE if the given pmc is already reserved at either system or
 * thread scope.  
 *
 * This method consumes the given configuration object if it returns
 * KERN_SUCCESS. Any other return value indicates the caller
 * must free the config object via pmc_free_config().
 *
 * NOTE: You can reserve the same pmc for N different tasks concurrently.
 * NOTE: This method is NOT interrupt safe.
 */
kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, 
	task_t task, pmc_reservation_t *reservation) {

	if(!pmc || !config || !reservation || !task) {
		return KERN_INVALID_ARGUMENT;
	}

	if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) {
		COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
		return KERN_INVALID_ARGUMENT;
	}

	pmc_reservation_t resv = reservation_alloc();
	if(!resv) {
		return KERN_RESOURCE_SHORTAGE;
	}

	reservation_init(resv);

	resv->flags |= PMC_FLAG_SCOPE_TASK;
	resv->task = task;

	resv->config = config;

	if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
		resv->config = NULL;
		return KERN_FAILURE;
	}
	
	/* enqueue reservation in proper place */
	if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
		/* Prevent free of config object */
		resv->config = NULL;

		reservation_free(resv);
		return KERN_FAILURE;
	}

	perf_monitor_reservation_add(pmc->monitor);

	*reservation = resv;

	return KERN_SUCCESS;
}

/*
 * pmc_reserve_thread will attempt to reserve the given @pmc with a given
 * configuration object, for counting when the given @thread is running on any
 * logical core that can directly access the given @pmc.  This method will fail
 * with KERN_FAILURE if the given pmc is already reserved at either system or
 * task scope.  
 *
 * This method consumes the given configuration object if it returns
 * KERN_SUCCESS. Any other return value indicates the caller
 * must free the config object via pmc_free_config().
 *
 * NOTE: You can reserve the same pmc for N different threads concurrently.
 * NOTE: This method is NOT interrupt safe.
 */
kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, 
	thread_t thread, pmc_reservation_t *reservation) {
	if(!pmc || !config || !reservation || !thread) {
		return KERN_INVALID_ARGUMENT;
	}

	if (!(pmc->monitor->methods.flags & PERFMON_FLAG_SUPPORTS_CONTEXT_SWITCHING)) {
		COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc);
		return KERN_INVALID_ARGUMENT;
	}

	pmc_reservation_t resv = reservation_alloc();
	if(!resv) {
		return KERN_RESOURCE_SHORTAGE;
	}

	reservation_init(resv);

	resv->flags |= PMC_FLAG_SCOPE_THREAD;
	resv->thread = thread;

	resv->config = config;

	if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) {
		resv->config = NULL;
		return KERN_FAILURE;
	}
	
	/* enqueue reservation in proper place */
	if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) {
		/* Prevent free of config object */
		resv->config = NULL;

		reservation_free(resv);
		return KERN_FAILURE;
	}

	perf_monitor_reservation_add(pmc->monitor);

	*reservation = resv;

	return KERN_SUCCESS;
}

/*
 * pmc_reservation_start instructs the given reservation to start counting as
 * soon as possible. 
 *
 * NOTE: This method is interrupt safe.
 */
kern_return_t pmc_reservation_start(pmc_reservation_t reservation) {
	pmc_state_t newState;

	if(!reservation) {
		return KERN_INVALID_ARGUMENT;
	}

	/* Move the state machine */
	if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) {
		return KERN_FAILURE;
	}
	
	/* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
	 * broadcast right before it leaves
	 */
	if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) {	
		/* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
		 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu
		 * on every cpu that can access the PMC.
		 */
		pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu);
	}
	
	return KERN_SUCCESS;			 
}

/*
 * pmc_reservation_stop instructs the given reservation to stop counting as
 * soon as possible.  When this method returns, the pmc will be marked as stopping
 * and subsequent calls to pmc_reservation_start will succeed.  This does not mean
 * that the pmc hardware has _actually_ stopped running.  Assuming no other changes
 * to the reservation state, the pmc hardware _will_ stop shortly.
 *
 */
kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) {
	pmc_state_t newState;

	if(!reservation) {
		return KERN_INVALID_ARGUMENT;
	}
	
	/* Move the state machine */
	if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) {
		return KERN_FAILURE;
	}
	
	/* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will
	 * broadcast right before it leaves.  Similarly, if we just moved directly to STOP, don't bother broadcasting.
	 */
	if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) {	
		/* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
			 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
		 * on every cpu that can access the PMC.
		 */
		
		pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
	}
	
	return KERN_SUCCESS;
}

/*
 * pmc_reservation_read will read the event count associated with a reservation.
 * If the caller is current executing in a context that both a) matches the
 * reservation's context, and b) can access the reservation's pmc directly, the
 * value will be read from hardware.  Otherwise, this returns the reservation's
 * stored value.
 *
 * NOTE: This method is interrupt safe.
 * NOTE: When not on the interrupt stack, this method may block.
 */
kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) {
	kern_return_t ret = KERN_FAILURE;
	uint64_t timeout;
	uint32_t spins;

	if(!reservation || !value) {
		return KERN_INVALID_ARGUMENT;
	}

	nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
	timeout += mach_absolute_time();
	spins = 0;
	do {
		uint32_t state = reservation->state;
		
		if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
			/* Attempt read from hardware via drivers. */

			assert(reservation->pmc);

			ret = reservation->pmc->methods.get_count(reservation->pmc->object, value);
			
			break;
		} else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
				   (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
			/* Spin */
			/* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
			if (++spins > PMC_SPIN_THRESHOLD) {
				if (mach_absolute_time() > timeout) {
					pmc_spin_timeout_count++;
					assert(0);
				}
			}

			cpu_pause();
		} else {
			break;
		}
	} while (1);

	/* If the direct hardware read failed (for whatever reason) */
	if(KERN_SUCCESS != ret) {
		/* Read stored value */
		*value = reservation->value;
	}

	return KERN_SUCCESS;
}

/*
 * pmc_reservation_write will write the event count associated with a reservation.
 * If the caller is current executing in a context that both a) matches the
 * reservation's context, and b) can access the reservation's pmc directly, the
 * value will be written to hardware.  Otherwise, this writes the reservation's
 * stored value.
 *
 * NOTE: This method is interrupt safe.
 * NOTE: When not on the interrupt stack, this method may block.
 */
kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) {
	kern_return_t ret = KERN_FAILURE;
	uint64_t timeout;
	uint32_t spins;

	if(!reservation) {
		return KERN_INVALID_ARGUMENT;
	}

	nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout);
	timeout += mach_absolute_time();
	spins = 0;
	do {
		uint32_t state = reservation->state;
		
		if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) {
				/* Write to hardware via drivers. */
			assert(reservation->pmc);

			ret = reservation->pmc->methods.set_count(reservation->pmc->object, value);
			break;
		} else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) ||
				   (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) {
			/* Spin */
			/* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */
			if (++spins > PMC_SPIN_THRESHOLD) {
				if (mach_absolute_time() > timeout) {
					pmc_spin_timeout_count++;
					assert(0);
				}
			}

			cpu_pause();
		} else {
			break;
		}
	} while (1);
	
	if(KERN_SUCCESS != ret) {
		/* Write stored value */
		reservation->value = value;
	}

	return KERN_SUCCESS;
}

/* 
 * pmc_reservation_free releases a reservation and all associated resources.
 *
 * NOTE: This method is NOT interrupt safe.
 */
kern_return_t pmc_reservation_free(pmc_reservation_t reservation) {
	pmc_state_t newState;
	
	if(!reservation) {
		return KERN_INVALID_ARGUMENT;
	}
	
	perf_monitor_reservation_remove(reservation->pmc->monitor);
	
	/* Move the state machine */
	if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) {
		return KERN_FAILURE;
	}

	/* If we didn't move directly to DEALLOC, help things along */	
	if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) {	
		/* A valid state move has been made, but won't be picked up until a context switch occurs.  To cause matching
		 * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu
		 * on every cpu that can access the PMC.
		 */
		pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu);
	}

	/* Block until the reservation hits the <DEALLOC, 0, > state */
	while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) {
		assert_wait((event_t)reservation, THREAD_UNINT);
		thread_block(THREAD_CONTINUE_NULL);
	}

	/* remove from queues */
	pmc_internal_reservation_remove(reservation);
		
	/* free reservation */
	reservation_free(reservation);

	return KERN_SUCCESS;
}

/*
 * pmc_idle notifies eligible monitors of impending per-CPU idle, and can be used to save state.
 */
boolean_t pmc_idle(void) {
	perf_monitor_t monitor = NULL;
	queue_head_t *cpu_queue;

	lck_spin_lock(&perf_monitor_queue_spin);
	
	if (cpu_monitor_queues) {
		cpu_queue = cpu_monitor_queues[cpu_number()];
	
		queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) {
			perf_monitor_methods_t *methods = &(monitor->methods);
			if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) {		    
				methods->on_idle(monitor->object);
			}
		}
	}

	lck_spin_unlock(&perf_monitor_queue_spin);

	return TRUE;
}

/*
 * pmc_idle_exit notifies eligible monitors of wake from idle; it can be used to restore state.
 */
boolean_t pmc_idle_exit(void) {
	perf_monitor_t monitor = NULL;
	queue_head_t *cpu_queue;

	lck_spin_lock(&perf_monitor_queue_spin);
	
	if (cpu_monitor_queues) {
		cpu_queue = cpu_monitor_queues[cpu_number()];
	
		queue_iterate(cpu_queue, monitor, perf_monitor_t, cpu_link) {
			perf_monitor_methods_t *methods = &(monitor->methods);
			if ((methods->flags & PERFMON_FLAG_ALWAYS_ACTIVE) || (monitor->reservedCounters)) {		    
				methods->on_idle_exit(monitor->object);
			}
		}
	}

	lck_spin_unlock(&perf_monitor_queue_spin);

	return TRUE;
}

/*
 * pmc_context_switch performs all context switching necessary to save all pmc
 * state associated with @oldThread (and the task to which @oldThread belongs),
 * as well as to restore all pmc state associated with @newThread (and the task
 * to which @newThread belongs).
 *
 * NOTE: This method IS interrupt safe.
 */
boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) {
	pmc_reservation_t resv = NULL;
	uint32_t cpuNum = cpu_number();

	lck_spin_lock(&reservations_spin);

	/* Save pmc states */
	if (thread_reservation_count) {
 		queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
			if ((oldThread == resv->thread) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
				(void)pmc_internal_reservation_context_out(resv);
			}
		}
	}
	
	if (task_reservation_count) {
		queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
			if ((resv->task == oldThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
    			(void)pmc_internal_reservation_context_out(resv);
			}
		}
	}
	
	/* Restore */
	if (thread_reservation_count) {
		queue_iterate(thread_reservations, resv, pmc_reservation_t, link) {
			if ((resv->thread == newThread) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
				(void)pmc_internal_reservation_context_in(resv);
			}
		}
	}

	if (task_reservation_count) {
		queue_iterate(task_reservations, resv, pmc_reservation_t, link) {
			if ((resv->task == newThread->task) && pmc_accessible_from_core(resv->pmc, cpuNum)) {
				(void)pmc_internal_reservation_context_in(resv);
			}
		}
	}
	
	lck_spin_unlock(&reservations_spin);

	return TRUE;
}

#else /* !CONFIG_COUNTERS */

#if 0
#pragma mark -
#pragma mark Dummy functions
#endif

/*
 * In the case that someone has chosen not to include the PMC KPI in some
 * configuration, we still have exports for kexts, so we'll need to define stub
 * methods that return failures.
 */
kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused,
	perf_monitor_methods_t *methods __unused) {
	return KERN_FAILURE;
}

kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_register(perf_monitor_object_t monitor __unused, 
	pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused,
	pmc_object_t pmc __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_create_config(pmc_t pmc __unused, 
	pmc_config_t *config __unused) {
	return KERN_FAILURE;
}

void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) {
}

kern_return_t pmc_config_set_value(pmc_t pmc __unused, 
	pmc_config_t config __unused, uint8_t id __unused, 
	uint64_t value __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused, 
	pmc_config_t config __unused, uint64_t threshold __unused, 
	pmc_interrupt_method_t method __unused, void *refCon __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) {
	return KERN_FAILURE;
}

void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) {
}

kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused, 
	size_t *pmcCount __unused) {
	return KERN_FAILURE;
}

const char *pmc_get_name(pmc_t pmc __unused) {
	return "";
}

kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused, 
	uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_reserve(pmc_t pmc __unused, 
	pmc_config_t config __unused, pmc_reservation_t *reservation __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_reserve_task(pmc_t pmc __unused, 
	pmc_config_t config __unused, task_t task __unused, 
	pmc_reservation_t *reservation __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_reserve_thread(pmc_t pmc __unused, 
	pmc_config_t config __unused, thread_t thread __unused, 
	pmc_reservation_t *reservation __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused, 
	uint64_t *value __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused, 
	uint64_t value __unused) {
	return KERN_FAILURE;
}

kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) {
	return KERN_FAILURE;
}


#endif /* !CONFIG_COUNTERS */