locks_arm.c [plain text]

/*
 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
/*
 * @OSF_COPYRIGHT@
 */
/*
 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
 * Mellon University All Rights Reserved.
 * 
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright notice
 * and this permission notice appear in all copies of the software,
 * derivative works or modified versions, and any portions thereof, and that
 * both notices appear in supporting documentation.
 * 
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 * 
 * Carnegie Mellon requests users of this software to return to
 * 
 * Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 * School of Computer Science Carnegie Mellon University Pittsburgh PA
 * 15213-3890
 * 
 * any improvements or extensions that they make and grant Carnegie Mellon the
 * rights to redistribute these changes.
 */
/*
 *	File:	kern/lock.c
 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
 *	Date:	1985
 *
 *	Locking primitives implementation
 */

#define ATOMIC_PRIVATE 1
#define LOCK_PRIVATE 1

#include <mach_ldebug.h>

#include <kern/kalloc.h>
#include <kern/locks.h>
#include <kern/misc_protos.h>
#include <kern/thread.h>
#include <kern/processor.h>
#include <kern/sched_prim.h>
#include <kern/xpr.h>
#include <kern/debug.h>
#include <kern/kcdata.h>
#include <string.h>

#include <arm/cpu_data_internal.h>
#include <arm/proc_reg.h>
#include <arm/smp.h>
#include <machine/atomic.h>
#include <machine/machine_cpu.h>

#include <sys/kdebug.h>

/*
 * We need only enough declarations from the BSD-side to be able to
 * test if our probe is active, and to call __dtrace_probe().  Setting
 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
 */
#if	CONFIG_DTRACE
#define NEED_DTRACE_DEFS
#include <../bsd/sys/lockstat.h>

#define DTRACE_RW_SHARED	0x0	//reader
#define DTRACE_RW_EXCL		0x1	//writer
#define DTRACE_NO_FLAG		0x0	//not applicable

#endif	/* CONFIG_DTRACE */

#define	LCK_RW_LCK_EXCLUSIVE_CODE	0x100
#define	LCK_RW_LCK_EXCLUSIVE1_CODE	0x101
#define	LCK_RW_LCK_SHARED_CODE		0x102
#define	LCK_RW_LCK_SH_TO_EX_CODE	0x103
#define	LCK_RW_LCK_SH_TO_EX1_CODE	0x104
#define	LCK_RW_LCK_EX_TO_SH_CODE	0x105


#define	ANY_LOCK_DEBUG	(USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)

// Panic in tests that check lock usage correctness
// These are undesirable when in a panic or a debugger is runnning.
#define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)

unsigned int    LcksOpts = 0;

#if CONFIG_DTRACE && __SMP__
extern uint64_t dtrace_spin_threshold;
#endif

/* Forwards */


#if	USLOCK_DEBUG
/*
 *	Perform simple lock checks.
 */
int             uslock_check = 1;
int             max_lock_loops = 100000000;
decl_simple_lock_data(extern, printf_lock)
decl_simple_lock_data(extern, panic_lock)
#endif				/* USLOCK_DEBUG */

extern unsigned int not_in_kdp;

/*
 *	We often want to know the addresses of the callers
 *	of the various lock routines.  However, this information
 *	is only used for debugging and statistics.
 */
typedef void   *pc_t;
#define	INVALID_PC	((void *) VM_MAX_KERNEL_ADDRESS)
#define	INVALID_THREAD	((void *) VM_MAX_KERNEL_ADDRESS)

#ifdef	lint
/*
 *	Eliminate lint complaints about unused local pc variables.
 */
#define	OBTAIN_PC(pc,l)	++pc
#else				/* lint */
#define	OBTAIN_PC(pc,l)
#endif				/* lint */


/*
 *	Portable lock package implementation of usimple_locks.
 */

#if	USLOCK_DEBUG
#define	USLDBG(stmt)	stmt
	void            usld_lock_init(usimple_lock_t, unsigned short);
	void            usld_lock_pre(usimple_lock_t, pc_t);
	void            usld_lock_post(usimple_lock_t, pc_t);
	void            usld_unlock(usimple_lock_t, pc_t);
	void            usld_lock_try_pre(usimple_lock_t, pc_t);
	void            usld_lock_try_post(usimple_lock_t, pc_t);
	int             usld_lock_common_checks(usimple_lock_t, const char *);
#else				/* USLOCK_DEBUG */
#define	USLDBG(stmt)
#endif				/* USLOCK_DEBUG */

/*
 * Owner thread pointer when lock held in spin mode
 */
#define LCK_MTX_SPIN_TAG  0xfffffff0


#define interlock_lock(lock)	hw_lock_bit    ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
#define interlock_try(lock)		hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
#define interlock_unlock(lock)	hw_unlock_bit  ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
#define lck_rw_ilk_lock(lock)	hw_lock_bit  ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
#define lck_rw_ilk_unlock(lock)	hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)

#define memory_barrier()	__c11_atomic_thread_fence(memory_order_acq_rel_smp)
#define load_memory_barrier()	__c11_atomic_thread_fence(memory_order_acquire_smp)
#define store_memory_barrier()	__c11_atomic_thread_fence(memory_order_release_smp)

// Enforce program order of loads and stores.
#define ordered_load(target, type) \
		__c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
#define ordered_store(target, type, value) \
		__c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)

#define ordered_load_mtx(lock)			ordered_load(&(lock)->lck_mtx_data, uintptr_t)
#define ordered_store_mtx(lock, value)	ordered_store(&(lock)->lck_mtx_data, uintptr_t, (value))
#define ordered_load_rw(lock)			ordered_load(&(lock)->lck_rw_data, uint32_t)
#define ordered_store_rw(lock, value)	ordered_store(&(lock)->lck_rw_data, uint32_t, (value))
#define ordered_load_rw_owner(lock)		ordered_load(&(lock)->lck_rw_owner, thread_t)
#define ordered_store_rw_owner(lock, value)	ordered_store(&(lock)->lck_rw_owner, thread_t, (value))
#define ordered_load_hw(lock)			ordered_load(&(lock)->lock_data, uintptr_t)
#define ordered_store_hw(lock, value)	ordered_store(&(lock)->lock_data, uintptr_t, (value))
#define ordered_load_bit(lock)			ordered_load((lock), uint32_t)
#define ordered_store_bit(lock, value)	ordered_store((lock), uint32_t, (value))


// Prevent the compiler from reordering memory operations around this
#define compiler_memory_fence()	__asm__ volatile ("" ::: "memory")

#define LOCK_PANIC_TIMEOUT	0xc00000
#define NOINLINE		__attribute__((noinline))


#if __arm__
#define interrupts_disabled(mask) (mask & PSR_INTMASK)
#else
#define interrupts_disabled(mask) (mask & DAIF_IRQF)
#endif


#if __arm__
#define enable_fiq()		__asm__ volatile ("cpsie  f" ::: "memory");
#define enable_interrupts()	__asm__ volatile ("cpsie if" ::: "memory");
#endif

/*
 * Forward declarations
 */

static void lck_rw_lock_shared_gen(lck_rw_t *lck);
static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
void lck_rw_clear_promotions_x86(thread_t thread);
static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);

/*
 * atomic exchange API is a low level abstraction of the operations
 * to atomically read, modify, and write a pointer.  This abstraction works
 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
 * well as the ARM exclusive instructions.
 *
 * atomic_exchange_begin() - begin exchange and retrieve current value
 * atomic_exchange_complete() - conclude an exchange
 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
 */
static uint32_t
atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
{
	uint32_t	val;

	val = load_exclusive32(target, ord);
	*previous = val;
	return val;
}

static boolean_t
atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
{
	(void)previous;		// Previous not needed, monitor is held
	return store_exclusive32(target, newval, ord);
}

static void
atomic_exchange_abort(void)
{
	clear_exclusive();
}

static boolean_t
atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
{
	uint32_t		value, prev;

	for ( ; ; ) {
		value = atomic_exchange_begin32(target, &prev, ord);
		if (value & test_mask) {
			if (wait)
				wait_for_event();	// Wait with monitor held
			else
				atomic_exchange_abort();	// Clear exclusive monitor
			return FALSE;
		}
		value |= set_mask;
		if (atomic_exchange_complete32(target, prev, value, ord))
			return TRUE;
	}
}

void _disable_preemption(void)
{
	thread_t	thread = current_thread();
	unsigned int	count;

	count = thread->machine.preemption_count + 1;
	ordered_store(&thread->machine.preemption_count, unsigned int, count);
}

void _enable_preemption(void)
{
	thread_t	thread = current_thread();
	long		state;
	unsigned int	count;
#if __arm__
#define INTERRUPT_MASK PSR_IRQF
#else	// __arm__
#define INTERRUPT_MASK DAIF_IRQF
#endif	// __arm__

	count = thread->machine.preemption_count;
	if (count == 0)
		panic("Preemption count negative");	// Count will go negative when released
	count--;
	if (count > 0)
		goto update_count;			// Preemption is still disabled, just update
	state = get_interrupts();			// Get interrupt state
	if (state & INTERRUPT_MASK)
		goto update_count;			// Interrupts are already masked, can't take AST here

	disable_interrupts_noread();			// Disable interrupts
	ordered_store(&thread->machine.preemption_count, unsigned int, count);
	if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
#if __arm__
#if __ARM_USER_PROTECT__
        uintptr_t up = arm_user_protect_begin(thread);
#endif	// __ARM_USER_PROTECT__
		enable_fiq();
#endif	// __arm__
		ast_taken_kernel();                     // Handle urgent AST
#if __arm__
#if __ARM_USER_PROTECT__
		arm_user_protect_end(thread, up, TRUE);
#endif	// __ARM_USER_PROTECT__
		enable_interrupts();
		return;					// Return early on arm only due to FIQ enabling
#endif	// __arm__
	}
	restore_interrupts(state);			// Enable interrupts
	return;

update_count:
	ordered_store(&thread->machine.preemption_count, unsigned int, count);
	return;
}

int get_preemption_level(void)
{
	return current_thread()->machine.preemption_count;
}

/* Forward declarations for unexported functions that are used externally */
void hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit);
void hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit);

#if	__SMP__
static unsigned int
hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout);
#endif

unsigned int
hw_lock_bit_to(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout)
{
	unsigned int success = 0;
	uint32_t	mask = (1 << bit);
#if	!__SMP__
	uint32_t	state;
#endif

	_disable_preemption();
#if	__SMP__
	if (__improbable(!atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE)))
		success = hw_lock_bit_to_contended(lock, mask, timeout);
	else
		success = 1;
#else	// __SMP__
	(void)timeout;
	state = ordered_load_bit(lock);
	if (!(mask & state)) {
		ordered_store_bit(lock, state | mask);
		success = 1;
	}
#endif	// __SMP__

#if CONFIG_DTRACE
	if (success)
		LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
#endif

	return success;
}

#if	__SMP__
static unsigned int NOINLINE
hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout)
{
	uint64_t	end = 0;
	int		i;
#if CONFIG_DTRACE
	uint64_t begin;
	boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
	if (__improbable(dtrace_enabled))
		begin = mach_absolute_time();
#endif
	for ( ; ; ) {	
		for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
			// Always load-exclusive before wfe
			// This grabs the monitor and wakes up on a release event
			if (atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
				goto end;
			}
		}
		if (end == 0)
			end = ml_get_timebase() + timeout;
		else if (ml_get_timebase() >= end)
			break;
	}
	return 0;
end:
#if CONFIG_DTRACE
	if (__improbable(dtrace_enabled)) {
		uint64_t spintime = mach_absolute_time() - begin;
		if (spintime > dtrace_spin_threshold)
			LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, mask);
	}
#endif
	return 1;
}
#endif	// __SMP__

void
hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit)
{
	if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT))
		return;
#if	__SMP__
	panic("hw_lock_bit(): timed out (%p)", lock);
#else
	panic("hw_lock_bit(): interlock held (%p)", lock);
#endif
}

unsigned int
hw_lock_bit_try(hw_lock_bit_t *lock, unsigned int bit)
{
	long		intmask;
	uint32_t	mask = (1 << bit);
#if	!__SMP__
	uint32_t	state;
#endif
	boolean_t	success = FALSE;

	intmask = disable_interrupts();
#if	__SMP__
	// TODO: consider weak (non-looping) atomic test-and-set
	success = atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
#else
	state = ordered_load_bit(lock);
	if (!(mask & state)) {
		ordered_store_bit(lock, state | mask);
		success = TRUE;
	}
#endif	// __SMP__
	if (success)
		disable_preemption();
	restore_interrupts(intmask);

#if CONFIG_DTRACE
	if (success)
		LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
#endif

	return success;
}

/*
 *	Routine:	hw_unlock_bit
 *
 *		Release spin-lock. The second parameter is the bit number to test and set.
 *		Decrement the preemption level.
 */
void
hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit)
{
	uint32_t	mask = (1 << bit);
#if	!__SMP__
	uint32_t	state;
#endif

#if	__SMP__
	__c11_atomic_fetch_and((_Atomic uint32_t *)lock, ~mask, memory_order_release);
	set_event();
#else	// __SMP__
	state = ordered_load_bit(lock);
	ordered_store_bit(lock, state & ~mask);
#endif	// __SMP__
#if CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
#endif
	enable_preemption();
}


/*
 *      Routine:        lck_spin_alloc_init
 */
lck_spin_t     *
lck_spin_alloc_init(
		lck_grp_t * grp,
		lck_attr_t * attr)
{
	lck_spin_t     *lck;

	if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0)
		lck_spin_init(lck, grp, attr);

	return (lck);
}

/*
 *      Routine:        lck_spin_free
 */
void
lck_spin_free(
	      lck_spin_t * lck,
	      lck_grp_t * grp)
{
	lck_spin_destroy(lck, grp);
	kfree((void *) lck, sizeof(lck_spin_t));
}

/*
 *      Routine:        lck_spin_init
 */
void
lck_spin_init(
	      lck_spin_t * lck,
	      lck_grp_t * grp,
	      __unused lck_attr_t * attr)
{
	hw_lock_init(&lck->hwlock);
	lck->type = LCK_SPIN_TYPE;
	lck_grp_reference(grp);
	lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
	store_memory_barrier();
}

/*
 * arm_usimple_lock is a lck_spin_t without a group or attributes
 */
void inline
arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
{
	lck->type = LCK_SPIN_TYPE;
	hw_lock_init(&lck->hwlock);
	store_memory_barrier();
}


/*
 *      Routine:        lck_spin_lock
 */
void
lck_spin_lock(lck_spin_t *lock)
{
#if	DEVELOPMENT || DEBUG
	if (lock->type != LCK_SPIN_TYPE)
		panic("Invalid spinlock %p", lock);
#endif	// DEVELOPMENT || DEBUG
	hw_lock_lock(&lock->hwlock);
}

/*
 *      Routine:        lck_spin_try_lock
 */
int
lck_spin_try_lock(lck_spin_t *lock)
{
	return hw_lock_try(&lock->hwlock);
}

/*
 *      Routine:        lck_spin_unlock
 */
void
lck_spin_unlock(lck_spin_t *lock)
{
#if	DEVELOPMENT || DEBUG
	if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC())
		panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
	if (lock->type != LCK_SPIN_TYPE)
		panic("Invalid spinlock type %p", lock);
#endif	// DEVELOPMENT || DEBUG
	hw_lock_unlock(&lock->hwlock);
}

/*
 *      Routine:        lck_spin_destroy
 */
void
lck_spin_destroy(
		 lck_spin_t * lck,
		 lck_grp_t * grp)
{
	if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED)
		return;
	lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
	lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
	lck_grp_deallocate(grp);
}

/*
 * Routine: kdp_lck_spin_is_acquired
 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
 */
boolean_t
kdp_lck_spin_is_acquired(lck_spin_t *lck) {
	if (not_in_kdp) {
		panic("panic: spinlock acquired check done outside of kernel debugger");
	}
	return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
}

/*
 *	Initialize a usimple_lock.
 *
 *	No change in preemption state.
 */
void
usimple_lock_init(
		  usimple_lock_t l,
		  unsigned short tag)
{
#ifndef	MACHINE_SIMPLE_LOCK
	USLDBG(usld_lock_init(l, tag));
	hw_lock_init(&l->lck_spin_data);
#else
	simple_lock_init((simple_lock_t) l, tag);
#endif
}


/*
 *	Acquire a usimple_lock.
 *
 *	Returns with preemption disabled.  Note
 *	that the hw_lock routines are responsible for
 *	maintaining preemption state.
 */
void
usimple_lock(
	     usimple_lock_t l)
{
#ifndef	MACHINE_SIMPLE_LOCK
	pc_t            pc;

	OBTAIN_PC(pc, l);
	USLDBG(usld_lock_pre(l, pc));

	if (!hw_lock_to(&l->lck_spin_data, LockTimeOut))	/* Try to get the lock
							 * with a timeout */
		panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", &l, cpu_number(), pc);

	USLDBG(usld_lock_post(l, pc));
#else
	simple_lock((simple_lock_t) l);
#endif
}


extern void     sync(void);

/*
 *	Release a usimple_lock.
 *
 *	Returns with preemption enabled.  Note
 *	that the hw_lock routines are responsible for
 *	maintaining preemption state.
 */
void
usimple_unlock(
	       usimple_lock_t l)
{
#ifndef	MACHINE_SIMPLE_LOCK
	pc_t            pc;

	OBTAIN_PC(pc, l);
	USLDBG(usld_unlock(l, pc));
	sync();
	hw_lock_unlock(&l->lck_spin_data);
#else
	simple_unlock((simple_lock_t) l);
#endif
}


/*
 *	Conditionally acquire a usimple_lock.
 *
 *	On success, returns with preemption disabled.
 *	On failure, returns with preemption in the same state
 *	as when first invoked.  Note that the hw_lock routines
 *	are responsible for maintaining preemption state.
 *
 *	XXX No stats are gathered on a miss; I preserved this
 *	behavior from the original assembly-language code, but
 *	doesn't it make sense to log misses?  XXX
 */
unsigned int
usimple_lock_try(
		 usimple_lock_t l)
{
#ifndef	MACHINE_SIMPLE_LOCK
	pc_t            pc;
	unsigned int    success;

	OBTAIN_PC(pc, l);
	USLDBG(usld_lock_try_pre(l, pc));
	if ((success = hw_lock_try(&l->lck_spin_data))) {
		USLDBG(usld_lock_try_post(l, pc));
	}
	return success;
#else
	return (simple_lock_try((simple_lock_t) l));
#endif
}

#if	USLOCK_DEBUG
/*
 *	States of a usimple_lock.  The default when initializing
 *	a usimple_lock is setting it up for debug checking.
 */
#define	USLOCK_CHECKED		0x0001	/* lock is being checked */
#define	USLOCK_TAKEN		0x0002	/* lock has been taken */
#define	USLOCK_INIT		0xBAA0	/* lock has been initialized */
#define	USLOCK_INITIALIZED	(USLOCK_INIT|USLOCK_CHECKED)
#define	USLOCK_CHECKING(l)	(uslock_check &&			\
				 ((l)->debug.state & USLOCK_CHECKED))

/*
 *	Trace activities of a particularly interesting lock.
 */
void            usl_trace(usimple_lock_t, int, pc_t, const char *);


/*
 *	Initialize the debugging information contained
 *	in a usimple_lock.
 */
void
usld_lock_init(
	       usimple_lock_t l,
	       __unused unsigned short tag)
{
	if (l == USIMPLE_LOCK_NULL)
		panic("lock initialization:  null lock pointer");
	l->lock_type = USLOCK_TAG;
	l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
	l->debug.lock_cpu = l->debug.unlock_cpu = 0;
	l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
	l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
	l->debug.duration[0] = l->debug.duration[1] = 0;
	l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
	l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
	l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
}


/*
 *	These checks apply to all usimple_locks, not just
 *	those with USLOCK_CHECKED turned on.
 */
int
usld_lock_common_checks(
			usimple_lock_t l,
			const char *caller)
{
	if (l == USIMPLE_LOCK_NULL)
		panic("%s:  null lock pointer", caller);
	if (l->lock_type != USLOCK_TAG)
		panic("%s:  0x%x is not a usimple lock", caller, (integer_t) l);
	if (!(l->debug.state & USLOCK_INIT))
		panic("%s:  0x%x is not an initialized lock",
		      caller, (integer_t) l);
	return USLOCK_CHECKING(l);
}


/*
 *	Debug checks on a usimple_lock just before attempting
 *	to acquire it.
 */
/* ARGSUSED */
void
usld_lock_pre(
	      usimple_lock_t l,
	      pc_t pc)
{
	const char     *caller = "usimple_lock";


	if (!usld_lock_common_checks(l, caller))
		return;

	/*
	 *	Note that we have a weird case where we are getting a lock when we are]
	 *	in the process of putting the system to sleep. We are running with no
	 *	current threads, therefore we can't tell if we are trying to retake a lock
	 *	we have or someone on the other processor has it.  Therefore we just
	 *	ignore this test if the locking thread is 0.
	 */

	if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
	    l->debug.lock_thread == (void *) current_thread()) {
		printf("%s:  lock 0x%x already locked (at %p) by",
		       caller, (integer_t) l, l->debug.lock_pc);
		printf(" current thread %p (new attempt at pc %p)\n",
		       l->debug.lock_thread, pc);
		panic("%s", caller);
	}
	mp_disable_preemption();
	usl_trace(l, cpu_number(), pc, caller);
	mp_enable_preemption();
}


/*
 *	Debug checks on a usimple_lock just after acquiring it.
 *
 *	Pre-emption has been disabled at this point,
 *	so we are safe in using cpu_number.
 */
void
usld_lock_post(
	       usimple_lock_t l,
	       pc_t pc)
{
	int             mycpu;
	const char     *caller = "successful usimple_lock";


	if (!usld_lock_common_checks(l, caller))
		return;

	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
		panic("%s:  lock 0x%x became uninitialized",
		      caller, (integer_t) l);
	if ((l->debug.state & USLOCK_TAKEN))
		panic("%s:  lock 0x%x became TAKEN by someone else",
		      caller, (integer_t) l);

	mycpu = cpu_number();
	l->debug.lock_thread = (void *) current_thread();
	l->debug.state |= USLOCK_TAKEN;
	l->debug.lock_pc = pc;
	l->debug.lock_cpu = mycpu;

	usl_trace(l, mycpu, pc, caller);
}


/*
 *	Debug checks on a usimple_lock just before
 *	releasing it.  Note that the caller has not
 *	yet released the hardware lock.
 *
 *	Preemption is still disabled, so there's
 *	no problem using cpu_number.
 */
void
usld_unlock(
	    usimple_lock_t l,
	    pc_t pc)
{
	int             mycpu;
	const char     *caller = "usimple_unlock";


	if (!usld_lock_common_checks(l, caller))
		return;

	mycpu = cpu_number();

	if (!(l->debug.state & USLOCK_TAKEN))
		panic("%s:  lock 0x%x hasn't been taken",
		      caller, (integer_t) l);
	if (l->debug.lock_thread != (void *) current_thread())
		panic("%s:  unlocking lock 0x%x, owned by thread %p",
		      caller, (integer_t) l, l->debug.lock_thread);
	if (l->debug.lock_cpu != mycpu) {
		printf("%s:  unlocking lock 0x%x on cpu 0x%x",
		       caller, (integer_t) l, mycpu);
		printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
		panic("%s", caller);
	}
	usl_trace(l, mycpu, pc, caller);

	l->debug.unlock_thread = l->debug.lock_thread;
	l->debug.lock_thread = INVALID_PC;
	l->debug.state &= ~USLOCK_TAKEN;
	l->debug.unlock_pc = pc;
	l->debug.unlock_cpu = mycpu;
}


/*
 *	Debug checks on a usimple_lock just before
 *	attempting to acquire it.
 *
 *	Preemption isn't guaranteed to be disabled.
 */
void
usld_lock_try_pre(
		  usimple_lock_t l,
		  pc_t pc)
{
	const char     *caller = "usimple_lock_try";

	if (!usld_lock_common_checks(l, caller))
		return;
	mp_disable_preemption();
	usl_trace(l, cpu_number(), pc, caller);
	mp_enable_preemption();
}


/*
 *	Debug checks on a usimple_lock just after
 *	successfully attempting to acquire it.
 *
 *	Preemption has been disabled by the
 *	lock acquisition attempt, so it's safe
 *	to use cpu_number.
 */
void
usld_lock_try_post(
		   usimple_lock_t l,
		   pc_t pc)
{
	int             mycpu;
	const char     *caller = "successful usimple_lock_try";

	if (!usld_lock_common_checks(l, caller))
		return;

	if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
		panic("%s:  lock 0x%x became uninitialized",
		      caller, (integer_t) l);
	if ((l->debug.state & USLOCK_TAKEN))
		panic("%s:  lock 0x%x became TAKEN by someone else",
		      caller, (integer_t) l);

	mycpu = cpu_number();
	l->debug.lock_thread = (void *) current_thread();
	l->debug.state |= USLOCK_TAKEN;
	l->debug.lock_pc = pc;
	l->debug.lock_cpu = mycpu;

	usl_trace(l, mycpu, pc, caller);
}


/*
 *	For very special cases, set traced_lock to point to a
 *	specific lock of interest.  The result is a series of
 *	XPRs showing lock operations on that lock.  The lock_seq
 *	value is used to show the order of those operations.
 */
usimple_lock_t  traced_lock;
unsigned int    lock_seq;

void
usl_trace(
	  usimple_lock_t l,
	  int mycpu,
	  pc_t pc,
	  const char *op_name)
{
	if (traced_lock == l) {
		XPR(XPR_SLOCK,
		    "seq %d, cpu %d, %s @ %x\n",
		    (integer_t) lock_seq, (integer_t) mycpu,
		    (integer_t) op_name, (integer_t) pc, 0);
		lock_seq++;
	}
}


#endif				/* USLOCK_DEBUG */

/*
 * The C portion of the shared/exclusive locks package.
 */

/*
 * compute the deadline to spin against when
 * waiting for a change of state on a lck_rw_t
 */
#if	__SMP__
static inline uint64_t
lck_rw_deadline_for_spin(lck_rw_t *lck)
{
	lck_rw_word_t	word;

	word.data = ordered_load_rw(lck);
	if (word.can_sleep) {
		if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
			/*
			 * there are already threads waiting on this lock... this
			 * implies that they have spun beyond their deadlines waiting for
			 * the desired state to show up so we will not bother spinning at this time...
			 *   or
			 * the current number of threads sharing this lock exceeds our capacity to run them
			 * concurrently and since all states we're going to spin for require the rw_shared_count
			 * to be at 0, we'll not bother spinning since the latency for this to happen is
			 * unpredictable...
			 */
			return (mach_absolute_time());
		}
		return (mach_absolute_time() + MutexSpin);
	} else
		return (mach_absolute_time() + (100000LL * 1000000000LL));
}
#endif	// __SMP__

static boolean_t
lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
{
#if	__SMP__
	uint64_t	deadline = 0;
	uint32_t	data;

	if (wait)
		deadline = lck_rw_deadline_for_spin(lock);

	for ( ; ; ) {
		data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
		if ((data & status_mask) == 0)
			break;
		if (wait)
			wait_for_event();
		else
			clear_exclusive();
		if (!wait || (mach_absolute_time() >= deadline))
			return FALSE;
	}
	clear_exclusive();
	return TRUE;
#else
	uint32_t	data;

	data = ordered_load_rw(lock);
	if ((data & status_mask) == 0)
		return TRUE;
	else
		return FALSE;
#endif	// __SMP__
}

/*
 * Spin while interlock is held.
 */
static inline void
lck_rw_interlock_spin(lck_rw_t *lock)
{
#if __SMP__
	uint32_t	data;

	for ( ; ; ) {
		data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
		if (data & LCK_RW_INTERLOCK)
			wait_for_event();
		else {
			clear_exclusive();
			return;
		}
	}
#else
	panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
#endif
}

/*
 * We disable interrupts while holding the RW interlock to prevent an
 * interrupt from exacerbating hold time.
 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
 */
static inline boolean_t
lck_interlock_lock(lck_rw_t *lck)
{
	boolean_t	istate;

	istate = ml_set_interrupts_enabled(FALSE);	
	lck_rw_ilk_lock(lck);
	return istate;
}

static inline void
lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
{
	lck_rw_ilk_unlock(lck);
	ml_set_interrupts_enabled(istate);
}


#define LCK_RW_GRAB_WANT	0
#define LCK_RW_GRAB_SHARED	1

static boolean_t
lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
{
	uint64_t	deadline = 0;
	uint32_t	data, prev;
	boolean_t	do_exch;

#if __SMP__
	if (wait)
		deadline = lck_rw_deadline_for_spin(lock);
#else
	wait = FALSE;	// Don't spin on UP systems
#endif

	for ( ; ; ) {
		data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
		if (data & LCK_RW_INTERLOCK) {
			atomic_exchange_abort();
			lck_rw_interlock_spin(lock);
			continue;
		}
		do_exch = FALSE;
		if (mode == LCK_RW_GRAB_WANT) {
			if ((data & LCK_RW_WANT_EXCL) == 0) {
				data |= LCK_RW_WANT_EXCL;
				do_exch = TRUE;
			}
		} else {	// LCK_RW_GRAB_SHARED
			if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
				(((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
				data += LCK_RW_SHARED_READER;
				do_exch = TRUE;
			}
		}
		if (do_exch) {
			if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
				return TRUE;
		} else {
			if (wait)						// Non-waiting
				wait_for_event();
			else
				atomic_exchange_abort();
			if (!wait || (mach_absolute_time() >= deadline))
				return FALSE;
		}
	}
}


/*
 *      Routine:        lck_rw_alloc_init
 */
lck_rw_t *
lck_rw_alloc_init(
	lck_grp_t	*grp,
	lck_attr_t	*attr)
{
	lck_rw_t	*lck;

	if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0)
		lck_rw_init(lck, grp, attr);

	return lck;
}

/*
 *      Routine:        lck_rw_free
 */
void
lck_rw_free(
	lck_rw_t	*lck,
	lck_grp_t	*grp)
{
	lck_rw_destroy(lck, grp);
	kfree(lck, sizeof(lck_rw_t));
}

/*
 *      Routine:        lck_rw_init
 */
void
lck_rw_init(
	lck_rw_t	*lck,
	lck_grp_t	*grp,
	lck_attr_t	*attr)
{
	if (attr == LCK_ATTR_NULL)
		attr = &LockDefaultLckAttr;
	memset(lck, 0, sizeof(lck_rw_t));
	lck->lck_rw_can_sleep = TRUE;
	if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0)
		lck->lck_rw_priv_excl = TRUE;

	lck_grp_reference(grp);
	lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
}


/*
 *      Routine:        lck_rw_destroy
 */
void
lck_rw_destroy(
	lck_rw_t	*lck,
	lck_grp_t	*grp)
{
	if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
		return;
#if MACH_LDEBUG
	lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
#endif
	lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
	lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
	lck_grp_deallocate(grp);
	return;
}

/*
 *	Routine:	lck_rw_lock
 */
void
lck_rw_lock(
	lck_rw_t		*lck,
	lck_rw_type_t	lck_rw_type)
{
	if (lck_rw_type == LCK_RW_TYPE_SHARED)
		lck_rw_lock_shared(lck);
	else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
		lck_rw_lock_exclusive(lck);
	else
		panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
}

/*
 *	Routine:	lck_rw_lock_exclusive
 */
void
lck_rw_lock_exclusive(lck_rw_t *lock)
{
	thread_t	thread = current_thread();

	thread->rwlock_count++;
	if (atomic_test_and_set32(&lock->lck_rw_data,
		(LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
		LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
#if	CONFIG_DTRACE
		LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
#endif	/* CONFIG_DTRACE */
	} else
		lck_rw_lock_exclusive_gen(lock);
#if MACH_ASSERT
	thread_t owner = ordered_load_rw_owner(lock);
	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
#endif
	ordered_store_rw_owner(lock, thread);
}

/*
 *	Routine:	lck_rw_lock_shared
 */
void
lck_rw_lock_shared(lck_rw_t *lock)
{
	uint32_t	data, prev;

	current_thread()->rwlock_count++;
	for ( ; ; ) {
		data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
		if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
			atomic_exchange_abort();
			lck_rw_lock_shared_gen(lock);
			break;
		}
		data += LCK_RW_SHARED_READER;
		if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
			break;
		cpu_pause();
	}
#if MACH_ASSERT
	thread_t owner = ordered_load_rw_owner(lock);
	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
#endif
#if	CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
#endif	/* CONFIG_DTRACE */
	return;
}

/*
 *	Routine:	lck_rw_lock_shared_to_exclusive
 */
boolean_t
lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
{
	uint32_t	data, prev;

	for ( ; ; ) {
		data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
		if (data & LCK_RW_INTERLOCK) {
			atomic_exchange_abort();
			lck_rw_interlock_spin(lock);
			continue;
		}
		if (data & LCK_RW_WANT_UPGRADE) {
			data -= LCK_RW_SHARED_READER;
			if ((data & LCK_RW_SHARED_MASK) == 0)		/* we were the last reader */
				data &= ~(LCK_RW_W_WAITING);		/* so clear the wait indicator */
			if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
				return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
		} else {
			data |= LCK_RW_WANT_UPGRADE;		/* ask for WANT_UPGRADE */
			data -= LCK_RW_SHARED_READER;		/* and shed our read count */
			if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
				break;
		}
		cpu_pause();
	}
										/* we now own the WANT_UPGRADE */
	if (data & LCK_RW_SHARED_MASK) 		/* check to see if all of the readers are drained */
		lck_rw_lock_shared_to_exclusive_success(lock);	/* if not, we need to go wait */
#if MACH_ASSERT
	thread_t owner = ordered_load_rw_owner(lock);
	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
#endif
	ordered_store_rw_owner(lock, current_thread());
#if	CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
#endif	/* CONFIG_DTRACE */
	return TRUE;
}


/*
 *	Routine:	lck_rw_lock_shared_to_exclusive_failure
 *	Function:
 *		Fast path code has already dropped our read
 *		count and determined that someone else owns 'lck_rw_want_upgrade'
 *		if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
 *		all we need to do here is determine if a wakeup is needed
 */
static boolean_t
lck_rw_lock_shared_to_exclusive_failure(
	lck_rw_t	*lck,
	uint32_t	prior_lock_state)
{
	thread_t	thread = current_thread();
	uint32_t	rwlock_count;

	/* Check if dropping the lock means that we need to unpromote */
	rwlock_count = thread->rwlock_count--;
#if MACH_LDEBUG
	if (rwlock_count == 0) {
		panic("rw lock count underflow for thread %p", thread);
	}
#endif
	if ((prior_lock_state & LCK_RW_W_WAITING) &&
		((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
		/*
		 *	Someone else has requested upgrade.
		 *	Since we've released the read lock, wake
		 *	him up if he's blocked waiting
		 */
		thread_wakeup(LCK_RW_WRITER_EVENT(lck));
	}

	if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
		/* sched_flags checked without lock, but will be rechecked while clearing */
		lck_rw_clear_promotion(thread);
	}

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
		     VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);

	return (FALSE);
}

/*
 *	Routine:	lck_rw_lock_shared_to_exclusive_success
 *	Function:
 *		assembly fast path code has already dropped our read
 *		count and successfully acquired 'lck_rw_want_upgrade'
 *		we just need to wait for the rest of the readers to drain
 *		and then we can return as the exclusive holder of this lock
 */
static boolean_t
lck_rw_lock_shared_to_exclusive_success(
	lck_rw_t	*lock)
{
	__kdebug_only uintptr_t	trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
	int			slept = 0;
	lck_rw_word_t		word;
	wait_result_t		res;
	boolean_t		istate;
	boolean_t		not_shared;

#if	CONFIG_DTRACE
	uint64_t		wait_interval = 0;
	int			readers_at_sleep = 0;
	boolean_t		dtrace_ls_initialized = FALSE;
	boolean_t		dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
#endif

	while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {

		word.data = ordered_load_rw(lock);
#if	CONFIG_DTRACE
		if (dtrace_ls_initialized == FALSE) {
			dtrace_ls_initialized = TRUE;
			dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
			dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
			dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
			if (dtrace_ls_enabled) {
				/*
				 * Either sleeping or spinning is happening,
				 *  start a timing of our delay interval now.
				 */
				readers_at_sleep = word.shared_count;
				wait_interval = mach_absolute_time();
			}
		}
#endif

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
			     trace_lck, word.shared_count, 0, 0, 0);

		not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
			     trace_lck, lock->lck_rw_shared_count, 0, 0, 0);

		if (not_shared)
			break;

		/*
		 * if we get here, the spin deadline in lck_rw_wait_on_status()
		 * has expired w/o the rw_shared_count having drained to 0
		 * check to see if we're allowed to do a thread_block
		 */
		if (word.can_sleep) {
			
			istate = lck_interlock_lock(lock);
			
			word.data = ordered_load_rw(lock);
			if (word.shared_count != 0) {
				KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
					     trace_lck, word.shared_count, 0, 0, 0);

				word.w_waiting = 1;
				ordered_store_rw(lock, word.data);

				thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
				res = assert_wait(LCK_RW_WRITER_EVENT(lock), THREAD_UNINT);
				lck_interlock_unlock(lock, istate);

				if (res == THREAD_WAITING) {
					res = thread_block(THREAD_CONTINUE_NULL);
					slept++;
				}
				KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
					     trace_lck, res, slept, 0, 0);
			} else {
				lck_interlock_unlock(lock, istate);
				break;
			}
		}
	}
#if	CONFIG_DTRACE
	/*
	 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
	 */
	if (dtrace_ls_enabled == TRUE) {
		if (slept == 0) {
			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
		} else {
			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
			    mach_absolute_time() - wait_interval, 1,
			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
		}
	}
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
#endif
	return (TRUE);
}


/*
 *	Routine:	lck_rw_lock_exclusive_to_shared
 */

void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
{
	uint32_t	data, prev;

	assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
	ordered_store_rw_owner(lock, THREAD_NULL);
	for ( ; ; ) {
		data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
		if (data & LCK_RW_INTERLOCK) {
#if __SMP__
			atomic_exchange_abort();
			lck_rw_interlock_spin(lock);	/* wait for interlock to clear */
			continue;
#else
			panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
#endif // __SMP__
		}
		data += LCK_RW_SHARED_READER;
		if (data & LCK_RW_WANT_UPGRADE)
			data &= ~(LCK_RW_WANT_UPGRADE);
		else
			data &= ~(LCK_RW_WANT_EXCL);
		if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
			data &= ~(LCK_RW_W_WAITING);
		if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
			break;
		cpu_pause();
	}
	return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
}

/*
 *      Routine:        lck_rw_lock_exclusive_to_shared_gen
 * 	Function:
 *		Fast path has already dropped
 *		our exclusive state and bumped lck_rw_shared_count
 *		all we need to do here is determine if anyone
 *		needs to be awakened.
 */
static void
lck_rw_lock_exclusive_to_shared_gen(
	lck_rw_t	*lck,
	uint32_t	prior_lock_state)
{
	__kdebug_only uintptr_t	trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
	lck_rw_word_t	fake_lck;

	/*
	 * prior_lock state is a snapshot of the 1st word of the
	 * lock in question... we'll fake up a pointer to it
	 * and carefully not access anything beyond whats defined
	 * in the first word of a lck_rw_t
	 */
	fake_lck.data = prior_lock_state;

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
			     trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);

	/*
	 * don't wake up anyone waiting to take the lock exclusively
	 * since we hold a read count... when the read count drops to 0,
	 * the writers will be woken.
	 *
	 * wake up any waiting readers if we don't have any writers waiting,
	 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
	 */
	if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
		thread_wakeup(LCK_RW_READER_EVENT(lck));

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
			     trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);

#if CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
#endif
}


/*
 *      Routine:        lck_rw_try_lock
 */
boolean_t
lck_rw_try_lock(
	lck_rw_t		*lck,
	lck_rw_type_t	lck_rw_type)
{
	if (lck_rw_type == LCK_RW_TYPE_SHARED)
		return lck_rw_try_lock_shared(lck);
	else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
		return lck_rw_try_lock_exclusive(lck);
	else
		panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
	return FALSE;
}

/*
 *	Routine:	lck_rw_try_lock_shared
 */

boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
{
	uint32_t	data, prev;

	for ( ; ; ) {
		data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
		if (data & LCK_RW_INTERLOCK) {
#if __SMP__
			atomic_exchange_abort();
			lck_rw_interlock_spin(lock);
			continue;
#else
			panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
#endif
		}
		if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
			atomic_exchange_abort();
			return FALSE;						/* lock is busy */
		}
		data += LCK_RW_SHARED_READER;			/* Increment reader refcount */
		if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
			break;
		cpu_pause();
	}
#if MACH_ASSERT
	thread_t owner = ordered_load_rw_owner(lock);
	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
#endif
	current_thread()->rwlock_count++;
#if	CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
#endif	/* CONFIG_DTRACE */
	return TRUE;
}


/*
 *	Routine:	lck_rw_try_lock_exclusive
 */

boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
{
	uint32_t	data, prev;
	thread_t	thread;

	for ( ; ; ) {
		data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
		if (data & LCK_RW_INTERLOCK) {
#if __SMP__
			atomic_exchange_abort();
			lck_rw_interlock_spin(lock);
			continue;
#else
			panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
#endif
		}
		if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
			atomic_exchange_abort();
			return FALSE;
		}
		data |= LCK_RW_WANT_EXCL;
		if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
			break;
		cpu_pause();
	}
	thread = current_thread();
	thread->rwlock_count++;
#if MACH_ASSERT
	thread_t owner = ordered_load_rw_owner(lock);
	assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
#endif
	ordered_store_rw_owner(lock, thread);
#if	CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
#endif	/* CONFIG_DTRACE */
	return TRUE;
}


/*
 *	Routine:	lck_rw_unlock
 */
void
lck_rw_unlock(
	lck_rw_t		*lck,
	lck_rw_type_t	lck_rw_type)
{
	if (lck_rw_type == LCK_RW_TYPE_SHARED)
		lck_rw_unlock_shared(lck);
	else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
		lck_rw_unlock_exclusive(lck);
	else
		panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
}


/*
 *	Routine:	lck_rw_unlock_shared
 */
void
lck_rw_unlock_shared(
	lck_rw_t	*lck)
{
	lck_rw_type_t	ret;

	assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
	assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
	ret = lck_rw_done(lck);

	if (ret != LCK_RW_TYPE_SHARED)
		panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
}


/*
 *	Routine:	lck_rw_unlock_exclusive
 */
void
lck_rw_unlock_exclusive(
	lck_rw_t	*lck)
{
	lck_rw_type_t	ret;

	assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
	ret = lck_rw_done(lck);

	if (ret != LCK_RW_TYPE_EXCLUSIVE)
		panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
}


/*
 *      Routine:        lck_rw_lock_exclusive_gen
 */
static void
lck_rw_lock_exclusive_gen(
	lck_rw_t	*lock)
{
	__kdebug_only uintptr_t	trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
	lck_rw_word_t		word;
	int			slept = 0;
	boolean_t		gotlock = 0;
	boolean_t		not_shared_or_upgrade = 0;
	wait_result_t		res = 0;
	boolean_t		istate;

#if	CONFIG_DTRACE
	boolean_t dtrace_ls_initialized = FALSE;
	boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
	uint64_t wait_interval = 0;
	int readers_at_sleep = 0;
#endif

	/*
	 *	Try to acquire the lck_rw_want_excl bit.
	 */
	while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {

#if	CONFIG_DTRACE
		if (dtrace_ls_initialized == FALSE) {
			dtrace_ls_initialized = TRUE;
			dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
			dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
			dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
			if (dtrace_ls_enabled) {
				/*
				 * Either sleeping or spinning is happening,
				 *  start a timing of our delay interval now.
				 */
				readers_at_sleep = lock->lck_rw_shared_count;
				wait_interval = mach_absolute_time();
			}
		}
#endif

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);

		gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);

		if (gotlock)
			break;
		/*
		 * if we get here, the deadline has expired w/o us
		 * being able to grab the lock exclusively
		 * check to see if we're allowed to do a thread_block
		 */
		word.data = ordered_load_rw(lock);
		if (word.can_sleep) {

			istate = lck_interlock_lock(lock);
			word.data = ordered_load_rw(lock);

			if (word.want_excl) {

				KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);

				word.w_waiting = 1;
				ordered_store_rw(lock, word.data);

				thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
				res = assert_wait(LCK_RW_WRITER_EVENT(lock), THREAD_UNINT);
				lck_interlock_unlock(lock, istate);

				if (res == THREAD_WAITING) {
					res = thread_block(THREAD_CONTINUE_NULL);
					slept++;
				}
				KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
			} else {
				word.want_excl = 1;
				ordered_store_rw(lock, word.data);
				lck_interlock_unlock(lock, istate);
				break;
			}
		}
	}
	/*
	 * Wait for readers (and upgrades) to finish...
	 */
	while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {

#if	CONFIG_DTRACE
		/*
		 * Either sleeping or spinning is happening, start
		 * a timing of our delay interval now.  If we set it
		 * to -1 we don't have accurate data so we cannot later
		 * decide to record a dtrace spin or sleep event.
		 */
		if (dtrace_ls_initialized == FALSE) {
			dtrace_ls_initialized = TRUE;
			dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
			dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
			dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
			if (dtrace_ls_enabled) {
				/*
				 * Either sleeping or spinning is happening,
				 *  start a timing of our delay interval now.
				 */
				readers_at_sleep = lock->lck_rw_shared_count;
				wait_interval = mach_absolute_time();
			}
		}
#endif

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);

		not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);

		if (not_shared_or_upgrade)
			break;
		/*
		 * if we get here, the deadline has expired w/o us
		 * being able to grab the lock exclusively
		 * check to see if we're allowed to do a thread_block
		 */
		word.data = ordered_load_rw(lock);
		if (word.can_sleep) {

			istate = lck_interlock_lock(lock);
			word.data = ordered_load_rw(lock);

			if (word.shared_count != 0 || word.want_upgrade) {
				KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);

				word.w_waiting = 1;
				ordered_store_rw(lock, word.data);

				thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
				res = assert_wait(LCK_RW_WRITER_EVENT(lock), THREAD_UNINT);
				lck_interlock_unlock(lock, istate);

				if (res == THREAD_WAITING) {
					res = thread_block(THREAD_CONTINUE_NULL);
					slept++;
				}
				KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
			} else {
				lck_interlock_unlock(lock, istate);
				/*
				 * must own the lock now, since we checked for
				 * readers or upgrade owner behind the interlock
				 * no need for a call to 'lck_rw_drain_status'
				 */
				break;
			}
		}
	}

#if	CONFIG_DTRACE
	/*
	 * Decide what latencies we suffered that are Dtrace events.
	 * If we have set wait_interval, then we either spun or slept.
	 * At least we get out from under the interlock before we record
	 * which is the best we can do here to minimize the impact
	 * of the tracing.
	 * If we have set wait_interval to -1, then dtrace was not enabled when we
	 * started sleeping/spinning so we don't record this event.
	 */
	if (dtrace_ls_enabled == TRUE) {
		if (slept == 0) {
			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
			    mach_absolute_time() - wait_interval, 1);
		} else {
			/*
			 * For the blocking case, we also record if when we blocked
			 * it was held for read or write, and how many readers.
			 * Notice that above we recorded this before we dropped
			 * the interlock so the count is accurate.
			 */
			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
			    mach_absolute_time() - wait_interval, 1,
			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
		}
	}
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
#endif	/* CONFIG_DTRACE */
}

/*
 *      Routine:        lck_rw_done
 */

lck_rw_type_t lck_rw_done(lck_rw_t *lock)
{
	uint32_t	data, prev;
	boolean_t	once = FALSE;

	for ( ; ; ) {
		data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
		if (data & LCK_RW_INTERLOCK) {		/* wait for interlock to clear */
#if __SMP__
			atomic_exchange_abort();
			lck_rw_interlock_spin(lock);
			continue;
#else
			panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
#endif // __SMP__
		}
		if (data & LCK_RW_SHARED_MASK) {	/* lock is held shared */
			assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
			data -= LCK_RW_SHARED_READER;
			if ((data & LCK_RW_SHARED_MASK) == 0)	/* if reader count has now gone to 0, check for waiters */
				goto check_waiters;
		} else {					/* if reader count == 0, must be exclusive lock */
			if (data & LCK_RW_WANT_UPGRADE) {
				data &= ~(LCK_RW_WANT_UPGRADE);
			} else {
				if (data & LCK_RW_WANT_EXCL)
					data &= ~(LCK_RW_WANT_EXCL);
				else					/* lock is not 'owned', panic */
					panic("Releasing non-exclusive RW lock without a reader refcount!");
			}
			if (!once) {
				// Only check for holder and clear it once
				assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
				ordered_store_rw_owner(lock, THREAD_NULL);
				once = TRUE;
			}
check_waiters:
			/*
			 * test the original values to match what
			 * lck_rw_done_gen is going to do to determine
			 * which wakeups need to happen...
			 *
			 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
			 */
			if (prev & LCK_RW_W_WAITING) {
				data &= ~(LCK_RW_W_WAITING);
				if ((prev & LCK_RW_PRIV_EXCL) == 0)
					data &= ~(LCK_RW_R_WAITING);
			} else
				data &= ~(LCK_RW_R_WAITING);
		}
		if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
			break;
		cpu_pause();
	}
	return lck_rw_done_gen(lock, prev);
}

/*
 *      Routine:        lck_rw_done_gen
 *
 *	called from the assembly language wrapper...
 *	prior_lock_state is the value in the 1st
 * 	word of the lock at the time of a successful
 *	atomic compare and exchange with the new value...
 * 	it represents the state of the lock before we
 *	decremented the rw_shared_count or cleared either
 * 	rw_want_upgrade or rw_want_write and
 *	the lck_x_waiting bits...  since the wrapper
 * 	routine has already changed the state atomically, 
 *	we just need to decide if we should
 *	wake up anyone and what value to return... we do
 *	this by examining the state of the lock before
 *	we changed it
 */
static lck_rw_type_t
lck_rw_done_gen(
	lck_rw_t	*lck,
	uint32_t	prior_lock_state)
{
	lck_rw_word_t	fake_lck;
	lck_rw_type_t	lock_type;
	thread_t		thread;
	uint32_t		rwlock_count;

	/*
	 * prior_lock state is a snapshot of the 1st word of the
	 * lock in question... we'll fake up a pointer to it
	 * and carefully not access anything beyond whats defined
	 * in the first word of a lck_rw_t
	 */
	fake_lck.data = prior_lock_state;

	if (fake_lck.shared_count <= 1) {
		if (fake_lck.w_waiting)
			thread_wakeup(LCK_RW_WRITER_EVENT(lck));

		if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
			thread_wakeup(LCK_RW_READER_EVENT(lck));
	}
	if (fake_lck.shared_count)
		lock_type = LCK_RW_TYPE_SHARED;
	else
		lock_type = LCK_RW_TYPE_EXCLUSIVE;

	/* Check if dropping the lock means that we need to unpromote */
	thread = current_thread();
	rwlock_count = thread->rwlock_count--;
#if MACH_LDEBUG
	if (rwlock_count == 0)
		panic("rw lock count underflow for thread %p", thread);
#endif
	if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
		/* sched_flags checked without lock, but will be rechecked while clearing */
		lck_rw_clear_promotion(thread);
	}
#if CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
#endif
	return lock_type;
}

/*
 *	Routine:	lck_rw_lock_shared_gen
 *	Function:
 *		Fast path code has determined that this lock
 *		is held exclusively... this is where we spin/block
 *		until we can acquire the lock in the shared mode
 */
static void
lck_rw_lock_shared_gen(
	lck_rw_t	*lck)
{
	__kdebug_only uintptr_t	trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
	lck_rw_word_t		word;
	boolean_t		gotlock = 0;
	int			slept = 0;
	wait_result_t		res = 0;
	boolean_t		istate;

#if	CONFIG_DTRACE
	uint64_t wait_interval = 0;
	int readers_at_sleep = 0;
	boolean_t dtrace_ls_initialized = FALSE;
	boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
#endif /* CONFIG_DTRACE */

	while ( !lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {

#if	CONFIG_DTRACE
		if (dtrace_ls_initialized == FALSE) {
			dtrace_ls_initialized = TRUE;
			dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
			dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
			dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
			if (dtrace_ls_enabled) {
				/*
				 * Either sleeping or spinning is happening,
				 *  start a timing of our delay interval now.
				 */
				readers_at_sleep = lck->lck_rw_shared_count;
				wait_interval = mach_absolute_time();
			}
		}
#endif

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
			     trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);

		gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
			     trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);

		if (gotlock)
			break;
		/*
		 * if we get here, the deadline has expired w/o us
		 * being able to grab the lock for read
		 * check to see if we're allowed to do a thread_block
		 */
		if (lck->lck_rw_can_sleep) {

			istate = lck_interlock_lock(lck);

			word.data = ordered_load_rw(lck);
			if ((word.want_excl || word.want_upgrade) &&
			    ((word.shared_count == 0) || word.priv_excl)) {

				KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
					     trace_lck, word.want_excl, word.want_upgrade, 0, 0);

				word.r_waiting = 1;
				ordered_store_rw(lck, word.data);

				thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
				res = assert_wait(LCK_RW_READER_EVENT(lck), THREAD_UNINT);
				lck_interlock_unlock(lck, istate);

				if (res == THREAD_WAITING) {
					res = thread_block(THREAD_CONTINUE_NULL);
					slept++;
				}
				KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
					     trace_lck, res, slept, 0, 0);
			} else {
				word.shared_count++;
				ordered_store_rw(lck, word.data);
				lck_interlock_unlock(lck, istate);
				break;
			}
		}
	}

#if	CONFIG_DTRACE
	if (dtrace_ls_enabled == TRUE) {
		if (slept == 0) {
			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
		} else {
			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
			    mach_absolute_time() - wait_interval, 0,
			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
		}
	}
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
#endif	/* CONFIG_DTRACE */
}


void
lck_rw_assert(
	lck_rw_t		*lck,
	unsigned int	type)
{
	switch (type) {
	case LCK_RW_ASSERT_SHARED:
		if ((lck->lck_rw_shared_count != 0) &&
		    (lck->lck_rw_owner == THREAD_NULL)) {
			return;
		}
		break;
	case LCK_RW_ASSERT_EXCLUSIVE:
		if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
			(lck->lck_rw_shared_count == 0) &&
		    (lck->lck_rw_owner == current_thread())) {
			return;
		}
		break;
	case LCK_RW_ASSERT_HELD:
		if (lck->lck_rw_shared_count != 0)
			return;		// Held shared
		if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
		    (lck->lck_rw_owner == current_thread())) {
			return;		// Held exclusive
		}
		break;
	case LCK_RW_ASSERT_NOTHELD:
		if ((lck->lck_rw_shared_count == 0) &&
		   !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
		    (lck->lck_rw_owner == THREAD_NULL)) {
			return;
		}
		break;
	default:
		break;
	}
	panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
}


/*
 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
 */
boolean_t
kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
	if (not_in_kdp) {
		panic("panic: rw lock exclusive check done outside of kernel debugger");
	}
	return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
}

/*
 * The C portion of the mutex package.  These routines are only invoked
 * if the optimized assembler routines can't do the work.
 */

/*
 * Forward declaration
 */

void 
lck_mtx_ext_init(
		 lck_mtx_ext_t * lck,
		 lck_grp_t * grp,
		 lck_attr_t * attr);

/*
 *      Routine:        lck_mtx_alloc_init
 */
lck_mtx_t      *
lck_mtx_alloc_init(
		   lck_grp_t * grp,
		   lck_attr_t * attr)
{
	lck_mtx_t      *lck;

	if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0)
		lck_mtx_init(lck, grp, attr);

	return (lck);
}

/*
 *      Routine:        lck_mtx_free
 */
void
lck_mtx_free(
	     lck_mtx_t * lck,
	     lck_grp_t * grp)
{
	lck_mtx_destroy(lck, grp);
	kfree((void *) lck, sizeof(lck_mtx_t));
}

/*
 *      Routine:        lck_mtx_init
 */
void
lck_mtx_init(
	     lck_mtx_t * lck,
	     lck_grp_t * grp,
	     lck_attr_t * attr)
{
#ifdef	BER_XXX
	lck_mtx_ext_t  *lck_ext;
#endif
	lck_attr_t     *lck_attr;

	if (attr != LCK_ATTR_NULL)
		lck_attr = attr;
	else
		lck_attr = &LockDefaultLckAttr;

#ifdef	BER_XXX
	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
		if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
			lck_mtx_ext_init(lck_ext, grp, lck_attr);
			lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
			lck->lck_mtx_ptr = lck_ext;
			lck->lck_mtx_type = LCK_MTX_TYPE;
		}
	} else
#endif
	{
		lck->lck_mtx_ptr = NULL;		// Clear any padding in the union fields below
		lck->lck_mtx_waiters = 0;
		lck->lck_mtx_pri = 0;
		lck->lck_mtx_type = LCK_MTX_TYPE;
		ordered_store_mtx(lck, 0);
	}
	lck_grp_reference(grp);
	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
}

/*
 *      Routine:        lck_mtx_init_ext
 */
void
lck_mtx_init_ext(
		 lck_mtx_t * lck,
		 lck_mtx_ext_t * lck_ext,
		 lck_grp_t * grp,
		 lck_attr_t * attr)
{
	lck_attr_t     *lck_attr;

	if (attr != LCK_ATTR_NULL)
		lck_attr = attr;
	else
		lck_attr = &LockDefaultLckAttr;

	if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
		lck_mtx_ext_init(lck_ext, grp, lck_attr);
		lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
		lck->lck_mtx_ptr = lck_ext;
		lck->lck_mtx_type = LCK_MTX_TYPE;
	} else {
		lck->lck_mtx_waiters = 0;
		lck->lck_mtx_pri = 0;
		lck->lck_mtx_type = LCK_MTX_TYPE;
		ordered_store_mtx(lck, 0);
	}
	lck_grp_reference(grp);
	lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
}

/*
 *      Routine:        lck_mtx_ext_init
 */
void
lck_mtx_ext_init(
		 lck_mtx_ext_t * lck,
		 lck_grp_t * grp,
		 lck_attr_t * attr)
{
	bzero((void *) lck, sizeof(lck_mtx_ext_t));

	lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;

	if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
		lck->lck_mtx_deb.type = MUTEX_TAG;
		lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
	}
	lck->lck_mtx_grp = grp;

	if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
		lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
}

/* The slow versions */
static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);

/*
 *	Routine:	lck_mtx_verify
 *
 *	Verify if a mutex is valid
 */
static inline void
lck_mtx_verify(lck_mtx_t *lock)
{
	if (lock->lck_mtx_type != LCK_MTX_TYPE)
		panic("Invalid mutex %p", lock);
#if	DEVELOPMENT || DEBUG
	if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
		panic("Mutex destroyed %p", lock);
#endif	/* DEVELOPMENT || DEBUG */
}

/*
 *	Routine:	lck_mtx_check_preemption
 *
 *	Verify preemption is enabled when attempting to acquire a mutex.
 */

static inline void
lck_mtx_check_preemption(lck_mtx_t *lock)
{
#if	DEVELOPMENT || DEBUG
	int pl = get_preemption_level();

	if (pl != 0)
		panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
#else
	(void)lock;
#endif
}

/*
 *	Routine:	lck_mtx_lock
 */
void
lck_mtx_lock(lck_mtx_t *lock)
{
	thread_t	thread;

	lck_mtx_verify(lock);
	lck_mtx_check_preemption(lock);
	thread = current_thread();
	if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
					memory_order_acquire_smp, FALSE)) {
#if	CONFIG_DTRACE
		LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
#endif /* CONFIG_DTRACE */
		return;
	}
	lck_mtx_lock_contended(lock, thread, FALSE);
}

/*
	This is the slow version of mutex locking.
 */
static void NOINLINE
lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
{
	thread_t	holding_thread;
	uintptr_t	state;
	int		waiters;

	if (interlocked)
		goto interlock_held;

	for ( ; ; ) {
		if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
						memory_order_acquire_smp, FALSE))
			return;
		interlock_lock(lock);
interlock_held:
		state = ordered_load_mtx(lock);
		holding_thread = LCK_MTX_STATE_TO_THREAD(state);
		if (holding_thread == NULL)
			break;
		ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
		lck_mtx_lock_wait(lock, holding_thread);
	}
	waiters = lck_mtx_lock_acquire(lock);
	state = LCK_MTX_THREAD_TO_STATE(thread);
	if (waiters != 0)
		state |= ARM_LCK_WAITERS;
#if __SMP__
	state |= LCK_ILOCK;				// Preserve interlock
	ordered_store_mtx(lock, state);	// Set ownership
	interlock_unlock(lock);			// Release interlock, enable preemption
#else
	ordered_store_mtx(lock, state);	// Set ownership
	enable_preemption();
#endif
	load_memory_barrier();

#if	CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
#endif /* CONFIG_DTRACE */
}

/*
 *	Common code for mutex locking as spinlock
 */
static inline void
lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
{
	uintptr_t	state;

	interlock_lock(lock);
	state = ordered_load_mtx(lock);
	if (LCK_MTX_STATE_TO_THREAD(state)) {
		if (allow_held_as_mutex)
			lck_mtx_lock_contended(lock, current_thread(), TRUE);
		else
			// "Always" variants can never block. If the lock is held and blocking is not allowed
			// then someone is mixing always and non-always calls on the same lock, which is
			// forbidden.
			panic("Attempting to block on a lock taken as spin-always %p", lock);
		return;
	}
	state &= ARM_LCK_WAITERS;						// Preserve waiters bit
	state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);	// Add spin tag and maintain interlock
	ordered_store_mtx(lock, state);
	load_memory_barrier();

#if	CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
#endif /* CONFIG_DTRACE */
}

/*
 *	Routine:	lck_mtx_lock_spin
 */
void
lck_mtx_lock_spin(lck_mtx_t *lock)
{
	lck_mtx_check_preemption(lock);
	lck_mtx_lock_spin_internal(lock, TRUE);
}

/*
 *	Routine:	lck_mtx_lock_spin_always
 */
void
lck_mtx_lock_spin_always(lck_mtx_t *lock)
{
	lck_mtx_lock_spin_internal(lock, FALSE);
}

/*
 *	Routine:	lck_mtx_try_lock
 */
boolean_t
lck_mtx_try_lock(lck_mtx_t *lock)
{
	thread_t	thread = current_thread();

	lck_mtx_verify(lock);
	if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
					memory_order_acquire_smp, FALSE)) {
#if	CONFIG_DTRACE
		LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
#endif /* CONFIG_DTRACE */
		return TRUE;
	}
	return lck_mtx_try_lock_contended(lock, thread);
}

static boolean_t NOINLINE
lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
{
	thread_t	holding_thread;
	uintptr_t	state;
	int		waiters;

#if	__SMP__
	interlock_lock(lock);
	state = ordered_load_mtx(lock);
	holding_thread = LCK_MTX_STATE_TO_THREAD(state);
	if (holding_thread) {
		interlock_unlock(lock);
		return FALSE;
	}
#else
	disable_preemption_for_thread(thread);
	state = ordered_load_mtx(lock);
	if (state & LCK_ILOCK)
		panic("Unexpected interlock set (%p)", lock);
	holding_thread = LCK_MTX_STATE_TO_THREAD(state);
	if (holding_thread) {
		enable_preemption();
		return FALSE;
	}
	state |= LCK_ILOCK;
	ordered_store_mtx(lock, state);
#endif	// __SMP__
	waiters = lck_mtx_lock_acquire(lock);
	state = LCK_MTX_THREAD_TO_STATE(thread);
	if (waiters != 0)
		state |= ARM_LCK_WAITERS;
#if __SMP__
	state |= LCK_ILOCK;				// Preserve interlock
	ordered_store_mtx(lock, state);	// Set ownership
	interlock_unlock(lock);			// Release interlock, enable preemption
#else
	ordered_store_mtx(lock, state);	// Set ownership
	enable_preemption();
#endif
	load_memory_barrier();
	return TRUE;
}

static inline boolean_t
lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
{
	uintptr_t	state;

	if (!interlock_try(lock))
		return FALSE;
	state = ordered_load_mtx(lock);
	if(LCK_MTX_STATE_TO_THREAD(state)) {
		// Lock is held as mutex
		if (allow_held_as_mutex)
			interlock_unlock(lock);
		else
			// "Always" variants can never block. If the lock is held as a normal mutex
			// then someone is mixing always and non-always calls on the same lock, which is
			// forbidden.
			panic("Spin-mutex held as full mutex %p", lock);
		return FALSE;
	}
	state &= ARM_LCK_WAITERS;						// Preserve waiters bit
	state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK);	// Add spin tag and maintain interlock
	ordered_store_mtx(lock, state);
	load_memory_barrier();

#if	CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
#endif /* CONFIG_DTRACE */
	return TRUE;
}

/*
 *	Routine: lck_mtx_try_lock_spin
 */
boolean_t
lck_mtx_try_lock_spin(lck_mtx_t *lock)
{
	return lck_mtx_try_lock_spin_internal(lock, TRUE);
}

/*
 *	Routine: lck_mtx_try_lock_spin_always
 */
boolean_t
lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
{
	return lck_mtx_try_lock_spin_internal(lock, FALSE);
}



/*
 *	Routine:	lck_mtx_unlock
 */
void
lck_mtx_unlock(lck_mtx_t *lock)
{
	thread_t	thread = current_thread();
	uintptr_t	state;
	boolean_t	ilk_held = FALSE;

	lck_mtx_verify(lock);

	state = ordered_load_mtx(lock);
	if (state & LCK_ILOCK) {
		if(LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG)
			ilk_held = TRUE;	// Interlock is held by (presumably) this thread
		goto slow_case;
	}
	// Locked as a mutex
	if (atomic_compare_exchange(&lock->lck_mtx_data, LCK_MTX_THREAD_TO_STATE(thread), 0,
					memory_order_release_smp, FALSE)) {
#if	CONFIG_DTRACE
		LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
#endif /* CONFIG_DTRACE */
		return;
	}
slow_case:
	lck_mtx_unlock_contended(lock, thread, ilk_held);
}

static void NOINLINE
lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
{
	uintptr_t	state;

	if (ilk_held) {
		state = ordered_load_mtx(lock);
	} else {
#if	__SMP__
		interlock_lock(lock);
		state = ordered_load_mtx(lock);
		if (thread != LCK_MTX_STATE_TO_THREAD(state))
			panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
#else
		disable_preemption_for_thread(thread);
		state = ordered_load_mtx(lock);
		if (state & LCK_ILOCK)
			panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
		if (thread != LCK_MTX_STATE_TO_THREAD(state))
			panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
		state |= LCK_ILOCK;
		ordered_store_mtx(lock, state);
#endif
	}
	if (state & ARM_LCK_WAITERS) {
		lck_mtx_unlock_wakeup(lock, thread);
		state = ordered_load_mtx(lock);
	} else {
		assertf(lock->lck_mtx_pri == 0, "pri=0x%x", lock->lck_mtx_pri);
	}
	state &= ARM_LCK_WAITERS;		// Retain waiters bit
#if __SMP__
	state |= LCK_ILOCK;
	ordered_store_mtx(lock, state);
	interlock_unlock(lock);
#else
	ordered_store_mtx(lock, state);
	enable_preemption();
#endif

#if	CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
#endif /* CONFIG_DTRACE */
}

/*
 *	Routine:	lck_mtx_assert
 */
void
lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
{
	thread_t	thread, holder;
	uintptr_t	state;

	state = ordered_load_mtx(lock);
	holder = LCK_MTX_STATE_TO_THREAD(state);
	if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
			// Lock is held in spin mode, owner is unknown.
		return;	// Punt
	}
	thread = current_thread();
	if (type == LCK_MTX_ASSERT_OWNED) {
		if (thread != holder)
			panic("lck_mtx_assert(): mutex (%p) owned", lock);
	} else if (type == LCK_MTX_ASSERT_NOTOWNED) {
		if (thread == holder)
			panic("lck_mtx_assert(): mutex (%p) not owned", lock);
	} else
		panic("lck_mtx_assert(): invalid arg (%u)", type);
}

/*
 *	Routine:	lck_mtx_ilk_unlock
 */
boolean_t
lck_mtx_ilk_unlock(lck_mtx_t *lock)
{
	interlock_unlock(lock);
	return TRUE;
}

/*
 *	Routine:	lck_mtx_convert_spin
 *
 *	Convert a mutex held for spin into a held full mutex
 */
void
lck_mtx_convert_spin(lck_mtx_t *lock)
{
	thread_t	thread = current_thread();
	uintptr_t	state;
	int			waiters;

	state = ordered_load_mtx(lock);
	if (LCK_MTX_STATE_TO_THREAD(state) == thread)
		return;		// Already owned as mutex, return
	if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG))
		panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
	state &= ~(LCK_MTX_THREAD_MASK);		// Clear the spin tag
	ordered_store_mtx(lock, state);
	waiters = lck_mtx_lock_acquire(lock);	// Acquire to manage priority boosts
	state = LCK_MTX_THREAD_TO_STATE(thread);
	if (waiters != 0)
		state |= ARM_LCK_WAITERS;
#if __SMP__
	state |= LCK_ILOCK;
	ordered_store_mtx(lock, state);			// Set ownership
	interlock_unlock(lock);					// Release interlock, enable preemption
#else
	ordered_store_mtx(lock, state);			// Set ownership
	enable_preemption();
#endif
}


/*
 *      Routine:        lck_mtx_destroy
 */
void
lck_mtx_destroy(
		lck_mtx_t * lck,
		lck_grp_t * grp)
{
	if (lck->lck_mtx_type != LCK_MTX_TYPE)
		panic("Destroying invalid mutex %p", lck);
	if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
		panic("Destroying previously destroyed lock %p", lck);
	lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
	lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
	lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
	lck_grp_deallocate(grp);
	return;
}

/*
 *	Routine:	lck_spin_assert
 */
void
lck_spin_assert(lck_spin_t *lock, unsigned int type)
{
	thread_t	thread, holder;
	uintptr_t	state;

	if (lock->type != LCK_SPIN_TYPE)
		panic("Invalid spinlock %p", lock);

	state = lock->lck_spin_data;
	holder = (thread_t)(state & ~LCK_ILOCK);
	thread = current_thread();
	if (type == LCK_ASSERT_OWNED) {
		if (holder == 0)
			panic("Lock not owned %p = %lx", lock, state);
		if (holder != thread)
			panic("Lock not owned by current thread %p = %lx", lock, state);
		if ((state & LCK_ILOCK) == 0)
			panic("Lock bit not set %p = %lx", lock, state);
	} else if (type == LCK_ASSERT_NOTOWNED) {
		if (holder != 0) {
			if (holder == thread)
				panic("Lock owned by current thread %p = %lx", lock, state);
			else
				panic("Lock %p owned by thread %p", lock, holder);
		}
		if (state & LCK_ILOCK)
			panic("Lock bit set %p = %lx", lock, state);
	} else
		panic("lck_spin_assert(): invalid arg (%u)", type);
}

boolean_t
lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
{
	lck_rw_word_t	word;

	lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);

	word.data = ordered_load_rw(lck);
	if (word.want_excl || word.want_upgrade || force_yield) {
		lck_rw_unlock_shared(lck);
		mutex_pause(2);
		lck_rw_lock_shared(lck);
		return TRUE;
	}

	return FALSE;
}

/*
 * Routine: kdp_lck_mtx_lock_spin_is_acquired
 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
 */
boolean_t
kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
{
	uintptr_t	state;

	if (not_in_kdp) {
		panic("panic: spinlock acquired check done outside of kernel debugger");
	}
	state = ordered_load_mtx(lck);
	if (state == LCK_MTX_TAG_DESTROYED)
		return FALSE;
	if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK))
		return TRUE;
	return FALSE;
}

void
kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
{
	lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
	uintptr_t state   = ordered_load_mtx(mutex);
	thread_t holder   = LCK_MTX_STATE_TO_THREAD(state);
	if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
		waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
	} else {
		assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
		assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
		waitinfo->owner = thread_tid(holder);
	}
}

void
kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
{
	lck_rw_t	*rwlck = NULL;
	switch(waitinfo->wait_type) {
		case kThreadWaitKernelRWLockRead:
			rwlck = READ_EVENT_TO_RWLOCK(event);
			break;
		case kThreadWaitKernelRWLockWrite:
		case kThreadWaitKernelRWLockUpgrade:
			rwlck = WRITE_EVENT_TO_RWLOCK(event);
			break;
		default:
			panic("%s was called with an invalid blocking type", __FUNCTION__);
			break;
	}
	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
	waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
}