#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/file_internal.h>
#include <sys/proc_internal.h>
#include <sys/kernel.h>
#include <sys/guarded.h>
#include <sys/stat.h>
#include <sys/malloc.h>
#include <sys/sysproto.h>
#include <sys/pthread_shims.h>
#include <mach/mach_types.h>
#include <kern/cpu_data.h>
#include <kern/mach_param.h>
#include <kern/kern_types.h>
#include <kern/assert.h>
#include <kern/kalloc.h>
#include <kern/thread.h>
#include <kern/clock.h>
#include <kern/ledger.h>
#include <kern/policy_internal.h>
#include <kern/task.h>
#include <kern/telemetry.h>
#include <kern/waitq.h>
#include <kern/sched_prim.h>
#include <kern/zalloc.h>
#include <kern/debug.h>
#include <pexpert/pexpert.h>
#define XNU_TEST_BITMAP
#include <kern/bits.h>
#include <sys/ulock.h>
static lck_grp_t *ull_lck_grp;
static lck_mtx_t ull_table_lock;
#define ull_global_lock() lck_mtx_lock(&ull_table_lock)
#define ull_global_unlock() lck_mtx_unlock(&ull_table_lock)
#define ull_lock(ull) lck_mtx_lock(&ull->ull_lock)
#define ull_unlock(ull) lck_mtx_unlock(&ull->ull_lock)
#define ull_assert_owned(ull) LCK_MTX_ASSERT(&ull->ull_lock, LCK_MTX_ASSERT_OWNED)
#define ULOCK_TO_EVENT(ull) ((event_t)ull)
#define EVENT_TO_ULOCK(event) ((ull_t *)event)
typedef struct __attribute__((packed)) {
user_addr_t ulk_addr;
pid_t ulk_pid;
} ulk_t;
inline static bool
ull_key_match(ulk_t *a, ulk_t *b)
{
return ((a->ulk_pid == b->ulk_pid) &&
(a->ulk_addr == b->ulk_addr));
}
typedef struct ull {
thread_t ull_owner;
ulk_t ull_key;
ulk_t ull_saved_key;
lck_mtx_t ull_lock;
int32_t ull_nwaiters;
int32_t ull_max_nwaiters;
int32_t ull_refcount;
struct promote_token ull_promote_token;
queue_chain_t ull_hash_link;
uint8_t ull_opcode;
} ull_t;
static const bool ull_debug = false;
extern void ulock_initialize(void);
#define ULL_MUST_EXIST 0x0001
static ull_t *ull_get(ulk_t *, uint32_t);
static void ull_put(ull_t *);
static thread_t ull_promote_owner_locked(ull_t* ull, thread_t thread);
#if DEVELOPMENT || DEBUG
static int ull_simulate_copyin_fault = 0;
static void
ull_dump(ull_t *ull)
{
kprintf("ull\t%p\n", ull);
kprintf("ull_key.ulk_pid\t%d\n", ull->ull_key.ulk_pid);
kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr));
kprintf("ull_saved_key.ulk_pid\t%d\n", ull->ull_saved_key.ulk_pid);
kprintf("ull_saved_key.ulk_addr\t%p\n", (void *)(ull->ull_saved_key.ulk_addr));
kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters);
kprintf("ull_max_nwaiters\t%d\n", ull->ull_max_nwaiters);
kprintf("ull_refcount\t%d\n", ull->ull_refcount);
kprintf("ull_opcode\t%d\n\n", ull->ull_opcode);
kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner));
kprintf("ull_promote_token\t%d, %d\n\n", ull->ull_promote_token.pt_basepri, ull->ull_promote_token.pt_qos);
}
#endif
static int ull_hash_buckets;
static queue_head_t *ull_bucket;
static uint32_t ull_nzalloc = 0;
static zone_t ull_zone;
static __inline__ uint32_t
ull_hash_index(char *key, size_t length)
{
uint32_t hash = jenkins_hash(key, length);
hash &= (ull_hash_buckets - 1);
return hash;
}
static_assert(sizeof(ulk_t) == sizeof(user_addr_t) + sizeof(pid_t));
#define ULL_INDEX(keyp) ull_hash_index((char *)keyp, sizeof *keyp)
void
ulock_initialize(void)
{
ull_lck_grp = lck_grp_alloc_init("ulocks", NULL);
lck_mtx_init(&ull_table_lock, ull_lck_grp, NULL);
assert(thread_max > 16);
ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2));
kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets);
assert(ull_hash_buckets >= thread_max/4);
ull_bucket = (queue_head_t *)kalloc(sizeof(queue_head_t) * ull_hash_buckets);
assert(ull_bucket != NULL);
for (int i = 0; i < ull_hash_buckets; i++) {
queue_init(&ull_bucket[i]);
}
ull_zone = zinit(sizeof(ull_t),
thread_max * sizeof(ull_t),
0, "ulocks");
zone_change(ull_zone, Z_NOENCRYPT, TRUE);
}
#if DEVELOPMENT || DEBUG
static int
ull_hash_dump(pid_t pid)
{
int count = 0;
ull_global_lock();
if (pid == 0) {
kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc);
kprintf("%s>BEGIN\n", __FUNCTION__);
}
for (int i = 0; i < ull_hash_buckets; i++) {
if (!queue_empty(&ull_bucket[i])) {
ull_t *elem;
if (pid == 0) {
kprintf("%s>index %d:\n", __FUNCTION__, i);
}
qe_foreach_element(elem, &ull_bucket[i], ull_hash_link) {
if ((pid == 0) || (pid == elem->ull_key.ulk_pid)) {
ull_dump(elem);
count++;
}
}
}
}
if (pid == 0) {
kprintf("%s>END\n", __FUNCTION__);
ull_nzalloc = 0;
}
ull_global_unlock();
return count;
}
#endif
static ull_t *
ull_alloc(ulk_t *key)
{
ull_t *ull = (ull_t *)zalloc(ull_zone);
assert(ull != NULL);
ull->ull_refcount = 1;
ull->ull_key = *key;
ull->ull_saved_key = *key;
ull->ull_nwaiters = 0;
ull->ull_max_nwaiters = 0;
ull->ull_opcode = 0;
ull->ull_owner = THREAD_NULL;
ull->ull_promote_token = PROMOTE_TOKEN_INIT;
lck_mtx_init(&ull->ull_lock, ull_lck_grp, NULL);
ull_nzalloc++;
return ull;
}
static void
ull_free(ull_t *ull)
{
assert(ull->ull_owner == THREAD_NULL);
LCK_MTX_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED);
lck_mtx_destroy(&ull->ull_lock, ull_lck_grp);
zfree(ull_zone, ull);
}
static ull_t *
ull_get(ulk_t *key, uint32_t flags)
{
ull_t *ull = NULL;
uint i = ULL_INDEX(key);
ull_t *elem;
ull_global_lock();
qe_foreach_element(elem, &ull_bucket[i], ull_hash_link) {
ull_lock(elem);
if (ull_key_match(&elem->ull_key, key)) {
ull = elem;
break;
} else {
ull_unlock(elem);
}
}
if (ull == NULL) {
if (flags & ULL_MUST_EXIST) {
ull_global_unlock();
return NULL;
}
ull = ull_alloc(key);
if (ull == NULL) {
ull_global_unlock();
return NULL;
}
ull_lock(ull);
enqueue(&ull_bucket[i], &ull->ull_hash_link);
}
ull->ull_refcount++;
ull_global_unlock();
return ull;
}
static void
ull_put(ull_t *ull)
{
ull_assert_owned(ull);
int refcount = --ull->ull_refcount;
assert(refcount == 0 ? (ull->ull_key.ulk_pid == 0 && ull->ull_key.ulk_addr == 0) : 1);
ull_unlock(ull);
if (refcount > 0) {
return;
}
ull_global_lock();
remqueue(&ull->ull_hash_link);
ull_global_unlock();
#if DEVELOPMENT || DEBUG
if (ull_debug) {
kprintf("%s>", __FUNCTION__);
ull_dump(ull);
}
#endif
ull_free(ull);
}
int
ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval)
{
uint opcode = args->operation & UL_OPCODE_MASK;
uint flags = args->operation & UL_FLAGS_MASK;
int ret = 0;
thread_t self = current_thread();
int id = thread_tid(self);
ulk_t key;
thread_t owner_thread = THREAD_NULL;
thread_t old_owner = THREAD_NULL;
thread_t old_lingering_owner = THREAD_NULL;
sched_call_t workq_callback = NULL;
if (ull_debug) {
kprintf("[%d]%s>ENTER opcode %d addr %llx value %llx timeout %d flags %x\n", id, __FUNCTION__, opcode, (unsigned long long)(args->addr), args->value, args->timeout, flags);
}
if ((flags & ULF_WAIT_MASK) != flags) {
ret = EINVAL;
goto munge_retval;
}
boolean_t set_owner = FALSE;
switch (opcode) {
case UL_UNFAIR_LOCK:
set_owner = TRUE;
break;
case UL_COMPARE_AND_WAIT:
break;
default:
if (ull_debug) {
kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n",
id, __FUNCTION__, opcode,
(unsigned long long)(args->addr), flags);
}
ret = EINVAL;
goto munge_retval;
}
uint32_t value = 0;
if ((args->addr == 0) || (args->addr % _Alignof(_Atomic(typeof(value))))) {
ret = EINVAL;
goto munge_retval;
}
key.ulk_pid = p->p_pid;
key.ulk_addr = args->addr;
if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) {
workq_callback = workqueue_get_sched_callback();
workq_callback = thread_disable_sched_call(self, workq_callback);
}
ull_t *ull = ull_get(&key, 0);
if (ull == NULL) {
ret = ENOMEM;
goto munge_retval;
}
ull->ull_nwaiters++;
if (ull->ull_nwaiters > ull->ull_max_nwaiters) {
ull->ull_max_nwaiters = ull->ull_nwaiters;
}
if (ull->ull_opcode == 0) {
ull->ull_opcode = opcode;
} else if (ull->ull_opcode != opcode) {
ull_unlock(ull);
ret = EDOM;
goto out;
}
uint64_t val64;
disable_preemption();
int copy_ret = copyin_word(args->addr, &val64, sizeof(value));
enable_preemption();
value = (uint32_t)val64;
#if DEVELOPMENT || DEBUG
if (((ull_simulate_copyin_fault == p->p_pid) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) {
static _Atomic int fault_inject = 0;
if (__c11_atomic_fetch_add(&fault_inject, 1, __ATOMIC_RELAXED) % 73 == 0) {
copy_ret = EFAULT;
}
}
#endif
if (copy_ret != 0) {
ull_unlock(ull);
ret = copy_ret;
goto out;
}
if (value != args->value) {
ull_unlock(ull);
if (ull_debug) {
kprintf("[%d]%s>Lock value %d has changed from expected %d so bail out\n",
id, __FUNCTION__, value, (uint32_t)(args->value));
}
goto out;
}
if (set_owner) {
mach_port_name_t owner_name = ulock_owner_value_to_port_name(args->value);
owner_thread = port_name_to_thread_for_ulock(owner_name);
if (owner_name != MACH_PORT_DEAD && owner_thread == THREAD_NULL) {
ull_unlock(ull);
ret = EOWNERDEAD;
goto out;
}
old_owner = ull_promote_owner_locked(ull, owner_thread);
}
wait_result_t wr;
uint32_t timeout = args->timeout;
thread_set_pending_block_hint(self, kThreadWaitUserLock);
if (timeout) {
wr = assert_wait_timeout(ULOCK_TO_EVENT(ull), THREAD_ABORTSAFE, timeout, NSEC_PER_USEC);
} else {
wr = assert_wait(ULOCK_TO_EVENT(ull), THREAD_ABORTSAFE);
}
ull_unlock(ull);
if (ull_debug) {
kprintf("[%d]%s>after assert_wait() returned %d\n", id, __FUNCTION__, wr);
}
if (set_owner && owner_thread != THREAD_NULL && wr == THREAD_WAITING) {
wr = thread_handoff(owner_thread);
owner_thread = THREAD_NULL;
} else {
wr = thread_block(NULL);
}
if (ull_debug) {
kprintf("[%d]%s>thread_block() returned %d\n", id, __FUNCTION__, wr);
}
switch (wr) {
case THREAD_AWAKENED:
break;
case THREAD_TIMED_OUT:
ret = ETIMEDOUT;
break;
case THREAD_INTERRUPTED:
case THREAD_RESTART:
default:
ret = EINTR;
break;
}
out:
ull_lock(ull);
*retval = --ull->ull_nwaiters;
if (ull->ull_nwaiters == 0) {
if (ull->ull_owner != THREAD_NULL) {
old_lingering_owner = ull_promote_owner_locked(ull, THREAD_NULL);
}
assert(ull->ull_owner == THREAD_NULL);
ull->ull_key.ulk_pid = 0;
ull->ull_key.ulk_addr = 0;
ull->ull_refcount--;
assert(ull->ull_refcount > 0);
}
ull_put(ull);
if (owner_thread != THREAD_NULL) {
thread_deallocate(owner_thread);
}
if (old_owner != THREAD_NULL) {
thread_deallocate(old_owner);
}
if (old_lingering_owner != THREAD_NULL) {
thread_deallocate(old_lingering_owner);
}
assert(*retval >= 0);
munge_retval:
if (workq_callback) {
thread_reenable_sched_call(self, workq_callback);
}
if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
*retval = -ret;
ret = 0;
}
return ret;
}
int
ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retval)
{
uint opcode = args->operation & UL_OPCODE_MASK;
uint flags = args->operation & UL_FLAGS_MASK;
int ret = 0;
int id = thread_tid(current_thread());
ulk_t key;
thread_t wake_thread = THREAD_NULL;
thread_t old_owner = THREAD_NULL;
if (ull_debug) {
kprintf("[%d]%s>ENTER opcode %d addr %llx flags %x\n",
id, __FUNCTION__, opcode, (unsigned long long)(args->addr), flags);
}
if ((flags & ULF_WAKE_MASK) != flags) {
ret = EINVAL;
goto munge_retval;
}
#if DEVELOPMENT || DEBUG
if (opcode == UL_DEBUG_HASH_DUMP_PID) {
*retval = ull_hash_dump(p->p_pid);
return ret;
} else if (opcode == UL_DEBUG_HASH_DUMP_ALL) {
*retval = ull_hash_dump(0);
return ret;
} else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) {
ull_simulate_copyin_fault = (int)(args->wake_value);
return ret;
}
#endif
if (args->addr == 0) {
ret = EINVAL;
goto munge_retval;
}
if (flags & ULF_WAKE_THREAD) {
if (flags & ULF_WAKE_ALL) {
ret = EINVAL;
goto munge_retval;
}
mach_port_name_t wake_thread_name = (mach_port_name_t)(args->wake_value);
wake_thread = port_name_to_thread_for_ulock(wake_thread_name);
if (wake_thread == THREAD_NULL) {
ret = ESRCH;
goto munge_retval;
}
}
key.ulk_pid = p->p_pid;
key.ulk_addr = args->addr;
ull_t *ull = ull_get(&key, ULL_MUST_EXIST);
if (ull == NULL) {
if (wake_thread != THREAD_NULL) {
thread_deallocate(wake_thread);
}
ret = ENOENT;
goto munge_retval;
}
boolean_t clear_owner = FALSE;
switch (opcode) {
case UL_UNFAIR_LOCK:
clear_owner = TRUE;
break;
case UL_COMPARE_AND_WAIT:
break;
default:
if (ull_debug) {
kprintf("[%d]%s>EINVAL opcode %d addr 0x%llx flags 0x%x\n",
id, __FUNCTION__, opcode, (unsigned long long)(args->addr), flags);
}
ret = EINVAL;
goto out_locked;
}
if (opcode != ull->ull_opcode) {
if (ull_debug) {
kprintf("[%d]%s>EDOM - opcode mismatch - opcode %d addr 0x%llx flags 0x%x\n",
id, __FUNCTION__, opcode, (unsigned long long)(args->addr), flags);
}
ret = EDOM;
goto out_locked;
}
if (!clear_owner) {
assert(ull->ull_owner == THREAD_NULL);
}
if (flags & ULF_WAKE_ALL) {
thread_wakeup(ULOCK_TO_EVENT(ull));
} else if (flags & ULF_WAKE_THREAD) {
kern_return_t kr = thread_wakeup_thread(ULOCK_TO_EVENT(ull), wake_thread);
if (kr != KERN_SUCCESS) {
assert(kr == KERN_NOT_WAITING);
ret = EALREADY;
}
} else {
thread_wakeup_one_with_pri(ULOCK_TO_EVENT(ull), WAITQ_SELECT_MAX_PRI);
}
if (ull->ull_owner == current_thread()) {
old_owner = ull_promote_owner_locked(ull, THREAD_NULL);
}
out_locked:
ull_put(ull);
if (wake_thread != THREAD_NULL) {
thread_deallocate(wake_thread);
}
if (old_owner != THREAD_NULL) {
thread_deallocate(old_owner);
}
munge_retval:
if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
*retval = -ret;
ret = 0;
}
return ret;
}
static thread_t
ull_promote_owner_locked(ull_t* ull,
thread_t new_owner)
{
if (new_owner != THREAD_NULL && ull->ull_owner == new_owner) {
thread_user_promotion_update(new_owner, current_thread(), &ull->ull_promote_token);
return THREAD_NULL;
}
thread_t old_owner = ull->ull_owner;
ull->ull_owner = THREAD_NULL;
if (new_owner != THREAD_NULL) {
thread_reference(new_owner);
ull->ull_owner = new_owner;
thread_user_promotion_add(new_owner, current_thread(), &ull->ull_promote_token);
} else {
ull->ull_promote_token = PROMOTE_TOKEN_INIT;
}
if (old_owner != THREAD_NULL) {
thread_user_promotion_drop(old_owner);
}
return old_owner;
}
void
kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
{
ull_t *ull = EVENT_TO_ULOCK(event);
assert(kdp_is_in_zone(ull, "ulocks"));
if (ull->ull_opcode == UL_UNFAIR_LOCK) { waitinfo->owner = thread_tid(ull->ull_owner);
waitinfo->context = ull->ull_key.ulk_addr;
} else if (ull->ull_opcode == UL_COMPARE_AND_WAIT) { waitinfo->owner = 0;
waitinfo->context = ull->ull_key.ulk_addr;
} else {
panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull);
}
return;
}