#include <zone_debug.h>
#include <mach/boolean.h>
#include <mach/machine/vm_types.h>
#include <mach/vm_param.h>
#include <kern/misc_protos.h>
#include <kern/zalloc.h>
#include <kern/kalloc.h>
#include <kern/ledger.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_map.h>
#include <libkern/OSMalloc.h>
#include <sys/kdebug.h>
#include <san/kasan.h>
#ifdef MACH_BSD
zone_t kalloc_zone(vm_size_t);
#endif
#define KALLOC_MAP_SIZE_MIN (16 * 1024 * 1024)
#define KALLOC_MAP_SIZE_MAX (128 * 1024 * 1024)
vm_map_t kalloc_map;
vm_size_t kalloc_max;
vm_size_t kalloc_max_prerounded;
vm_size_t kalloc_kernmap_size;
unsigned long kalloc_fallback_count;
unsigned int kalloc_large_inuse;
vm_size_t kalloc_large_total;
vm_size_t kalloc_large_max;
vm_size_t kalloc_largest_allocated = 0;
uint64_t kalloc_large_sum;
int kalloc_fake_zone_index = -1;
vm_offset_t kalloc_map_min;
vm_offset_t kalloc_map_max;
#ifdef MUTEX_ZONE
zone_t lck_mtx_zone;
#endif
static void
KALLOC_ZINFO_SALLOC(vm_size_t bytes)
{
thread_t thr = current_thread();
ledger_debit(thr->t_ledger, task_ledgers.tkm_shared, bytes);
}
static void
KALLOC_ZINFO_SFREE(vm_size_t bytes)
{
thread_t thr = current_thread();
ledger_credit(thr->t_ledger, task_ledgers.tkm_shared, bytes);
}
#if KALLOC_MINSIZE == 16 && KALLOC_LOG2_MINALIGN == 4
#define K_ZONE_SIZES \
16, \
32, \
48, \
64, \
80, \
96, \
128, \
160, \
192, \
224, \
256, \
288, \
368, \
400, \
512, \
576, \
768, \
1024, \
1152, \
1280, \
1664, \
2048, \
4096, \
6144
#define K_ZONE_NAMES \
"kalloc.16", \
"kalloc.32", \
"kalloc.48", \
"kalloc.64", \
"kalloc.80", \
"kalloc.96", \
"kalloc.128", \
"kalloc.160", \
"kalloc.192", \
"kalloc.224", \
"kalloc.256", \
"kalloc.288", \
"kalloc.368", \
"kalloc.400", \
"kalloc.512", \
"kalloc.576", \
"kalloc.768", \
"kalloc.1024", \
"kalloc.1152", \
"kalloc.1280", \
"kalloc.1664", \
"kalloc.2048", \
"kalloc.4096", \
"kalloc.6144"
#elif KALLOC_MINSIZE == 8 && KALLOC_LOG2_MINALIGN == 3
#define K_ZONE_SIZES \
8, \
16, 24, \
32, 40, 48, \
64, 72, 88, 112, \
128, 192, \
256, 288, 384, 440, \
512, 576, 768, \
1024, 1152, 1536, \
2048, 2128, 3072, \
4096, 6144
#define K_ZONE_NAMES \
"kalloc.8", \
"kalloc.16", "kalloc.24", \
"kalloc.32", "kalloc.40", "kalloc.48", \
"kalloc.64", "kalloc.72", "kalloc.88", "kalloc.112", \
"kalloc.128", "kalloc.192", \
"kalloc.256", "kalloc.288", "kalloc.384", "kalloc.440", \
"kalloc.512", "kalloc.576", "kalloc.768", \
"kalloc.1024", "kalloc.1152", "kalloc.1536", \
"kalloc.2048", "kalloc.2128", "kalloc.3072", \
"kalloc.4096", "kalloc.6144"
#else
#error missing or invalid zone size parameters for kalloc
#endif
#define KALLOC_MINALIGN (1 << KALLOC_LOG2_MINALIGN)
#define KiB(x) (1024 * (x))
static const int k_zone_size[] = {
K_ZONE_SIZES,
KiB(8),
KiB(16),
KiB(32)
};
#define MAX_K_ZONE (sizeof (k_zone_size) / sizeof (k_zone_size[0]))
static const char *k_zone_name[MAX_K_ZONE] = {
K_ZONE_NAMES,
"kalloc.8192",
"kalloc.16384",
"kalloc.32768"
};
#define INDEX_ZDLUT(size) \
(((size) + KALLOC_MINALIGN - 1) / KALLOC_MINALIGN)
#define N_K_ZDLUT (2048 / KALLOC_MINALIGN)
#define MAX_SIZE_ZDLUT ((N_K_ZDLUT - 1) * KALLOC_MINALIGN)
static int8_t k_zone_dlut[N_K_ZDLUT];
static int k_zindex_start;
static zone_t k_zone[MAX_K_ZONE];
lck_grp_t kalloc_lck_grp;
lck_mtx_t kalloc_lock;
#define kalloc_spin_lock() lck_mtx_lock_spin(&kalloc_lock)
#define kalloc_unlock() lck_mtx_unlock(&kalloc_lock)
static
queue_head_t OSMalloc_tag_list;
lck_grp_t *OSMalloc_tag_lck_grp;
lck_mtx_t OSMalloc_tag_lock;
#define OSMalloc_tag_spin_lock() lck_mtx_lock_spin(&OSMalloc_tag_lock)
#define OSMalloc_tag_unlock() lck_mtx_unlock(&OSMalloc_tag_lock)
void OSMalloc_init(void);
void OSMalloc_Tagref(OSMallocTag tag);
void OSMalloc_Tagrele(OSMallocTag tag);
void
kalloc_init(
void)
{
kern_return_t retval;
vm_offset_t min;
vm_size_t size, kalloc_map_size;
int i;
vm_map_kernel_flags_t vmk_flags;
kalloc_map_size = (vm_size_t)(sane_size >> 5);
#if !__LP64__
if (kalloc_map_size > KALLOC_MAP_SIZE_MAX)
kalloc_map_size = KALLOC_MAP_SIZE_MAX;
#endif
if (kalloc_map_size < KALLOC_MAP_SIZE_MIN)
kalloc_map_size = KALLOC_MAP_SIZE_MIN;
vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
vmk_flags.vmkf_permanent = TRUE;
retval = kmem_suballoc(kernel_map, &min, kalloc_map_size,
FALSE,
(VM_FLAGS_ANYWHERE),
vmk_flags,
VM_KERN_MEMORY_KALLOC,
&kalloc_map);
if (retval != KERN_SUCCESS)
panic("kalloc_init: kmem_suballoc failed");
kalloc_map_min = min;
kalloc_map_max = min + kalloc_map_size - 1;
kalloc_max = PAGE_SIZE << 2;
if (kalloc_max < KiB(16)) {
kalloc_max = KiB(16);
}
assert(kalloc_max <= KiB(64));
kalloc_max_prerounded = kalloc_max / 2 + 1;
kalloc_kernmap_size = (kalloc_max * 16) + 1;
kalloc_largest_allocated = kalloc_kernmap_size;
for (i = 0; i < (int)MAX_K_ZONE && (size = k_zone_size[i]) < kalloc_max; i++) {
k_zone[i] = zinit(size, size, size, k_zone_name[i]);
zone_change(k_zone[i], Z_CALLERACCT, FALSE);
#if VM_MAX_TAG_ZONES
if (zone_tagging_on) zone_change(k_zone[i], Z_TAGS_ENABLED, TRUE);
#endif
zone_change(k_zone[i], Z_KASAN_QUARANTINE, FALSE);
}
for (i = 0, size = 0; i <= N_K_ZDLUT; i++, size += KALLOC_MINALIGN) {
int zindex = 0;
while ((vm_size_t)k_zone_size[zindex] < size)
zindex++;
if (i == N_K_ZDLUT) {
k_zindex_start = zindex;
break;
}
k_zone_dlut[i] = (int8_t)zindex;
}
#ifdef KALLOC_DEBUG
printf("kalloc_init: k_zindex_start %d\n", k_zindex_start);
for (i = 0; i < (int)MAX_K_ZONE; i++) {
vm_size_t testsize = (vm_size_t)k_zone_size[i] - 1;
int compare = 0;
int zindex;
if (testsize < MAX_SIZE_ZDLUT) {
compare += 1;
long dindex = INDEX_ZDLUT(testsize);
zindex = (int)k_zone_dlut[dindex];
} else if (testsize < kalloc_max_prerounded) {
compare += 2;
zindex = k_zindex_start;
while ((vm_size_t)k_zone_size[zindex] < testsize) {
zindex++;
compare++;
}
compare++;
} else
break;
zone_t z = k_zone[zindex];
printf("kalloc_init: req size %4lu: %11s took %d compare%s\n",
(unsigned long)testsize, z->zone_name, compare,
compare == 1 ? "" : "s");
}
#endif
lck_grp_init(&kalloc_lck_grp, "kalloc.large", LCK_GRP_ATTR_NULL);
lck_mtx_init(&kalloc_lock, &kalloc_lck_grp, LCK_ATTR_NULL);
OSMalloc_init();
#ifdef MUTEX_ZONE
lck_mtx_zone = zinit(sizeof(struct _lck_mtx_), 1024*256, 4096, "lck_mtx");
#endif
}
static __inline zone_t
get_zone_dlut(vm_size_t size)
{
long dindex = INDEX_ZDLUT(size);
int zindex = (int)k_zone_dlut[dindex];
return (k_zone[zindex]);
}
static __inline zone_t
get_zone_search(vm_size_t size, int zindex)
{
assert(size < kalloc_max_prerounded);
while ((vm_size_t)k_zone_size[zindex] < size)
zindex++;
assert((unsigned)zindex < MAX_K_ZONE &&
(vm_size_t)k_zone_size[zindex] < kalloc_max);
return (k_zone[zindex]);
}
static vm_size_t
vm_map_lookup_kalloc_entry_locked(
vm_map_t map,
void *addr)
{
boolean_t ret;
vm_map_entry_t vm_entry = NULL;
ret = vm_map_lookup_entry(map, (vm_map_offset_t)addr, &vm_entry);
if (!ret) {
panic("Attempting to lookup/free an address not allocated via kalloc! (vm_map_lookup_entry() failed map: %p, addr: %p)\n",
map, addr);
}
if (vm_entry->vme_start != (vm_map_offset_t)addr) {
panic("Attempting to lookup/free the middle of a kalloc'ed element! (map: %p, addr: %p, entry: %p)\n",
map, addr, vm_entry);
}
if (!vm_entry->vme_atomic) {
panic("Attempting to lookup/free an address not managed by kalloc! (map: %p, addr: %p, entry: %p)\n",
map, addr, vm_entry);
}
return (vm_entry->vme_end - vm_entry->vme_start);
}
#if KASAN_KALLOC
vm_size_t
kalloc_size(void *addr)
{
(void)vm_map_lookup_kalloc_entry_locked;
return kasan_user_size((vm_offset_t)addr);
}
#else
vm_size_t
kalloc_size(
void *addr)
{
vm_map_t map;
vm_size_t size;
size = zone_element_size(addr, NULL);
if (size) {
return size;
}
if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) {
map = kalloc_map;
} else {
map = kernel_map;
}
vm_map_lock_read(map);
size = vm_map_lookup_kalloc_entry_locked(map, addr);
vm_map_unlock_read(map);
return size;
}
#endif
vm_size_t
kalloc_bucket_size(
vm_size_t size)
{
zone_t z;
vm_map_t map;
if (size < MAX_SIZE_ZDLUT) {
z = get_zone_dlut(size);
return z->elem_size;
}
if (size < kalloc_max_prerounded) {
z = get_zone_search(size, k_zindex_start);
return z->elem_size;
}
if (size >= kalloc_kernmap_size)
map = kernel_map;
else
map = kalloc_map;
return vm_map_round_page(size, VM_MAP_PAGE_MASK(map));
}
#if KASAN_KALLOC
vm_size_t
kfree_addr(void *addr)
{
vm_size_t origsz = kalloc_size(addr);
kfree(addr, origsz);
return origsz;
}
#else
vm_size_t
kfree_addr(
void *addr)
{
vm_map_t map;
vm_size_t size = 0;
kern_return_t ret;
zone_t z;
size = zone_element_size(addr, &z);
if (size) {
zfree(z, addr);
return size;
}
if (((vm_offset_t)addr >= kalloc_map_min) && ((vm_offset_t)addr < kalloc_map_max)) {
map = kalloc_map;
} else {
map = kernel_map;
}
if ((vm_offset_t)addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
panic("kfree on an address not in the kernel & kext address range! addr: %p\n", addr);
}
vm_map_lock(map);
size = vm_map_lookup_kalloc_entry_locked(map, addr);
ret = vm_map_remove_locked(map,
vm_map_trunc_page((vm_map_offset_t)addr,
VM_MAP_PAGE_MASK(map)),
vm_map_round_page((vm_map_offset_t)addr + size,
VM_MAP_PAGE_MASK(map)),
VM_MAP_REMOVE_KUNWIRE);
if (ret != KERN_SUCCESS) {
panic("vm_map_remove_locked() failed for kalloc vm_entry! addr: %p, map: %p ret: %d\n",
addr, map, ret);
}
vm_map_unlock(map);
kalloc_spin_lock();
kalloc_large_total -= size;
kalloc_large_inuse--;
kalloc_unlock();
KALLOC_ZINFO_SFREE(size);
return size;
}
#endif
void *
kalloc_canblock(
vm_size_t * psize,
boolean_t canblock,
vm_allocation_site_t * site)
{
zone_t z;
vm_size_t size;
void *addr;
vm_tag_t tag;
tag = VM_KERN_MEMORY_KALLOC;
size = *psize;
#if KASAN_KALLOC
vm_size_t req_size = size;
size = kasan_alloc_resize(req_size);
#endif
if (size < MAX_SIZE_ZDLUT)
z = get_zone_dlut(size);
else if (size < kalloc_max_prerounded)
z = get_zone_search(size, k_zindex_start);
else {
vm_map_t alloc_map;
if (!canblock) {
return(NULL);
}
#if KASAN_KALLOC
size = round_page(req_size + 2 * PAGE_SIZE);
assert(size >= MAX_SIZE_ZDLUT && size >= kalloc_max_prerounded);
#endif
if (size >= kalloc_kernmap_size)
alloc_map = kernel_map;
else
alloc_map = kalloc_map;
if (site) tag = vm_tag_alloc(site);
if (kmem_alloc_flags(alloc_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS) {
if (alloc_map != kernel_map) {
if (kalloc_fallback_count++ == 0) {
printf("%s: falling back to kernel_map\n", __func__);
}
if (kmem_alloc_flags(kernel_map, (vm_offset_t *)&addr, size, tag, KMA_ATOMIC) != KERN_SUCCESS)
addr = NULL;
}
else
addr = NULL;
}
if (addr != NULL) {
kalloc_spin_lock();
if (size > kalloc_largest_allocated)
kalloc_largest_allocated = size;
kalloc_large_inuse++;
kalloc_large_total += size;
kalloc_large_sum += size;
if (kalloc_large_total > kalloc_large_max)
kalloc_large_max = kalloc_large_total;
kalloc_unlock();
KALLOC_ZINFO_SALLOC(size);
}
#if KASAN_KALLOC
addr = (void *)kasan_alloc((vm_offset_t)addr, size, req_size, PAGE_SIZE);
#else
*psize = round_page(size);
#endif
return(addr);
}
#ifdef KALLOC_DEBUG
if (size > z->elem_size)
panic("%s: z %p (%s) but requested size %lu", __func__,
z, z->zone_name, (unsigned long)size);
#endif
assert(size <= z->elem_size);
#if VM_MAX_TAG_ZONES
if (z->tags && site)
{
tag = vm_tag_alloc(site);
if (!canblock && !vm_allocation_zone_totals[tag]) tag = VM_KERN_MEMORY_KALLOC;
}
#endif
addr = zalloc_canblock_tag(z, canblock, size, tag);
#if KASAN_KALLOC
addr = (void *)kasan_alloc((vm_offset_t)addr, z->elem_size, req_size, KASAN_GUARD_SIZE);
#else
*psize = z->elem_size;
#endif
return addr;
}
void *
kalloc_external(
vm_size_t size);
void *
kalloc_external(
vm_size_t size)
{
return( kalloc_tag_bt(size, VM_KERN_MEMORY_KALLOC) );
}
volatile SInt32 kfree_nop_count = 0;
void
kfree(
void *data,
vm_size_t size)
{
zone_t z;
#if KASAN_KALLOC
vm_size_t user_size = size;
kasan_check_free((vm_address_t)data, size, KASAN_HEAP_KALLOC);
data = (void *)kasan_dealloc((vm_address_t)data, &size);
kasan_free(&data, &size, KASAN_HEAP_KALLOC, NULL, user_size, true);
if (!data) {
return;
}
#endif
if (size < MAX_SIZE_ZDLUT)
z = get_zone_dlut(size);
else if (size < kalloc_max_prerounded)
z = get_zone_search(size, k_zindex_start);
else {
vm_map_t alloc_map = kernel_map;
if ((((vm_offset_t) data) >= kalloc_map_min) && (((vm_offset_t) data) <= kalloc_map_max))
alloc_map = kalloc_map;
if (size > kalloc_largest_allocated) {
OSAddAtomic(1, &kfree_nop_count);
return;
}
kmem_free(alloc_map, (vm_offset_t)data, size);
kalloc_spin_lock();
kalloc_large_total -= size;
kalloc_large_inuse--;
kalloc_unlock();
KALLOC_ZINFO_SFREE(size);
return;
}
#ifdef KALLOC_DEBUG
if (size > z->elem_size)
panic("%s: z %p (%s) but requested size %lu", __func__,
z, z->zone_name, (unsigned long)size);
#endif
assert(size <= z->elem_size);
zfree(z, data);
}
#ifdef MACH_BSD
zone_t
kalloc_zone(
vm_size_t size)
{
if (size < MAX_SIZE_ZDLUT)
return (get_zone_dlut(size));
if (size <= kalloc_max)
return (get_zone_search(size, k_zindex_start));
return (ZONE_NULL);
}
#endif
void
OSMalloc_init(
void)
{
queue_init(&OSMalloc_tag_list);
OSMalloc_tag_lck_grp = lck_grp_alloc_init("OSMalloc_tag", LCK_GRP_ATTR_NULL);
lck_mtx_init(&OSMalloc_tag_lock, OSMalloc_tag_lck_grp, LCK_ATTR_NULL);
}
OSMallocTag
OSMalloc_Tagalloc(
const char *str,
uint32_t flags)
{
OSMallocTag OSMTag;
OSMTag = (OSMallocTag)kalloc(sizeof(*OSMTag));
bzero((void *)OSMTag, sizeof(*OSMTag));
if (flags & OSMT_PAGEABLE)
OSMTag->OSMT_attr = OSMT_ATTR_PAGEABLE;
OSMTag->OSMT_refcnt = 1;
strlcpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME);
OSMalloc_tag_spin_lock();
enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag);
OSMalloc_tag_unlock();
OSMTag->OSMT_state = OSMT_VALID;
return(OSMTag);
}
void
OSMalloc_Tagref(
OSMallocTag tag)
{
if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID))
panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state);
(void)hw_atomic_add(&tag->OSMT_refcnt, 1);
}
void
OSMalloc_Tagrele(
OSMallocTag tag)
{
if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID))
panic("OSMalloc_Tagref():'%s' has bad state 0x%08X\n", tag->OSMT_name, tag->OSMT_state);
if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) {
if (hw_compare_and_store(OSMT_VALID|OSMT_RELEASED, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) {
OSMalloc_tag_spin_lock();
(void)remque((queue_entry_t)tag);
OSMalloc_tag_unlock();
kfree((void*)tag, sizeof(*tag));
} else
panic("OSMalloc_Tagrele():'%s' has refcnt 0\n", tag->OSMT_name);
}
}
void
OSMalloc_Tagfree(
OSMallocTag tag)
{
if (!hw_compare_and_store(OSMT_VALID, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state))
panic("OSMalloc_Tagfree():'%s' has bad state 0x%08X \n", tag->OSMT_name, tag->OSMT_state);
if (hw_atomic_sub(&tag->OSMT_refcnt, 1) == 0) {
OSMalloc_tag_spin_lock();
(void)remque((queue_entry_t)tag);
OSMalloc_tag_unlock();
kfree((void*)tag, sizeof(*tag));
}
}
void *
OSMalloc(
uint32_t size,
OSMallocTag tag)
{
void *addr=NULL;
kern_return_t kr;
OSMalloc_Tagref(tag);
if ((tag->OSMT_attr & OSMT_PAGEABLE)
&& (size & ~PAGE_MASK)) {
if ((kr = kmem_alloc_pageable_external(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS)
addr = NULL;
} else
addr = kalloc_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC);
if (!addr)
OSMalloc_Tagrele(tag);
return(addr);
}
void *
OSMalloc_nowait(
uint32_t size,
OSMallocTag tag)
{
void *addr=NULL;
if (tag->OSMT_attr & OSMT_PAGEABLE)
return(NULL);
OSMalloc_Tagref(tag);
addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC);
if (addr == NULL)
OSMalloc_Tagrele(tag);
return(addr);
}
void *
OSMalloc_noblock(
uint32_t size,
OSMallocTag tag)
{
void *addr=NULL;
if (tag->OSMT_attr & OSMT_PAGEABLE)
return(NULL);
OSMalloc_Tagref(tag);
addr = kalloc_noblock_tag_bt((vm_size_t)size, VM_KERN_MEMORY_KALLOC);
if (addr == NULL)
OSMalloc_Tagrele(tag);
return(addr);
}
void
OSFree(
void *addr,
uint32_t size,
OSMallocTag tag)
{
if ((tag->OSMT_attr & OSMT_PAGEABLE)
&& (size & ~PAGE_MASK)) {
kmem_free(kernel_map, (vm_offset_t)addr, size);
} else
kfree((void *)addr, size);
OSMalloc_Tagrele(tag);
}
uint32_t
OSMalloc_size(
void *addr)
{
return (uint32_t)kalloc_size(addr);
}