#include <mach_cluster_stats.h>
#include <mach_pagemap.h>
#include <mach_kdb.h>
#include <libkern/OSAtomic.h>
#include <mach/mach_types.h>
#include <mach/kern_return.h>
#include <mach/message.h>
#include <mach/vm_param.h>
#include <mach/vm_behavior.h>
#include <mach/memory_object.h>
#include <mach/sdt.h>
#include <kern/kern_types.h>
#include <kern/host_statistics.h>
#include <kern/counters.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/sched_prim.h>
#include <kern/host.h>
#include <kern/xpr.h>
#include <kern/mach_param.h>
#include <kern/macro_help.h>
#include <kern/zalloc.h>
#include <kern/misc_protos.h>
#include <vm/vm_fault.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_kern.h>
#include <vm/pmap.h>
#include <vm/vm_pageout.h>
#include <vm/vm_protos.h>
#include <vm/vm_external.h>
#include <vm/memory_object.h>
#include <vm/vm_purgeable_internal.h>
#include <vm/vm_shared_region.h>
#define VM_FAULT_CLASSIFY 0
#define TRACEFAULTPAGE 0
int vm_object_pagein_throttle = 16;
boolean_t thread_is_io_throttled(void);
uint64_t vm_hard_throttle_threshold;
extern unsigned int dp_pages_free, dp_pages_reserve;
#define NEED_TO_HARD_THROTTLE_THIS_TASK() (((dp_pages_free + dp_pages_reserve < 2000) && \
(get_task_resident_size(current_task()) > vm_hard_throttle_threshold) && \
(current_task() != kernel_task) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) || \
(vm_page_free_count < vm_page_throttle_limit && thread_is_io_throttled() && \
(get_task_resident_size(current_task()) > vm_hard_throttle_threshold)))
#define HARD_THROTTLE_DELAY 20000
#define SOFT_THROTTLE_DELAY 2000
extern int cs_debug;
#if MACH_KDB
extern struct db_watchpoint *db_watchpoint_list;
#endif
boolean_t current_thread_aborted(void);
extern kern_return_t vm_fault_wire_fast(
vm_map_t map,
vm_map_offset_t va,
vm_map_entry_t entry,
pmap_t pmap,
vm_map_offset_t pmap_addr);
extern void vm_fault_continue(void);
extern void vm_fault_copy_cleanup(
vm_page_t page,
vm_page_t top_page);
extern void vm_fault_copy_dst_cleanup(
vm_page_t page);
#if VM_FAULT_CLASSIFY
extern void vm_fault_classify(vm_object_t object,
vm_object_offset_t offset,
vm_prot_t fault_type);
extern void vm_fault_classify_init(void);
#endif
unsigned long vm_pmap_enter_blocked = 0;
unsigned long vm_cs_validates = 0;
unsigned long vm_cs_revalidates = 0;
unsigned long vm_cs_query_modified = 0;
unsigned long vm_cs_validated_dirtied = 0;
unsigned long vm_cs_bitmap_validated = 0;
#if CONFIG_ENFORCE_SIGNED_CODE
int cs_enforcement_disable=0;
#else
static const int cs_enforcement_disable=1;
#endif
void
vm_fault_init(void)
{
#if !SECURE_KERNEL
#if CONFIG_ENFORCE_SIGNED_CODE
PE_parse_boot_argn("cs_enforcement_disable", &cs_enforcement_disable,
sizeof (cs_enforcement_disable));
#endif
PE_parse_boot_argn("cs_debug", &cs_debug, sizeof (cs_debug));
#endif
vm_hard_throttle_threshold = sane_size * (35 - MIN((int)(sane_size / (1024*1024*1024)), 25)) / 100;
}
void
vm_fault_cleanup(
register vm_object_t object,
register vm_page_t top_page)
{
vm_object_paging_end(object);
vm_object_unlock(object);
if (top_page != VM_PAGE_NULL) {
object = top_page->object;
vm_object_lock(object);
VM_PAGE_FREE(top_page);
vm_object_paging_end(object);
vm_object_unlock(object);
}
}
#if MACH_CLUSTER_STATS
#define MAXCLUSTERPAGES 16
struct {
unsigned long pages_in_cluster;
unsigned long pages_at_higher_offsets;
unsigned long pages_at_lower_offsets;
} cluster_stats_in[MAXCLUSTERPAGES];
#define CLUSTER_STAT(clause) clause
#define CLUSTER_STAT_HIGHER(x) \
((cluster_stats_in[(x)].pages_at_higher_offsets)++)
#define CLUSTER_STAT_LOWER(x) \
((cluster_stats_in[(x)].pages_at_lower_offsets)++)
#define CLUSTER_STAT_CLUSTER(x) \
((cluster_stats_in[(x)].pages_in_cluster)++)
#else
#define CLUSTER_STAT(clause)
#endif
#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0)
boolean_t vm_page_deactivate_behind = TRUE;
#define VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW 128
#define VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER 16
int vm_default_behind = VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW;
#define MAX_SEQUENTIAL_RUN (1024 * 1024 * 1024)
static
void
vm_fault_is_sequential(
vm_object_t object,
vm_object_offset_t offset,
vm_behavior_t behavior)
{
vm_object_offset_t last_alloc;
int sequential;
int orig_sequential;
last_alloc = object->last_alloc;
sequential = object->sequential;
orig_sequential = sequential;
switch (behavior) {
case VM_BEHAVIOR_RANDOM:
sequential = 0;
break;
case VM_BEHAVIOR_SEQUENTIAL:
if (offset && last_alloc == offset - PAGE_SIZE_64) {
if (sequential < MAX_SEQUENTIAL_RUN)
sequential += PAGE_SIZE;
} else {
sequential = 0;
}
break;
case VM_BEHAVIOR_RSEQNTL:
if (last_alloc && last_alloc == offset + PAGE_SIZE_64) {
if (sequential > -MAX_SEQUENTIAL_RUN)
sequential -= PAGE_SIZE;
} else {
sequential = 0;
}
break;
case VM_BEHAVIOR_DEFAULT:
default:
if (offset && last_alloc == (offset - PAGE_SIZE_64)) {
if (sequential < 0)
sequential = 0;
if (sequential < MAX_SEQUENTIAL_RUN)
sequential += PAGE_SIZE;
} else if (last_alloc && last_alloc == (offset + PAGE_SIZE_64)) {
if (sequential > 0)
sequential = 0;
if (sequential > -MAX_SEQUENTIAL_RUN)
sequential -= PAGE_SIZE;
} else {
sequential = 0;
}
break;
}
if (sequential != orig_sequential) {
if (!OSCompareAndSwap(orig_sequential, sequential, (UInt32 *)&object->sequential)) {
return;
}
}
object->last_alloc = offset;
}
int vm_page_deactivate_behind_count = 0;
static
boolean_t
vm_fault_deactivate_behind(
vm_object_t object,
vm_object_offset_t offset,
vm_behavior_t behavior)
{
int n;
int pages_in_run = 0;
int max_pages_in_run = 0;
int sequential_run;
int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
vm_object_offset_t run_offset = 0;
vm_object_offset_t pg_offset = 0;
vm_page_t m;
vm_page_t page_run[VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER];
pages_in_run = 0;
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind);
#endif
if (object == kernel_object || vm_page_deactivate_behind == FALSE) {
return FALSE;
}
if ((sequential_run = object->sequential)) {
if (sequential_run < 0) {
sequential_behavior = VM_BEHAVIOR_RSEQNTL;
sequential_run = 0 - sequential_run;
} else {
sequential_behavior = VM_BEHAVIOR_SEQUENTIAL;
}
}
switch (behavior) {
case VM_BEHAVIOR_RANDOM:
break;
case VM_BEHAVIOR_SEQUENTIAL:
if (sequential_run >= (int)PAGE_SIZE) {
run_offset = 0 - PAGE_SIZE_64;
max_pages_in_run = 1;
}
break;
case VM_BEHAVIOR_RSEQNTL:
if (sequential_run >= (int)PAGE_SIZE) {
run_offset = PAGE_SIZE_64;
max_pages_in_run = 1;
}
break;
case VM_BEHAVIOR_DEFAULT:
default:
{ vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64;
if ((uint64_t)sequential_run >= behind && (sequential_run % (VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER * PAGE_SIZE)) == 0) {
if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) {
if (offset >= behind) {
run_offset = 0 - behind;
pg_offset = PAGE_SIZE_64;
max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER;
}
} else {
if (offset < -behind) {
run_offset = behind;
pg_offset = 0 - PAGE_SIZE_64;
max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER;
}
}
}
break;
}
}
for (n = 0; n < max_pages_in_run; n++) {
m = vm_page_lookup(object, offset + run_offset + (n * pg_offset));
if (m && !m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) {
page_run[pages_in_run++] = m;
pmap_clear_reference(m->phys_page);
}
}
if (pages_in_run) {
vm_page_lockspin_queues();
for (n = 0; n < pages_in_run; n++) {
m = page_run[n];
vm_page_deactivate_internal(m, FALSE);
vm_page_deactivate_behind_count++;
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m);
#endif
}
vm_page_unlock_queues();
return TRUE;
}
return FALSE;
}
static int
vm_page_throttled(void)
{
clock_sec_t elapsed_sec;
clock_sec_t tv_sec;
clock_usec_t tv_usec;
thread_t thread = current_thread();
if (thread->options & TH_OPT_VMPRIV)
return (0);
thread->t_page_creation_count++;
if (NEED_TO_HARD_THROTTLE_THIS_TASK())
return (HARD_THROTTLE_DELAY);
if (vm_page_free_count < vm_page_throttle_limit &&
thread->t_page_creation_count > vm_page_creation_throttle) {
clock_get_system_microtime(&tv_sec, &tv_usec);
elapsed_sec = tv_sec - thread->t_page_creation_time;
if (elapsed_sec <= 6 || (thread->t_page_creation_count / elapsed_sec) >= (vm_page_creation_throttle / 6)) {
if (elapsed_sec >= 60) {
thread->t_page_creation_time = tv_sec;
thread->t_page_creation_count = (vm_page_creation_throttle / 6) * 5;
}
++vm_page_throttle_count;
return (SOFT_THROTTLE_DELAY);
}
thread->t_page_creation_time = tv_sec;
thread->t_page_creation_count = 0;
}
return (0);
}
static vm_fault_return_t
vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t interruptible_state)
{
int throttle_delay;
if (object->shadow_severed ||
VM_OBJECT_PURGEABLE_FAULT_ERROR(object)) {
if (m != VM_PAGE_NULL)
VM_PAGE_FREE(m);
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_ERROR);
}
if (vm_backing_store_low) {
if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) {
if (m != VM_PAGE_NULL)
VM_PAGE_FREE(m);
vm_fault_cleanup(object, first_m);
assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT);
thread_block(THREAD_CONTINUE_NULL);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_RETRY);
}
}
if ((throttle_delay = vm_page_throttled())) {
if (m != VM_PAGE_NULL)
VM_PAGE_FREE(m);
vm_fault_cleanup(object, first_m);
VM_DEBUG_EVENT(vmf_check_zfdelay, VMF_CHECK_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
delay(throttle_delay);
if (current_thread_aborted()) {
thread_interrupt_level(interruptible_state);
return VM_FAULT_INTERRUPTED;
}
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_SHORTAGE);
}
return (VM_FAULT_SUCCESS);
}
static int
vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill)
{
int my_fault = DBG_ZERO_FILL_FAULT;
m->pmapped = TRUE;
m->cs_validated = FALSE;
m->cs_tainted = FALSE;
if (no_zero_fill == TRUE) {
my_fault = DBG_NZF_PAGE_FAULT;
} else {
vm_page_zero_fill(m);
VM_STAT_INCR(zero_fill_count);
DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL);
}
assert(!m->laundry);
assert(m->object != kernel_object);
if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
(m->object->purgable == VM_PURGABLE_DENY ||
m->object->purgable == VM_PURGABLE_NONVOLATILE ||
m->object->purgable == VM_PURGABLE_VOLATILE )) {
vm_page_lockspin_queues();
assert(!VM_PAGE_WIRED(m));
VM_PAGE_QUEUES_REMOVE(m);
queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
m->throttled = TRUE;
vm_page_throttled_count++;
vm_page_unlock_queues();
} else {
if (current_thread()->t_page_creation_count > vm_page_creation_throttle) {
m->zero_fill = TRUE;
VM_ZF_COUNT_INCR();
}
}
return (my_fault);
}
unsigned int vm_fault_page_blocked_access = 0;
vm_fault_return_t
vm_fault_page(
vm_object_t first_object,
vm_object_offset_t first_offset,
vm_prot_t fault_type,
boolean_t must_be_resident,
vm_prot_t *protection,
vm_page_t *result_page,
vm_page_t *top_page,
int *type_of_fault,
kern_return_t *error_code,
boolean_t no_zero_fill,
#if MACH_PAGEMAP
boolean_t data_supply,
#else
__unused boolean_t data_supply,
#endif
vm_object_fault_info_t fault_info)
{
vm_page_t m;
vm_object_t object;
vm_object_offset_t offset;
vm_page_t first_m;
vm_object_t next_object;
vm_object_t copy_object;
boolean_t look_for_page;
vm_prot_t access_required = fault_type;
vm_prot_t wants_copy_flag;
CLUSTER_STAT(int pages_at_higher_offsets;)
CLUSTER_STAT(int pages_at_lower_offsets;)
kern_return_t wait_result;
boolean_t interruptible_state;
vm_fault_return_t error;
int my_fault;
uint32_t try_failed_count;
int interruptible;
memory_object_t pager;
vm_fault_return_t retval;
#if MACH_PAGEMAP
#define MUST_ASK_PAGER(o, f) (vm_external_state_get((o)->existence_map, (f)) \
!= VM_EXTERNAL_STATE_ABSENT)
#define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
== VM_EXTERNAL_STATE_EXISTS)
#else
#define MUST_ASK_PAGER(o, f) (TRUE)
#define PAGED_OUT(o, f) (FALSE)
#endif
#define RELEASE_PAGE(m) \
MACRO_BEGIN \
PAGE_WAKEUP_DONE(m); \
if (!m->active && !m->inactive && !m->throttled) { \
vm_page_lockspin_queues(); \
if (!m->active && !m->inactive && !m->throttled) \
vm_page_activate(m); \
vm_page_unlock_queues(); \
} \
MACRO_END
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset);
#endif
#if MACH_KDB
if (db_watchpoint_list) {
if (!(fault_type & VM_PROT_WRITE))
*protection &= ~VM_PROT_WRITE;
}
#endif
interruptible = fault_info->interruptible;
interruptible_state = thread_interrupt_level(interruptible);
object = first_object;
offset = first_offset;
first_m = VM_PAGE_NULL;
access_required = fault_type;
XPR(XPR_VM_FAULT,
"vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n",
object, offset, fault_type, *protection, 0);
my_fault = DBG_CACHE_HIT_FAULT;
while (TRUE) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0);
#endif
if (!object->alive) {
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_ERROR);
}
if (!object->pager_created && object->phys_contiguous) {
m = VM_PAGE_NULL;
goto phys_contig_object;
}
if (object->blocked_access) {
vm_object_activity_begin(object);
vm_object_paging_end(object);
while (object->blocked_access) {
vm_object_sleep(object,
VM_OBJECT_EVENT_UNBLOCKED,
THREAD_UNINT);
}
vm_fault_page_blocked_access++;
vm_object_paging_begin(object);
vm_object_activity_end(object);
}
m = vm_page_lookup(object, offset);
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object);
#endif
if (m != VM_PAGE_NULL) {
if (m->busy) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0);
#endif
if (m->list_req_pending) {
if (m->absent) {
VM_PAGE_FREE(m);
continue;
}
if (m->pageout || m->cleaning) {
vm_pageout_queue_steal(m, FALSE);
PAGE_WAKEUP_DONE(m);
}
} else {
wait_result = PAGE_SLEEP(object, m, interruptible);
XPR(XPR_VM_FAULT,
"vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n",
object, offset,
m, 0, 0);
counter(c_vm_fault_page_block_busy_kernel++);
if (wait_result != THREAD_AWAKENED) {
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
if (wait_result == THREAD_RESTART)
return (VM_FAULT_RETRY);
else
return (VM_FAULT_INTERRUPTED);
}
continue;
}
}
if (m->phys_page == vm_page_guard_addr) {
if (fault_type == VM_PROT_NONE) {
m->busy = TRUE;
*result_page = m;
assert(first_m == VM_PAGE_NULL);
*top_page = first_m;
if (type_of_fault)
*type_of_fault = DBG_GUARD_FAULT;
return VM_FAULT_SUCCESS;
} else {
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return VM_FAULT_MEMORY_ERROR;
}
}
if (m->error) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code);
#endif
if (error_code)
*error_code = KERN_MEMORY_ERROR;
VM_PAGE_FREE(m);
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_ERROR);
}
if (m->restart) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0);
#endif
VM_PAGE_FREE(m);
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_RETRY);
}
if (m->absent) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow);
#endif
next_object = object->shadow;
if (next_object == VM_OBJECT_NULL) {
assert(!must_be_resident);
error = vm_fault_check(object, m, first_m, interruptible_state);
if (error != VM_FAULT_SUCCESS)
return (error);
XPR(XPR_VM_FAULT,
"vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n",
object, offset,
m,
first_object, 0);
if (object != first_object) {
VM_PAGE_FREE(m);
vm_object_paging_end(object);
vm_object_unlock(object);
m = first_m;
first_m = VM_PAGE_NULL;
object = first_object;
offset = first_offset;
vm_object_lock(object);
} else {
m->absent = FALSE;
m->busy = TRUE;
}
my_fault = vm_fault_zero_page(m, no_zero_fill);
if (fault_info->mark_zf_absent && no_zero_fill == TRUE)
m->absent = TRUE;
break;
} else {
if (must_be_resident)
vm_object_paging_end(object);
else if (object != first_object) {
vm_object_paging_end(object);
VM_PAGE_FREE(m);
} else {
first_m = m;
m->absent = FALSE;
m->busy = TRUE;
vm_page_lockspin_queues();
VM_PAGE_QUEUES_REMOVE(m);
vm_page_unlock_queues();
}
XPR(XPR_VM_FAULT,
"vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n",
object, offset,
next_object,
offset+object->vo_shadow_offset,0);
offset += object->vo_shadow_offset;
fault_info->lo_offset += object->vo_shadow_offset;
fault_info->hi_offset += object->vo_shadow_offset;
access_required = VM_PROT_READ;
vm_object_lock(next_object);
vm_object_unlock(object);
object = next_object;
vm_object_paging_begin(object);
my_fault = DBG_CACHE_HIT_FAULT;
continue;
}
}
if ((m->cleaning)
&& ((object != first_object) || (object->copy != VM_OBJECT_NULL))
&& (fault_type & VM_PROT_WRITE)) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset);
#endif
XPR(XPR_VM_FAULT,
"vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n",
object, offset,
m, 0, 0);
vm_object_reference_locked(object);
vm_fault_cleanup(object, first_m);
counter(c_vm_fault_page_block_backoff_kernel++);
vm_object_lock(object);
assert(object->ref_count > 0);
m = vm_page_lookup(object, offset);
if (m != VM_PAGE_NULL && m->cleaning) {
PAGE_ASSERT_WAIT(m, interruptible);
vm_object_unlock(object);
wait_result = thread_block(THREAD_CONTINUE_NULL);
vm_object_deallocate(object);
goto backoff;
} else {
vm_object_unlock(object);
vm_object_deallocate(object);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_RETRY);
}
}
if (type_of_fault == NULL && m->speculative &&
!(fault_info != NULL && fault_info->stealth)) {
vm_page_lockspin_queues();
VM_PAGE_QUEUES_REMOVE(m);
vm_page_unlock_queues();
}
if (m->encrypted) {
m->busy = TRUE;
vm_page_decrypt(m, 0);
assert(object == m->object);
assert(m->busy);
PAGE_WAKEUP_DONE(m);
continue;
}
ASSERT_PAGE_DECRYPTED(m);
if (m->object->code_signed) {
}
#if TRACEFAULTPAGE
dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0);
#endif
XPR(XPR_VM_FAULT,
"vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n",
object, offset, m, 0, 0);
assert(!m->busy);
assert(!m->absent);
m->busy = TRUE;
break;
}
look_for_page = (object->pager_created && (MUST_ASK_PAGER(object, offset) == TRUE) && !data_supply);
#if TRACEFAULTPAGE
dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object);
#endif
if ((look_for_page || (object == first_object)) && !must_be_resident && !object->phys_contiguous) {
m = vm_page_grab();
#if TRACEFAULTPAGE
dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object);
#endif
if (m == VM_PAGE_NULL) {
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_SHORTAGE);
}
vm_page_insert(m, object, offset);
}
if (look_for_page && !must_be_resident) {
kern_return_t rc;
if (!object->pager_ready) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0);
#endif
if (m != VM_PAGE_NULL)
VM_PAGE_FREE(m);
XPR(XPR_VM_FAULT,
"vm_f_page: ready wait obj 0x%X, offset 0x%X\n",
object, offset, 0, 0, 0);
vm_object_reference_locked(object);
vm_fault_cleanup(object, first_m);
counter(c_vm_fault_page_block_backoff_kernel++);
vm_object_lock(object);
assert(object->ref_count > 0);
if (!object->pager_ready) {
wait_result = vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGER_READY, interruptible);
vm_object_unlock(object);
if (wait_result == THREAD_WAITING)
wait_result = thread_block(THREAD_CONTINUE_NULL);
vm_object_deallocate(object);
goto backoff;
} else {
vm_object_unlock(object);
vm_object_deallocate(object);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_RETRY);
}
}
if (!object->internal && !object->phys_contiguous && object->paging_in_progress > vm_object_pagein_throttle) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0);
#endif
if (m != VM_PAGE_NULL)
VM_PAGE_FREE(m);
vm_object_reference_locked(object);
vm_fault_cleanup(object, first_m);
counter(c_vm_fault_page_block_backoff_kernel++);
vm_object_lock(object);
assert(object->ref_count > 0);
if (object->paging_in_progress >= vm_object_pagein_throttle) {
vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGING_ONLY_IN_PROGRESS, interruptible);
vm_object_unlock(object);
wait_result = thread_block(THREAD_CONTINUE_NULL);
vm_object_deallocate(object);
goto backoff;
} else {
vm_object_unlock(object);
vm_object_deallocate(object);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_RETRY);
}
}
if (m != VM_PAGE_NULL) {
m->list_req_pending = TRUE;
m->absent = TRUE;
}
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0);
#endif
pager = object->pager;
if (pager == MEMORY_OBJECT_NULL) {
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return VM_FAULT_MEMORY_ERROR;
}
vm_object_unlock(object);
if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL && object != first_object)
wants_copy_flag = VM_PROT_WANTS_COPY;
else
wants_copy_flag = VM_PROT_NONE;
XPR(XPR_VM_FAULT,
"vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n",
object, offset, m,
access_required | wants_copy_flag, 0);
rc = memory_object_data_request(
pager,
offset + object->paging_offset,
PAGE_SIZE,
access_required | wants_copy_flag,
(memory_object_fault_info_t)fault_info);
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc);
#endif
vm_object_lock(object);
if (rc != KERN_SUCCESS) {
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return ((rc == MACH_SEND_INTERRUPTED) ?
VM_FAULT_INTERRUPTED :
VM_FAULT_MEMORY_ERROR);
} else {
clock_sec_t tv_sec;
clock_usec_t tv_usec;
clock_get_system_microtime(&tv_sec, &tv_usec);
current_thread()->t_page_creation_time = tv_sec;
current_thread()->t_page_creation_count = 0;
}
if ((interruptible != THREAD_UNINT) && (current_thread()->sched_flags & TH_SFLAG_ABORT)) {
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_INTERRUPTED);
}
if (m == VM_PAGE_NULL && object->phys_contiguous) {
phys_contig_object:
goto done;
}
my_fault = DBG_PAGEIN_FAULT;
continue;
}
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m);
#endif
if (object == first_object)
first_m = m;
else
assert(m == VM_PAGE_NULL);
XPR(XPR_VM_FAULT,
"vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n",
object, offset, m,
object->shadow, 0);
next_object = object->shadow;
if (next_object == VM_OBJECT_NULL) {
assert(!must_be_resident);
if (object != first_object) {
vm_object_paging_end(object);
vm_object_unlock(object);
object = first_object;
offset = first_offset;
vm_object_lock(object);
}
m = first_m;
assert(m->object == object);
first_m = VM_PAGE_NULL;
error = vm_fault_check(object, m, first_m, interruptible_state);
if (error != VM_FAULT_SUCCESS)
return (error);
if (m == VM_PAGE_NULL) {
m = vm_page_grab();
if (m == VM_PAGE_NULL) {
vm_fault_cleanup(object, VM_PAGE_NULL);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_SHORTAGE);
}
vm_page_insert(m, object, offset);
}
my_fault = vm_fault_zero_page(m, no_zero_fill);
if (fault_info->mark_zf_absent && no_zero_fill == TRUE)
m->absent = TRUE;
break;
} else {
if ((object != first_object) || must_be_resident)
vm_object_paging_end(object);
offset += object->vo_shadow_offset;
fault_info->lo_offset += object->vo_shadow_offset;
fault_info->hi_offset += object->vo_shadow_offset;
access_required = VM_PROT_READ;
vm_object_lock(next_object);
vm_object_unlock(object);
object = next_object;
vm_object_paging_begin(object);
}
}
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m);
#endif
#if EXTRA_ASSERTIONS
assert(m->busy && !m->absent);
assert((first_m == VM_PAGE_NULL) ||
(first_m->busy && !first_m->absent &&
!first_m->active && !first_m->inactive));
#endif
ASSERT_PAGE_DECRYPTED(m);
XPR(XPR_VM_FAULT,
"vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n",
object, offset, m,
first_object, first_m);
if (object != first_object) {
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type);
#endif
if (fault_type & VM_PROT_WRITE) {
vm_page_t copy_m;
assert(!must_be_resident);
if (vm_backing_store_low) {
if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) {
RELEASE_PAGE(m);
vm_fault_cleanup(object, first_m);
assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT);
thread_block(THREAD_CONTINUE_NULL);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_RETRY);
}
}
copy_m = vm_page_grab();
if (copy_m == VM_PAGE_NULL) {
RELEASE_PAGE(m);
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_SHORTAGE);
}
XPR(XPR_VM_FAULT,
"vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n",
object, offset,
m, copy_m, 0);
vm_page_copy(m, copy_m);
if (m->pmapped)
pmap_disconnect(m->phys_page);
assert(!m->cleaning);
PAGE_WAKEUP_DONE(m);
vm_object_paging_end(object);
vm_object_unlock(object);
my_fault = DBG_COW_FAULT;
VM_STAT_INCR(cow_faults);
DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL);
current_task()->cow_faults++;
object = first_object;
offset = first_offset;
vm_object_lock(object);
VM_PAGE_FREE(first_m);
first_m = VM_PAGE_NULL;
assert(copy_m->busy);
vm_page_insert(copy_m, object, offset);
copy_m->dirty = TRUE;
m = copy_m;
vm_object_paging_end(object);
vm_object_collapse(object, offset, TRUE);
vm_object_paging_begin(object);
} else
*protection &= (~VM_PROT_WRITE);
}
try_failed_count = 0;
while ((copy_object = first_object->copy) != VM_OBJECT_NULL) {
vm_object_offset_t copy_offset;
vm_page_t copy_m;
#if TRACEFAULTPAGE
dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type);
#endif
if ((fault_type & VM_PROT_WRITE) == 0) {
*protection &= ~VM_PROT_WRITE;
break;
}
if (must_be_resident)
break;
if (!vm_object_lock_try(copy_object)) {
vm_object_unlock(object);
try_failed_count++;
mutex_pause(try_failed_count);
vm_object_lock(object);
continue;
}
try_failed_count = 0;
vm_object_reference_locked(copy_object);
copy_offset = first_offset - copy_object->vo_shadow_offset;
if (copy_object->vo_size <= copy_offset)
;
else if ((copy_m = vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) {
if (copy_m->busy) {
RELEASE_PAGE(m);
vm_object_reference_locked(copy_object);
vm_object_unlock(copy_object);
vm_fault_cleanup(object, first_m);
counter(c_vm_fault_page_block_backoff_kernel++);
vm_object_lock(copy_object);
assert(copy_object->ref_count > 0);
VM_OBJ_RES_DECR(copy_object);
vm_object_lock_assert_exclusive(copy_object);
copy_object->ref_count--;
assert(copy_object->ref_count > 0);
copy_m = vm_page_lookup(copy_object, copy_offset);
if (copy_m != VM_PAGE_NULL && copy_m->busy) {
PAGE_ASSERT_WAIT(copy_m, interruptible);
vm_object_unlock(copy_object);
wait_result = thread_block(THREAD_CONTINUE_NULL);
vm_object_deallocate(copy_object);
goto backoff;
} else {
vm_object_unlock(copy_object);
vm_object_deallocate(copy_object);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_RETRY);
}
}
}
else if (!PAGED_OUT(copy_object, copy_offset)) {
if (vm_backing_store_low) {
if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) {
assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT);
RELEASE_PAGE(m);
VM_OBJ_RES_DECR(copy_object);
vm_object_lock_assert_exclusive(copy_object);
copy_object->ref_count--;
assert(copy_object->ref_count > 0);
vm_object_unlock(copy_object);
vm_fault_cleanup(object, first_m);
thread_block(THREAD_CONTINUE_NULL);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_RETRY);
}
}
copy_m = vm_page_alloc(copy_object, copy_offset);
if (copy_m == VM_PAGE_NULL) {
RELEASE_PAGE(m);
VM_OBJ_RES_DECR(copy_object);
vm_object_lock_assert_exclusive(copy_object);
copy_object->ref_count--;
assert(copy_object->ref_count > 0);
vm_object_unlock(copy_object);
vm_fault_cleanup(object, first_m);
thread_interrupt_level(interruptible_state);
return (VM_FAULT_MEMORY_SHORTAGE);
}
vm_page_copy(m, copy_m);
if (m->pmapped)
pmap_disconnect(m->phys_page);
if ((!copy_object->pager_created)
#if MACH_PAGEMAP
|| vm_external_state_get(copy_object->existence_map, copy_offset) == VM_EXTERNAL_STATE_ABSENT
#endif
) {
vm_page_lockspin_queues();
assert(!m->cleaning);
vm_page_activate(copy_m);
vm_page_unlock_queues();
copy_m->dirty = TRUE;
PAGE_WAKEUP_DONE(copy_m);
}
else {
assert(copy_m->busy == TRUE);
assert(!m->cleaning);
copy_m->dirty = TRUE;
vm_object_unlock(object);
vm_pageout_initialize_page(copy_m);
if ((copy_object->shadow != object) || (copy_object->ref_count == 1)) {
vm_object_unlock(copy_object);
vm_object_deallocate(copy_object);
vm_object_lock(object);
continue;
}
vm_object_lock(object);
}
if (m->wanted) {
m->wanted = FALSE;
thread_wakeup_with_result((event_t) m, THREAD_RESTART);
}
}
vm_object_lock_assert_exclusive(copy_object);
copy_object->ref_count--;
assert(copy_object->ref_count > 0);
VM_OBJ_RES_DECR(copy_object);
vm_object_unlock(copy_object);
break;
}
done:
*result_page = m;
*top_page = first_m;
XPR(XPR_VM_FAULT,
"vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n",
object, offset, m, first_m, 0);
if (m != VM_PAGE_NULL) {
retval = VM_FAULT_SUCCESS;
if (my_fault == DBG_PAGEIN_FAULT) {
VM_STAT_INCR(pageins);
DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL);
DTRACE_VM2(maj_fault, int, 1, (uint64_t *), NULL);
current_task()->pageins++;
if (m->object->internal) {
DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL);
my_fault = DBG_PAGEIND_FAULT;
} else {
DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL);
my_fault = DBG_PAGEINV_FAULT;
}
vm_fault_is_sequential(object, offset, fault_info->behavior);
vm_fault_deactivate_behind(object, offset, fault_info->behavior);
}
if (type_of_fault)
*type_of_fault = my_fault;
} else {
retval = VM_FAULT_SUCCESS_NO_VM_PAGE;
assert(first_m == VM_PAGE_NULL);
assert(object == first_object);
}
thread_interrupt_level(interruptible_state);
#if TRACEFAULTPAGE
dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0);
#endif
return retval;
backoff:
thread_interrupt_level(interruptible_state);
if (wait_result == THREAD_INTERRUPTED)
return (VM_FAULT_INTERRUPTED);
return (VM_FAULT_RETRY);
#undef RELEASE_PAGE
}
#define VM_FAULT_NEED_CS_VALIDATION(pmap, page) \
((pmap) != kernel_pmap && \
!(page)->cs_tainted && \
(page)->object->code_signed && \
(!(page)->cs_validated || (page)->wpmapped ))
unsigned long cs_enter_tainted_rejected = 0;
unsigned long cs_enter_tainted_accepted = 0;
kern_return_t
vm_fault_enter(vm_page_t m,
pmap_t pmap,
vm_map_offset_t vaddr,
vm_prot_t prot,
vm_prot_t fault_type,
boolean_t wired,
boolean_t change_wiring,
boolean_t no_cache,
boolean_t cs_bypass,
int *type_of_fault)
{
kern_return_t kr, pe_result;
boolean_t previously_pmapped = m->pmapped;
boolean_t must_disconnect = 0;
boolean_t map_is_switched, map_is_switch_protected;
vm_object_lock_assert_held(m->object);
#if DEBUG
lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
#endif
if (m->phys_page == vm_page_guard_addr) {
assert(m->fictitious);
return KERN_SUCCESS;
}
if (*type_of_fault == DBG_ZERO_FILL_FAULT) {
vm_object_lock_assert_exclusive(m->object);
} else if ((fault_type & VM_PROT_WRITE) == 0) {
prot &= ~VM_PROT_WRITE;
}
if (m->pmapped == FALSE) {
if ((*type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) {
VM_STAT_INCR(pageins);
DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL);
if (m->object->internal) {
DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL);
*type_of_fault = DBG_PAGEIND_FAULT;
} else {
DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL);
*type_of_fault = DBG_PAGEINV_FAULT;
}
current_task()->pageins++;
}
VM_PAGE_CONSUME_CLUSTERED(m);
}
if (*type_of_fault != DBG_COW_FAULT) {
DTRACE_VM2(as_fault, int, 1, (uint64_t *), NULL);
if (pmap == kernel_pmap) {
DTRACE_VM2(kernel_asflt, int, 1, (uint64_t *), NULL);
}
}
if (VM_FAULT_NEED_CS_VALIDATION(pmap, m)) {
vm_object_lock_assert_exclusive(m->object);
if (m->cs_validated) {
vm_cs_revalidates++;
}
vm_page_validate_cs(m);
}
#define page_immutable(m,prot) ((m)->cs_validated )
map_is_switched = ((pmap != vm_map_pmap(current_task()->map)) &&
(pmap == vm_map_pmap(current_thread()->map)));
map_is_switch_protected = current_thread()->map->switch_protect;
if(!cs_enforcement_disable && map_is_switched &&
map_is_switch_protected && page_immutable(m, prot) &&
(prot & VM_PROT_WRITE))
{
return KERN_CODESIGN_ERROR;
}
if (m->cs_tainted ||
(( !cs_enforcement_disable && !cs_bypass ) &&
(
(!m->cs_validated && (prot & VM_PROT_EXECUTE)) ||
(page_immutable(m, prot) && ((prot & VM_PROT_WRITE) || m->wpmapped))
))
)
{
boolean_t reject_page;
if(map_is_switched) {
assert(pmap==vm_map_pmap(current_thread()->map));
assert(!(prot & VM_PROT_WRITE) || (map_is_switch_protected == FALSE));
reject_page = FALSE;
} else {
reject_page = cs_invalid_page((addr64_t) vaddr);
}
if (reject_page) {
kr = KERN_CODESIGN_ERROR;
cs_enter_tainted_rejected++;
} else {
kr = KERN_SUCCESS;
must_disconnect = !m->cs_tainted;
m->cs_tainted = TRUE;
cs_enter_tainted_accepted++;
}
if (cs_debug || kr != KERN_SUCCESS) {
printf("CODESIGNING: vm_fault_enter(0x%llx): "
"page %p obj %p off 0x%llx *** INVALID PAGE ***\n",
(long long)vaddr, m, m->object, m->offset);
}
} else {
kr = KERN_SUCCESS;
}
if (kr == KERN_SUCCESS) {
m->pmapped = TRUE;
if(vm_page_is_slideable(m)) {
boolean_t was_busy = m->busy;
m->busy = TRUE;
kr = vm_page_slide(m, 0);
assert(m->busy);
if(!was_busy) {
PAGE_WAKEUP_DONE(m);
}
if (kr != KERN_SUCCESS) {
goto after_the_pmap_enter;
}
}
if (fault_type & VM_PROT_WRITE) {
if (m->wpmapped == FALSE) {
vm_object_lock_assert_exclusive(m->object);
m->wpmapped = TRUE;
}
if (must_disconnect) {
assert(cs_enforcement_disable == FALSE);
pmap_disconnect(m->phys_page);
if (!cs_bypass){
prot &= ~VM_PROT_EXECUTE;
}
}
}
PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, 0,
wired, PMAP_OPTIONS_NOWAIT, pe_result);
if(pe_result == KERN_RESOURCE_SHORTAGE) {
boolean_t was_busy = m->busy;
m->busy = TRUE;
vm_object_unlock(m->object);
PMAP_ENTER(pmap, vaddr, m, prot, 0, wired);
vm_object_lock(m->object);
assert(m->busy);
if(!was_busy) {
PAGE_WAKEUP_DONE(m);
}
vm_pmap_enter_blocked++;
}
}
after_the_pmap_enter:
if (change_wiring) {
vm_page_lockspin_queues();
if (wired) {
if (kr == KERN_SUCCESS) {
vm_page_wire(m);
}
} else {
vm_page_unwire(m, TRUE);
}
vm_page_unlock_queues();
} else {
if (kr != KERN_SUCCESS) {
vm_page_lockspin_queues();
vm_page_deactivate(m);
vm_page_unlock_queues();
} else {
if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m) && !m->throttled) {
if ( vm_page_local_q && !no_cache && (*type_of_fault == DBG_COW_FAULT || *type_of_fault == DBG_ZERO_FILL_FAULT) ) {
struct vpl *lq;
uint32_t lid;
lid = cpu_number();
lq = &vm_page_local_q[lid].vpl_un.vpl;
VPL_LOCK(&lq->vpl_lock);
queue_enter(&lq->vpl_queue, m, vm_page_t, pageq);
m->local = TRUE;
m->local_id = lid;
lq->vpl_count++;
VPL_UNLOCK(&lq->vpl_lock);
if (lq->vpl_count > vm_page_local_q_soft_limit) {
vm_page_reactivate_local(lid, FALSE, FALSE);
}
return kr;
}
vm_page_lockspin_queues();
if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m)) {
if (no_cache && (!previously_pmapped || m->no_cache)) {
m->no_cache = TRUE;
if (!m->speculative)
vm_page_speculate(m, FALSE);
} else if (!m->active && !m->inactive)
vm_page_activate(m);
}
vm_page_unlock_queues();
}
}
}
return kr;
}
extern int _map_enter_debug;
unsigned long vm_fault_collapse_total = 0;
unsigned long vm_fault_collapse_skipped = 0;
kern_return_t
vm_fault(
vm_map_t map,
vm_map_offset_t vaddr,
vm_prot_t fault_type,
boolean_t change_wiring,
int interruptible,
pmap_t caller_pmap,
vm_map_offset_t caller_pmap_addr)
{
vm_map_version_t version;
boolean_t wired;
vm_object_t object;
vm_object_offset_t offset;
vm_prot_t prot;
vm_object_t old_copy_object;
vm_page_t result_page;
vm_page_t top_page;
kern_return_t kr;
vm_page_t m;
kern_return_t error_code;
vm_object_t cur_object;
vm_object_offset_t cur_offset;
vm_page_t cur_m;
vm_object_t new_object;
int type_of_fault;
pmap_t pmap;
boolean_t interruptible_state;
vm_map_t real_map = map;
vm_map_t original_map = map;
vm_prot_t original_fault_type;
struct vm_object_fault_info fault_info;
boolean_t need_collapse = FALSE;
int object_lock_type = 0;
int cur_object_lock_type;
vm_object_t top_object = VM_OBJECT_NULL;
int throttle_delay;
KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START,
(int)((uint64_t)vaddr >> 32),
(int)vaddr,
(map == kernel_map),
0,
0);
if (get_preemption_level() != 0) {
KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
(int)((uint64_t)vaddr >> 32),
(int)vaddr,
KERN_FAILURE,
0,
0);
return (KERN_FAILURE);
}
interruptible_state = thread_interrupt_level(interruptible);
VM_STAT_INCR(faults);
current_task()->faults++;
original_fault_type = fault_type;
if (fault_type & VM_PROT_WRITE)
object_lock_type = OBJECT_LOCK_EXCLUSIVE;
else
object_lock_type = OBJECT_LOCK_SHARED;
cur_object_lock_type = OBJECT_LOCK_SHARED;
RetryFault:
type_of_fault = DBG_CACHE_HIT_FAULT;
fault_type = original_fault_type;
map = original_map;
vm_map_lock_read(map);
kr = vm_map_lookup_locked(&map, vaddr, fault_type,
object_lock_type, &version,
&object, &offset, &prot, &wired,
&fault_info,
&real_map);
if (kr != KERN_SUCCESS) {
vm_map_unlock_read(map);
goto done;
}
pmap = real_map->pmap;
fault_info.interruptible = interruptible;
fault_info.stealth = FALSE;
fault_info.io_sync = FALSE;
fault_info.mark_zf_absent = FALSE;
if (wired) {
fault_type = prot | VM_PROT_WRITE;
if (object_lock_type == OBJECT_LOCK_SHARED) {
object_lock_type = OBJECT_LOCK_EXCLUSIVE;
if (vm_object_lock_upgrade(object) == FALSE) {
vm_object_lock(object);
}
}
}
#if VM_FAULT_CLASSIFY
vm_fault_classify(object, offset, fault_type);
#endif
if (object->copy_strategy == MEMORY_OBJECT_COPY_DELAY &&
object->copy != VM_OBJECT_NULL && (fault_type & VM_PROT_WRITE))
goto handle_copy_delay;
cur_object = object;
cur_offset = offset;
while (TRUE) {
if (!cur_object->pager_created &&
cur_object->phys_contiguous)
break;
if (cur_object->blocked_access) {
break;
}
m = vm_page_lookup(cur_object, cur_offset);
if (m != VM_PAGE_NULL) {
if (m->busy) {
wait_result_t result;
if (object != cur_object) {
vm_object_unlock(object);
if (cur_object_lock_type == OBJECT_LOCK_SHARED) {
cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
if (vm_object_lock_upgrade(cur_object) == FALSE) {
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
goto RetryFault;
}
}
} else if (object_lock_type == OBJECT_LOCK_SHARED) {
object_lock_type = OBJECT_LOCK_EXCLUSIVE;
if (vm_object_lock_upgrade(object) == FALSE) {
vm_object_lock(object);
continue;
}
}
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
result = PAGE_ASSERT_WAIT(m, interruptible);
vm_object_unlock(cur_object);
if (result == THREAD_WAITING) {
result = thread_block(THREAD_CONTINUE_NULL);
counter(c_vm_fault_page_block_busy_kernel++);
}
if (result == THREAD_AWAKENED || result == THREAD_RESTART)
goto RetryFault;
kr = KERN_ABORTED;
goto done;
}
if (m->phys_page == vm_page_guard_addr) {
break;
}
if (m->unusual && (m->error || m->restart || m->private || m->absent)) {
break;
}
if (VM_OBJECT_PURGEABLE_FAULT_ERROR(m->object)) {
if (object != cur_object)
vm_object_unlock(object);
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
vm_object_unlock(cur_object);
kr = KERN_MEMORY_ERROR;
goto done;
}
if (m->encrypted) {
if (object != cur_object) {
vm_object_unlock(object);
if (cur_object_lock_type == OBJECT_LOCK_SHARED) {
cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
if (vm_object_lock_upgrade(cur_object) == FALSE) {
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
goto RetryFault;
}
}
} else if (object_lock_type == OBJECT_LOCK_SHARED) {
object_lock_type = OBJECT_LOCK_EXCLUSIVE;
if (vm_object_lock_upgrade(object) == FALSE) {
vm_object_lock(object);
continue;
}
}
m->busy = TRUE;
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
vm_page_decrypt(m, 0);
assert(m->busy);
PAGE_WAKEUP_DONE(m);
vm_object_unlock(cur_object);
goto RetryFault;
}
ASSERT_PAGE_DECRYPTED(m);
if(vm_page_is_slideable(m)) {
if (object != cur_object) {
if (cur_object_lock_type == OBJECT_LOCK_SHARED) {
vm_object_unlock(object);
vm_object_unlock(cur_object);
cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
goto RetryFault;
}
} else if (object_lock_type == OBJECT_LOCK_SHARED) {
vm_object_unlock(object);
object_lock_type = OBJECT_LOCK_EXCLUSIVE;
vm_map_unlock_read(map);
goto RetryFault;
}
}
if (VM_FAULT_NEED_CS_VALIDATION(map->pmap, m)) {
upgrade_for_validation:
if (object != cur_object) {
if (cur_object_lock_type == OBJECT_LOCK_SHARED) {
vm_object_unlock(object);
vm_object_unlock(cur_object);
cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE;
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
goto RetryFault;
}
} else if (object_lock_type == OBJECT_LOCK_SHARED) {
object_lock_type = OBJECT_LOCK_EXCLUSIVE;
if (vm_object_lock_upgrade(object) == FALSE) {
vm_object_lock(object);
continue;
}
}
}
if (object == cur_object && object->copy == VM_OBJECT_NULL) {
goto FastPmapEnter;
}
if ((fault_type & VM_PROT_WRITE) == 0) {
if (object != cur_object) {
top_object = object;
object = cur_object;
object_lock_type = cur_object_lock_type;
}
FastPmapEnter:
if (caller_pmap) {
kr = vm_fault_enter(m,
caller_pmap,
caller_pmap_addr,
prot,
fault_type,
wired,
change_wiring,
fault_info.no_cache,
fault_info.cs_bypass,
&type_of_fault);
} else {
kr = vm_fault_enter(m,
pmap,
vaddr,
prot,
fault_type,
wired,
change_wiring,
fault_info.no_cache,
fault_info.cs_bypass,
&type_of_fault);
}
if (top_object != VM_OBJECT_NULL) {
vm_object_unlock(top_object);
top_object = VM_OBJECT_NULL;
}
if (need_collapse == TRUE)
vm_object_collapse(object, offset, TRUE);
if (type_of_fault == DBG_PAGEIND_FAULT || type_of_fault == DBG_PAGEINV_FAULT || type_of_fault == DBG_CACHE_HIT_FAULT) {
vm_fault_is_sequential(object, cur_offset, fault_info.behavior);
vm_fault_deactivate_behind(object, cur_offset, fault_info.behavior);
}
if (m->busy)
PAGE_WAKEUP_DONE(m);
vm_object_unlock(object);
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
goto done;
}
assert(object_lock_type == OBJECT_LOCK_EXCLUSIVE);
if ((throttle_delay = vm_page_throttled())) {
if (object != cur_object)
vm_object_unlock(cur_object);
vm_object_unlock(object);
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
VM_DEBUG_EVENT(vmf_cowdelay, VMF_COWDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
delay(throttle_delay);
if (!current_thread_aborted() && vm_page_wait((change_wiring) ?
THREAD_UNINT :
THREAD_ABORTSAFE))
goto RetryFault;
kr = KERN_ABORTED;
goto done;
}
if (cur_object == object) {
break;
}
if ((cur_object_lock_type == OBJECT_LOCK_SHARED) &&
VM_FAULT_NEED_CS_VALIDATION(NULL, m)) {
goto upgrade_for_validation;
}
cur_m = m;
m = vm_page_grab();
if (m == VM_PAGE_NULL) {
break;
}
vm_page_copy(cur_m, m);
vm_page_insert(m, object, offset);
m->dirty = TRUE;
if (object->ref_count > 1 && cur_m->pmapped)
pmap_disconnect(cur_m->phys_page);
need_collapse = TRUE;
if (!cur_object->internal &&
cur_object->copy_strategy == MEMORY_OBJECT_COPY_DELAY) {
if (cur_object->copy == object) {
need_collapse = FALSE;
} else if (cur_object->copy == object->shadow &&
object->shadow->resident_page_count == 0) {
need_collapse = FALSE;
}
}
vm_object_unlock(cur_object);
if (need_collapse == FALSE)
vm_fault_collapse_skipped++;
vm_fault_collapse_total++;
type_of_fault = DBG_COW_FAULT;
VM_STAT_INCR(cow_faults);
DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL);
current_task()->cow_faults++;
goto FastPmapEnter;
} else {
if (cur_object->pager_created) {
if (MUST_ASK_PAGER(cur_object, cur_offset) == TRUE) {
break;
}
}
if (cur_object->shadow == VM_OBJECT_NULL) {
if (cur_object->shadow_severed ||
VM_OBJECT_PURGEABLE_FAULT_ERROR(cur_object))
{
if (object != cur_object)
vm_object_unlock(cur_object);
vm_object_unlock(object);
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
kr = KERN_MEMORY_ERROR;
goto done;
}
if ((throttle_delay = vm_page_throttled())) {
if (object != cur_object)
vm_object_unlock(cur_object);
vm_object_unlock(object);
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
VM_DEBUG_EVENT(vmf_zfdelay, VMF_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0);
delay(throttle_delay);
if (!current_thread_aborted() && vm_page_wait((change_wiring) ?
THREAD_UNINT :
THREAD_ABORTSAFE))
goto RetryFault;
kr = KERN_ABORTED;
goto done;
}
if (vm_backing_store_low) {
if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV))
break;
}
if (cur_object != object) {
vm_object_unlock(cur_object);
cur_object = object;
}
if (object_lock_type == OBJECT_LOCK_SHARED) {
object_lock_type = OBJECT_LOCK_EXCLUSIVE;
if (vm_object_lock_upgrade(object) == FALSE) {
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
goto RetryFault;
}
}
m = vm_page_alloc(object, offset);
if (m == VM_PAGE_NULL) {
break;
}
type_of_fault = vm_fault_zero_page(m, map->no_zero_fill);
goto FastPmapEnter;
}
cur_offset += cur_object->vo_shadow_offset;
new_object = cur_object->shadow;
if (cur_object_lock_type == OBJECT_LOCK_SHARED)
vm_object_lock_shared(new_object);
else
vm_object_lock(new_object);
if (cur_object != object)
vm_object_unlock(cur_object);
cur_object = new_object;
continue;
}
}
if (object != cur_object)
vm_object_unlock(cur_object);
if (object_lock_type == OBJECT_LOCK_SHARED) {
object_lock_type = OBJECT_LOCK_EXCLUSIVE;
if (vm_object_lock_upgrade(object) == FALSE) {
vm_object_lock(object);
}
}
handle_copy_delay:
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
vm_object_reference_locked(object);
vm_object_paging_begin(object);
XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0);
error_code = 0;
kr = vm_fault_page(object, offset, fault_type,
(change_wiring && !wired),
&prot, &result_page, &top_page,
&type_of_fault,
&error_code, map->no_zero_fill,
FALSE, &fault_info);
if (kr != VM_FAULT_SUCCESS &&
kr != VM_FAULT_SUCCESS_NO_VM_PAGE) {
vm_object_deallocate(object);
switch (kr) {
case VM_FAULT_MEMORY_SHORTAGE:
if (vm_page_wait((change_wiring) ?
THREAD_UNINT :
THREAD_ABORTSAFE))
goto RetryFault;
case VM_FAULT_INTERRUPTED:
kr = KERN_ABORTED;
goto done;
case VM_FAULT_RETRY:
goto RetryFault;
case VM_FAULT_MEMORY_ERROR:
if (error_code)
kr = error_code;
else
kr = KERN_MEMORY_ERROR;
goto done;
default:
panic("vm_fault: unexpected error 0x%x from "
"vm_fault_page()\n", kr);
}
}
m = result_page;
if (m != VM_PAGE_NULL) {
assert((change_wiring && !wired) ?
(top_page == VM_PAGE_NULL) :
((top_page == VM_PAGE_NULL) == (m->object == object)));
}
#define RELEASE_PAGE(m) \
MACRO_BEGIN \
PAGE_WAKEUP_DONE(m); \
if (!m->active && !m->inactive && !m->throttled) { \
vm_page_lockspin_queues(); \
if (!m->active && !m->inactive && !m->throttled) \
vm_page_activate(m); \
vm_page_unlock_queues(); \
} \
MACRO_END
if (m != VM_PAGE_NULL) {
old_copy_object = m->object->copy;
vm_object_unlock(m->object);
} else {
old_copy_object = VM_OBJECT_NULL;
vm_object_unlock(object);
}
if ((map != original_map) || !vm_map_verify(map, &version)) {
vm_object_t retry_object;
vm_object_offset_t retry_offset;
vm_prot_t retry_prot;
map = original_map;
vm_map_lock_read(map);
kr = vm_map_lookup_locked(&map, vaddr,
fault_type & ~VM_PROT_WRITE,
OBJECT_LOCK_EXCLUSIVE, &version,
&retry_object, &retry_offset, &retry_prot,
&wired,
&fault_info,
&real_map);
pmap = real_map->pmap;
if (kr != KERN_SUCCESS) {
vm_map_unlock_read(map);
if (m != VM_PAGE_NULL) {
vm_object_lock(m->object);
RELEASE_PAGE(m);
vm_fault_cleanup(m->object, top_page);
} else {
vm_object_lock(object);
vm_fault_cleanup(object, top_page);
}
vm_object_deallocate(object);
goto done;
}
vm_object_unlock(retry_object);
if ((retry_object != object) || (retry_offset != offset)) {
vm_map_unlock_read(map);
if (real_map != map)
vm_map_unlock(real_map);
if (m != VM_PAGE_NULL) {
vm_object_lock(m->object);
RELEASE_PAGE(m);
vm_fault_cleanup(m->object, top_page);
} else {
vm_object_lock(object);
vm_fault_cleanup(object, top_page);
}
vm_object_deallocate(object);
goto RetryFault;
}
prot &= retry_prot;
}
if (m != VM_PAGE_NULL) {
vm_object_lock(m->object);
if (m->object->copy != old_copy_object) {
prot &= ~VM_PROT_WRITE;
}
} else
vm_object_lock(object);
if (wired && (fault_type != (prot | VM_PROT_WRITE))) {
vm_map_verify_done(map, &version);
if (real_map != map)
vm_map_unlock(real_map);
if (m != VM_PAGE_NULL) {
RELEASE_PAGE(m);
vm_fault_cleanup(m->object, top_page);
} else
vm_fault_cleanup(object, top_page);
vm_object_deallocate(object);
goto RetryFault;
}
if (m != VM_PAGE_NULL) {
if (caller_pmap) {
kr = vm_fault_enter(m,
caller_pmap,
caller_pmap_addr,
prot,
fault_type,
wired,
change_wiring,
fault_info.no_cache,
fault_info.cs_bypass,
&type_of_fault);
} else {
kr = vm_fault_enter(m,
pmap,
vaddr,
prot,
fault_type,
wired,
change_wiring,
fault_info.no_cache,
fault_info.cs_bypass,
&type_of_fault);
}
if (kr != KERN_SUCCESS) {
vm_map_verify_done(map, &version);
if (real_map != map)
vm_map_unlock(real_map);
PAGE_WAKEUP_DONE(m);
vm_fault_cleanup(m->object, top_page);
vm_object_deallocate(object);
goto done;
}
} else {
vm_map_entry_t entry;
vm_map_offset_t laddr;
vm_map_offset_t ldelta, hdelta;
#ifdef ppc
if ((fault_type & VM_PROT_EXECUTE) &&
(!pmap_eligible_for_execute((ppnum_t)(object->vo_shadow_offset >> 12)))) {
vm_map_verify_done(map, &version);
if (real_map != map)
vm_map_unlock(real_map);
vm_fault_cleanup(object, top_page);
vm_object_deallocate(object);
kr = KERN_PROTECTION_FAILURE;
goto done;
}
#endif
if (real_map != map)
vm_map_unlock(real_map);
if (original_map != map) {
vm_map_unlock_read(map);
vm_map_lock_read(original_map);
map = original_map;
}
real_map = map;
laddr = vaddr;
hdelta = 0xFFFFF000;
ldelta = 0xFFFFF000;
while (vm_map_lookup_entry(map, laddr, &entry)) {
if (ldelta > (laddr - entry->vme_start))
ldelta = laddr - entry->vme_start;
if (hdelta > (entry->vme_end - laddr))
hdelta = entry->vme_end - laddr;
if (entry->is_sub_map) {
laddr = (laddr - entry->vme_start)
+ entry->offset;
vm_map_lock_read(entry->object.sub_map);
if (map != real_map)
vm_map_unlock_read(map);
if (entry->use_pmap) {
vm_map_unlock_read(real_map);
real_map = entry->object.sub_map;
}
map = entry->object.sub_map;
} else {
break;
}
}
if (vm_map_lookup_entry(map, laddr, &entry) &&
(entry->object.vm_object != NULL) &&
(entry->object.vm_object == object)) {
int superpage = (!object->pager_created && object->phys_contiguous)? VM_MEM_SUPERPAGE : 0;
if (caller_pmap) {
assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12));
pmap_map_block(caller_pmap,
(addr64_t)(caller_pmap_addr - ldelta),
(ppnum_t)((((vm_map_offset_t) (entry->object.vm_object->vo_shadow_offset)) +
entry->offset + (laddr - entry->vme_start) - ldelta) >> 12),
(uint32_t)((ldelta + hdelta) >> 12), prot,
(VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0);
} else {
assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12));
pmap_map_block(real_map->pmap,
(addr64_t)(vaddr - ldelta),
(ppnum_t)((((vm_map_offset_t)(entry->object.vm_object->vo_shadow_offset)) +
entry->offset + (laddr - entry->vme_start) - ldelta) >> 12),
(uint32_t)((ldelta + hdelta) >> 12), prot,
(VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0);
}
}
}
vm_map_verify_done(map, &version);
if (real_map != map)
vm_map_unlock(real_map);
if (m != VM_PAGE_NULL) {
PAGE_WAKEUP_DONE(m);
vm_fault_cleanup(m->object, top_page);
} else
vm_fault_cleanup(object, top_page);
vm_object_deallocate(object);
#undef RELEASE_PAGE
kr = KERN_SUCCESS;
done:
thread_interrupt_level(interruptible_state);
KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END,
(int)((uint64_t)vaddr >> 32),
(int)vaddr,
kr,
type_of_fault,
0);
return (kr);
}
kern_return_t
vm_fault_wire(
vm_map_t map,
vm_map_entry_t entry,
pmap_t pmap,
vm_map_offset_t pmap_addr)
{
register vm_map_offset_t va;
register vm_map_offset_t end_addr = entry->vme_end;
register kern_return_t rc;
assert(entry->in_transition);
if ((entry->object.vm_object != NULL) &&
!entry->is_sub_map &&
entry->object.vm_object->phys_contiguous) {
return KERN_SUCCESS;
}
pmap_pageable(pmap, pmap_addr,
pmap_addr + (end_addr - entry->vme_start), FALSE);
for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
if ((rc = vm_fault_wire_fast(
map, va, entry, pmap,
pmap_addr + (va - entry->vme_start)
)) != KERN_SUCCESS) {
rc = vm_fault(map, va, VM_PROT_NONE, TRUE,
(pmap == kernel_pmap) ?
THREAD_UNINT : THREAD_ABORTSAFE,
pmap, pmap_addr + (va - entry->vme_start));
DTRACE_VM2(softlock, int, 1, (uint64_t *), NULL);
}
if (rc != KERN_SUCCESS) {
struct vm_map_entry tmp_entry = *entry;
tmp_entry.vme_end = va;
vm_fault_unwire(map,
&tmp_entry, FALSE, pmap, pmap_addr);
return rc;
}
}
return KERN_SUCCESS;
}
void
vm_fault_unwire(
vm_map_t map,
vm_map_entry_t entry,
boolean_t deallocate,
pmap_t pmap,
vm_map_offset_t pmap_addr)
{
register vm_map_offset_t va;
register vm_map_offset_t end_addr = entry->vme_end;
vm_object_t object;
struct vm_object_fault_info fault_info;
object = (entry->is_sub_map)
? VM_OBJECT_NULL : entry->object.vm_object;
if (object != VM_OBJECT_NULL && object->phys_contiguous)
return;
fault_info.interruptible = THREAD_UNINT;
fault_info.behavior = entry->behavior;
fault_info.user_tag = entry->alias;
fault_info.lo_offset = entry->offset;
fault_info.hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
fault_info.no_cache = entry->no_cache;
fault_info.stealth = TRUE;
fault_info.io_sync = FALSE;
fault_info.cs_bypass = FALSE;
fault_info.mark_zf_absent = FALSE;
for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
if (object == VM_OBJECT_NULL) {
if (pmap) {
pmap_change_wiring(pmap,
pmap_addr + (va - entry->vme_start), FALSE);
}
(void) vm_fault(map, va, VM_PROT_NONE,
TRUE, THREAD_UNINT, pmap, pmap_addr);
} else {
vm_prot_t prot;
vm_page_t result_page;
vm_page_t top_page;
vm_object_t result_object;
vm_fault_return_t result;
if (end_addr - va > (vm_size_t) -1) {
fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE);
} else {
fault_info.cluster_size = (vm_size_t) (end_addr - va);
assert(fault_info.cluster_size == end_addr - va);
}
do {
prot = VM_PROT_NONE;
vm_object_lock(object);
vm_object_paging_begin(object);
XPR(XPR_VM_FAULT,
"vm_fault_unwire -> vm_fault_page\n",
0,0,0,0,0);
result = vm_fault_page(
object,
entry->offset + (va - entry->vme_start),
VM_PROT_NONE, TRUE,
&prot, &result_page, &top_page,
(int *)0,
NULL, map->no_zero_fill,
FALSE, &fault_info);
} while (result == VM_FAULT_RETRY);
if (result == VM_FAULT_MEMORY_ERROR && !object->alive)
continue;
if (result != VM_FAULT_SUCCESS)
panic("vm_fault_unwire: failure");
result_object = result_page->object;
if (deallocate) {
assert(result_page->phys_page !=
vm_page_fictitious_addr);
pmap_disconnect(result_page->phys_page);
VM_PAGE_FREE(result_page);
} else {
if ((pmap) && (result_page->phys_page != vm_page_guard_addr))
pmap_change_wiring(pmap,
pmap_addr + (va - entry->vme_start), FALSE);
if (VM_PAGE_WIRED(result_page)) {
vm_page_lockspin_queues();
vm_page_unwire(result_page, TRUE);
vm_page_unlock_queues();
}
if(entry->zero_wired_pages) {
pmap_zero_page(result_page->phys_page);
entry->zero_wired_pages = FALSE;
}
PAGE_WAKEUP_DONE(result_page);
}
vm_fault_cleanup(result_object, top_page);
}
}
pmap_pageable(pmap, pmap_addr,
pmap_addr + (end_addr - entry->vme_start), TRUE);
}
kern_return_t
vm_fault_wire_fast(
__unused vm_map_t map,
vm_map_offset_t va,
vm_map_entry_t entry,
pmap_t pmap,
vm_map_offset_t pmap_addr)
{
vm_object_t object;
vm_object_offset_t offset;
register vm_page_t m;
vm_prot_t prot;
thread_t thread = current_thread();
int type_of_fault;
kern_return_t kr;
VM_STAT_INCR(faults);
if (thread != THREAD_NULL && thread->task != TASK_NULL)
thread->task->faults++;
#undef RELEASE_PAGE
#define RELEASE_PAGE(m) { \
PAGE_WAKEUP_DONE(m); \
vm_page_lockspin_queues(); \
vm_page_unwire(m, TRUE); \
vm_page_unlock_queues(); \
}
#undef UNLOCK_THINGS
#define UNLOCK_THINGS { \
vm_object_paging_end(object); \
vm_object_unlock(object); \
}
#undef UNLOCK_AND_DEALLOCATE
#define UNLOCK_AND_DEALLOCATE { \
UNLOCK_THINGS; \
vm_object_deallocate(object); \
}
#define GIVE_UP { \
UNLOCK_AND_DEALLOCATE; \
return(KERN_FAILURE); \
}
if (entry->is_sub_map)
return(KERN_FAILURE);
object = entry->object.vm_object;
offset = (va - entry->vme_start) + entry->offset;
prot = entry->protection;
vm_object_lock(object);
vm_object_reference_locked(object);
vm_object_paging_begin(object);
m = vm_page_lookup(object, offset);
if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) ||
(m->unusual && ( m->error || m->restart || m->absent))) {
GIVE_UP;
}
ASSERT_PAGE_DECRYPTED(m);
if (m->fictitious &&
m->phys_page == vm_page_guard_addr) {
kr = KERN_SUCCESS;
goto done;
}
vm_page_lockspin_queues();
vm_page_wire(m);
vm_page_unlock_queues();
assert(!m->busy);
m->busy = TRUE;
assert(!m->absent);
if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) {
RELEASE_PAGE(m);
GIVE_UP;
}
type_of_fault = DBG_CACHE_HIT_FAULT;
kr = vm_fault_enter(m,
pmap,
pmap_addr,
prot,
prot,
TRUE,
FALSE,
FALSE,
FALSE,
&type_of_fault);
done:
PAGE_WAKEUP_DONE(m);
UNLOCK_AND_DEALLOCATE;
return kr;
}
void
vm_fault_copy_cleanup(
vm_page_t page,
vm_page_t top_page)
{
vm_object_t object = page->object;
vm_object_lock(object);
PAGE_WAKEUP_DONE(page);
if (!page->active && !page->inactive && !page->throttled) {
vm_page_lockspin_queues();
if (!page->active && !page->inactive && !page->throttled)
vm_page_activate(page);
vm_page_unlock_queues();
}
vm_fault_cleanup(object, top_page);
}
void
vm_fault_copy_dst_cleanup(
vm_page_t page)
{
vm_object_t object;
if (page != VM_PAGE_NULL) {
object = page->object;
vm_object_lock(object);
vm_page_lockspin_queues();
vm_page_unwire(page, TRUE);
vm_page_unlock_queues();
vm_object_paging_end(object);
vm_object_unlock(object);
}
}
kern_return_t
vm_fault_copy(
vm_object_t src_object,
vm_object_offset_t src_offset,
vm_map_size_t *copy_size,
vm_object_t dst_object,
vm_object_offset_t dst_offset,
vm_map_t dst_map,
vm_map_version_t *dst_version,
int interruptible)
{
vm_page_t result_page;
vm_page_t src_page;
vm_page_t src_top_page;
vm_prot_t src_prot;
vm_page_t dst_page;
vm_page_t dst_top_page;
vm_prot_t dst_prot;
vm_map_size_t amount_left;
vm_object_t old_copy_object;
kern_return_t error = 0;
vm_fault_return_t result;
vm_map_size_t part_size;
struct vm_object_fault_info fault_info_src;
struct vm_object_fault_info fault_info_dst;
#define RETURN(x) \
MACRO_BEGIN \
*copy_size -= amount_left; \
MACRO_RETURN(x); \
MACRO_END
amount_left = *copy_size;
fault_info_src.interruptible = interruptible;
fault_info_src.behavior = VM_BEHAVIOR_SEQUENTIAL;
fault_info_src.user_tag = 0;
fault_info_src.lo_offset = vm_object_trunc_page(src_offset);
fault_info_src.hi_offset = fault_info_src.lo_offset + amount_left;
fault_info_src.no_cache = FALSE;
fault_info_src.stealth = TRUE;
fault_info_src.io_sync = FALSE;
fault_info_src.cs_bypass = FALSE;
fault_info_src.mark_zf_absent = FALSE;
fault_info_dst.interruptible = interruptible;
fault_info_dst.behavior = VM_BEHAVIOR_SEQUENTIAL;
fault_info_dst.user_tag = 0;
fault_info_dst.lo_offset = vm_object_trunc_page(dst_offset);
fault_info_dst.hi_offset = fault_info_dst.lo_offset + amount_left;
fault_info_dst.no_cache = FALSE;
fault_info_dst.stealth = TRUE;
fault_info_dst.io_sync = FALSE;
fault_info_dst.cs_bypass = FALSE;
fault_info_dst.mark_zf_absent = FALSE;
do {
RetryDestinationFault: ;
dst_prot = VM_PROT_WRITE|VM_PROT_READ;
vm_object_lock(dst_object);
vm_object_paging_begin(dst_object);
if (amount_left > (vm_size_t) -1) {
fault_info_dst.cluster_size = (vm_size_t) (0 - PAGE_SIZE);
} else {
fault_info_dst.cluster_size = (vm_size_t) amount_left;
assert(fault_info_dst.cluster_size == amount_left);
}
XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0);
result = vm_fault_page(dst_object,
vm_object_trunc_page(dst_offset),
VM_PROT_WRITE|VM_PROT_READ,
FALSE,
&dst_prot, &dst_page, &dst_top_page,
(int *)0,
&error,
dst_map->no_zero_fill,
FALSE, &fault_info_dst);
switch (result) {
case VM_FAULT_SUCCESS:
break;
case VM_FAULT_RETRY:
goto RetryDestinationFault;
case VM_FAULT_MEMORY_SHORTAGE:
if (vm_page_wait(interruptible))
goto RetryDestinationFault;
case VM_FAULT_INTERRUPTED:
RETURN(MACH_SEND_INTERRUPTED);
case VM_FAULT_SUCCESS_NO_VM_PAGE:
vm_object_paging_end(dst_object);
vm_object_unlock(dst_object);
case VM_FAULT_MEMORY_ERROR:
if (error)
return (error);
else
return(KERN_MEMORY_ERROR);
default:
panic("vm_fault_copy: unexpected error 0x%x from "
"vm_fault_page()\n", result);
}
assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE);
old_copy_object = dst_page->object->copy;
vm_page_lockspin_queues();
vm_page_wire(dst_page);
vm_page_unlock_queues();
PAGE_WAKEUP_DONE(dst_page);
vm_object_unlock(dst_page->object);
if (dst_top_page != VM_PAGE_NULL) {
vm_object_lock(dst_object);
VM_PAGE_FREE(dst_top_page);
vm_object_paging_end(dst_object);
vm_object_unlock(dst_object);
}
RetrySourceFault: ;
if (src_object == VM_OBJECT_NULL) {
src_page = VM_PAGE_NULL;
result_page = VM_PAGE_NULL;
} else {
vm_object_lock(src_object);
src_page = vm_page_lookup(src_object,
vm_object_trunc_page(src_offset));
if (src_page == dst_page) {
src_prot = dst_prot;
result_page = VM_PAGE_NULL;
} else {
src_prot = VM_PROT_READ;
vm_object_paging_begin(src_object);
if (amount_left > (vm_size_t) -1) {
fault_info_src.cluster_size = (vm_size_t) (0 - PAGE_SIZE);
} else {
fault_info_src.cluster_size = (vm_size_t) amount_left;
assert(fault_info_src.cluster_size == amount_left);
}
XPR(XPR_VM_FAULT,
"vm_fault_copy(2) -> vm_fault_page\n",
0,0,0,0,0);
result = vm_fault_page(
src_object,
vm_object_trunc_page(src_offset),
VM_PROT_READ, FALSE,
&src_prot,
&result_page, &src_top_page,
(int *)0, &error, FALSE,
FALSE, &fault_info_src);
switch (result) {
case VM_FAULT_SUCCESS:
break;
case VM_FAULT_RETRY:
goto RetrySourceFault;
case VM_FAULT_MEMORY_SHORTAGE:
if (vm_page_wait(interruptible))
goto RetrySourceFault;
case VM_FAULT_INTERRUPTED:
vm_fault_copy_dst_cleanup(dst_page);
RETURN(MACH_SEND_INTERRUPTED);
case VM_FAULT_SUCCESS_NO_VM_PAGE:
vm_object_paging_end(src_object);
vm_object_unlock(src_object);
case VM_FAULT_MEMORY_ERROR:
vm_fault_copy_dst_cleanup(dst_page);
if (error)
return (error);
else
return(KERN_MEMORY_ERROR);
default:
panic("vm_fault_copy(2): unexpected "
"error 0x%x from "
"vm_fault_page()\n", result);
}
assert((src_top_page == VM_PAGE_NULL) ==
(result_page->object == src_object));
}
assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE);
vm_object_unlock(result_page->object);
}
if (!vm_map_verify(dst_map, dst_version)) {
if (result_page != VM_PAGE_NULL && src_page != dst_page)
vm_fault_copy_cleanup(result_page, src_top_page);
vm_fault_copy_dst_cleanup(dst_page);
break;
}
vm_object_lock(dst_page->object);
if (dst_page->object->copy != old_copy_object) {
vm_object_unlock(dst_page->object);
vm_map_verify_done(dst_map, dst_version);
if (result_page != VM_PAGE_NULL && src_page != dst_page)
vm_fault_copy_cleanup(result_page, src_top_page);
vm_fault_copy_dst_cleanup(dst_page);
break;
}
vm_object_unlock(dst_page->object);
if (!page_aligned(src_offset) ||
!page_aligned(dst_offset) ||
!page_aligned(amount_left)) {
vm_object_offset_t src_po,
dst_po;
src_po = src_offset - vm_object_trunc_page(src_offset);
dst_po = dst_offset - vm_object_trunc_page(dst_offset);
if (dst_po > src_po) {
part_size = PAGE_SIZE - dst_po;
} else {
part_size = PAGE_SIZE - src_po;
}
if (part_size > (amount_left)){
part_size = amount_left;
}
if (result_page == VM_PAGE_NULL) {
assert((vm_offset_t) dst_po == dst_po);
assert((vm_size_t) part_size == part_size);
vm_page_part_zero_fill(dst_page,
(vm_offset_t) dst_po,
(vm_size_t) part_size);
} else {
assert((vm_offset_t) src_po == src_po);
assert((vm_offset_t) dst_po == dst_po);
assert((vm_size_t) part_size == part_size);
vm_page_part_copy(result_page,
(vm_offset_t) src_po,
dst_page,
(vm_offset_t) dst_po,
(vm_size_t)part_size);
if(!dst_page->dirty){
vm_object_lock(dst_object);
dst_page->dirty = TRUE;
vm_object_unlock(dst_page->object);
}
}
} else {
part_size = PAGE_SIZE;
if (result_page == VM_PAGE_NULL)
vm_page_zero_fill(dst_page);
else{
vm_page_copy(result_page, dst_page);
if(!dst_page->dirty){
vm_object_lock(dst_object);
dst_page->dirty = TRUE;
vm_object_unlock(dst_page->object);
}
}
}
vm_map_verify_done(dst_map, dst_version);
if (result_page != VM_PAGE_NULL && src_page != dst_page)
vm_fault_copy_cleanup(result_page, src_top_page);
vm_fault_copy_dst_cleanup(dst_page);
amount_left -= part_size;
src_offset += part_size;
dst_offset += part_size;
} while (amount_left > 0);
RETURN(KERN_SUCCESS);
#undef RETURN
}
#if VM_FAULT_CLASSIFY
#define VM_FAULT_TYPES_MAX 5
#define VM_FAULT_LEVEL_MAX 8
int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX];
#define VM_FAULT_TYPE_ZERO_FILL 0
#define VM_FAULT_TYPE_MAP_IN 1
#define VM_FAULT_TYPE_PAGER 2
#define VM_FAULT_TYPE_COPY 3
#define VM_FAULT_TYPE_OTHER 4
void
vm_fault_classify(vm_object_t object,
vm_object_offset_t offset,
vm_prot_t fault_type)
{
int type, level = 0;
vm_page_t m;
while (TRUE) {
m = vm_page_lookup(object, offset);
if (m != VM_PAGE_NULL) {
if (m->busy || m->error || m->restart || m->absent) {
type = VM_FAULT_TYPE_OTHER;
break;
}
if (((fault_type & VM_PROT_WRITE) == 0) ||
((level == 0) && object->copy == VM_OBJECT_NULL)) {
type = VM_FAULT_TYPE_MAP_IN;
break;
}
type = VM_FAULT_TYPE_COPY;
break;
}
else {
if (object->pager_created) {
type = VM_FAULT_TYPE_PAGER;
break;
}
if (object->shadow == VM_OBJECT_NULL) {
type = VM_FAULT_TYPE_ZERO_FILL;
break;
}
offset += object->vo_shadow_offset;
object = object->shadow;
level++;
continue;
}
}
if (level > VM_FAULT_LEVEL_MAX)
level = VM_FAULT_LEVEL_MAX;
vm_fault_stats[type][level] += 1;
return;
}
void
vm_fault_classify_init(void)
{
int type, level;
for (type = 0; type < VM_FAULT_TYPES_MAX; type++) {
for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) {
vm_fault_stats[type][level] = 0;
}
}
return;
}
#endif
extern int cs_validation;
void
vm_page_validate_cs_mapped(
vm_page_t page,
const void *kaddr)
{
vm_object_t object;
vm_object_offset_t offset;
kern_return_t kr;
memory_object_t pager;
void *blobs;
boolean_t validated, tainted;
assert(page->busy);
vm_object_lock_assert_exclusive(page->object);
if (!cs_validation) {
return;
}
if (page->wpmapped && !page->cs_tainted) {
page->cs_validated = TRUE;
page->cs_tainted = TRUE;
if (cs_debug) {
printf("CODESIGNING: vm_page_validate_cs: "
"page %p obj %p off 0x%llx "
"was modified\n",
page, page->object, page->offset);
}
vm_cs_validated_dirtied++;
}
if (page->cs_validated) {
return;
}
vm_cs_validates++;
object = page->object;
assert(object->code_signed);
offset = page->offset;
if (!object->alive || object->terminating || object->pager == NULL) {
return;
}
assert(!object->internal);
assert(object->pager != NULL);
assert(object->pager_ready);
pager = object->pager;
assert(object->paging_in_progress);
kr = vnode_pager_get_object_cs_blobs(pager, &blobs);
if (kr != KERN_SUCCESS) {
blobs = NULL;
}
validated = cs_validate_page(blobs,
offset + object->paging_offset,
(const void *)kaddr,
&tainted);
page->cs_validated = validated;
if (validated) {
page->cs_tainted = tainted;
}
}
void
vm_page_validate_cs(
vm_page_t page)
{
vm_object_t object;
vm_object_offset_t offset;
vm_map_offset_t koffset;
vm_map_size_t ksize;
vm_offset_t kaddr;
kern_return_t kr;
boolean_t busy_page;
vm_object_lock_assert_held(page->object);
if (!cs_validation) {
return;
}
if (page->wpmapped && !page->cs_tainted) {
vm_object_lock_assert_exclusive(page->object);
page->cs_validated = TRUE;
page->cs_tainted = TRUE;
if (cs_debug) {
printf("CODESIGNING: vm_page_validate_cs: "
"page %p obj %p off 0x%llx "
"was modified\n",
page, page->object, page->offset);
}
vm_cs_validated_dirtied++;
}
if (page->cs_validated) {
return;
}
#if CHECK_CS_VALIDATION_BITMAP
if ( vnode_pager_cs_check_validation_bitmap( page->object->pager, trunc_page(page->offset + page->object->paging_offset), CS_BITMAP_CHECK ) == KERN_SUCCESS) {
page->cs_validated = TRUE;
page->cs_tainted = FALSE;
vm_cs_bitmap_validated++;
return;
}
#endif
vm_object_lock_assert_exclusive(page->object);
object = page->object;
assert(object->code_signed);
offset = page->offset;
busy_page = page->busy;
if (!busy_page) {
page->busy = TRUE;
}
vm_object_paging_begin(object);
koffset = 0;
ksize = PAGE_SIZE_64;
kr = vm_paging_map_object(&koffset,
page,
object,
offset,
&ksize,
VM_PROT_READ,
FALSE);
if (kr != KERN_SUCCESS) {
panic("vm_page_validate_cs: could not map page: 0x%x\n", kr);
}
kaddr = CAST_DOWN(vm_offset_t, koffset);
vm_page_validate_cs_mapped(page, (const void *) kaddr);
#if CHECK_CS_VALIDATION_BITMAP
if ( page->cs_validated == TRUE && page->cs_tainted == FALSE ) {
vnode_pager_cs_check_validation_bitmap( object->pager, trunc_page( offset + object->paging_offset), CS_BITMAP_SET );
}
#endif
assert(page->busy);
assert(object == page->object);
vm_object_lock_assert_exclusive(object);
if (!busy_page) {
PAGE_WAKEUP_DONE(page);
}
if (koffset != 0) {
vm_paging_unmap_object(object, koffset, koffset + ksize);
koffset = 0;
ksize = 0;
kaddr = 0;
}
vm_object_paging_end(object);
}