#include <libkern/libkern.h>
#include <mach/mach_types.h>
#include <mach/task.h>
#include <sys/proc_internal.h>
#include <sys/event.h>
#include <sys/eventvar.h>
#include <kern/locks.h>
#include <sys/queue.h>
#include <kern/vm_pressure.h>
#include <sys/malloc.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/sysctl.h>
#include <kern/assert.h>
#include <kern/task.h>
#include <vm/vm_pageout.h>
#include <kern/task.h>
#if CONFIG_MEMORYSTATUS
#include <sys/kern_memorystatus.h>
#endif
#define VM_PRESSURE_MINIMUM_RSIZE 10
#define VM_PRESSURE_NOTIFY_WAIT_PERIOD 10000
void vm_pressure_klist_lock(void);
void vm_pressure_klist_unlock(void);
static void vm_dispatch_memory_pressure(void);
void vm_reset_active_list(void);
#if CONFIG_MEMORYSTATUS
static kern_return_t vm_try_pressure_candidates(boolean_t target_foreground_process);
#endif
static lck_mtx_t vm_pressure_klist_mutex;
struct klist vm_pressure_klist;
struct klist vm_pressure_klist_dormant;
#if DEBUG
#define VM_PRESSURE_DEBUG(cond, format, ...) \
do { \
if (cond) { printf(format, ##__VA_ARGS__); } \
} while(0)
#else
#define VM_PRESSURE_DEBUG(cond, format, ...)
#endif
void vm_pressure_init(lck_grp_t *grp, lck_attr_t *attr) {
lck_mtx_init(&vm_pressure_klist_mutex, grp, attr);
}
void vm_pressure_klist_lock(void) {
lck_mtx_lock(&vm_pressure_klist_mutex);
}
void vm_pressure_klist_unlock(void) {
lck_mtx_unlock(&vm_pressure_klist_mutex);
}
int vm_knote_register(struct knote *kn) {
int rv = 0;
vm_pressure_klist_lock();
if ((kn->kn_sfflags) & (NOTE_VM_PRESSURE)) {
KNOTE_ATTACH(&vm_pressure_klist, kn);
} else {
rv = ENOTSUP;
}
vm_pressure_klist_unlock();
return rv;
}
void vm_knote_unregister(struct knote *kn) {
struct knote *kn_temp;
vm_pressure_klist_lock();
VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d cancelling pressure notification\n", kn->kn_kq->kq_p->p_pid);
SLIST_FOREACH(kn_temp, &vm_pressure_klist, kn_selnext) {
if (kn_temp == kn) {
KNOTE_DETACH(&vm_pressure_klist, kn);
vm_pressure_klist_unlock();
return;
}
}
SLIST_FOREACH(kn_temp, &vm_pressure_klist_dormant, kn_selnext) {
if (kn_temp == kn) {
KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
vm_pressure_klist_unlock();
return;
}
}
vm_pressure_klist_unlock();
}
void vm_pressure_proc_cleanup(proc_t p)
{
struct knote *kn = NULL;
vm_pressure_klist_lock();
VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d exiting pressure notification\n", p->p_pid);
SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {
if (kn->kn_kq->kq_p == p) {
KNOTE_DETACH(&vm_pressure_klist, kn);
vm_pressure_klist_unlock();
return;
}
}
SLIST_FOREACH(kn, &vm_pressure_klist_dormant, kn_selnext) {
if (kn->kn_kq->kq_p == p) {
KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
vm_pressure_klist_unlock();
return;
}
}
vm_pressure_klist_unlock();
}
void consider_vm_pressure_events(void)
{
vm_dispatch_memory_pressure();
}
#if CONFIG_MEMORYSTATUS
struct knote *vm_find_knote_from_pid(pid_t, struct klist *);
struct knote *vm_find_knote_from_pid(pid_t pid, struct klist *list) {
struct knote *kn = NULL;
SLIST_FOREACH(kn, list, kn_selnext) {
struct proc *p;
pid_t current_pid;
p = kn->kn_kq->kq_p;
current_pid = p->p_pid;
if (current_pid == pid) {
break;
}
}
return kn;
}
int vm_dispatch_pressure_note_to_pid(pid_t pid, boolean_t locked) {
int ret = EINVAL;
struct knote *kn;
VM_PRESSURE_DEBUG(1, "vm_dispatch_pressure_note_to_pid(): pid %d\n", pid);
if (!locked) {
vm_pressure_klist_lock();
}
kn = vm_find_knote_from_pid(pid, &vm_pressure_klist);
if (kn) {
KNOTE(&vm_pressure_klist, pid);
ret = 0;
} else {
kn = vm_find_knote_from_pid(pid, &vm_pressure_klist_dormant);
if (kn) {
KNOTE(&vm_pressure_klist_dormant, pid);
ret = 0;
}
}
if (!locked) {
vm_pressure_klist_unlock();
}
return ret;
}
void vm_find_pressure_foreground_candidates(void)
{
struct knote *kn, *kn_tmp;
struct klist dispatch_klist = { NULL };
vm_pressure_klist_lock();
proc_list_lock();
SLIST_FOREACH_SAFE(kn, &vm_pressure_klist, kn_selnext, kn_tmp) {
proc_t p = kn->kn_kq->kq_p;
if (memorystatus_is_foreground_locked(p)) {
KNOTE_DETACH(&vm_pressure_klist, kn);
KNOTE_ATTACH(&dispatch_klist, kn);
}
}
SLIST_FOREACH_SAFE(kn, &vm_pressure_klist_dormant, kn_selnext, kn_tmp) {
proc_t p = kn->kn_kq->kq_p;
if (memorystatus_is_foreground_locked(p)) {
KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
KNOTE_ATTACH(&dispatch_klist, kn);
}
}
proc_list_unlock();
SLIST_FOREACH_SAFE(kn, &dispatch_klist, kn_selnext, kn_tmp) {
proc_t p = kn->kn_kq->kq_p;
proc_list_lock();
if (p != proc_ref_locked(p)) {
proc_list_unlock();
KNOTE_DETACH(&dispatch_klist, kn);
KNOTE_ATTACH(&vm_pressure_klist_dormant, kn);
continue;
}
proc_list_unlock();
VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d\n", kn->kn_kq->kq_p->p_pid);
KNOTE(&dispatch_klist, p->p_pid);
KNOTE_DETACH(&dispatch_klist, kn);
KNOTE_ATTACH(&vm_pressure_klist_dormant, kn);
microuptime(&p->vm_pressure_last_notify_tstamp);
memorystatus_send_pressure_note(p->p_pid);
proc_rele(p);
}
vm_pressure_klist_unlock();
}
void vm_find_pressure_candidate(void)
{
struct knote *kn = NULL, *kn_max = NULL;
unsigned int resident_max = 0;
pid_t target_pid = -1;
struct klist dispatch_klist = { NULL };
struct timeval curr_tstamp = {0, 0};
int elapsed_msecs = 0;
proc_t target_proc = PROC_NULL;
kern_return_t kr = KERN_SUCCESS;
microuptime(&curr_tstamp);
vm_pressure_klist_lock();
SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {\
struct mach_task_basic_info basic_info;
mach_msg_type_number_t size = MACH_TASK_BASIC_INFO_COUNT;
unsigned int resident_size = 0;
proc_t p = PROC_NULL;
struct task* t = TASK_NULL;
p = kn->kn_kq->kq_p;
proc_list_lock();
if (p != proc_ref_locked(p)) {
p = PROC_NULL;
proc_list_unlock();
continue;
}
proc_list_unlock();
t = (struct task *)(p->task);
timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp);
elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000;
if (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD) {
proc_rele(p);
continue;
}
if (!memorystatus_bg_pressure_eligible(p)) {
VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid);
proc_rele(p);
continue;
}
if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) {
VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed\n", p->p_pid);
proc_rele(p);
continue;
}
resident_size = (basic_info.resident_size)/(1024 * 1024);
if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) {
if (resident_size > resident_max) {
resident_max = resident_size;
kn_max = kn;
target_pid = p->p_pid;
target_proc = p;
}
} else {
VM_PRESSURE_DEBUG(1, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size);
}
proc_rele(p);
}
if (kn_max == NULL || target_pid == -1) {
VM_PRESSURE_DEBUG(1, "[vm_pressure] - no target found!\n");
goto exit;
}
VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0);
VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
KNOTE_DETACH(&vm_pressure_klist, kn_max);
target_proc = proc_find(target_pid);
if (target_proc != PROC_NULL) {
KNOTE_ATTACH(&dispatch_klist, kn_max);
KNOTE(&dispatch_klist, target_pid);
KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max);
memorystatus_send_pressure_note(target_pid);
microuptime(&target_proc->vm_pressure_last_notify_tstamp);
proc_rele(target_proc);
}
exit:
vm_pressure_klist_unlock();
}
#endif
struct knote *
vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process);
kern_return_t vm_pressure_notification_without_levels(boolean_t target_foreground_process);
kern_return_t vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process);
kern_return_t
vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process)
{
vm_pressure_klist_lock();
if (SLIST_EMPTY(&vm_pressure_klist)) {
vm_reset_active_list();
}
if (!SLIST_EMPTY(&vm_pressure_klist)) {
VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n");
if (KERN_SUCCESS == vm_try_pressure_candidates(target_foreground_process)) {
vm_pressure_klist_unlock();
return KERN_SUCCESS;
}
}
VM_PRESSURE_DEBUG(1, "[vm_pressure] could not find suitable event candidate\n");
vm_pressure_klist_unlock();
return KERN_FAILURE;
}
static void vm_dispatch_memory_pressure(void)
{
memorystatus_update_vm_pressure(FALSE);
}
extern vm_pressure_level_t
convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t);
struct knote *
vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process)
{
struct knote *kn = NULL, *kn_max = NULL;
unsigned int resident_max = 0;
struct timeval curr_tstamp = {0, 0};
int elapsed_msecs = 0;
int selected_task_importance = 0;
static int pressure_snapshot = -1;
boolean_t pressure_increase = FALSE;
if (level != -1) {
if (pressure_snapshot == -1) {
pressure_snapshot = level;
pressure_increase = TRUE;
} else {
if (level >= pressure_snapshot) {
pressure_increase = TRUE;
} else {
pressure_increase = FALSE;
}
pressure_snapshot = level;
}
}
if ((level > 0) && (pressure_increase) == TRUE) {
selected_task_importance = INT_MAX;
} else {
selected_task_importance = 0;
}
microuptime(&curr_tstamp);
SLIST_FOREACH(kn, candidate_list, kn_selnext) {
unsigned int resident_size = 0;
proc_t p = PROC_NULL;
struct task* t = TASK_NULL;
int curr_task_importance = 0;
boolean_t consider_knote = FALSE;
p = kn->kn_kq->kq_p;
proc_list_lock();
if (p != proc_ref_locked(p)) {
p = PROC_NULL;
proc_list_unlock();
continue;
}
proc_list_unlock();
#if CONFIG_MEMORYSTATUS
if (target_foreground_process == TRUE && !memorystatus_is_foreground_locked(p)) {
proc_rele(p);
continue;
}
#endif
t = (struct task *)(p->task);
timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp);
elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000;
if ((level == -1) && (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD)) {
proc_rele(p);
continue;
}
if (level != -1) {
vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(level);
if ((kn->kn_sfflags & dispatch_level) == 0) {
proc_rele(p);
continue;
}
}
#if CONFIG_MEMORYSTATUS
if (target_foreground_process == FALSE && !memorystatus_bg_pressure_eligible(p)) {
VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid);
proc_rele(p);
continue;
}
#endif
curr_task_importance = task_importance_estimate(t);
resident_size = (get_task_phys_footprint(t))/(1024*1024ULL);
if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) {
if (level > 0) {
if (pressure_increase) {
if ((curr_task_importance < selected_task_importance) ||
((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) {
if (task_has_been_notified(t, level) == FALSE) {
consider_knote = TRUE;
}
}
} else {
if ((curr_task_importance > selected_task_importance) ||
((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) {
if (task_has_been_notified(t, level) == FALSE) {
consider_knote = TRUE;
}
}
}
} else if (level == 0) {
if ((curr_task_importance > selected_task_importance) ||
((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) {
if ((task_has_been_notified(t, kVMPressureWarning) == TRUE) || (task_has_been_notified(t, kVMPressureCritical) == TRUE)) {
consider_knote = TRUE;
}
}
} else if (level == -1) {
if (resident_size > resident_max) {
consider_knote = TRUE;
}
}
if (consider_knote) {
resident_max = resident_size;
kn_max = kn;
selected_task_importance = curr_task_importance;
consider_knote = FALSE;
}
} else {
VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size);
}
proc_rele(p);
}
if (kn_max) {
VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
}
return kn_max;
}
kern_return_t vm_pressure_notification_without_levels(boolean_t target_foreground_process)
{
struct knote *kn_max = NULL;
pid_t target_pid = -1;
struct klist dispatch_klist = { NULL };
proc_t target_proc = PROC_NULL;
struct klist *candidate_list = NULL;
candidate_list = &vm_pressure_klist;
kn_max = vm_pressure_select_optimal_candidate_to_notify(candidate_list, -1, target_foreground_process);
if (kn_max == NULL) {
if (target_foreground_process) {
candidate_list = &vm_pressure_klist_dormant;
kn_max = vm_pressure_select_optimal_candidate_to_notify(candidate_list, -1, target_foreground_process);
}
if (kn_max == NULL) {
return KERN_FAILURE;
}
}
target_proc = kn_max->kn_kq->kq_p;
KNOTE_DETACH(candidate_list, kn_max);
if (target_proc != PROC_NULL) {
target_pid = target_proc->p_pid;
memoryshot(VM_PRESSURE_EVENT, DBG_FUNC_NONE);
KNOTE_ATTACH(&dispatch_klist, kn_max);
KNOTE(&dispatch_klist, target_pid);
KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max);
#if CONFIG_MEMORYSTATUS
memorystatus_send_pressure_note(target_pid);
#endif
microuptime(&target_proc->vm_pressure_last_notify_tstamp);
}
return KERN_SUCCESS;
}
static kern_return_t vm_try_pressure_candidates(boolean_t target_foreground_process)
{
return (vm_pressure_notification_without_levels(target_foreground_process));
}
void vm_reset_active_list(void) {
if (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
struct knote *kn;
VM_PRESSURE_DEBUG(1, "[vm_pressure] recharging main list from dormant list\n");
while (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
kn = SLIST_FIRST(&vm_pressure_klist_dormant);
SLIST_REMOVE_HEAD(&vm_pressure_klist_dormant, kn_selnext);
SLIST_INSERT_HEAD(&vm_pressure_klist, kn, kn_selnext);
}
}
}