vm_swapfile_pager.c   [plain text]


/*
 * Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#include <mach/kern_return.h>
#include <mach/memory_object_control.h>
#include <mach/upl.h>

#include <kern/ipc_kobject.h>
#include <kern/kalloc.h>
#include <kern/queue.h>

#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#include <vm/vm_pageout.h>
#include <vm/vm_protos.h>


/* 
 * APPLE SWAPFILE MEMORY PAGER 
 *
 * This external memory manager (EMM) handles mappings of the swap files.
 * Swap files are not regular files and are used solely to store contents of
 * anonymous memory mappings while not resident in memory.
 * There's no valid reason to map a swap file.  This just puts extra burden
 * on the system, is potentially a security issue and is not reliable since
 * the contents can change at any time with pageout operations.
 * Here are some of the issues with mapping a swap file.
 * * PERFORMANCE:
 *   Each page in the swap file belong to an anonymous memory object. Mapping
 *   the swap file makes those pages also accessible via a vnode memory
 *   object and each page can now be resident twice.
 * * SECURITY:
 *   Mapping a swap file allows access to other processes' memory.  Swap files
 *   are only accessible by the "root" super-user, who can already access any
 *   process's memory, so this is not a real issue but if permissions on the
 *   swap file got changed, it could become one.
 *   Swap files are not "zero-filled" on creation, so until their contents are
 *   overwritten with pageout operations, they still contain whatever was on
 *   the disk blocks they were allocated.  The "super-user" could see the
 *   contents of free blocks anyway, so this is not a new security issue but
 *   it may be perceive as one.
 * * ENCRYPTED SWAP:
 *   When swap is encrypted, one does not expect to find any clear contents
 *   in the swap files.  Since unused blocks are not scrubbed, they could still
 *   contain clear contents.  If these contents are visible through a mapping
 *   of the swap file, it makes it look like swap is not really encrypted.
 *   
 * We can't legitimately prevent a user process with appropriate privileges
 * from mapping a swap file, but we can prevent it from accessing its actual
 * contents.
 * This pager mostly handles page-in request (from memory_object_data_request())
 * for swap file mappings and just returns bogus data.
 * Pageouts are not handled, so mmap() has to make sure it does not allow
 * writable (i.e. MAP_SHARED and PROT_WRITE) mappings of swap files.
 */

/* forward declarations */
void swapfile_pager_reference(memory_object_t mem_obj);
void swapfile_pager_deallocate(memory_object_t mem_obj);
kern_return_t swapfile_pager_init(memory_object_t mem_obj,
				       memory_object_control_t control,
				       memory_object_cluster_size_t pg_size);
kern_return_t swapfile_pager_terminate(memory_object_t mem_obj);
kern_return_t swapfile_pager_data_request(memory_object_t mem_obj,
					       memory_object_offset_t offset,
					       memory_object_cluster_size_t length,
					       vm_prot_t protection_required,
					       memory_object_fault_info_t fault_info);
kern_return_t swapfile_pager_data_return(memory_object_t mem_obj,
					      memory_object_offset_t offset,
					      memory_object_cluster_size_t	data_cnt,
					      memory_object_offset_t *resid_offset,
					      int *io_error,
					      boolean_t dirty,
					      boolean_t kernel_copy,
					      int upl_flags);
kern_return_t swapfile_pager_data_initialize(memory_object_t mem_obj,
						  memory_object_offset_t offset,
						  memory_object_cluster_size_t data_cnt);
kern_return_t swapfile_pager_data_unlock(memory_object_t mem_obj,
					      memory_object_offset_t offset,
					      memory_object_size_t size,
					      vm_prot_t desired_access);
kern_return_t swapfile_pager_synchronize(memory_object_t mem_obj,
					      memory_object_offset_t offset,
					      memory_object_size_t length,
					      vm_sync_t sync_flags);
kern_return_t swapfile_pager_map(memory_object_t mem_obj,
				      vm_prot_t prot);
kern_return_t swapfile_pager_last_unmap(memory_object_t mem_obj);

/*
 * Vector of VM operations for this EMM.
 * These routines are invoked by VM via the memory_object_*() interfaces.
 */
const struct memory_object_pager_ops swapfile_pager_ops = {
	swapfile_pager_reference,
	swapfile_pager_deallocate,
	swapfile_pager_init,
	swapfile_pager_terminate,
	swapfile_pager_data_request,
	swapfile_pager_data_return,
	swapfile_pager_data_initialize,
	swapfile_pager_data_unlock,
	swapfile_pager_synchronize,
	swapfile_pager_map,
	swapfile_pager_last_unmap,
	NULL, /* data_reclaim */
	"swapfile pager"
};

/*
 * The "swapfile_pager" describes a memory object backed by
 * the "swapfile" EMM.
 */
typedef struct swapfile_pager {
	struct ipc_object_header pager_header;	/* fake ip_kotype() */
	memory_object_pager_ops_t pager_ops;	/* == &swapfile_pager_ops */
	queue_chain_t		pager_queue;	/* next & prev pagers */
	unsigned int		ref_count;	/* reference count */
	boolean_t		is_ready;	/* is this pager ready ? */
	boolean_t		is_mapped;	/* is this pager mapped ? */
	memory_object_control_t pager_control;	/* mem object control handle */
	struct vnode 		*swapfile_vnode;/* the swapfile's vnode */
} *swapfile_pager_t;
#define	SWAPFILE_PAGER_NULL	((swapfile_pager_t) NULL)
#define pager_ikot pager_header.io_bits

/*
 * List of memory objects managed by this EMM.
 * The list is protected by the "swapfile_pager_lock" lock.
 */
int swapfile_pager_count = 0;		/* number of pagers */
queue_head_t swapfile_pager_queue;
decl_lck_mtx_data(,swapfile_pager_lock)

/*
 * Statistics & counters.
 */
int swapfile_pager_count_max = 0;


lck_grp_t		swapfile_pager_lck_grp;
lck_grp_attr_t		swapfile_pager_lck_grp_attr;
lck_attr_t		swapfile_pager_lck_attr;


/* internal prototypes */
swapfile_pager_t swapfile_pager_create(struct vnode *vp);
swapfile_pager_t swapfile_pager_lookup(memory_object_t mem_obj);
void swapfile_pager_dequeue(swapfile_pager_t pager);
void swapfile_pager_deallocate_internal(swapfile_pager_t pager,
					boolean_t locked);
void swapfile_pager_terminate_internal(swapfile_pager_t pager);


#if DEBUG
int swapfile_pagerdebug = 0;
#define PAGER_ALL		0xffffffff
#define	PAGER_INIT		0x00000001
#define	PAGER_PAGEIN		0x00000002

#define PAGER_DEBUG(LEVEL, A)						\
	MACRO_BEGIN							\
	if ((swapfile_pagerdebug & LEVEL)==LEVEL) {		\
		printf A;						\
	}								\
	MACRO_END
#else
#define PAGER_DEBUG(LEVEL, A)
#endif


void
swapfile_pager_bootstrap(void)
{
	lck_grp_attr_setdefault(&swapfile_pager_lck_grp_attr);
	lck_grp_init(&swapfile_pager_lck_grp, "swapfile pager", &swapfile_pager_lck_grp_attr);
	lck_attr_setdefault(&swapfile_pager_lck_attr);
	lck_mtx_init(&swapfile_pager_lock, &swapfile_pager_lck_grp, &swapfile_pager_lck_attr);
	queue_init(&swapfile_pager_queue);
}

/*
 * swapfile_pager_init()
 *
 * Initialize the memory object and makes it ready to be used and mapped.
 */
kern_return_t
swapfile_pager_init(
	memory_object_t		mem_obj, 
	memory_object_control_t	control, 
#if !DEBUG
	__unused
#endif
	memory_object_cluster_size_t pg_size)
{
	swapfile_pager_t	pager;
	kern_return_t   	kr;
	memory_object_attr_info_data_t  attributes;

	PAGER_DEBUG(PAGER_ALL,
		    ("swapfile_pager_init: %p, %p, %x\n",
		     mem_obj, control, pg_size));

	if (control == MEMORY_OBJECT_CONTROL_NULL)
		return KERN_INVALID_ARGUMENT;

	pager = swapfile_pager_lookup(mem_obj);

	memory_object_control_reference(control);

	pager->pager_control = control;

	attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
	attributes.cluster_size = (1 << (PAGE_SHIFT));
	attributes.may_cache_object = FALSE;
	attributes.temporary = TRUE;

	kr = memory_object_change_attributes(
					control,
					MEMORY_OBJECT_ATTRIBUTE_INFO,
					(memory_object_info_t) &attributes,
					MEMORY_OBJECT_ATTR_INFO_COUNT);
	if (kr != KERN_SUCCESS)
		panic("swapfile_pager_init: "
		      "memory_object_change_attributes() failed");

	return KERN_SUCCESS;
}

/*
 * swapfile_data_return()
 *
 * Handles page-out requests from VM.  This should never happen since
 * the pages provided by this EMM are not supposed to be dirty or dirtied
 * and VM should simply discard the contents and reclaim the pages if it
 * needs to.
 */
kern_return_t
swapfile_pager_data_return(
        __unused memory_object_t	mem_obj,
        __unused memory_object_offset_t	offset,
        __unused memory_object_cluster_size_t		data_cnt,
        __unused memory_object_offset_t	*resid_offset,
	__unused int			*io_error,
	__unused boolean_t		dirty,
	__unused boolean_t		kernel_copy,
	__unused int			upl_flags)  
{
	panic("swapfile_pager_data_return: should never get called");
	return KERN_FAILURE;
}

kern_return_t
swapfile_pager_data_initialize(
	__unused memory_object_t	mem_obj,
	__unused memory_object_offset_t	offset,
	__unused memory_object_cluster_size_t		data_cnt)
{
	panic("swapfile_pager_data_initialize: should never get called");
	return KERN_FAILURE;
}

kern_return_t
swapfile_pager_data_unlock(
	__unused memory_object_t	mem_obj,
	__unused memory_object_offset_t	offset,
	__unused memory_object_size_t		size,
	__unused vm_prot_t		desired_access)
{
	return KERN_FAILURE;
}

/*
 * swapfile_pager_data_request()
 *
 * Handles page-in requests from VM.
 */
kern_return_t	
swapfile_pager_data_request(
	memory_object_t		mem_obj,
	memory_object_offset_t	offset,
	memory_object_cluster_size_t		length,
#if !DEBUG
	__unused
#endif
	vm_prot_t		protection_required,
	__unused memory_object_fault_info_t mo_fault_info)
{
	swapfile_pager_t	pager;
	memory_object_control_t	mo_control;
	upl_t			upl;
	int			upl_flags;
	upl_size_t		upl_size;
	upl_page_info_t		*upl_pl = NULL;
	unsigned int		pl_count;
	vm_object_t		dst_object;
	kern_return_t		kr, retval;
	vm_map_offset_t		kernel_mapping;
	vm_offset_t		dst_vaddr;
	char			*dst_ptr;
	vm_offset_t		cur_offset;
	vm_map_entry_t		map_entry;

	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_data_request: %p, %llx, %x, %x\n", mem_obj, offset, length, protection_required));

	kernel_mapping = 0;
	upl = NULL;
	upl_pl = NULL;

	pager = swapfile_pager_lookup(mem_obj);
	assert(pager->is_ready);
	assert(pager->ref_count > 1); /* pager is alive and mapped */

	PAGER_DEBUG(PAGER_PAGEIN, ("swapfile_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager));

	/*
	 * Gather in a UPL all the VM pages requested by VM.
	 */
	mo_control = pager->pager_control;

	upl_size = length;
	upl_flags =
		UPL_RET_ONLY_ABSENT |
		UPL_SET_LITE |
		UPL_NO_SYNC |
		UPL_CLEAN_IN_PLACE |	/* triggers UPL_CLEAR_DIRTY */
		UPL_SET_INTERNAL;
	pl_count = 0;
	kr = memory_object_upl_request(mo_control,
				       offset, upl_size,
				       &upl, NULL, NULL, upl_flags);
	if (kr != KERN_SUCCESS) {
		retval = kr;
		goto done;
	}
	dst_object = mo_control->moc_object;
	assert(dst_object != VM_OBJECT_NULL);


	/*
	 * Reserve a virtual page in the kernel address space to map each
	 * destination physical page when it's its turn to be processed.
	 */
	vm_object_reference(kernel_object);	/* ref. for mapping */
	kr = vm_map_find_space(kernel_map,
			       &kernel_mapping,
			       PAGE_SIZE_64,
			       0,
			       0,
			       &map_entry);
	if (kr != KERN_SUCCESS) {
		vm_object_deallocate(kernel_object);
		retval = kr;
		goto done;
	}
	map_entry->object.vm_object = kernel_object;
	map_entry->offset = kernel_mapping - VM_MIN_KERNEL_ADDRESS;
	vm_map_unlock(kernel_map);
	dst_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping);
	dst_ptr = (char *) dst_vaddr;

	/*
	 * Fill in the contents of the pages requested by VM.
	 */
	upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
	pl_count = length / PAGE_SIZE;
	for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) {
		ppnum_t dst_pnum;

		if (!upl_page_present(upl_pl, (int)(cur_offset / PAGE_SIZE))) {
			/* this page is not in the UPL: skip it */
			continue;
		}

		/*
		 * Establish an explicit pmap mapping of the destination
		 * physical page.
		 * We can't do a regular VM mapping because the VM page
		 * is "busy".
		 */
		dst_pnum = (ppnum_t)
			upl_phys_page(upl_pl, (int)(cur_offset / PAGE_SIZE));
		assert(dst_pnum != 0);
		pmap_enter(kernel_pmap,
			   kernel_mapping,
			   dst_pnum,
			   VM_PROT_READ | VM_PROT_WRITE,
			   VM_PROT_NONE,
			   0,
			   TRUE);

		memset(dst_ptr, '\0', PAGE_SIZE);
		/* add an end-of-line to keep line counters happy */
		dst_ptr[PAGE_SIZE-1] = '\n';
		
		/*
		 * Remove the pmap mapping of the destination page
		 * in the kernel.
		 */
		pmap_remove(kernel_pmap,
			    (addr64_t) kernel_mapping,
			    (addr64_t) (kernel_mapping + PAGE_SIZE_64));

	}

	retval = KERN_SUCCESS;
done:
	if (upl != NULL) {
		/* clean up the UPL */

		/*
		 * The pages are currently dirty because we've just been
		 * writing on them, but as far as we're concerned, they're
		 * clean since they contain their "original" contents as
		 * provided by us, the pager.
		 * Tell the UPL to mark them "clean".
		 */
		upl_clear_dirty(upl, TRUE);

		/* abort or commit the UPL */
		if (retval != KERN_SUCCESS) {
			upl_abort(upl, 0);
		} else {
			boolean_t empty;
			upl_commit_range(upl, 0, upl->size, 
					 UPL_COMMIT_CS_VALIDATED,
					 upl_pl, pl_count, &empty);
		}

		/* and deallocate the UPL */
		upl_deallocate(upl);
		upl = NULL;
	}
	if (kernel_mapping != 0) {
		/* clean up the mapping of the source and destination pages */
		kr = vm_map_remove(kernel_map,
				   kernel_mapping,
				   kernel_mapping + PAGE_SIZE_64,
				   VM_MAP_NO_FLAGS);
		assert(kr == KERN_SUCCESS);
		kernel_mapping = 0;
		dst_vaddr = 0;
	}

	return retval;
}

/*
 * swapfile_pager_reference()
 *
 * Get a reference on this memory object.
 * For external usage only.  Assumes that the initial reference count is not 0,
 * i.e one should not "revive" a dead pager this way.
 */
void
swapfile_pager_reference(
	memory_object_t		mem_obj)
{	
	swapfile_pager_t	pager;

	pager = swapfile_pager_lookup(mem_obj);

	lck_mtx_lock(&swapfile_pager_lock);
	assert(pager->ref_count > 0);
	pager->ref_count++;
	lck_mtx_unlock(&swapfile_pager_lock);
}


/*
 * swapfile_pager_dequeue:
 *
 * Removes a pager from the list of pagers.
 *
 * The caller must hold "swapfile_pager_lock".
 */
void
swapfile_pager_dequeue(
	swapfile_pager_t pager)
{
	assert(!pager->is_mapped);

	queue_remove(&swapfile_pager_queue,
		     pager,
		     swapfile_pager_t,
		     pager_queue);
	pager->pager_queue.next = NULL;
	pager->pager_queue.prev = NULL;
	
	swapfile_pager_count--;
}

/*
 * swapfile_pager_terminate_internal:
 *
 * Trigger the asynchronous termination of the memory object associated
 * with this pager.
 * When the memory object is terminated, there will be one more call
 * to memory_object_deallocate() (i.e. swapfile_pager_deallocate())
 * to finish the clean up.
 *
 * "swapfile_pager_lock" should not be held by the caller.
 * We don't need the lock because the pager has already been removed from
 * the pagers' list and is now ours exclusively.
 */
void
swapfile_pager_terminate_internal(
	swapfile_pager_t pager)
{
	assert(pager->is_ready);
	assert(!pager->is_mapped);

	if (pager->swapfile_vnode != NULL) {
		pager->swapfile_vnode = NULL;
	}

	/* trigger the destruction of the memory object */
	memory_object_destroy(pager->pager_control, 0);
}

/*
 * swapfile_pager_deallocate_internal()
 *
 * Release a reference on this pager and free it when the last
 * reference goes away.
 * Can be called with swapfile_pager_lock held or not but always returns
 * with it unlocked.
 */
void
swapfile_pager_deallocate_internal(
	swapfile_pager_t	pager,
	boolean_t		locked)
{
	if (! locked) {
		lck_mtx_lock(&swapfile_pager_lock);
	}

	/* drop a reference on this pager */
	pager->ref_count--;

	if (pager->ref_count == 1) {
		/*
		 * Only the "named" reference is left, which means that
		 * no one is really holding on to this pager anymore.
		 * Terminate it.
		 */
		swapfile_pager_dequeue(pager);
		/* the pager is all ours: no need for the lock now */
		lck_mtx_unlock(&swapfile_pager_lock);
		swapfile_pager_terminate_internal(pager);
	} else if (pager->ref_count == 0) {
		/*
		 * Dropped the existence reference;  the memory object has
		 * been terminated.  Do some final cleanup and release the
		 * pager structure.
		 */
		lck_mtx_unlock(&swapfile_pager_lock);
		if (pager->pager_control != MEMORY_OBJECT_CONTROL_NULL) {
			memory_object_control_deallocate(pager->pager_control);
			pager->pager_control = MEMORY_OBJECT_CONTROL_NULL;
		}
		kfree(pager, sizeof (*pager));
		pager = SWAPFILE_PAGER_NULL;
	} else {
		/* there are still plenty of references:  keep going... */
		lck_mtx_unlock(&swapfile_pager_lock);
	}

	/* caution: lock is not held on return... */
}

/*
 * swapfile_pager_deallocate()
 *
 * Release a reference on this pager and free it when the last
 * reference goes away.
 */
void
swapfile_pager_deallocate(
	memory_object_t		mem_obj)
{
	swapfile_pager_t	pager;

	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_deallocate: %p\n", mem_obj));
	pager = swapfile_pager_lookup(mem_obj);
	swapfile_pager_deallocate_internal(pager, FALSE);
}

/*
 *
 */
kern_return_t
swapfile_pager_terminate(
#if !DEBUG
	__unused
#endif
	memory_object_t	mem_obj)
{
	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_terminate: %p\n", mem_obj));

	return KERN_SUCCESS;
}

/*
 *
 */
kern_return_t
swapfile_pager_synchronize(
	memory_object_t		mem_obj,
	memory_object_offset_t	offset,
	memory_object_size_t		length,
	__unused vm_sync_t		sync_flags)
{
	swapfile_pager_t	pager;

	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_synchronize: %p\n", mem_obj));

	pager = swapfile_pager_lookup(mem_obj);

	memory_object_synchronize_completed(pager->pager_control,
					    offset, length);

	return KERN_SUCCESS;
}

/*
 * swapfile_pager_map()
 *
 * This allows VM to let us, the EMM, know that this memory object
 * is currently mapped one or more times.  This is called by VM each time
 * the memory object gets mapped and we take one extra reference on the
 * memory object to account for all its mappings.
 */
kern_return_t
swapfile_pager_map(
	memory_object_t		mem_obj,
	__unused vm_prot_t	prot)
{
	swapfile_pager_t	pager;

	PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_map: %p\n", mem_obj));

	pager = swapfile_pager_lookup(mem_obj);

	lck_mtx_lock(&swapfile_pager_lock);
	assert(pager->is_ready);
	assert(pager->ref_count > 0); /* pager is alive */
	if (pager->is_mapped == FALSE) {
		/*
		 * First mapping of this pager:  take an extra reference
		 * that will remain until all the mappings of this pager
		 * are removed.
		 */
		pager->is_mapped = TRUE;
		pager->ref_count++;
	}
	lck_mtx_unlock(&swapfile_pager_lock);

	return KERN_SUCCESS;
}

/*
 * swapfile_pager_last_unmap()
 *
 * This is called by VM when this memory object is no longer mapped anywhere.
 */
kern_return_t
swapfile_pager_last_unmap(
	memory_object_t		mem_obj)
{
	swapfile_pager_t	pager;

	PAGER_DEBUG(PAGER_ALL,
		    ("swapfile_pager_last_unmap: %p\n", mem_obj));

	pager = swapfile_pager_lookup(mem_obj);

	lck_mtx_lock(&swapfile_pager_lock);
	if (pager->is_mapped) {
		/*
		 * All the mappings are gone, so let go of the one extra
		 * reference that represents all the mappings of this pager.
		 */
		pager->is_mapped = FALSE;
		swapfile_pager_deallocate_internal(pager, TRUE);
		/* caution: deallocate_internal() released the lock ! */
	} else {
		lck_mtx_unlock(&swapfile_pager_lock);
	}
	
	return KERN_SUCCESS;
}


/*
 *
 */
swapfile_pager_t
swapfile_pager_lookup(
	memory_object_t	 mem_obj)
{
	swapfile_pager_t	pager;

	pager = (swapfile_pager_t) mem_obj;
	assert(pager->pager_ops == &swapfile_pager_ops);
	assert(pager->ref_count > 0);
	return pager;
}

swapfile_pager_t
swapfile_pager_create(
	struct vnode 		*vp)
{
	swapfile_pager_t	pager, pager2;
	memory_object_control_t	control;
	kern_return_t		kr;

	pager = (swapfile_pager_t) kalloc(sizeof (*pager));
	if (pager == SWAPFILE_PAGER_NULL) {
		return SWAPFILE_PAGER_NULL;
	}

	/*
	 * The vm_map call takes both named entry ports and raw memory
	 * objects in the same parameter.  We need to make sure that
	 * vm_map does not see this object as a named entry port.  So,
	 * we reserve the second word in the object for a fake ip_kotype
	 * setting - that will tell vm_map to use it as a memory object.
	 */
	pager->pager_ops = &swapfile_pager_ops;
	pager->pager_ikot = IKOT_MEMORY_OBJECT;
	pager->is_ready = FALSE;/* not ready until it has a "name" */
	pager->ref_count = 1;	/* setup reference */
	pager->is_mapped = FALSE;
	pager->pager_control = MEMORY_OBJECT_CONTROL_NULL;
	pager->swapfile_vnode = vp;
	
	lck_mtx_lock(&swapfile_pager_lock);
	/* see if anyone raced us to create a pager for the same object */
	queue_iterate(&swapfile_pager_queue,
		      pager2,
		      swapfile_pager_t,
		      pager_queue) {
		if (pager2->swapfile_vnode == vp) {
			break;
		}
	}
	if (! queue_end(&swapfile_pager_queue,
			(queue_entry_t) pager2)) {
		/* while we hold the lock, transfer our setup ref to winner */
		pager2->ref_count++;
		/* we lost the race, down with the loser... */
		lck_mtx_unlock(&swapfile_pager_lock);
		pager->swapfile_vnode = NULL;
		kfree(pager, sizeof (*pager));
		/* ... and go with the winner */
		pager = pager2;
		/* let the winner make sure the pager gets ready */
		return pager;
	}

	/* enter new pager at the head of our list of pagers */
	queue_enter_first(&swapfile_pager_queue,
			  pager,
			  swapfile_pager_t,
			  pager_queue);
	swapfile_pager_count++;
	if (swapfile_pager_count > swapfile_pager_count_max) {
		swapfile_pager_count_max = swapfile_pager_count;
	}
	lck_mtx_unlock(&swapfile_pager_lock);

	kr = memory_object_create_named((memory_object_t) pager,
					0,
					&control);
	assert(kr == KERN_SUCCESS);

	lck_mtx_lock(&swapfile_pager_lock);
	/* the new pager is now ready to be used */
	pager->is_ready = TRUE;
	lck_mtx_unlock(&swapfile_pager_lock);

	/* wakeup anyone waiting for this pager to be ready */
	thread_wakeup(&pager->is_ready);

	return pager;
}

/*
 * swapfile_pager_setup()
 *
 * Provide the caller with a memory object backed by the provided
 * "backing_object" VM object.  If such a memory object already exists,
 * re-use it, otherwise create a new memory object.
 */
memory_object_t
swapfile_pager_setup(
	struct vnode *vp)
{
	swapfile_pager_t	pager;

	lck_mtx_lock(&swapfile_pager_lock);

	queue_iterate(&swapfile_pager_queue,
		      pager,
		      swapfile_pager_t,
		      pager_queue) {
		if (pager->swapfile_vnode == vp) {
			break;
		}
	}
	if (queue_end(&swapfile_pager_queue,
		      (queue_entry_t) pager)) {
		/* no existing pager for this backing object */
		pager = SWAPFILE_PAGER_NULL;
	} else {
		/* make sure pager doesn't disappear */
		pager->ref_count++;
	}

	lck_mtx_unlock(&swapfile_pager_lock);

	if (pager == SWAPFILE_PAGER_NULL) {
		pager = swapfile_pager_create(vp);
		if (pager == SWAPFILE_PAGER_NULL) {
			return MEMORY_OBJECT_NULL;
		}
	}

	lck_mtx_lock(&swapfile_pager_lock);
	while (!pager->is_ready) {
		lck_mtx_sleep(&swapfile_pager_lock,
			LCK_SLEEP_DEFAULT,
			&pager->is_ready,
			THREAD_UNINT);
	}
	lck_mtx_unlock(&swapfile_pager_lock);

	return (memory_object_t) pager;
}	

memory_object_control_t
swapfile_pager_control(
	memory_object_t	mem_obj)
{
	swapfile_pager_t	pager;

	pager = swapfile_pager_lookup(mem_obj);

	return pager->pager_control;
}