VolumeAllocation.c   [plain text]


/*
 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
/*
	File:		VolumeAllocation.c

	Contains:	Routines for accessing and modifying the volume bitmap.

	Version:	HFS Plus 1.0

	Copyright:	ÔøΩ 1996-2009 by Apple Computer, Inc., all rights reserved.

*/

/*
Public routines:
	BlockAllocate
					Allocate space on a volume.  Can allocate space contiguously.
					If not contiguous, then allocation may be less than what was
					asked for.  Returns the starting block number, and number of
					blocks.  (Will only do a single extent???)
	BlockDeallocate
					Deallocate a contiguous run of allocation blocks.
 
	BlockMarkAllocated
					Exported wrapper to mark blocks as in-use.  This will correctly determine
					whether or not the red-black tree is enabled and call the appropriate function 
					if applicable.
	BlockMarkFree
					Exported wrapper to mark blocks as freed.  This will correctly determine whether or
					not the red-black tree is enabled and call the appropriate function if applicable.

 
	ResetVCBFreeExtCache
					Since the red-black tree obviates the need to maintain the free extent cache, we do
					not update it if the tree is also live.  As a result, if we ever need to destroy the trees
					we should reset the free extent cache so it doesn't confuse us when we need to fall back to the
					bitmap scanning allocator.
					We also reset and disable the free extent cache when volume resizing is 
					in flight.
 
	UpdateAllocLimit 
					Adjusts the AllocLimit field in the hfs mount point.  This is used when we need to prevent
					allocations from occupying space in the region we are modifying during a filesystem resize.  
					At other times, it should be consistent with the total number of allocation blocks in the 
					filesystem.  It is also used to shrink or grow the number of blocks that the red-black tree should
					know about. If growing, scan the new range of bitmap, and if shrinking, reduce the
					number of items in the tree that we can allocate from.
 
Internal routines:
	Note that the RBTree routines are guarded by a cpp check for CONFIG_HFS_ALLOC_RBTREE.  This
	is to cut down on space for functions that could not possibly be used if they are not planning to 
	use the red-black tree code.
 
	BlockMarkFreeRBTree
					Make an internal call to BlockMarkFree and then update 
					and/or create Red-Black Tree allocation tree nodes to correspond
					to the free space being generated.
	BlockMarkFreeInternal
					Mark a contiguous range of blocks as free.  The corresponding
					bits in the volume bitmap will be cleared.  This will actually do the work
					of modifying the bitmap for us.
					
	BlockMarkAllocatedRBTree
					Make an internal call to BlockAllocateMarked, which will update the 
					bitmap on-disk when we allocate blocks.  If that is successful, then
					we'll remove the appropriate entries from the red-black tree.
	BlockMarkAllocatedInternal
					Mark a contiguous range of blocks as allocated.  The cor-
					responding bits in the volume bitmap are set.  Also tests to see
					if any of the blocks were previously unallocated.  
	BlockFindContiguous
					Find a contiguous range of blocks of a given size.  The caller
					specifies where to begin the search (by block number).  The
					block number of the first block in the range is returned.  This is only
					called by the bitmap scanning logic as the red-black tree should be able
					to do this internally by searching its tree. 
	BlockAllocateAny
					Find and allocate a contiguous range of blocks up to a given size.  The
					first range of contiguous free blocks found are allocated, even if there
					are fewer blocks than requested (and even if a contiguous range of blocks
					of the given size exists elsewhere).
	BlockAllocateAnyBitmap
					Finds a range of blocks per the above requirements without using the 
					Allocation RB Tree.  This relies on the bitmap-scanning logic in order to find
					any valid range of free space needed.
	BlockAllocateAnyRBTree
					Finds a valid range of blocks per the above requirements by searching
					the red-black tree.  We can just make an internal call to 
					BlockAllocateContigRBTree to find the valid range.
	BlockAllocateContig
					Find and allocate a contiguous range of blocks of a given size.  If
					a contiguous range of free blocks of the given size isn't found, then
					the allocation fails (i.e. it is "all or nothing").  This routine is
					essentially a wrapper function around its related sub-functions,
					BlockAllocateContigBitmap and BlockAllocateContigRBTree, which use,
					respectively, the original HFS+ bitmap scanning logic and the new 
					Red-Black Tree to search and manage free-space decisions.  This function
					contains logic for when to use which of the allocation algorithms,
					depending on the free space contained in the volume.
	BlockAllocateContigBitmap
					Finds and allocates a range of blocks specified by the size parameters
					using the original HFS+ bitmap scanning logic.  The red-black tree
					will not be updated if this function is used.  
	BlockAllocateContigRBTree
					Finds and allocates a range of blocks specified by the size parameters
					using the new red/black tree data structure and search algorithms
					provided by the tree library.  Updates the red/black tree nodes after
					the on-disk data structure (bitmap) has been updated. 
	BlockAllocateKnown
					Try to allocate space from known free space in the volume's
					free extent cache.

	ReadBitmapBlock
					Given an allocation block number, read the bitmap block that
					contains that allocation block into a caller-supplied buffer.

	ReleaseBitmapBlock
					Release a bitmap block back into the buffer cache.
 
 
Debug/Test Routines
	hfs_isallocated
					Test to see if any blocks in a range are allocated.  Journal or
					allocation file lock must be held.
 
	hfs_isallocated_scan
					Test to see if any blocks in a range are allocated.  Releases and
					invalidates the block used when finished.
	
	hfs_isrbtree_active
					Test to see if the allocation red-black tree is live.  This function
					requires either an exclusive or shared lock on the allocation bitmap file
					in the HFS mount structure, to prevent red-black tree pointers from disappearing.
 
	hfs_isrbtree_allocated
					Test to see if the specified extent is marked as allocated in the red-black tree.
					Multiplexes between the metadata zone trees and the normal allocation zone trees
					depending on the offset of the extent specified.
					
	check_rbtree_extents
					Void function that wraps around the above function (hfs_isrbtree_allocated)
					and checks to see that the return value was appropriate based on the assertion we're
					trying to validate (whether or not the specified extent should be marked as free 
					or allocated).
	
	hfs_validate_rbtree
					Exhaustive search function that will check every allocation block for its status in the
					red-black tree and then check the corresponding status in the bitmap file.  If the two are out
					of sync, it will panic.  Note that this function is extremely expensive and must NEVER
					be run outside of debug code.
 
	hfs_checktreelinks
					Checks the embedded linked list structure of the red black tree for integrity.  The next pointer
					should always point to whatever extent_tree_offset_next returns.
 
 
Red Black Tree Specific Routines
	GenerateTree
					Build a red-black tree for the given filesystem's bitmap.
 
	DestroyTrees
					Destroy the tree on the given filesystem 


	hfs_alloc_scan_block
					Given a starting allocation block number, figures out which physical block contains that 
					allocation block's bit, and scans it from the starting bit until either the ending bit or
					the end of the block.  Free space extents are inserted into the appropriate red-black tree.
					
*/

#include "../../hfs_macos_defs.h"

#include <sys/types.h>
#include <sys/buf.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/disk.h>
#include <sys/ubc.h>
#include <sys/uio.h>
#include <kern/kalloc.h>

#include "../../hfs.h"
#include "../../hfs_dbg.h"
#include "../../hfs_format.h"
#include "../../hfs_endian.h"
#include "../../hfs_macos_defs.h"
#include "../headers/FileMgrInternal.h"
#include "../headers/HybridAllocator.h"
#include "../../hfs_kdebug.h"

#ifndef CONFIG_HFS_TRIM
#define CONFIG_HFS_TRIM 0
#endif

/*
 * Use sysctl vfs.generic.hfs.kdebug.allocation to control which
 * KERNEL_DEBUG_CONSTANT events are enabled at runtime.  (They're
 * disabled by default because there can be a lot of these events,
 * and we don't want to overwhelm the kernel debug buffer.  If you
 * want to watch these events in particular, just set the sysctl.)
 */
static int hfs_kdebug_allocation = 0;
SYSCTL_DECL(_vfs_generic);
SYSCTL_NODE(_vfs_generic, OID_AUTO, hfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS file system");
SYSCTL_NODE(_vfs_generic_hfs, OID_AUTO, kdebug, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "HFS kdebug");
SYSCTL_INT(_vfs_generic_hfs_kdebug, OID_AUTO, allocation, CTLFLAG_RW|CTLFLAG_LOCKED, &hfs_kdebug_allocation, 0, "Enable kdebug logging for HFS allocations");
enum {
	/*
	 * HFSDBG_ALLOC_ENABLED: Log calls to BlockAllocate and
	 * BlockDeallocate, including the internal BlockAllocateXxx
	 * routines so we can see how an allocation was satisfied.
	 *
	 * HFSDBG_EXT_CACHE_ENABLED: Log routines that read or write the
	 * free extent cache.
	 *
	 * HFSDBG_UNMAP_ENABLED: Log events involving the trim list.
	 *
	 * HFSDBG_BITMAP_ENABLED: Log accesses to the volume bitmap (setting
	 * or clearing bits, scanning the bitmap).
	 */
	HFSDBG_ALLOC_ENABLED		= 1,
	HFSDBG_EXT_CACHE_ENABLED	= 2,
	HFSDBG_UNMAP_ENABLED		= 4,
	HFSDBG_BITMAP_ENABLED		= 8
};

enum {
	kBytesPerWord			=	4,
	kBitsPerByte			=	8,
	kBitsPerWord			=	32,

	kBitsWithinWordMask		=	kBitsPerWord-1
};

#define kLowBitInWordMask	0x00000001ul
#define kHighBitInWordMask	0x80000000ul
#define kAllBitsSetInWord	0xFFFFFFFFul


#define ALLOC_DEBUG 0

static OSErr ReadBitmapBlock(
	ExtendedVCB		*vcb,
	u_int32_t		bit,
	u_int32_t		**buffer,
	uintptr_t		*blockRef);

static OSErr ReleaseBitmapBlock(
	ExtendedVCB		*vcb,
	uintptr_t		blockRef,
	Boolean			dirty);

static OSErr BlockAllocateAny(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		endingBlock,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks);

static OSErr BlockAllocateAnyBitmap(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		endingBlock,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks);

static OSErr BlockAllocateContig(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		minBlocks,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks);

static OSErr BlockAllocateContigBitmap(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		minBlocks,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks);

static OSErr BlockFindContiguous(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		endingBlock,
	u_int32_t		minBlocks,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks);

static OSErr BlockAllocateKnown(
	ExtendedVCB		*vcb,
	u_int32_t		maxBlocks,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks);

static OSErr BlockMarkAllocatedInternal (
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	register u_int32_t	numBlocks);

static OSErr BlockMarkFreeInternal(
	ExtendedVCB	*vcb,
	u_int32_t	startingBlock,
	u_int32_t	numBlocks, 
	Boolean 	do_validate);

#if CONFIG_HFS_ALLOC_RBTREE

static OSErr ReleaseRBScanBitmapBlock( struct buf *bp );

static OSErr BlockAllocateAnyRBTree(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks);

static OSErr BlockAllocateContigRBTree(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		minBlocks,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks,
	u_int32_t 		forceContig);

static OSErr BlockMarkAllocatedRBTree(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t	numBlocks);
	
static OSErr BlockMarkFreeRBTree(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t	numBlocks);

static int
hfs_isrbtree_allocated (struct hfsmount * hfsmp, 
	u_int32_t startBlock, 
	u_int32_t numBlocks,
	extent_node_t** node1);

extern void
hfs_validate_rbtree (struct hfsmount *hfsmp, 
					 u_int32_t start, 
					 u_int32_t end);

static void hfs_checktreelinks (struct hfsmount *hfsmp);


void check_rbtree_extents (struct hfsmount *hfsmp,
	u_int32_t start,
	u_int32_t numBlocks,
	int shouldBeFree);

int hfs_isallocated_scan (struct hfsmount *hfsmp,
								 u_int32_t startingBlock,
								 u_int32_t *bp_buf);

static int hfs_alloc_scan_block(struct hfsmount *hfsmp, 
								u_int32_t startbit, 
								u_int32_t endBit, 
								u_int32_t *bitToScan);

#define ASSERT_FREE 1
#define ASSERT_ALLOC 0
								
#endif /* CONFIG_HFS_ALLOC_RBTREE */

/* Functions for manipulating free extent cache */
static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount);
static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated);

#if ALLOC_DEBUG
/* 
 * Extra #includes for the debug function below.  These are not normally #included because
 * they would constitute a layering violation
 */
#include <vfs/vfs_journal.h>
#include <sys/disk.h>

/*
 * Validation Routine to verify that the TRIM list maintained by the journal
 * is in good shape relative to what we think the bitmap should have.  We should
 * never encounter allocated blocks in the TRIM list, so if we ever encounter them,
 * we panic.  
 */
int trim_validate_bitmap (struct hfsmount *hfsmp) {
	u_int64_t blockno_offset;
	u_int64_t numblocks;
	int i;
	int count;
	u_int32_t startblk;
	u_int32_t blks;
	int err = 0;
	uint32_t alloccount = 0;

	if (hfsmp->jnl) {
		struct journal *jnl = (struct journal*)hfsmp->jnl;
		if (jnl->active_tr) {
			struct jnl_trim_list *trim = &(jnl->active_tr->trim);
			count = trim->extent_count;
			for (i = 0; i < count; i++) {
				blockno_offset = trim->extents[i].offset;
				blockno_offset = blockno_offset - (uint64_t)hfsmp->hfsPlusIOPosOffset;
				blockno_offset = blockno_offset / hfsmp->blockSize;
				numblocks = trim->extents[i].length / hfsmp->blockSize;

				startblk = (u_int32_t)blockno_offset;
				blks = (u_int32_t) numblocks;
				err = hfs_count_allocated (hfsmp, startblk, blks, &alloccount);

				if (err == 0 && alloccount != 0) {
					panic ("trim_validate_bitmap: %d blocks @ ABN %d are allocated!", alloccount, startblk);
				}
			}
		}
	}
	return 0;
}

#endif

/*
;________________________________________________________________________________
;
; Routine:		hfs_unmap_free_extent
;
; Function:		Make note of a range of allocation blocks that should be
;				unmapped (trimmed).  That is, the given range of blocks no
;				longer have useful content, and the device can unmap the
;				previous contents.  For example, a solid state disk may reuse
;				the underlying storage for other blocks.
;
;				This routine is only supported for journaled volumes.  The extent
;				being freed is passed to the journal code, and the extent will
;				be unmapped after the current transaction is written to disk.
;
; Input Arguments:
;	hfsmp			- The volume containing the allocation blocks.
;	startingBlock	- The first allocation block of the extent being freed.
;	numBlocks		- The number of allocation blocks of the extent being freed.
;________________________________________________________________________________
*/
static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
{
	u_int64_t offset;
	u_int64_t length;
	int err;
	
	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
	
	if (hfsmp->jnl != NULL) {
		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
		length = (u_int64_t) numBlocks * hfsmp->blockSize;

		err = journal_trim_add_extent(hfsmp->jnl, offset, length);
		if (err) {
			printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err);
		}
	}
	
	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0);
}


/*
;________________________________________________________________________________
;
; Routine:		hfs_unmap_alloc_extent
;
; Function:		Make note of a range of allocation blocks, some of
;				which may have previously been passed to hfs_unmap_free_extent,
;				is now in use on the volume.  The given blocks will be removed
;				from any pending DKIOCUNMAP.
;
; Input Arguments:
;	hfsmp			- The volume containing the allocation blocks.
;	startingBlock	- The first allocation block of the extent being allocated.
;	numBlocks		- The number of allocation blocks being allocated.
;________________________________________________________________________________
*/
static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
{
	u_int64_t offset;
	u_int64_t length;
	int err;
	
	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);
	
	if (hfsmp->jnl != NULL) {
		offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset;
		length = (u_int64_t) numBlocks * hfsmp->blockSize;
		
		err = journal_trim_remove_extent(hfsmp->jnl, offset, length);
		if (err) {
			printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err);
		}
	}
	
	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0);
}


/*
;________________________________________________________________________________
;
; Routine:		hfs_trim_callback
;
; Function:		This function is called when a transaction that freed extents
;				(via hfs_unmap_free_extent/journal_trim_add_extent) has been
;				written to the on-disk journal.  This routine will add those
;				extents to the free extent cache so that they can be reused.
;
;				CAUTION: This routine is called while the journal's trim lock
;				is held shared, so that no other thread can reuse any portion
;				of those extents.  We must be very careful about which locks
;				we take from within this callback, to avoid deadlock.  The
;				call to add_free_extent_cache will end up taking the cache's
;				lock (just long enough to add these extents to the cache).
;
;				CAUTION: If the journal becomes invalid (eg., due to an I/O
;				error when trying to write to the journal), this callback
;				will stop getting called, even if extents got freed before
;				the journal became invalid!
;
; Input Arguments:
;	arg				- The hfsmount of the volume containing the extents.
;	extent_count	- The number of extents freed in the transaction.
;	extents			- An array of extents (byte ranges) that were freed.
;________________________________________________________________________________
*/
__private_extern__ void
hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents)
{
	uint32_t i;
	uint32_t startBlock, numBlocks;
	struct hfsmount *hfsmp = arg;
	
	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0);
	
	for (i=0; i<extent_count; ++i) {
		/* Convert the byte range in *extents back to a range of allocation blocks. */
		startBlock = (extents[i].offset - hfsmp->hfsPlusIOPosOffset) / hfsmp->blockSize;
		numBlocks = extents[i].length / hfsmp->blockSize;
		(void) add_free_extent_cache(hfsmp, startBlock, numBlocks);
	}

	if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_END, 0, 0, 0, 0, 0);
}


/*
 ;________________________________________________________________________________
 ;
 ; Routine:	   BlockAllocate
 ;
 ; Function:   Allocate space on a volume.	If contiguous allocation is requested,
 ;			   at least the requested number of bytes will be allocated or an
 ;			   error will be returned.	If contiguous allocation is not forced,
 ;			   the space will be allocated with the first largest extent available 
 ;			   at the requested starting allocation block.  If there is not enough
 ;			   room there, a block allocation of less than the requested size will be
 ;			   allocated.
 ;
 ;			   If the requested starting block is 0 (for new file allocations),
 ;			   the volume's allocation block pointer will be used as a starting
 ;			   point.
 ;
 ; Input Arguments:
 ;	 vcb			 - Pointer to ExtendedVCB for the volume to allocate space on
 ;	 fcb			 - Pointer to FCB for the file for which storage is being allocated
 ;	 startingBlock	 - Preferred starting allocation block, 0 = no preference
 ;	 minBlocks	 	 - Number of blocks requested.	If the allocation is non-contiguous,
 ;					   less than this may actually be allocated
 ;	 maxBlocks	 	 - The maximum number of blocks to allocate.  If there is additional free
 ;					   space after bytesRequested, then up to maxBlocks bytes should really
 ;					   be allocated.  (Used by ExtendFileC to round up allocations to a multiple
 ;					   of the file's clump size.)
 ;	 flags           - Flags to specify options like contiguous, use metadata zone, 
 ;					   skip free block check, etc.
 ;
 ; Output:
 ;	 (result)		 - Error code, zero for successful allocation
 ;	 *startBlock	 - Actual starting allocation block
 ;	 *actualBlocks	 - Actual number of allocation blocks allocated
 ;
 ; Side effects:
 ;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
 ;________________________________________________________________________________
 */
OSErr BlockAllocate (
	ExtendedVCB		*vcb,				/* which volume to allocate space on */
	u_int32_t		startingBlock,		/* preferred starting block, or 0 for no preference */
	u_int32_t		minBlocks,		/* desired number of blocks to allocate */
	u_int32_t		maxBlocks,		/* maximum number of blocks to allocate */
	u_int32_t		flags,			/* option flags */
	u_int32_t		*actualStartBlock,	/* actual first block of allocation */
	u_int32_t		*actualNumBlocks)	/* number of blocks actually allocated; if forceContiguous */
							/* was zero, then this may represent fewer than minBlocks */
{
	u_int32_t  freeBlocks;
	OSErr			err;
	Boolean			updateAllocPtr = false;		//	true if nextAllocation needs to be updated
	struct hfsmount	*hfsmp;
	Boolean useMetaZone;
	Boolean forceContiguous;

	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0);
	
	if (flags & HFS_ALLOC_FORCECONTIG) {
		forceContiguous = true;
	} else {
		forceContiguous = false;
	}

	if (flags & HFS_ALLOC_METAZONE) {
		useMetaZone = true;
	} else {
		useMetaZone = false;
	}

	//TODO: Figure out when we need to re-enable the RB-Tree. 
	
	
	//TODO: Make sure we use allocLimit when appropriate.
	
	/*
	 * TODO: Update BlockAllocate and its sub-functions to do cooperative allocation and bitmap scanning
	 * in conjunction with the Generate Tree function.   If the red-black tree does not currently contain
	 * an allocation block of appropriate size, then start scanning blocks FOR the tree generation function until
	 * we find what we need.  We'll update the tree fields when we're done, indicating that we've advanced the
	 * high water mark for the tree.  
	 */
	
	//
	//	Initialize outputs in case we get an error
	//
	*actualStartBlock = 0;
	*actualNumBlocks = 0;
	hfsmp = VCBTOHFS (vcb);
	freeBlocks = hfs_freeblks(hfsmp, 0);
	
	
	/* Skip free block check if blocks are being allocated for relocating 
	 * data during truncating a volume.
	 * 
	 * During hfs_truncatefs(), the volume free block count is updated 
	 * before relocating data to reflect the total number of free blocks 
	 * that will exist on the volume after resize is successful.  This 
	 * means that we have reserved allocation blocks required for relocating 
	 * the data and hence there is no need to check the free blocks.
	 * It will also prevent resize failure when the number of blocks in 
	 * an extent being relocated is more than the free blocks that will 
	 * exist after the volume is resized.
	 */
	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
		//	If the disk is already full, don't bother.
		if (freeBlocks == 0) {
			err = dskFulErr;
			goto Exit;
		}
		if (forceContiguous && freeBlocks < minBlocks) {
			err = dskFulErr;
			goto Exit;
		}

		/*
		 * Clip if necessary so we don't over-subscribe the free blocks.
		 */
		if (minBlocks > freeBlocks) {
			minBlocks = freeBlocks;
		}
		if (maxBlocks > freeBlocks) {
			maxBlocks = freeBlocks;
		}
	}

	//
	//	If caller didn't specify a starting block number, then use the volume's
	//	next block to allocate from.
	//
	if (startingBlock == 0) {
		HFS_MOUNT_LOCK(vcb, TRUE);
		
		/* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */
		if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
			startingBlock = vcb->sparseAllocation;
		} 
		else {
			startingBlock = vcb->nextAllocation;
		}
		HFS_MOUNT_UNLOCK(vcb, TRUE);
		updateAllocPtr = true;
	}
	
	
	if (startingBlock >= vcb->allocLimit) {
		startingBlock = 0; /* overflow so start at beginning */
	}

	//
	//	If the request must be contiguous, then find a sequence of free blocks
	//	that is long enough.  Otherwise, find the first free block.
	//
	if (forceContiguous) {
		err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks,
		                          useMetaZone, actualStartBlock, actualNumBlocks);
		/*
		 * If we allocated from a new position then also update the roving allocator.  
		 * This will keep the roving allocation pointer up-to-date even 
		 * if we are using the new R/B tree allocator, since
		 * it doesn't matter to us here, how the underlying allocator found 
		 * the block to vend out.
		 */
		if ((err == noErr) &&
		    (*actualStartBlock > startingBlock) &&
		    ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
	    	     (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
			updateAllocPtr = true;
		}
	} else {
#if CONFIG_HFS_ALLOC_RBTREE
		/* 
		 * If the RB-Tree Allocator is live, just go straight for a 
		 * BlockAllocateAny call and return the result.  Otherwise, 
		 * resort to the bitmap scanner.
		 */
		if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
			/* Start by trying to allocate from the starting block forward */
			err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
								   maxBlocks, useMetaZone, actualStartBlock,
								   actualNumBlocks);
			
			/* 
			 * Because the RB-Tree is live, the previous call to BlockAllocateAny
			 * will use the rbtree variant.  As a result, it will automatically search the 
			 * metadata zone for a valid extent if needed.  If we get a return value of 
			 * noErr, we found a valid extent and we can skip to the end.  If the error indicates
			 * the disk is full, that's an equally valid return code and we can skip to the end, too.
			 */
			if (err == noErr || err == dskFulErr) {
				goto Exit; 
			}
			else {
				//TODO: only tear down tree if the tree is finished building.
				//Make sure to handle the ENOSPC condition properly.  We shouldn't error out in that case.
				/* Tear down tree if we encounter an error */
				if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) {
					hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
					DestroyTrees(hfsmp);
					ResetVCBFreeExtCache(hfsmp);				
				}
				else {
					goto Exit;
				}
				// fall through to the normal allocation since the rb-tree allocation failed.
			}
		}
#endif
					
		/*
		 * Scan the bitmap once, gather the N largest free extents, then
		 * allocate from these largest extents.  Repeat as needed until
		 * we get all the space we needed.  We could probably build up
		 * that list when the higher level caller tried (and failed) a
		 * contiguous allocation first.
		 *
		 * Note that the free-extent cache will be cease to be updated if
		 * we are using the red-black tree for allocations.  If we jettison 
		 * the tree, then we will reset the free-extent cache and start over.
		 */
		
		err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks);
		/* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */

		if (err == dskFulErr) {
			/* 
			 * Now we have to do a bigger scan.  Start at startingBlock and go up until the
			 * allocation limit.
			 */
			err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit,
			                       maxBlocks, useMetaZone, actualStartBlock,
			                       actualNumBlocks);
		}
		if (err == dskFulErr) {
			/*
			 * We may be out of space in the normal zone; go up to the starting block from
			 * the start of the volume.
			 */
			err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks,
			                       useMetaZone, actualStartBlock,
			                       actualNumBlocks);
		}
	}

Exit:
	// if we actually allocated something then go update the
	// various bits of state that we maintain regardless of
	// whether there was an error (i.e. partial allocations
	// still need to update things like the free block count).
	//
	if (*actualNumBlocks != 0) {
		//
		//	If we used the volume's roving allocation pointer, then we need to update it.
		//	Adding in the length of the current allocation might reduce the next allocate
		//	call by avoiding a re-scan of the already allocated space.  However, the clump
		//	just allocated can quite conceivably end up being truncated or released when
		//	the file is closed or its EOF changed.  Leaving the allocation pointer at the
		//	start of the last allocation will avoid unnecessary fragmentation in this case.
		//
		HFS_MOUNT_LOCK(vcb, TRUE);

		lck_spin_lock(&hfsmp->vcbFreeExtLock);
		if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) {
			vcb->sparseAllocation = *actualStartBlock;
		}
		lck_spin_unlock(&hfsmp->vcbFreeExtLock);
		if (*actualNumBlocks < vcb->hfs_freed_block_count) {
			vcb->hfs_freed_block_count -= *actualNumBlocks;
		} else {
			vcb->hfs_freed_block_count = 0;
		}

		if (updateAllocPtr &&
				((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) ||
				 (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) {
			HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock);
		}

		(void) remove_free_extent_cache(hfsmp, *actualStartBlock, *actualNumBlocks);

		/* 
		 * Update the number of free blocks on the volume 
		 *
		 * Skip updating the free blocks count if the block are 
		 * being allocated to relocate data as part of hfs_truncatefs()
		 */
		if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
			vcb->freeBlocks -= *actualNumBlocks;
		}
		MarkVCBDirty(vcb);
		HFS_MOUNT_UNLOCK(vcb, TRUE);

		hfs_generate_volume_notifications(VCBTOHFS(vcb));
	}
	
	if (ALLOC_DEBUG) {
		if (err == noErr) {
			if (*actualStartBlock >= hfsmp->totalBlocks) {
				panic ("BlockAllocate: vending invalid blocks!");
			}
			if (*actualStartBlock >= hfsmp->allocLimit) {
				panic ("BlockAllocate: vending block past allocLimit!");
			}
			
			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) {	
				panic ("BlockAllocate: vending too many invalid blocks!");
			}
			
			if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) {	
				panic ("BlockAllocate: vending too many invalid blocks past allocLimit!");
			}
		}
	}
	
	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);

	return err;
}


/*
;________________________________________________________________________________
;
; Routine:	   BlockDeallocate
;
; Function:    Update the bitmap to deallocate a run of disk allocation blocks
;
; Input Arguments:
;	 vcb		- Pointer to ExtendedVCB for the volume to free space on
;	 firstBlock	- First allocation block to be freed
;	 numBlocks	- Number of allocation blocks to free up (must be > 0!)
;
; Output:
;	 (result)	- Result code
;
; Side effects:
;	 The volume bitmap is read and updated; the volume bitmap cache may be changed.
;	 The Allocator's red-black trees may also be modified as a result.
;________________________________________________________________________________
*/

OSErr BlockDeallocate (
	ExtendedVCB		*vcb,			//	Which volume to deallocate space on
	u_int32_t		firstBlock,		//	First block in range to deallocate
	u_int32_t		numBlocks, 		//	Number of contiguous blocks to deallocate
	u_int32_t 		flags)
{
	OSErr			err;
	struct hfsmount *hfsmp;
	hfsmp = VCBTOHFS(vcb);
	
	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0);

	//
	//	If no blocks to deallocate, then exit early
	//
	if (numBlocks == 0) {
		err = noErr;
		goto Exit;
	}
	
	
	if (ALLOC_DEBUG) {
		if (firstBlock >= hfsmp->totalBlocks) {
			panic ("BlockDeallocate: freeing invalid blocks!");
		}
		
		if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) {	
			panic ("BlockDeallocate: freeing too many invalid blocks!");
		}			
	}
	
	
	

	/*
	 * If we're using the red-black tree code, then try to free the
	 * blocks by marking them in the red-black tree first.  If the tree
	 * is not active for whatever reason (or we're not using the 
	 * R/B Tree code at all), then go straight for the BlockMarkFree 
	 * function. 
	 *
	 * Remember that we can get into this function if the tree isn't finished
	 * building.  In that case, check to see if the block we're de-allocating is
	 * past the high watermark
	 */
#if CONFIG_HFS_ALLOC_RBTREE
	if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
		/*
		 * BlockMarkFreeRBTree deals with the case where we are resizing the
		 * filesystem (shrinking), and we need to manipulate the bitmap beyond the portion
		 * that is currenly controlled by the r/b tree.
		 */
		
		//TODO: Update multiplexing code for the half-finished case.
		err = BlockMarkFreeRBTree(vcb, firstBlock, numBlocks);
		adjustFreeExtCache = 0;
	}
	else {
		err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
	}

#else
	err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true);
#endif
	if (err)
		goto Exit;

	//
	//	Update the volume's free block count, and mark the VCB as dirty.
	//
	HFS_MOUNT_LOCK(vcb, TRUE);
	
	/* 
	 * Do not update the free block count.  This flags is specified 
	 * when a volume is being truncated.  
	 */
	if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) {
		vcb->freeBlocks += numBlocks;
	}

	vcb->hfs_freed_block_count += numBlocks;

	if (vcb->nextAllocation == (firstBlock + numBlocks)) {
		HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks));
	}

	if (hfsmp->jnl == NULL) {
		/*
		 * In the journal case, we'll add the free extent once the journal
		 * calls us back to tell us it wrote the transaction to disk.
		 */
		(void) add_free_extent_cache(vcb, firstBlock, numBlocks);
		
		/*
		 * If the journal case, we'll only update sparseAllocation once the
		 * free extent cache becomes empty (when we remove the last entry
		 * from the cache).  Skipping it here means we're less likely to
		 * find a recently freed extent via the bitmap before it gets added
		 * to the free extent cache.
		 */
		if (firstBlock < vcb->sparseAllocation) {
			vcb->sparseAllocation = firstBlock;
		}
	}
	
	MarkVCBDirty(vcb);
  	HFS_MOUNT_UNLOCK(vcb, TRUE); 

	hfs_generate_volume_notifications(VCBTOHFS(vcb));
Exit:

	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_END, err, 0, 0, 0, 0);

	return err;
}


u_int8_t freebitcount[16] = {
	4, 3, 3, 2, 3, 2, 2, 1,  /* 0 1 2 3 4 5 6 7 */
	3, 2, 2, 1, 2, 1, 1, 0,  /* 8 9 A B C D E F */
};

u_int32_t
MetaZoneFreeBlocks(ExtendedVCB *vcb)
{
	u_int32_t freeblocks;
	u_int32_t *currCache;
	uintptr_t blockRef;
	u_int32_t bit;
	u_int32_t lastbit;
	int bytesleft;
	int bytesperblock;
	u_int8_t byte;
	u_int8_t *buffer;

	blockRef = 0;
	bytesleft = freeblocks = 0;
	buffer = NULL;
	bit = VCBTOHFS(vcb)->hfs_metazone_start;
	if (bit == 1)
		bit = 0;
	
	lastbit = VCBTOHFS(vcb)->hfs_metazone_end;
	bytesperblock = vcb->vcbVBMIOSize;

	/*
	 *  Count all the bits from bit to lastbit.
	 */
	while (bit < lastbit) {
		/*
		 *  Get next bitmap block.
		 */
		if (bytesleft == 0) {
			if (blockRef) {
				(void) ReleaseBitmapBlock(vcb, blockRef, false);
				blockRef = 0;
			}
			if (ReadBitmapBlock(vcb, bit, &currCache, &blockRef) != 0) {
				return (0);
			}
			buffer = (u_int8_t *)currCache;
			bytesleft = bytesperblock;
		}
		byte = *buffer++;
		freeblocks += freebitcount[byte & 0x0F];
		freeblocks += freebitcount[(byte >> 4) & 0x0F];
		bit += kBitsPerByte;
		--bytesleft;
	}
	if (blockRef)
		(void) ReleaseBitmapBlock(vcb, blockRef, false);

	return (freeblocks);
}


/*
 * Obtain the next allocation block (bit) that's
 * outside the metadata allocation zone.
 */
static u_int32_t NextBitmapBlock(
	ExtendedVCB		*vcb,
	u_int32_t		bit)
{
	struct  hfsmount *hfsmp = VCBTOHFS(vcb);

	if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0)
		return (bit);
	/*
	 * Skip over metadata allocation zone.
	 */
	if ((bit >= hfsmp->hfs_metazone_start) &&
	    (bit <= hfsmp->hfs_metazone_end)) {
		bit = hfsmp->hfs_metazone_end + 1;
	}
	return (bit);
}


/*
;_______________________________________________________________________
;
; Routine:	ReadBitmapBlock
;
; Function:	Read in a bitmap block corresponding to a given allocation
;			block (bit).  Return a pointer to the bitmap block.
;
; Inputs:
;	vcb			--	Pointer to ExtendedVCB
;	bit			--	Allocation block whose bitmap block is desired
;
; Outputs:
;	buffer		--	Pointer to bitmap block corresonding to "block"
;	blockRef
;_______________________________________________________________________
*/
static OSErr ReadBitmapBlock(
	ExtendedVCB		*vcb,
	u_int32_t		bit,
	u_int32_t		**buffer,
	uintptr_t		*blockRef)
{
	OSErr			err;
	struct buf *bp = NULL;
	struct vnode *vp = NULL;
	daddr64_t block;
	u_int32_t blockSize;

	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_START, bit, 0, 0, 0, 0);

	/*
	 * volume bitmap blocks are protected by the allocation file lock
	 */
	REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);	

	blockSize = (u_int32_t)vcb->vcbVBMIOSize;
	block = (daddr64_t)(bit / (blockSize * kBitsPerByte));

	if (vcb->vcbSigWord == kHFSPlusSigWord) {
		vp = vcb->hfs_allocation_vp;	/* use allocation file vnode */

	} else /* hfs */ {
		vp = VCBTOHFS(vcb)->hfs_devvp;	/* use device I/O vnode */
		block += vcb->vcbVBMSt;			/* map to physical block */
	}

	err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp);

	if (bp) {
		if (err) {
			buf_brelse(bp);
			*blockRef = 0;
			*buffer = NULL;
		} else {
			*blockRef = (uintptr_t)bp;
			*buffer = (u_int32_t *)buf_dataptr(bp);
		}
	}

	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_BLOCK | DBG_FUNC_END, err, 0, 0, 0, 0);

	return err;
}


/*
;_______________________________________________________________________
;
; Routine:	ReleaseBitmapBlock
;
; Function:	Relase a bitmap block. 
;
; Inputs:
;	vcb
;	blockRef
;	dirty
;_______________________________________________________________________
*/
static OSErr ReleaseBitmapBlock(
	ExtendedVCB		*vcb,
	uintptr_t		blockRef,
	Boolean			dirty)
{
	struct buf *bp = (struct buf *)blockRef;
	
	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0);

	if (blockRef == 0) {
		if (dirty)
			panic("hfs: ReleaseBitmapBlock: missing bp");
		return (0);
	}

	if (bp) {
		if (dirty) {
			// XXXdbg
			struct hfsmount *hfsmp = VCBTOHFS(vcb);
			
			if (hfsmp->jnl) {
				journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
			} else {
				buf_bdwrite(bp);
			}
		} else {
			buf_brelse(bp);
		}
	}

	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0);

	return (0);
}

#if CONFIG_HFS_ALLOC_RBTREE
/*
 * ReleaseRBScanBitmapBlock is used to release struct bufs that were 
 * created for use by the Red-Black tree generation code.  We want to force 
 * them to be purged out of the buffer cache ASAP, so we'll release them differently
 * than in the ReleaseBitmapBlock case.  Alternately, we know that we're only reading 
 * the blocks, so we will never dirty them as part of the tree building scan.
 */

static OSErr ReleaseRBScanBitmapBlock(struct buf *bp ) {
	
	if (bp == NULL) {
		return (0);
	}
	
	if (bp) {
		/* Mark the buffer invalid if it isn't locked, then release it */
		if ((buf_flags(bp) & B_LOCKED) == 0) {
			buf_markinvalid(bp);
		}
		buf_brelse(bp);
	}
	
	return (0);
	
	
}

#endif


/*
_______________________________________________________________________

Routine:	BlockAllocateContig

Function:	Allocate a contiguous group of allocation blocks.  The
			allocation is all-or-nothing.  The caller guarantees that
			there are enough free blocks (though they may not be
			contiguous, in which case this call will fail).

Inputs:
	vcb				Pointer to volume where space is to be allocated
	startingBlock	Preferred first block for allocation
	minBlocks		Minimum number of contiguous blocks to allocate
	maxBlocks		Maximum number of contiguous blocks to allocate
	useMetaZone

Outputs:
	actualStartBlock	First block of range allocated, or 0 if error
	actualNumBlocks		Number of blocks allocated, or 0 if error
_______________________________________________________________________
*/
static OSErr BlockAllocateContig(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		minBlocks,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks)
{

#if CONFIG_HFS_ALLOC_RBTREE
	if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
		return BlockAllocateContigRBTree(vcb, startingBlock, minBlocks, maxBlocks, useMetaZone, 
				actualStartBlock, actualNumBlocks, 1);
	}
#endif
	return BlockAllocateContigBitmap(vcb, startingBlock, minBlocks, 
			maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);	
}

/*
 * Variant of BlockAllocateContig that uses the original bitmap-searching logic
 */

static OSErr BlockAllocateContigBitmap(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		minBlocks,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks)
{
	OSErr	err;

	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0);

	//
	//	Find a contiguous group of blocks at least minBlocks long.
	//	Determine the number of contiguous blocks available (up
	//	to maxBlocks).
	//

	/*
	 * NOTE: If the only contiguous free extent of at least minBlocks
	 * crosses startingBlock (i.e. starts before, ends after), then we
	 * won't find it. Earlier versions *did* find this case by letting
	 * the second search look past startingBlock by minBlocks.  But
	 * with the free extent cache, this can lead to duplicate entries
	 * in the cache, causing the same blocks to be allocated twice.
	 */
	err = BlockFindContiguous(vcb, startingBlock, vcb->allocLimit, minBlocks,
	                          maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
	if (err == dskFulErr && startingBlock != 0) {
		/*
		 * Constrain the endingBlock so we don't bother looking for ranges
		 * that would overlap those found in the previous call.
		 */
		err = BlockFindContiguous(vcb, 1, startingBlock, minBlocks, maxBlocks,
		                          useMetaZone, actualStartBlock, actualNumBlocks);
	}
	//
	//	Now mark those blocks allocated.
	//
	if (err == noErr)
		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
	
	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);

	return err;
}

#if CONFIG_HFS_ALLOC_RBTREE
/*
 * Variant of BlockAllocateContig that uses the newer red-black tree library
 * in order to manage free space extents.  This will search the red-black tree
 * and return results in the same fashion as BlockAllocateContigBitmap.
 * 
 * Note that this function is invoked from both the red-black tree variant of BlockAllocateany
 * as well as BlockAllocateContig.  In order to determine when we should vend contiguous chunks over
 * locality-based-searches, we use the forceContig argument to determine who called us.
 */

static OSErr BlockAllocateContigRBTree(
						  ExtendedVCB		*vcb,
						  u_int32_t		startingBlock,
						  u_int32_t		minBlocks,
						  u_int32_t		maxBlocks,
						  Boolean			useMetaZone,
						  u_int32_t		*actualStartBlock,
						  u_int32_t		*actualNumBlocks,
						  u_int32_t 	forceContig)
{
	OSErr	err;
	struct hfsmount *hfsmp = VCBTOHFS(vcb);
	extent_node_t search_sentinel;
	extent_node_t *node = NULL;
	extent_node_t tempnode;
	
	bzero (&tempnode, sizeof(extent_node_t));
	
	/* Begin search at the end of the file, via startingBlock */
	memset (&search_sentinel, 0, sizeof(extent_node_t));
	search_sentinel.offset = startingBlock;
	
	*actualStartBlock = 0;
	*actualNumBlocks = 0;
	
	/* 
	 * Find the first available extent that satifies the allocation by searching
	 * from the starting point and moving forward
	 */
	node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);
	
	if (node) {
		*actualStartBlock = node->offset;
		*actualNumBlocks = node->length;
	}
	
	 /* If we managed to grab at least minBlocks of space, then we're done. */

	if (*actualNumBlocks >= minBlocks) {
		if (*actualNumBlocks > maxBlocks) {
			*actualNumBlocks = maxBlocks;
		}
		
		
		/* Check to see if blocks are already marked as in-use */
		if (ALLOC_DEBUG) {
			REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
			if (hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
				printf("bad node: %p, offset %d, length %d\n", node, node->offset,node->length);
				panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
					   *actualStartBlock, *actualNumBlocks);
			}
		}
		
		/*
		 * BlockMarkAllocatedRBTree is responsible for removing the nodes
		 * from the red-black tree after the bitmap has been updated on-disk.
		 */
		err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
		if (err == noErr) {
			
			if ( ALLOC_DEBUG ) {
				REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
				if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
						   *actualStartBlock, *actualNumBlocks);
				}
				check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);		
			}		
			
			return err;
		}
	}
	
	/*
	 * We may have failed to grow at the end of the file.  We'll try to find 
	 * appropriate free extents, searching by size in the normal allocation zone.
	 * 
	 * However, if we're allocating on behalf of a sparse device that hasn't explicitly
	 * requested a contiguous chunk, then we try to search by offset, even if it 
	 * means fragmenting the file.  We want all available entries starting 
	 * from the front of the disk to avoid creating new bandfiles.  As a result, 
	 * we'll start by searching the offset tree rather than the normal length 
	 * tree. Note that this function can be invoked from BlockAllocateAny, in 
	 * which the minimum block size is 1 block, making it easy to succeed. 
	 */
	search_sentinel.offset = hfsmp->hfs_metazone_end;
	search_sentinel.length = minBlocks;
	
	if ((vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) && (forceContig == 0)) {
		/* just start with the first offset node */
		node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel);		
	}
	else {
		/* 
		 * Otherwise, start from the end of the metadata zone or our next allocation pointer, 
		 * and try to find the first chunk of size >= min.
		 */
		node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
		
		if (node == NULL) {
			extent_node_t *metaend_node;
			/* 
			 * Maybe there's a free extent coalesced with the space still in the metadata 
			 * zone.  If there is, find it and allocate from the middle of it, starting at
			 * the end of the metadata zone.
			 *
			 * If search_prev yields a result that is not offset == metazone_end, then that
			 * means no node existed at that offset.  If the previous node's offset + length crosses
			 * the metazone boundary, then allocate from there.  If it is too small to 
			 * cross the metazone boundary, then it is of no importance and we'd have to 
			 * report ENOSPC.
			 */
			metaend_node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
			
			if ((metaend_node) && (metaend_node->offset < hfsmp->hfs_metazone_end)) {
				u_int32_t node_end = metaend_node->offset + metaend_node->length;
				if (node_end > hfsmp->hfs_metazone_end) {
					u_int32_t modified_length = node_end - hfsmp->hfs_metazone_end;
					if (modified_length >= minBlocks) {
						/* 
						 * Then we can allocate it.  Fill in the contents into tempnode,
						 * and BlockMarkAllocatedRBTree below will take care of the rest.
						 */
						tempnode.offset = hfsmp->hfs_metazone_end;
						tempnode.length = MIN(minBlocks, node_end - tempnode.offset);
						node = &tempnode;
					}
				}
			}
		}
	}
	
	 /* If we can't find anything useful, search the metadata zone as a last resort. */
	
	if ((!node) && useMetaZone) {
		search_sentinel.offset = 0;
		search_sentinel.length = minBlocks;
		node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel);
	}
	
	/* If we found something useful, then go ahead and update the bitmap */
	if ((node) && (node->length >= minBlocks)) {
		*actualStartBlock = node->offset;
		if (node->length >= maxBlocks) {
			*actualNumBlocks = maxBlocks;
		}
		else {
			*actualNumBlocks = node->length;
		}

		err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks);
		
		if (err == noErr) {
			if ( ALLOC_DEBUG ) {
				REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
				if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) {
					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
						   *actualStartBlock, *actualNumBlocks);
				}
				check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC);		
			}
		}
	}
	else {
		int destroy_trees = 0;
		/*
		 * TODO: Add High-water mark check here.  If we couldn't find anything useful, 
		 * when do we tear down the tree?  Or should the logic be in BlockAllocateContig??
		 */
		if (destroy_trees) {
			DestroyTrees(VCBTOHFS(vcb));
			/* Reset the Free Ext Cache since we'll be using it now. */
			ResetVCBFreeExtCache(VCBTOHFS(vcb));
		}
		
		if (ALLOC_DEBUG) {
			printf("HFS allocator: No space on FS (%s). Node  %p Start %d Min %d, Max %d, Tree still alive.\n", 
				   hfsmp->vcbVN, node, startingBlock, minBlocks, maxBlocks);
			
			/* Dump the list ? */
			extent_tree_offset_print(&hfsmp->offset_tree);
			
			printf("HFS allocator: Done printing list on FS (%s). Min %d, Max %d, Tree still alive.\n", 
				   hfsmp->vcbVN, minBlocks, maxBlocks);


			
		}
		err = dskFulErr;
	}
	
	if (err == noErr) {
		if (ALLOC_DEBUG) {
			if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit)
				panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
		}
	}
	else {
		*actualStartBlock = 0;
		*actualNumBlocks = 0;
	}
	
	return err;
	
}
#endif



/*
_______________________________________________________________________

Routine:	BlockAllocateAny

Function:	Allocate one or more allocation blocks.  If there are fewer
			free blocks than requested, all free blocks will be
			allocated.  The caller guarantees that there is at least
			one free block.

Inputs:
	vcb				Pointer to volume where space is to be allocated
	startingBlock	Preferred first block for allocation
	endingBlock		Last block to check + 1
	maxBlocks		Maximum number of contiguous blocks to allocate
	useMetaZone

Outputs:
	actualStartBlock	First block of range allocated, or 0 if error
	actualNumBlocks		Number of blocks allocated, or 0 if error
_______________________________________________________________________
*/

/*
 * BlockAllocateAny acts as a multiplexer between BlockAllocateAnyRBTree
 * and BlockAllocateAnyBitmap, which uses the bitmap scanning logic.  
 */

static OSErr BlockAllocateAny(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	register u_int32_t	endingBlock,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks)
{
	
#if CONFIG_HFS_ALLOC_RBTREE
	if (hfs_isrbtree_active(VCBTOHFS(vcb))) {
		return BlockAllocateAnyRBTree(vcb, startingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);
	}
#endif
	return BlockAllocateAnyBitmap(vcb, startingBlock, endingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks);

}


#if CONFIG_HFS_ALLOC_RBTREE
/*
 * BlockAllocateAnyRBTree finds one or more allocation blocks by using
 * the red-black allocation tree to figure out where the free ranges are.  
 * This function is typically used as a last resort becuase we were unable to 
 * find the right ranges.  Outputs are the same as BlockAllocateAnyBitmap.
 */
static OSErr BlockAllocateAnyRBTree(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks)
{	
	OSErr err;
	
	/* 
	 * BlockAllocateContig 
	 */
	/* If we're using the red-black tree, try searching at the specified offsets. */
	err = BlockAllocateContigRBTree(vcb, startingBlock, 1, maxBlocks, useMetaZone, 
									actualStartBlock, actualNumBlocks, 0);
	return err;
	
}
#endif

/*
 * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out
 * where the free allocation blocks are.  Inputs and outputs are the same as for
 * BlockAllocateAny and BlockAllocateAnyRBTree
 */

static OSErr BlockAllocateAnyBitmap(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	register u_int32_t	endingBlock,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks)
{
	OSErr			err;
	register u_int32_t	block;			//	current block number
	register u_int32_t	currentWord;	//	Pointer to current word within bitmap block
	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
	u_int32_t  *buffer = NULL;
	u_int32_t  *currCache = NULL;
	uintptr_t  blockRef;
	u_int32_t  bitsPerBlock;
	u_int32_t  wordsPerBlock;
	Boolean dirty = false;
	struct hfsmount *hfsmp = VCBTOHFS(vcb);

	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0);

	/*
	 * When we're skipping the metadata zone and the start/end
	 * range overlaps with the metadata zone then adjust the 
	 * start to be outside of the metadata zone.  If the range
	 * is entirely inside the metadata zone then we can deny the
	 * request (dskFulErr).
	 */
	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
		if (startingBlock <= vcb->hfs_metazone_end) {
			if (endingBlock > (vcb->hfs_metazone_end + 2))
				startingBlock = vcb->hfs_metazone_end + 1;
			else {
				err = dskFulErr;
				goto Exit;
			}
		}
	}

	//	Since this routine doesn't wrap around
	if (maxBlocks > (endingBlock - startingBlock)) {
		maxBlocks = endingBlock - startingBlock;
	}

	//
	//	Pre-read the first bitmap block
	//
	err = ReadBitmapBlock(vcb, startingBlock, &currCache, &blockRef);
	if (err != noErr) goto Exit;
	buffer = currCache;

	//
	//	Set up the current position within the block
	//
	{
		u_int32_t wordIndexInBlock;
		
		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;

		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
		buffer += wordIndexInBlock;
		wordsLeft = wordsPerBlock - wordIndexInBlock;
		currentWord = SWAP_BE32 (*buffer);
		bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask);
	}
	
	//
	//	Find the first unallocated block
	//
	block=startingBlock;
	while (block < endingBlock) {
		if ((currentWord & bitMask) == 0)
			break;

		//	Next bit
		++block;
		bitMask >>= 1;
		if (bitMask == 0) {
			//	Next word
			bitMask = kHighBitInWordMask;
			++buffer;

			if (--wordsLeft == 0) {
				//	Next block
				buffer = currCache = NULL;
				err = ReleaseBitmapBlock(vcb, blockRef, false);
				if (err != noErr) goto Exit;

				/*
				 * Skip over metadata blocks.
				 */
				if (!useMetaZone) {
					block = NextBitmapBlock(vcb, block);
				}
				if (block >= endingBlock) {
					err = dskFulErr;
					goto Exit;
				}

				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
				if (err != noErr) goto Exit;
				buffer = currCache;

				wordsLeft = wordsPerBlock;
			}
			currentWord = SWAP_BE32 (*buffer);
		}
	}

	//	Did we get to the end of the bitmap before finding a free block?
	//	If so, then couldn't allocate anything.
	if (block >= endingBlock) {
		err = dskFulErr;
		goto Exit;
	}

	//	Return the first block in the allocated range
	*actualStartBlock = block;
	dirty = true;
	
	//	If we could get the desired number of blocks before hitting endingBlock,
	//	then adjust endingBlock so we won't keep looking.  Ideally, the comparison
	//	would be (block + maxBlocks) < endingBlock, but that could overflow.  The
	//	comparison below yields identical results, but without overflow.
	if (block < (endingBlock-maxBlocks)) {
		endingBlock = block + maxBlocks;	//	if we get this far, we've found enough
	}
	
	// XXXdbg
	if (hfsmp->jnl) {
		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
	}

	//
	//	Allocate all of the consecutive blocks
	//
	while ((currentWord & bitMask) == 0) {
		//	Allocate this block
		currentWord |= bitMask;
		
		//	Move to the next block.  If no more, then exit.
		++block;
		if (block == endingBlock)
			break;

		//	Next bit
		bitMask >>= 1;
		if (bitMask == 0) {
			*buffer = SWAP_BE32 (currentWord);					//	update value in bitmap
			
			//	Next word
			bitMask = kHighBitInWordMask;
			++buffer;
			
			if (--wordsLeft == 0) {
				//	Next block
				buffer = currCache = NULL;
				err = ReleaseBitmapBlock(vcb, blockRef, true);
				if (err != noErr) goto Exit;

				/*
				 * Skip over metadata blocks.
				 */
				if (!useMetaZone) {
					u_int32_t nextBlock;

					nextBlock = NextBitmapBlock(vcb, block);
					if (nextBlock != block) {
						goto Exit;  /* allocation gap, so stop */
					}
				}

				err = ReadBitmapBlock(vcb, block, &currCache, &blockRef);
				if (err != noErr) goto Exit;
				buffer = currCache;

				// XXXdbg
				if (hfsmp->jnl) {
					journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
				}
				
				wordsLeft = wordsPerBlock;
			}
			
			currentWord = SWAP_BE32 (*buffer);
		}
	}
	*buffer = SWAP_BE32 (currentWord);							//	update the last change

Exit:
	if (err == noErr) {
		*actualNumBlocks = block - *actualStartBlock;

		// sanity check
		if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) {
			panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN);
		}
		
		/*
		 * Beware!
		 * Because this function directly manipulates the bitmap to mark the
		 * blocks it came across as allocated, we must inform the journal (and
		 * subsequently, the journal's trim list) that we are allocating these 
		 * blocks, just like in BlockMarkAllocatedInternal.  hfs_unmap_alloc_extent
		 * and the functions it calls will serialize behind the journal trim list lock
		 * to ensure that either the asynchronous flush/TRIM/UNMAP happens prior to
		 * us manipulating the trim list, or we get there first and successfully remove
		 * these bitmap blocks before the TRIM happens.
		 */
		hfs_unmap_alloc_extent (vcb, *actualStartBlock, *actualNumBlocks);
	}
	else {
		*actualStartBlock = 0;
		*actualNumBlocks = 0;
	}
	
    if (currCache)
    	(void) ReleaseBitmapBlock(vcb, blockRef, dirty);

	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);

	return err;
}


/*
_______________________________________________________________________

Routine:	BlockAllocateKnown

Function:	Try to allocate space from known free space in the free
			extent cache.

Inputs:
	vcb				Pointer to volume where space is to be allocated
	maxBlocks		Maximum number of contiguous blocks to allocate

Outputs:
	actualStartBlock	First block of range allocated, or 0 if error
	actualNumBlocks		Number of blocks allocated, or 0 if error

Returns:
	dskFulErr		Free extent cache is empty
_______________________________________________________________________
*/

static OSErr BlockAllocateKnown(
	ExtendedVCB		*vcb,
	u_int32_t		maxBlocks,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks)
{
	OSErr			err;	
	u_int32_t		i;
	u_int32_t		foundBlocks;
	u_int32_t		newStartBlock, newBlockCount;

	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0);

	HFS_MOUNT_LOCK(vcb, TRUE);
	lck_spin_lock(&vcb->vcbFreeExtLock);
	if ((hfs_isrbtree_active(vcb) == true) || 
		vcb->vcbFreeExtCnt == 0 || 
	    vcb->vcbFreeExt[0].blockCount == 0) {
		lck_spin_unlock(&vcb->vcbFreeExtLock);
		HFS_MOUNT_UNLOCK(vcb, TRUE);
		if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
			KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0);
		return dskFulErr;
	}
	lck_spin_unlock(&vcb->vcbFreeExtLock);
	HFS_MOUNT_UNLOCK(vcb, TRUE);

	lck_spin_lock(&vcb->vcbFreeExtLock);

	//	Just grab up to maxBlocks of the first (largest) free exent.
	*actualStartBlock = vcb->vcbFreeExt[0].startBlock;
	foundBlocks = vcb->vcbFreeExt[0].blockCount;
	if (foundBlocks > maxBlocks)
		foundBlocks = maxBlocks;
	*actualNumBlocks = foundBlocks;
	
	if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
		// since sparse volumes keep the free extent list sorted by starting
		// block number, the list won't get re-ordered, it may only shrink
		//
		vcb->vcbFreeExt[0].startBlock += foundBlocks;
		vcb->vcbFreeExt[0].blockCount -= foundBlocks;
		if (vcb->vcbFreeExt[0].blockCount == 0) {
			for(i=1; i < vcb->vcbFreeExtCnt; i++) {
				vcb->vcbFreeExt[i-1] = vcb->vcbFreeExt[i];
			}
			vcb->vcbFreeExtCnt--;
		}

		goto done;
	}

	//	Adjust the start and length of that extent.
	newStartBlock = vcb->vcbFreeExt[0].startBlock + foundBlocks;
	newBlockCount = vcb->vcbFreeExt[0].blockCount - foundBlocks;
		
	
	//	The first extent might not be the largest anymore.  Bubble up any
	//	(now larger) extents to the top of the list.
	for (i=1; i<vcb->vcbFreeExtCnt; ++i)
	{
		if (vcb->vcbFreeExt[i].blockCount > newBlockCount)
		{
			vcb->vcbFreeExt[i-1].startBlock = vcb->vcbFreeExt[i].startBlock;
			vcb->vcbFreeExt[i-1].blockCount = vcb->vcbFreeExt[i].blockCount;
		}
		else
		{
			break;
		}
	}
	
	//	If this is now the smallest known free extent, then it might be smaller than
	//	other extents we didn't keep track of.  So, just forget about this extent.
	//	After the previous loop, (i-1) is the index of the extent we just allocated from.
	if (newBlockCount == 0)
	{
		// then just reduce the number of free extents since this guy got deleted
		--vcb->vcbFreeExtCnt;
	}
	else
	{
		//	It's not the smallest, so store it in its proper place
		vcb->vcbFreeExt[i-1].startBlock = newStartBlock;
		vcb->vcbFreeExt[i-1].blockCount = newBlockCount;
	}

done:
	lck_spin_unlock(&vcb->vcbFreeExtLock);
	// sanity check
	if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) 
	{
		printf ("hfs: BlockAllocateKnown() found allocation overflow on \"%s\"", vcb->vcbVN);
		hfs_mark_volume_inconsistent(vcb);
		*actualStartBlock = 0;
		*actualNumBlocks = 0;
		err = EIO;
	} 
	else 
	{
		//
		//	Now mark the found extent in the bitmap
		//
		err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks);
	}

	sanity_check_free_ext(vcb, 0);

	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);

	return err;
}

/*
 * BlockMarkAllocated
 * 
 * This is a wrapper function around the internal calls which will actually mark the blocks
 * as in-use.  It will mark the blocks in the red-black tree if appropriate.  We need to do 
 * this logic here to avoid callers having to deal with whether or not the red-black tree
 * is enabled.
 */


OSErr BlockMarkAllocated(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	register u_int32_t	numBlocks)
{
	struct hfsmount *hfsmp;
	
	hfsmp = VCBTOHFS(vcb);
#if CONFIG_HFS_ALLOC_RBTREE
	if (hfs_isrbtree_active(hfsmp)) {
		int err;
		
		if ((startingBlock >= hfsmp->offset_block_end) && 
			(hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
			/* 
			 * We're manipulating a portion of the bitmap that is not controlled by the
			 * red-black tree.  Just update the bitmap and don't bother manipulating the tree
			 */
			goto justbitmap;
		}
		
		err = BlockMarkAllocatedRBTree(vcb, startingBlock, numBlocks);
		if (err == noErr) {
			if ( ALLOC_DEBUG ) {
				REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
				if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n",
						   startingBlock, numBlocks);
				}
				check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_ALLOC);		
			}
		}
		return err;

	}
justbitmap:
#endif

	return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks);
	
}



/*
_______________________________________________________________________

Routine:	BlockMarkAllocatedInternal

Function:	Mark a contiguous group of blocks as allocated (set in the
			bitmap).  It assumes those bits are currently marked
			deallocated (clear in the bitmap).  Note that this function
			must be called regardless of whether or not the bitmap or
			tree-based allocator is used, as all allocations must correctly
			be marked on-disk.  If the tree-based approach is running, then
			this will be done before the node is removed from the tree.

Inputs:
	vcb				Pointer to volume where space is to be allocated
	startingBlock	First block number to mark as allocated
	numBlocks		Number of blocks to mark as allocated
_______________________________________________________________________
*/
static 
OSErr BlockMarkAllocatedInternal (
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	register u_int32_t	numBlocks)
{
	OSErr			err;
	register u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
	u_int32_t		firstBit;		//	Bit index within word of first bit to allocate
	u_int32_t		numBits;		//	Number of bits in word to allocate
	u_int32_t		*buffer = NULL;
	uintptr_t  blockRef;
	u_int32_t  bitsPerBlock;
	u_int32_t  wordsPerBlock;
	// XXXdbg
	struct hfsmount *hfsmp = VCBTOHFS(vcb);

	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0);

	hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks);
	
	//
	//	Pre-read the bitmap block containing the first word of allocation
	//

	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
	if (err != noErr) goto Exit;
	//
	//	Initialize currentWord, and wordsLeft.
	//
	{
		u_int32_t wordIndexInBlock;
		
		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;

		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
		currentWord = buffer + wordIndexInBlock;
		wordsLeft = wordsPerBlock - wordIndexInBlock;
	}
	
	// XXXdbg
	if (hfsmp->jnl) {
		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
	}

	//
	//	If the first block to allocate doesn't start on a word
	//	boundary in the bitmap, then treat that first word
	//	specially.
	//

	firstBit = startingBlock % kBitsPerWord;
	if (firstBit != 0) {
		bitMask = kAllBitsSetInWord >> firstBit;	//	turn off all bits before firstBit
		numBits = kBitsPerWord - firstBit;			//	number of remaining bits in this word
		if (numBits > numBlocks) {
			numBits = numBlocks;					//	entire allocation is inside this one word
			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));	//	turn off bits after last
		}
#if DEBUG_BUILD
		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
		}
#endif
		*currentWord |= SWAP_BE32 (bitMask);		//	set the bits in the bitmap
		numBlocks -= numBits;						//	adjust number of blocks left to allocate

		++currentWord;								//	move to next word
		--wordsLeft;								//	one less word left in this block
	}

	//
	//	Allocate whole words (32 blocks) at a time.
	//

	bitMask = kAllBitsSetInWord;					//	put this in a register for 68K
	while (numBlocks >= kBitsPerWord) {
		if (wordsLeft == 0) {
			//	Read in the next bitmap block
			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
			
			buffer = NULL;
			err = ReleaseBitmapBlock(vcb, blockRef, true);
			if (err != noErr) goto Exit;

			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
			if (err != noErr) goto Exit;

			// XXXdbg
			if (hfsmp->jnl) {
				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
			}

			//	Readjust currentWord and wordsLeft
			currentWord = buffer;
			wordsLeft = wordsPerBlock;
		}
#if DEBUG_BUILD
		if (*currentWord != 0) {
			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
		}
#endif
		*currentWord = SWAP_BE32 (bitMask);
		numBlocks -= kBitsPerWord;

		++currentWord;								//	move to next word
		--wordsLeft;								//	one less word left in this block
	}
	
	//
	//	Allocate any remaining blocks.
	//
	
	if (numBlocks != 0) {
		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
		if (wordsLeft == 0) {
			//	Read in the next bitmap block
			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
			
			buffer = NULL;
			err = ReleaseBitmapBlock(vcb, blockRef, true);
			if (err != noErr) goto Exit;

			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
			if (err != noErr) goto Exit;

			// XXXdbg
			if (hfsmp->jnl) {
				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
			}
			
			//	Readjust currentWord and wordsLeft
			currentWord = buffer;
			wordsLeft = wordsPerBlock;
		}
#if DEBUG_BUILD
		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
			panic("hfs: BlockMarkAllocatedInternal: blocks already allocated!");
		}
#endif
		*currentWord |= SWAP_BE32 (bitMask);			//	set the bits in the bitmap

		//	No need to update currentWord or wordsLeft
	}

Exit:

	if (buffer)
		(void)ReleaseBitmapBlock(vcb, blockRef, true);

	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);

	return err;
}

#if CONFIG_HFS_ALLOC_RBTREE
/*
 * This is a wrapper function around BlockMarkAllocated.  This function is
 * called when the RB Tree-based allocator needs to mark a block as in-use.
 * This function should take the locks that would not normally be 
 * necessary for the normal bitmap allocator, and then call the function.  Once 
 * the on-disk data structures are updated properly, then this will remove the 
 * appropriate node from the tree.
 */

static OSErr BlockMarkAllocatedRBTree(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		numBlocks)
{
	OSErr err;
	struct hfsmount *hfsmp  = VCBTOHFS(vcb);
	int rb_err = 0;

	
	if (ALLOC_DEBUG) {
		REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
		if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
			panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n",
				   startingBlock, numBlocks);
		}
		check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);		
	}
	
	err = BlockMarkAllocatedInternal (vcb, startingBlock, numBlocks);
	
	if (err == noErr) {

		if (ALLOC_DEBUG) {
			if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
				panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet!\n",
					   startingBlock, numBlocks);
			}
		}
		
		/*
		 * Mark the blocks in the offset tree.
		 */
		rb_err = extent_tree_offset_alloc_space(&hfsmp->offset_tree, numBlocks, startingBlock);
		if (rb_err) {
			if (ALLOC_DEBUG) {
				printf("HFS RBTree Allocator: Could not mark blocks as in-use! %d \n", rb_err);
			}
			
			/* 
			 * We may be called from the BlockMarkAllocated interface, in which case, they would
			 * not be picking extents from their start. Do a check here, find if the specified
			 * extent is free, and if it is, then find the containing node.
			 */
			extent_node_t *node = NULL;
			extent_node_t search_sentinel;
			search_sentinel.offset = startingBlock;
			
			node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
			
			if (node) {
				rb_err = extent_tree_offset_alloc_unaligned (&hfsmp->offset_tree, numBlocks, startingBlock);
			}
			
			if (ALLOC_DEBUG) {
				if (rb_err) {
					printf ("HFS RBTree Allocator: Still Couldn't mark blocks as in-use! %d\n", rb_err);
				}
			}
		}
		if (ALLOC_DEBUG) {
			check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);		
		}
	}
	
	/* 
	 * If we encountered a red-black tree error, for now, we immediately back off and force
	 * destruction of rb-tree.  Set the persistent error-detected bit in the mount point.
	 * That will ensure that even if we reach a low-water-mark in the future we will still
	 * not allow the rb-tree to be used.  On next mount, we will force a re-construction from
	 * on-disk state.  As a fallback, we will now resort to the bitmap-scanning behavior.
	 */
	if (rb_err) {
		/* Mark RB-Trees with error */
		hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
		DestroyTrees(hfsmp);
		/* Reset the Free Ext Cache since we'll be using it now. */
		ResetVCBFreeExtCache(hfsmp);
		printf("HFS: Red-Black Allocator Tree BlockMarkAllocated error\n");
	}
	
	return err;
}
#endif



/*
 * BlockMarkFree
 * 
 * This is a wrapper function around the internal calls which will actually mark the blocks
 * as freed.  It will mark the blocks in the red-black tree if appropriate.  We need to do 
 * this logic here to avoid callers having to deal with whether or not the red-black tree
 * is enabled.
 *
 */
OSErr BlockMarkFree(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	register u_int32_t	numBlocks)
{
	struct hfsmount *hfsmp;
	hfsmp = VCBTOHFS(vcb);
#if CONFIG_HFS_ALLOC_RBTREE		
	if (hfs_isrbtree_active(hfsmp)) {		
		int err;
		
		if ((startingBlock >= hfsmp->offset_block_end) && 
			(hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
			/* 
			 * We're manipulating a portion of the bitmap that is not controlled by the
			 * red-black tree.  Just update the bitmap and don't bother manipulating the tree
			 */
			goto justbitmap;
		}
		
		err = BlockMarkFreeRBTree(vcb, startingBlock, numBlocks);
		if (err == noErr) {
			if ( ALLOC_DEBUG ) {
				REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
				if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
					panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use!\n",
						   startingBlock, numBlocks);
				}
				check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_FREE);		
			}
		}
		return err;
	}
justbitmap:
#endif
	return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
	
}


/*
 * BlockMarkFreeUnused
 * 
 * Scan the bitmap block beyond end of current file system for bits 
 * that are marked as used.  If any of the bits are marked as used,
 * this function marks them free.
 *
 * Note:  This was specifically written to mark all bits beyond 
 * end of current file system during hfs_extendfs(), which makes
 * sure that all the new blocks added to the file system are 
 * marked as free.   We expect that all the blocks beyond end of
 * current file system are always marked as free, but there might 
 * be cases where are marked as used.  This function assumes that 
 * the number of blocks marked as used incorrectly are relatively
 * small, otherwise this can overflow journal transaction size
 * on certain file system configurations (example, large unused 
 * bitmap with relatively small journal). 
 *
 * Input:
 * 	startingBlock: First block of the range to mark unused
 * 	numBlocks: Number of blocks in the range to mark unused
 *
 * Returns: zero on success, non-zero on error.
 */
OSErr BlockMarkFreeUnused(ExtendedVCB *vcb, u_int32_t startingBlock, register u_int32_t	numBlocks)
{
	int error = 0;
	struct hfsmount *hfsmp = VCBTOHFS(vcb);
	u_int32_t curNumBlocks;
	u_int32_t  bitsPerBlock;
	u_int32_t lastBit;

	/* Use the optimal bitmap I/O size instead of bitmap block size */
	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;

	/* 
	 * First clear any non bitmap allocation block aligned bits
	 *
	 * Calculate the first bit in the bitmap block next to 
	 * the bitmap block containing the bit for startingBlock.
	 * Using this value, we calculate the total number of 
	 * bits to be marked unused from startingBlock to the 
	 * end of bitmap block containing startingBlock. 
	 */
	lastBit = ((startingBlock + (bitsPerBlock - 1))/bitsPerBlock) * bitsPerBlock;
	curNumBlocks = lastBit - startingBlock;
	if (curNumBlocks > numBlocks) {
		curNumBlocks = numBlocks;
	}
	error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
	if (error) {
		return error;
	}
	startingBlock += curNumBlocks;
	numBlocks -= curNumBlocks;

	/* 
	 * Check a full bitmap block for any 'used' bit.  If any bit is used,
	 * mark all the bits only in that bitmap block as free.  This ensures
	 * that we do not write unmodified bitmap blocks and do not 
	 * overwhelm the journal. 
	 *
	 * The code starts by checking full bitmap block at a time, and 
	 * marks entire bitmap block as free only if any bit in that bitmap 
	 * block is marked as used.  In the end, it handles the last bitmap 
	 * block which might be partially full by only checking till the 
	 * caller-specified last bit and if any bit is set, only mark that 
	 * range as free.
	 */
	while (numBlocks) {
		if (numBlocks >= bitsPerBlock) {
			curNumBlocks = bitsPerBlock;
		} else {
			curNumBlocks = numBlocks;
		}
		if (hfs_isallocated(hfsmp, startingBlock, curNumBlocks) == true) {
			error = BlockMarkFreeInternal(vcb, startingBlock, curNumBlocks, false);
			if (error) {
				return error;
			}
		}
		startingBlock += curNumBlocks;
		numBlocks -= curNumBlocks;
	}

	return error;
}

/*
_______________________________________________________________________

Routine:	BlockMarkFreeInternal

Function:	Mark a contiguous group of blocks as free (clear in the
			bitmap).  It assumes those bits are currently marked
			allocated (set in the bitmap).

Inputs:
	vcb				Pointer to volume where space is to be freed
	startingBlock	First block number to mark as freed
	numBlocks		Number of blocks to mark as freed
	do_validate 	If true, validate that the blocks being 
					deallocated to check if they are within totalBlocks
					for current volume and whether they were allocated
					before they are marked free.
_______________________________________________________________________
*/
static 
OSErr BlockMarkFreeInternal(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock_in,
	register u_int32_t	numBlocks_in,
	Boolean 		do_validate)
{
	OSErr			err;
	u_int32_t	startingBlock = startingBlock_in;
	u_int32_t	numBlocks = numBlocks_in;
	register u_int32_t	*currentWord;	//	Pointer to current word within bitmap block
	register u_int32_t	wordsLeft;		//	Number of words left in this bitmap block
	register u_int32_t	bitMask;		//	Word with given bits already set (ready to OR in)
	u_int32_t			firstBit;		//	Bit index within word of first bit to allocate
	u_int32_t			numBits;		//	Number of bits in word to allocate
	u_int32_t			*buffer = NULL;
	uintptr_t  blockRef;
	u_int32_t  bitsPerBlock;
	u_int32_t  wordsPerBlock;
    // XXXdbg
	struct hfsmount *hfsmp = VCBTOHFS(vcb);

	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0);

	/*
	 * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
	 * need to be able to free blocks being relocated during hfs_truncatefs.
	 */
	if ((do_validate == true) && 
	    (startingBlock + numBlocks > vcb->totalBlocks)) {
		if (ALLOC_DEBUG) {
			panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN);
		}
		
		printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
		hfs_mark_volume_inconsistent(vcb);
		err = EIO;
		goto Exit;
	}

	//
	//	Pre-read the bitmap block containing the first word of allocation
	//

	err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
	if (err != noErr) goto Exit;
	// XXXdbg
	if (hfsmp->jnl) {
		journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
	}

	//
	//	Initialize currentWord, and wordsLeft.
	//
	{
		u_int32_t wordIndexInBlock;
		
		bitsPerBlock  = vcb->vcbVBMIOSize * kBitsPerByte;
		wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;

		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
		currentWord = buffer + wordIndexInBlock;
		wordsLeft = wordsPerBlock - wordIndexInBlock;
	}
	
	//
	//	If the first block to free doesn't start on a word
	//	boundary in the bitmap, then treat that first word
	//	specially.
	//

	firstBit = startingBlock % kBitsPerWord;
	if (firstBit != 0) {
		bitMask = kAllBitsSetInWord >> firstBit;	//	turn off all bits before firstBit
		numBits = kBitsPerWord - firstBit;			//	number of remaining bits in this word
		if (numBits > numBlocks) {
			numBits = numBlocks;					//	entire allocation is inside this one word
			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));	//	turn off bits after last
		}
		if ((do_validate == true) && 
		    (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
			goto Corruption;
		}
		*currentWord &= SWAP_BE32 (~bitMask);		//	clear the bits in the bitmap
		numBlocks -= numBits;						//	adjust number of blocks left to free

		++currentWord;								//	move to next word
		--wordsLeft;								//	one less word left in this block
	}

	//
	//	Free whole words (32 blocks) at a time.
	//

	while (numBlocks >= kBitsPerWord) {
		if (wordsLeft == 0) {
			//	Read in the next bitmap block
			startingBlock += bitsPerBlock;			//	generate a block number in the next bitmap block
			
			buffer = NULL;
			err = ReleaseBitmapBlock(vcb, blockRef, true);
			if (err != noErr) goto Exit;

			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
			if (err != noErr) goto Exit;

			// XXXdbg
			if (hfsmp->jnl) {
				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
			}

			//	Readjust currentWord and wordsLeft
			currentWord = buffer;
			wordsLeft = wordsPerBlock;
		}
		if ((do_validate == true) && 
		    (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) {
			goto Corruption;
		}
		*currentWord = 0;							//	clear the entire word
		numBlocks -= kBitsPerWord;
		
		++currentWord;								//	move to next word
		--wordsLeft;								//	one less word left in this block
	}
	
	//
	//	Free any remaining blocks.
	//
	
	if (numBlocks != 0) {
		bitMask = ~(kAllBitsSetInWord >> numBlocks);	//	set first numBlocks bits
		if (wordsLeft == 0) {
			//	Read in the next bitmap block
			startingBlock += bitsPerBlock;				//	generate a block number in the next bitmap block
			
			buffer = NULL;
			err = ReleaseBitmapBlock(vcb, blockRef, true);
			if (err != noErr) goto Exit;

			err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef);
			if (err != noErr) goto Exit;

			// XXXdbg
			if (hfsmp->jnl) {
				journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef);
			}
			
			//	Readjust currentWord and wordsLeft
			currentWord = buffer;
			wordsLeft = wordsPerBlock;
		}
		if ((do_validate == true) && 
		    (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) {
			goto Corruption;
		}
		*currentWord &= SWAP_BE32 (~bitMask);			//	clear the bits in the bitmap

		//	No need to update currentWord or wordsLeft
	}

Exit:

	if (buffer)
		(void)ReleaseBitmapBlock(vcb, blockRef, true);

	if (err == noErr) {
		hfs_unmap_free_extent(vcb, startingBlock_in, numBlocks_in);
	}

	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0);

	return err;

Corruption:
#if DEBUG_BUILD
	panic("hfs: BlockMarkFreeInternal: blocks not allocated!");
#else
	printf ("hfs: BlockMarkFreeInternal() trying to free unallocated blocks (%u,%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN);
	hfs_mark_volume_inconsistent(vcb);
	err = EIO;
	goto Exit;
#endif
}

#if CONFIG_HFS_ALLOC_RBTREE
/*
 * This is a wrapper function around BlockMarkFree.  This function is
 * called when the RB Tree-based allocator needs to mark a block as no longer
 * in use. This function should take the locks that would not normally be 
 * necessary for the normal bitmap deallocator, and then call the function.  Once 
 * the on-disk data structures are updated properly, then this will update an
 * existing rb-tree node if possible, or else create a new one. 
 */

OSErr BlockMarkFreeRBTree(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	register u_int32_t	numBlocks)
{
	OSErr err;
	struct hfsmount *hfsmp  = VCBTOHFS(vcb);
	int rb_err = 0;
	
	if (ALLOC_DEBUG) {
		REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false);
		if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
			panic ("HFS RBTree Allocator: Trying to free blocks starting @ %x for %x but blocks not in use! \n",
				   startingBlock, numBlocks);
		}
		check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC);		
	}	
	
	err = BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true);
	
	if (err == noErr) {
		
		/*
		 * During a filesystem truncation, we may need to relocate files out of the
		 * portion of the bitmap that is no longer controlled by the r/b tree. 
		 * In this case, just update the bitmap and do not attempt to manipulate the tree.
		 */
		if ((startingBlock >= hfsmp->offset_block_end) && 
			(hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) {
			goto free_error;
		}
		
		extent_node_t *newnode;
		
		if (ALLOC_DEBUG) {
			/* 
			 * Validate that the blocks in question are not allocated in the bitmap, and that they're
			 * not in the offset tree, since it should be tracking free extents, rather than allocated 
			 * extents
			 */
			if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) {
				panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks still marked in-use!\n",
					   startingBlock, numBlocks);
			}
		}		
		
		if ((hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) == 0) {
			if (startingBlock >= hfsmp->offset_block_end) {
				/*
				 * If the tree generation code has not yet finished scanning the
				 * bitmap region containing this extent, do nothing.  If the start 
				 * of the range to be deallocated is greater than the current high 
				 * watermark on the offset tree, just bail out and let the scanner catch up with us. 
				 */							
				rb_err = 0;
				goto free_error;
			}
		}
		
		newnode = extent_tree_free_space(&hfsmp->offset_tree, numBlocks, startingBlock);
		if (newnode == NULL) {
			rb_err = 1;
			goto free_error;
		}
		
		if (ALLOC_DEBUG) {
			check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE);
		}
		
	}
	
free_error:
	/* 
	 * We follow the same principle as in BlockMarkAllocatedRB. 
	 * If we encounter an error in adding the extents to the rb-tree, then immediately
	 * back off, destroy the trees, and persistently set a bit in the runtime hfsmp flags
	 * to indicate we should not use the rb-tree until next mount, when we can force a rebuild.
	 */
	if (rb_err) {
		/* Mark RB-Trees with error */
		hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED;
		DestroyTrees(hfsmp);
		/* Reset the Free Ext Cache since we'll be using it now. */
		ResetVCBFreeExtCache(hfsmp);
		printf("HFS: Red-Black Allocator Tree BlockMarkFree error\n");
	}
	
	
	return err;
	
}
#endif

/*
_______________________________________________________________________

Routine:	BlockFindContiguous

Function:	Find a contiguous range of blocks that are free (bits
			clear in the bitmap).  If a contiguous range of the
			minimum size can't be found, an error will be returned.
			This is only needed to support the bitmap-scanning logic,
			as the red-black tree should be able to do this by internally
			searching its tree.

Inputs:
	vcb				Pointer to volume where space is to be allocated
	startingBlock	Preferred first block of range
	endingBlock		Last possible block in range + 1
	minBlocks		Minimum number of blocks needed.  Must be > 0.
	maxBlocks		Maximum (ideal) number of blocks desired
	useMetaZone	OK to dip into metadata allocation zone

Outputs:
	actualStartBlock	First block of range found, or 0 if error
	actualNumBlocks		Number of blocks found, or 0 if error

Returns:
	noErr			Found at least minBlocks contiguous
	dskFulErr		No contiguous space found, or all less than minBlocks
_______________________________________________________________________
*/

static OSErr BlockFindContiguous(
	ExtendedVCB		*vcb,
	u_int32_t		startingBlock,
	u_int32_t		endingBlock,
	u_int32_t		minBlocks,
	u_int32_t		maxBlocks,
	Boolean			useMetaZone,
	u_int32_t		*actualStartBlock,
	u_int32_t		*actualNumBlocks)
{
	OSErr			err;
	register u_int32_t	currentBlock;		//	Block we're currently looking at.
	u_int32_t			firstBlock;			//	First free block in current extent.
	u_int32_t			stopBlock;			//	If we get to this block, stop searching for first free block.
	u_int32_t			foundBlocks;		//	Number of contiguous free blocks in current extent.
	u_int32_t			*buffer = NULL;
	register u_int32_t	*currentWord;
	register u_int32_t	bitMask;
	register u_int32_t	wordsLeft;
	register u_int32_t	tempWord;
	uintptr_t  blockRef;
	u_int32_t  wordsPerBlock;
	u_int32_t  updated_free_extent = 0;

	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0);

	/*
	 * When we're skipping the metadata zone and the start/end
	 * range overlaps with the metadata zone then adjust the 
	 * start to be outside of the metadata zone.  If the range
	 * is entirely inside the metadata zone then we can deny the
	 * request (dskFulErr).
	 */
	if (!useMetaZone && (vcb->hfs_flags & HFS_METADATA_ZONE)) {
		if (startingBlock <= vcb->hfs_metazone_end) {
			if (endingBlock > (vcb->hfs_metazone_end + 2))
				startingBlock = vcb->hfs_metazone_end + 1;
			else
				goto DiskFull;
		}
	}

	if ((endingBlock - startingBlock) < minBlocks)
	{
		//	The set of blocks we're checking is smaller than the minimum number
		//	of blocks, so we couldn't possibly find a good range.
		goto DiskFull;
	}

	stopBlock = endingBlock - minBlocks + 1;
	currentBlock = startingBlock;
	firstBlock = 0;

	/*
	 * Skip over metadata blocks.
	 */
	if (!useMetaZone)
		currentBlock = NextBitmapBlock(vcb, currentBlock);

	//
	//	Pre-read the first bitmap block.
	//
	err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
	if ( err != noErr ) goto ErrorExit;

	//
	//	Figure out where currentBlock is within the buffer.
	//
	wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord;

	wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1);	// Current index into buffer
	currentWord = buffer + wordsLeft;
	wordsLeft = wordsPerBlock - wordsLeft;
	
	do
	{
		foundBlocks = 0;
		
		//============================================================
		//	Look for a free block, skipping over allocated blocks.
		//============================================================

		//
		//	Check an initial partial word (if any)
		//
		bitMask = currentBlock & kBitsWithinWordMask;
		if (bitMask)
		{			
			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
			bitMask = kHighBitInWordMask >> bitMask;
			while (tempWord & bitMask)
			{
				bitMask >>= 1;
				++currentBlock;
			}

			//	Did we find an unused bit (bitMask != 0), or run out of bits (bitMask == 0)? 
			if (bitMask)
				goto FoundUnused;

			//	Didn't find any unused bits, so we're done with this word.
			++currentWord;
			--wordsLeft;
		}

		//
		//	Check whole words
		//
		while (currentBlock < stopBlock)
		{
			//	See if it's time to read another block.
			if (wordsLeft == 0)
			{
				buffer = NULL;
				err = ReleaseBitmapBlock(vcb, blockRef, false);
				if (err != noErr) goto ErrorExit;

				/*
				 * Skip over metadata blocks.
				 */
				if (!useMetaZone) {
					currentBlock = NextBitmapBlock(vcb, currentBlock);
					if (currentBlock >= stopBlock) {
						goto LoopExit;
					}
				}

				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
				if ( err != noErr ) goto ErrorExit;
				
				currentWord = buffer;
				wordsLeft = wordsPerBlock;
			}
			
			//	See if any of the bits are clear
			if ((tempWord = SWAP_BE32(*currentWord)) + 1)	//	non-zero if any bits were clear
			{
				//	Figure out which bit is clear
				bitMask = kHighBitInWordMask;
				while (tempWord & bitMask)
				{
					bitMask >>= 1;
					++currentBlock;
				}
				
				break;		//	Found the free bit; break out to FoundUnused.
			}

			//	Keep looking at the next word
			currentBlock += kBitsPerWord;
			++currentWord;
			--wordsLeft;
		}

FoundUnused:
		//	Make sure the unused bit is early enough to use
		if (currentBlock >= stopBlock)
		{
			break;
		}

		//	Remember the start of the extent
		firstBlock = currentBlock;

		//============================================================
		//	Count the number of contiguous free blocks.
		//============================================================

		//
		//	Check an initial partial word (if any)
		//
		bitMask = currentBlock & kBitsWithinWordMask;
		if (bitMask)
		{
			tempWord = SWAP_BE32(*currentWord);			//	Fetch the current word only once
			bitMask = kHighBitInWordMask >> bitMask;
			while (bitMask && !(tempWord & bitMask))
			{
				bitMask >>= 1;
				++currentBlock;
			}

			//	Did we find a used bit (bitMask != 0), or run out of bits (bitMask == 0)? 
			if (bitMask)
				goto FoundUsed;

			//	Didn't find any used bits, so we're done with this word.
			++currentWord;
			--wordsLeft;
		}
		
		//
		//	Check whole words
		//
		while (currentBlock < endingBlock)
		{
			//	See if it's time to read another block.
			if (wordsLeft == 0)
			{
				buffer = NULL;
				err = ReleaseBitmapBlock(vcb, blockRef, false);
				if (err != noErr) goto ErrorExit;

				/*
				 * Skip over metadata blocks.
				 */
				if (!useMetaZone) {
					u_int32_t nextBlock;

					nextBlock = NextBitmapBlock(vcb, currentBlock);
					if (nextBlock != currentBlock) {
						goto LoopExit;  /* allocation gap, so stop */
					}
				}

				err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef);
				if ( err != noErr ) goto ErrorExit;
				
				currentWord = buffer;
				wordsLeft = wordsPerBlock;
			}
			
			//	See if any of the bits are set
			if ((tempWord = SWAP_BE32(*currentWord)) != 0)
			{
				//	Figure out which bit is set
				bitMask = kHighBitInWordMask;
				while (!(tempWord & bitMask))
				{
					bitMask >>= 1;
					++currentBlock;
				}
				
				break;		//	Found the used bit; break out to FoundUsed.
			}

			//	Keep looking at the next word
			currentBlock += kBitsPerWord;
			++currentWord;
			--wordsLeft;
			
			//	If we found at least maxBlocks, we can quit early.
			if ((currentBlock - firstBlock) >= maxBlocks)
				break;
		}

FoundUsed:
		//	Make sure we didn't run out of bitmap looking for a used block.
		//	If so, pin to the end of the bitmap.
		if (currentBlock > endingBlock)
			currentBlock = endingBlock;

		//	Figure out how many contiguous free blocks there were.
		//	Pin the answer to maxBlocks.
		foundBlocks = currentBlock - firstBlock;
		if (foundBlocks > maxBlocks)
			foundBlocks = maxBlocks;
		if (foundBlocks >= minBlocks)
			break;		//	Found what we needed!

		/* We did not find the total blocks were were looking for, but 
		 * lets add this free block run to our free extent cache list
		 */
		updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks);

	} while (currentBlock < stopBlock);
LoopExit:

	//	Return the outputs.
	if (foundBlocks < minBlocks)
	{
DiskFull:
		err = dskFulErr;
ErrorExit:
		*actualStartBlock = 0;
		*actualNumBlocks = 0;
	}
	else
	{
		err = noErr;
		*actualStartBlock = firstBlock;
		*actualNumBlocks = foundBlocks;
		/*
		 * Sanity check for overflow
		 */
		if ((firstBlock + foundBlocks) > vcb->allocLimit) {
			panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x",
				vcb->vcbVN, startingBlock, endingBlock, currentBlock,
				firstBlock, stopBlock, minBlocks, foundBlocks);
		}
	}
	
	if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) {
		int i;
		u_int32_t min_start = vcb->totalBlocks;
			
		// set the nextAllocation pointer to the smallest free block number
		// we've seen so on the next mount we won't rescan unnecessarily
		lck_spin_lock(&vcb->vcbFreeExtLock);
		for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) {
			if (vcb->vcbFreeExt[i].startBlock < min_start) {
				min_start = vcb->vcbFreeExt[i].startBlock;
			}
		}
		lck_spin_unlock(&vcb->vcbFreeExtLock);
		if (min_start != vcb->totalBlocks) {
			if (min_start < vcb->nextAllocation) {
				vcb->nextAllocation = min_start;
			}
			if (min_start < vcb->sparseAllocation) {
				vcb->sparseAllocation = min_start;
			}
		}
	}
	
	if (buffer)
		(void) ReleaseBitmapBlock(vcb, blockRef, false);

	if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0);

	return err;
}


#if CONFIG_HFS_ALLOC_RBTREE
/*
 * Wrapper function around hfs_isrbtree_allocated.  This just takes the start offset,
 * and the number of blocks, and whether or not we should check if the blocks are
 * free or not.  This function is designed to be used primarily with the debug #ifdef
 * enabled, so it results in a panic if anything unexpected occurs.
 *
 * shouldBeFree will be nonzero if the caller expects the zone to be free.
 */

void check_rbtree_extents (struct hfsmount *hfsmp, u_int32_t startBlocks,
								 u_int32_t numBlocks, int shouldBeFree) {
	int alloc;
	extent_node_t *node1 = NULL;
	u_int32_t off1 = 0;
	u_int32_t len1 = 0;
	alloc = hfs_isrbtree_allocated (hfsmp, startBlocks, numBlocks, &node1);
	
	if (node1) {
		off1 = node1->offset;
		len1 = node1->length;
	}
	
	if (shouldBeFree) {
		/* 
		 * If the region should be free, then we expect to see extents in the tree
		 * matching this start and length.  Alloc != 0 means some portion of the extent
		 * specified was allocated. 
		 */ 
		if (alloc != 0){
			panic ("HFS check_rbtree_extents: Node (%p) do not exist! "
				   "node1 off (%d),len(%d),, start(%d) end(%d)\n",
				   node1, off1, len1, startBlocks, numBlocks);
		}
	}
	else {
		/* 
		 * Otherwise, this means that the region should be allocated, and if we find
		 * an extent matching it, that's bad.
		 */
		if (alloc == 0){
			panic ("HFS check_rbtree_extents: Node (%p) exists! "
				   "node1 off (%d),len(%d), start(%d) end(%d)\n",
				   node1, off1, len1, startBlocks, numBlocks);
		}
	}
}
#endif

#if CONFIG_HFS_ALLOC_RBTREE
/*
 * Exhaustive validation search.  This function iterates over all allocation blocks and 
 * compares their status in the red-black tree vs. the allocation bitmap.  If the two are out of sync
 * then it will panic.  Bitmap lock must be held while this function is run.
 *
 * Because this function requires a red-black tree search to validate every allocation block, it is
 * very expensive and should ONLY be run in debug mode, and even then, infrequently. 
 * 
 * 'end' is non-inclusive, so it should represent the total number of blocks in the volume.
 * 
 */
void
hfs_validate_rbtree (struct hfsmount *hfsmp, u_int32_t start, u_int32_t end){
	
	u_int32_t current;
	extent_node_t* node1;
	
	hfs_checktreelinks (hfsmp);
	
	for (current = start; current < end; current++) {
		node1 = NULL;
		int rbtree = hfs_isrbtree_allocated(hfsmp, current, 1, &node1);
		int bitmap = hfs_isallocated(hfsmp, current, 1);
		
		if (bitmap != rbtree){
			panic("HFS: Allocator mismatch @ block %d -- bitmap %d : rbtree %d\n", 
				  current, bitmap, rbtree);
		}
	}
}

/*
 * Exhaustive Red-Black Tree Linked List verification routine.  
 *
 * This function iterates through the red-black tree's nodes, and then verifies that the linked list
 * embedded within each of the nodes accurately points to the correct node as its "next" pointer.
 * The bitmap lock must be held while this function is run.
 */

void 
hfs_checktreelinks (struct hfsmount *hfsmp) {
	extent_tree_offset_t *tree = &hfsmp->offset_tree;
	
	extent_node_t *current = NULL;
	extent_node_t *next = NULL;
	extent_node_t *treenext;
	
	current = extent_tree_off_first (tree);
	
	while (current) {
		next = current->offset_next;
		treenext = extent_tree_off_next (tree, current);
		if (next != treenext) {
			panic("hfs_checktreelinks: mismatch for node (%p), next: %p , treenext %p !\n", current, next, treenext);
		}
		current = treenext;
	}
}

#endif


#if CONFIG_HFS_ALLOC_RBTREE
/*
 * Test to see if any free blocks exist at a given offset.
 * If there exists a node at the specified offset, it will return the appropriate
 * node.
 *
 * NULL indicates allocated blocks exist at that offset. 
 * 
 * Allocation file lock must be held.
 *
 * Returns:
 *	1 if blocks in the range are allocated.
 *	0 if all blocks in the range are free.
 */

static int
hfs_isrbtree_allocated (struct hfsmount *hfsmp, u_int32_t startBlock, 
						u_int32_t numBlocks, extent_node_t **ret_node) {
	
	extent_node_t search_sentinel;
	extent_node_t *node = NULL;
	extent_node_t *nextnode = NULL;
	
	/*
	 * With only one tree, then we just have to validate that there are entries 
	 * in the R/B tree at the specified offset if it really is free.
	 */
	search_sentinel.offset = startBlock;
	search_sentinel.length = numBlocks;
	
	node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
	if (node) {

		*ret_node = node;
		nextnode = extent_tree_off_next (&hfsmp->offset_tree, node);
		if (nextnode != node->offset_next) {
			panic ("hfs_rbtree_isallocated: Next pointers out of sync!\n");
		}
				
		/* 
		 * Check to see if it is a superset of our target range. Because we started
		 * with the offset or some offset prior to it, then we know the node's offset is 
		 * at least <= startBlock.  So, if the end of the node is greater than the end of
		 * our target range, then the whole range is free.
		 */ 
	
		if ((node->offset + node->length) >= (startBlock + numBlocks)) {
			if (node->offset > startBlock) {
				panic ("hfs_rbtree_isallocated: bad node ordering!");
			}	
			return 0;
		}
	}	
	/* 
	 * We got here if either our node search resulted in a node whose extent 
	 * was strictly before our target offset, or we couldnt' find a previous node
	 * at all (the beginning of the volume).  If the former, then we can infer that 
	 * at least one block in the target range is allocated since the next node's offset
	 * must be greater than startBlock.
	 *
	 * Either way, this means that the target node is unavailable to allocate, so
	 * just return 1;
	 */	
	return 1;
}


#endif

/* 
 * Count number of bits set in the given 32-bit unsigned number 
 *
 * Returns:
 * 	Number of bits set
 */
static int num_bits_set(u_int32_t num) 
{
	int count;

	for (count = 0; num; count++) {
		num &= num - 1;
	}

	return count;
}

/* 
 * For a given range of blocks, find the total number of blocks 
 * allocated.  If 'stop_on_first' is true, it stops as soon as it 
 * encounters the first allocated block.  This option is useful 
 * to determine if any block is allocated or not. 
 *
 * Inputs:
 * 	startingBlock	First allocation block number of the range to be scanned.
 * 	numBlocks	Total number of blocks that need to be scanned.
 * 	stop_on_first	Stop the search after the first allocated block is found.
 *
 * Output:
 * 	allocCount	Total number of allocation blocks allocated in the given range.
 *
 * 			On error, it is the number of allocated blocks found 
 * 			before the function got an error. 
 *
 * 			If 'stop_on_first' is set, 
 * 				allocCount = 1 if any allocated block was found.
 * 				allocCount = 0 if no allocated block was found.
 *
 * Returns:
 * 	0 on success, non-zero on failure. 
 */
static int 
hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, 
		u_int32_t numBlocks, Boolean stop_on_first, u_int32_t *allocCount)
{
	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
	u_int32_t  wordsLeft;      // Number of words left in this bitmap block
	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
	u_int32_t  numBits;        // Number of bits in word to allocate
	u_int32_t  *buffer = NULL;
	uintptr_t  blockRef;
	u_int32_t  bitsPerBlock;
	u_int32_t  wordsPerBlock;
 	u_int32_t  blockCount = 0;
	int  error;

	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_START, startingBlock, numBlocks, stop_on_first, 0, 0);

	/*
	 * Pre-read the bitmap block containing the first word of allocation
	 */
	error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
	if (error)
		goto JustReturn;

	/*
	 * Initialize currentWord, and wordsLeft.
	 */
	{
		u_int32_t wordIndexInBlock;
		
		bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
		wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;

		wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
		currentWord = buffer + wordIndexInBlock;
		wordsLeft = wordsPerBlock - wordIndexInBlock;
	}
	
	/*
	 * First test any non word aligned bits.
	 */
	firstBit = startingBlock % kBitsPerWord;
	if (firstBit != 0) {
		bitMask = kAllBitsSetInWord >> firstBit;
		numBits = kBitsPerWord - firstBit;
		if (numBits > numBlocks) {
			numBits = numBlocks;
			bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
		}
		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
			if (stop_on_first) {
				blockCount = 1;
				goto Exit;
			}
			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
		}
		numBlocks -= numBits;
		++currentWord;
		--wordsLeft;
	}

	/*
	 * Test whole words (32 blocks) at a time.
	 */
	while (numBlocks >= kBitsPerWord) {
		if (wordsLeft == 0) {
			/* Read in the next bitmap block. */
			startingBlock += bitsPerBlock;
			
			buffer = NULL;
			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
			if (error) goto Exit;

			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
			if (error) goto Exit;

			/* Readjust currentWord and wordsLeft. */
			currentWord = buffer;
			wordsLeft = wordsPerBlock;
		}
		if (*currentWord != 0) {
			if (stop_on_first) {
				blockCount = 1;
				goto Exit;
			} 
			blockCount += num_bits_set(*currentWord);
		}
		numBlocks -= kBitsPerWord;
		++currentWord;
		--wordsLeft;
	}
	
	/*
	 * Test any remaining blocks.
	 */
	if (numBlocks != 0) {
		bitMask = ~(kAllBitsSetInWord >> numBlocks);
		if (wordsLeft == 0) {
			/* Read in the next bitmap block */
			startingBlock += bitsPerBlock;
			
			buffer = NULL;
			error = ReleaseBitmapBlock(hfsmp, blockRef, false);
			if (error) goto Exit;

			error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
			if (error) goto Exit;

			currentWord = buffer;
			wordsLeft = wordsPerBlock;
		}
		if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
			if (stop_on_first) {
				blockCount = 1;
				goto Exit;
			}
			blockCount += num_bits_set(*currentWord & SWAP_BE32 (bitMask));
		}
	}
Exit:
	if (buffer) {
		(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
	}
	if (allocCount) {
		*allocCount = blockCount;
	}

JustReturn:
	if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_IS_ALLOCATED | DBG_FUNC_END, error, 0, blockCount, 0, 0);

	return (error);
}

/* 
 * Count total number of blocks that are allocated in the given 
 * range from the bitmap.  This is used to preflight total blocks 
 * that need to be relocated during volume resize.  
 *
 * The journal or allocation file lock must be held.
 *
 * Returns:
 * 	0 on success, non-zero on failure.  
 * 	On failure, allocCount is zero. 
 */
int
hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock,
		u_int32_t numBlocks, u_int32_t *allocCount)
{
	return hfs_isallocated_internal(hfsmp, startBlock, numBlocks, false, allocCount);
}

/*
 * Test to see if any blocks in a range are allocated.
 * 
 * Note:  On error, this function returns 1, which means that 
 * one or more blocks in the range are allocated.  This function 
 * is primarily used for volume resize and we do not want 
 * to report to the caller that the blocks are free when we 
 * were not able to deterministically find it out.  So on error, 
 * we always report that the blocks are allocated.  
 *
 * The journal or allocation file lock must be held.
 *
 * Returns 
 *	0 if all blocks in the range are free.
 *	1 if blocks in the range are allocated, or there was an error.
 */
int 
hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks)
{
	int error; 
	u_int32_t allocCount;

	error = hfs_isallocated_internal(hfsmp, startingBlock, numBlocks, true, &allocCount);
	if (error) {
		/* On error, we always say that the blocks are allocated 
		 * so that volume resize does not return false success.
		 */
		return 1;
	} else {
		/* The function was deterministically able to find out 
		 * if there was any block allocated or not.  In that case,
		 * the value in allocCount is good enough to be returned 
		 * back to the caller.
		 */
		return allocCount;
	}
} 

/*
 * Check to see if the red-black tree is live.  Allocation file lock must be held
 * shared or exclusive to call this function. Note that we may call this even if
 * HFS is built without activating the red-black tree code.
 */
__private_extern__
int 
hfs_isrbtree_active(struct hfsmount *hfsmp){
	
	//TODO: Update this function to deal with a truncate/resize coming in when the tree
	//isn't fully finished.  maybe we need to check the flags for something other than ENABLED?
	
#if CONFIG_HFS_ALLOC_RBTREE
	if (ALLOC_DEBUG) {
		REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
	}
	if (hfsmp){
		
		if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
			return 1;
		}
	}
#else
	#pragma unused (hfsmp)
#endif
	/* If the RB Tree code is not enabled, then just always return 0 */
	return 0;
}

#if CONFIG_HFS_ALLOC_RBTREE
/*
 * This function is basically the same as hfs_isallocated, except it's designed for 
 * use with the red-black tree validation code.  It assumes we're only checking whether
 * one bit is active, and that we're going to pass in the buf to use, since GenerateTree
 * calls ReadBitmapBlock and will have that buf locked down for the duration of its operation.
 *
 * This should not be called in general purpose scanning code.
 */
int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) {
	
	u_int32_t  *currentWord;   // Pointer to current word within bitmap block
	u_int32_t  bitMask;        // Word with given bits already set (ready to test)
	u_int32_t  firstBit;       // Bit index within word of first bit to allocate
	u_int32_t  numBits;        // Number of bits in word to allocate
	u_int32_t  bitsPerBlock;
	uintptr_t  blockRef;
	u_int32_t  wordsPerBlock;
	u_int32_t  numBlocks = 1;
	u_int32_t  *buffer = NULL;

	int  inuse = 0;
	int error;
	
	
	if (bp_buf) {
		/* just use passed-in buffer if avail. */
		buffer = bp_buf;
	}
	else {
		/*
		 * Pre-read the bitmap block containing the first word of allocation
		 */
		error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef);
		if (error)
			return (error);
	}
	
	/*
	 * Initialize currentWord, and wordsLeft.
	 */
	u_int32_t wordIndexInBlock;
	
	bitsPerBlock  = hfsmp->vcbVBMIOSize * kBitsPerByte;
	wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord;
	
	wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord;
	currentWord = buffer + wordIndexInBlock;
		
	/*
	 * First test any non word aligned bits.
	 */
	firstBit = startingBlock % kBitsPerWord;
	bitMask = kAllBitsSetInWord >> firstBit;
	numBits = kBitsPerWord - firstBit;
	if (numBits > numBlocks) {
		numBits = numBlocks;
		bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits));
	}
	if ((*currentWord & SWAP_BE32 (bitMask)) != 0) {
		inuse = 1;
		goto Exit;
	}
	numBlocks -= numBits;
	++currentWord;
	
Exit:
	if(bp_buf == NULL) {
		if (buffer) {
			(void)ReleaseBitmapBlock(hfsmp, blockRef, false);
		}
	}
	return (inuse);
	
	
	
}

/* 
 * This function scans the specified block and adds it to the pair of trees specified 
 * in its arguments.  We break this behavior out of GenerateTree so that an allocating
 * thread can invoke this if the tree does not have enough extents to satisfy 
 * an allocation request.
 * 
 * startbit		- the allocation block represented by a bit in 'allocblock' where we need to
 *				start our scan.  For instance, we may need to start the normal allocation scan
 *				in the middle of an existing allocation block.
 * endBit		- the allocation block where we should end this search (inclusive).
 * bitToScan	- output argument for this function to specify the next bit to scan.
 *
 * Returns:
 *		0 on success
 *		nonzero on failure. 
 */

static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit, 
						 u_int32_t endBit, u_int32_t *bitToScan) {

	int error;
	u_int32_t curAllocBlock;
	struct buf *blockRef = NULL;
	u_int32_t *buffer = NULL;
	u_int32_t wordIndexInBlock;
	u_int32_t blockSize = (u_int32_t)hfsmp->vcbVBMIOSize;
	u_int32_t wordsPerBlock = blockSize / kBytesPerWord; 
	u_int32_t offset = 0;
	u_int32_t size = 0;

	/* 
	 * Read the appropriate block from the bitmap file.  ReadBitmapBlock
	 * figures out which actual on-disk block corresponds to the bit we're 
	 * looking at.
	 */	
	error = ReadBitmapBlock(hfsmp, startbit, &buffer, (uintptr_t*)&blockRef);
	if (error) {
		return error;
	}
	
	/* curAllocBlock represents the logical block we're analyzing. */
	curAllocBlock = startbit;	

	/*  Figure out which word curAllocBlock corresponds to in the block we read  */
	wordIndexInBlock = (curAllocBlock / kBitsPerWord) % wordsPerBlock;
	
	/* Scan a word at a time */
	while (wordIndexInBlock < wordsPerBlock) {
		u_int32_t currentWord = SWAP_BE32(buffer[wordIndexInBlock]);
		u_int32_t curBit;
		
		/* modulate curBit because it may start in the middle of a word */
		for (curBit = curAllocBlock % kBitsPerWord; curBit < kBitsPerWord; curBit++) {
			
			u_int32_t is_allocated = currentWord & (1 << (kBitsWithinWordMask - curBit));
			if (ALLOC_DEBUG) {
				u_int32_t res = hfs_isallocated_scan (hfsmp, curAllocBlock, buffer); 
				if ( ((res) && (!is_allocated)) || ((!res) && (is_allocated))) {
					panic("hfs_alloc_scan: curAllocBit %u, curBit (%d), word (0x%x), is_allocated (0x%x)  res(0x%x) \n",
						  curAllocBlock, curBit, currentWord, is_allocated, res);
				}
			}
			/* 
			 * If curBit is not allocated, keep track of the start of the free range.
			 * Increment a running tally on how many free blocks in a row we've seen.
			 */
			if (!is_allocated) {
				size++;
				if (offset == 0) {
					offset = curAllocBlock;
				}
			}
			else {
				/* 
				 * If we hit an allocated block, insert the extent that tracked the range
				 * we saw, and reset our tally counter.
				 */
				if (size != 0) {
					extent_tree_free_space(&hfsmp->offset_tree, size, offset);	
					size = 0;
					offset = 0;
				}
			}
			curAllocBlock++;
			/*
			 * Exit early if the next bit we'd analyze would take us beyond the end of the 
			 * range that we're supposed to scan.  
			 */
			if (curAllocBlock >= endBit) {
				goto DoneScanning;
			}
		}
		wordIndexInBlock++;
	}
DoneScanning:
	
	/* We may have been tracking a range of free blocks that hasn't been inserted yet. */
	if (size != 0) {
		extent_tree_free_space(&hfsmp->offset_tree, size, offset);	
	}
	/* 
	 * curAllocBlock represents the next block we need to scan while we're in this 
	 * function. 
	 */
	*bitToScan = curAllocBlock;
	
	ReleaseRBScanBitmapBlock(blockRef);

	return 0;
}

/*
 * Extern function that is called from mount and upgrade mount routines
 * that enable us to initialize the tree.
 */

__private_extern__
u_int32_t InitTree(struct hfsmount *hfsmp) {
	extent_tree_init (&(hfsmp->offset_tree));
	return 0;
}


/*
 * This function builds the trees specified in its arguments. It uses
 * buf_meta_breads to scan through the bitmap and re-build the tree state.
 * It is very important to use buf_meta_bread because we need to ensure that we 
 * read the most current version of the blocks that we're scanning.  If we used 
 * cluster_io, then journaled transactions could still be sitting in RAM since they are
 * written to disk in the proper location asynchronously.  
 *
 * Because this could be still running when mount has finished, we need to check
 * after every allocation block that we're working on if an unmount or some other 
 * operation that would cause us to teardown has come in. (think downgrade mount).
 * If an unmount has come in, then abort whatever we're doing and return -1
 * to indicate we hit an error.  If we don't do this, we'd hold up unmount for
 * a very long time.
 *
 * This function assumes that the bitmap lock is acquired exclusively before being
 * called.  It will drop the lock and then re-acquire it during operation, but 
 * will always return with the lock held.
 */
__private_extern__
u_int32_t GenerateTree(struct hfsmount *hfsmp, u_int32_t endBlock, int *flags, int initialscan) {
	
	REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
	
	u_int32_t *cur_block_eof;
	int error = 0;
	
	int USE_FINE_GRAINED_LOCKING = 0;
		
	/* Initialize the block counter while we hold the bitmap lock */
	cur_block_eof = &hfsmp->offset_block_end;
	
	/*
	 * This loop advances over all allocation bitmap blocks of the current region 
	 * to scan them and add the results into the red-black tree.  We use the mount point
	 * variable offset_block_end as our loop counter.  This gives us flexibility
	 * because we can release the allocation bitmap lock and allow a thread that wants 
	 * to make an allocation to grab the lock and do some scanning on our behalf while we're 
	 * waiting to re-acquire the lock.  Then, the allocating thread will only do as much bitmap 
	 * scanning as needed to fulfill its allocation.
	 * 
	 * If the other thread does IO for us, then it will update the offset_block_end 
	 * variable as well, since it will use the same hfs_alloc_scan_block function to do its bit
	 * scanning.  So when we re-grab the lock, our current EOF/loop will immediately skip us to the next 
	 * block that needs scanning.
	 */
	
	while (*cur_block_eof < endBlock) {

		/* 
		 * If the filesystem is being resized before the bitmap has been fully scanned, we'll 
		 * update our endBlock to match the current allocation limit in the hfsmp struct.
		 * The allocLimit field would only be be updated while holding the bitmap lock, so we won't
		 * be executing this code at the same time that the resize is going on.  
		 */
		if ((initialscan) && (endBlock != hfsmp->allocLimit)) {			
			
			/* If we're past the new/modified allocLimit, then just stop immediately.*/
			if (*cur_block_eof >= hfsmp->allocLimit ) {
				break;
			}
			endBlock = hfsmp->allocLimit;
		}
		
		/* 
		 * TODO: fix unmount stuff!
		 * See rdar://7391404
		 *
		 * Once the RB allocator is checked in, we'll want to augment it to not hold the 
		 * allocation bitmap lock for the entire duration of the tree scan.  For a first check-in
		 * it's ok to do that but we can't leave it like that forever.
		 * 
		 * The gist of the new algorithm will work as follows:
		 * if an unmount is in flight and has been detected:
		 *		abort tree-build.
		 *		unset tree-in-progress bit.
		 *		wakeup unmount thread
		 *		unlock allocation bitmap lock, fail out.
		 *
		 * The corresponding code in the unmount side should already be in place. 
		 */
		
		error = hfs_alloc_scan_block (hfsmp, *cur_block_eof, endBlock, cur_block_eof);
				
		//TODO: Fix this below!
		if (USE_FINE_GRAINED_LOCKING){
			hfs_systemfile_unlock(hfsmp, *flags);
			*flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
		}
		//TODO: Infer that if *flags == 0, we don't actually need to lock/unlock. 
	}
	
	return error;
}

/*
 * This function destroys the specified rb-trees associated with the mount point. 
 */
__private_extern__
void DestroyTrees(struct hfsmount *hfsmp) {
	
	if (ALLOC_DEBUG) {
		REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false);
		printf("DestroyTrees: Validating red/black tree for vol %s\n", (char*) hfsmp->vcbVN);
		hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end );
	}
	
	/*
	 * extent_tree_destroy will start with the first entry in the tree (by offset), then
	 * iterate through the tree quickly using its embedded linked list.  This results in tree
	 * destruction in O(n) time.
	 */
	
	if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) {
		extent_tree_destroy(&hfsmp->offset_tree);
		
		/* Mark Trees as disabled */
		hfsmp->extent_tree_flags &= ~HFS_ALLOC_RB_ENABLED;		
	}
	
	return;
}	

#endif

/*
 * This function resets all of the data structures relevant to the
 * free extent cache stored in the hfsmount struct.  
 * 
 * If we are using the red-black tree code then we need to account for the fact that 
 * we may encounter situations where we need to jettison the tree.  If that is the 
 * case, then we fail-over to the bitmap scanning logic, but we need to ensure that 
 * the free ext cache is zeroed before we start using it.  
 *
 * We also reset and disable the cache when allocLimit is updated... which 
 * is when a volume is being resized (via hfs_truncatefs() or hfs_extendfs()). 
 * It is independent of the type of allocator being used currently.
 */
void ResetVCBFreeExtCache(struct hfsmount *hfsmp) 
{
	int bytes;
	void *freeExt;

	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0);

	lck_spin_lock(&hfsmp->vcbFreeExtLock);
	
	/* reset Free Extent Count */
	hfsmp->vcbFreeExtCnt = 0;
	
	/* reset the actual array */
	bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor);
	freeExt = (void*)(hfsmp->vcbFreeExt);
	
	bzero (freeExt, bytes);
	
	lck_spin_unlock(&hfsmp->vcbFreeExtLock);

	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, 0, 0);

	return;
}

/*
 * This function is used to inform the allocator if we have to effectively shrink
 * or grow the total number of allocation blocks via hfs_truncatefs or hfs_extendfs. 
 *
 * The bitmap lock must be held when calling this function.  This function also modifies the
 * allocLimit field in the hfs mount point structure in the general case. 
 * 
 * In the shrinking case, we'll have to remove all free extents from the red-black
 * tree past the specified offset new_end_block.  In the growth case, we'll have to force
 * a re-scan of the new allocation blocks from our current allocLimit to the new end block.
 * 
 * new_end_block represents the total number of blocks available for allocation in the resized
 * filesystem.  Block #new_end_block should not be allocatable in the resized filesystem since it
 * will be out of the (0, n-1) range that are indexable in the bitmap.
 *
 * Returns	0 on success
 *			errno on failure
 */
__private_extern__
u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) {
	
	/* 
	 * Update allocLimit to the argument specified, but don't do anything else 
	 * if the red/black tree is not enabled.
	 */
	hfsmp->allocLimit = new_end_block;

	/* Invalidate the free extent cache completely so that 
	 * it does not have any extents beyond end of current 
	 * volume.
	 */
	ResetVCBFreeExtCache(hfsmp);

#if CONFIG_HFS_ALLOC_RBTREE
	/* Shrinking the existing filesystem */
	if ((new_end_block < hfsmp->offset_block_end) &&
		(hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {	
		extent_node_t search_sentinel;
		extent_node_t *node = NULL;
		/* Remover points to the current item to free/remove from the tree */
		extent_node_t *remover = NULL;
		
		/* Begin search at the specified offset */
		memset (&search_sentinel, 0, sizeof(extent_node_t));
		search_sentinel.offset = new_end_block;
				
		/* 
		 * Find the first available extent that satifies the allocation by searching
		 * from the starting point or 1 earlier.  We may need to split apart an existing node
		 * if it straddles the new alloc limit.
		 */
		node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel);
		if (node) {
			/* If it's an exact match, then just remove them all from this point forward */
			if (node->offset == new_end_block) {
				/* 
				 * Find the previous entry and update its next pointer to NULL
				 * since this entry is biting the dust.  Update remover to node.
				 */
				extent_node_t *prev = NULL;
				prev = extent_tree_off_prev (&hfsmp->offset_tree, node);
				if (prev) {
					prev->offset_next = NULL;
				}
				remover = node;
			}
			else {
				/* See if we need to split this node */
				if ((node->offset + node->length) > new_end_block) {
					/* 
					 * Update node to reflect its new size up until new_end_block.
					 */
					remover = node->offset_next;
					node->length = new_end_block - node->offset;
					/* node is becoming the last free extent in the volume.  */
					node->offset_next = NULL;
				}
				else {
					if (node->offset_next == NULL) {
						/*
						 * 'node' points to the last free extent in the volume. 
						 * Coincidentally, it is also before the new cut-off point at which 
						 * we will stop representing bitmap values in the tree.  Just bail out now.
						 */
						return 0;
					}
					/* 
					 * Otherwise, point our temp variable 'remover' to the node where
					 * we'll need to start yanking things out of the tree, and make 'node' 
					 * the last element in the tree in the linked list.
					 */
					remover = node->offset_next;
					if (remover->offset <= new_end_block) {
						panic ("UpdateAllocLimit: Invalid RBTree node next ptr!");
					}
					node->offset_next = NULL;
				}
			}
			
			/* 
			 * Remover is our "temp" pointer that points to the current node to remove from 
			 * the offset tree.  We'll simply iterate through the tree linked list, removing the current
			 * element from the tree, freeing them as we come across them.
			 */
			while (remover) {
				extent_node_t *next = remover->offset_next;
				extent_tree_remove_node (&hfsmp->offset_tree, remover);
				free_node (remover);
				remover = next;
			}
			
			if (ALLOC_DEBUG) {
				printf ("UpdateAllocLimit: Validating rbtree after truncation\n");
				hfs_validate_rbtree (hfsmp, 0, new_end_block-1);
			}
			
			/* 
			 * Don't forget to shrink offset_block_end after a successful truncation 
			 * new_end_block should represent the number of blocks available on the 
			 * truncated volume.
			 */
			
			hfsmp->offset_block_end = new_end_block;
			
			return 0;
		}
		else {
			if (ALLOC_DEBUG) {
				panic ("UpdateAllocLimit: no prev!");
			}
			return ENOSPC;
		}
	}
	/* Growing the existing filesystem */
	else if ((new_end_block > hfsmp->offset_block_end) &&
		(hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) {	
		int flags = 0;
		int retval = 0;
		
		if (ALLOC_DEBUG) {
			printf ("UpdateAllocLimit: Validating rbtree prior to growth\n");
			hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end);
		}
		
		
		retval = GenerateTree (hfsmp, new_end_block, &flags, 0);
		
		/*
		 * Don't forget to update offset_block_end after a successful tree extension.
		 */
		if (retval == 0) {
			
			if (ALLOC_DEBUG) {
				printf ("UpdateAllocLimit: Validating rbtree after growth\n");
				hfs_validate_rbtree (hfsmp, 0, new_end_block);
			}
			
			hfsmp->offset_block_end = new_end_block;
		}
		
		return retval;
	}
	/* Otherwise, do nothing. fall through to the code below. */	
	printf ("error : off_block_end: %d, alloclimit: %d, new_end_block: %d\n", 
			hfsmp->offset_block_end,hfsmp->allocLimit, new_end_block);
#endif

	return 0;

}


/*
 * Remove an entry from free extent cache after it has been allocated.
 *
 * This function does not split extents to remove them from the allocated list.  
 *
 * Inputs: 
 * 	hfsmp		- mount point structure 
 * 	startBlock	- starting block of the extent to be removed. 
 * 	blockCount	- number of blocks of the extent to be removed.
 */
static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount)
{
	int i, j;
	int extentsRemoved = 0;
	u_int32_t start, end;

#if CONFIG_HFS_ALLOC_RBTREE
	/* If red-black tree is enabled, no free extent cache is necessary */
	if (hfs_isrbtree_active(hfsmp) == true) {
		return;
	}
#endif

	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);

	lck_spin_lock(&hfsmp->vcbFreeExtLock);

	for (i = 0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
		start = hfsmp->vcbFreeExt[i].startBlock;
		end = start + hfsmp->vcbFreeExt[i].blockCount;

		/* If the extent to remove from free extent list starts within 
		 * this free extent, or, if it starts before this free extent 
		 * but ends in this free extent, remove it by shifting all other
		 * extents.
		 */
		if (((startBlock >= start) && (startBlock < end)) ||
		    ((startBlock < start) && (startBlock + blockCount) > start)) {
			for (j = i; j < (int)hfsmp->vcbFreeExtCnt - 1; j++) {
				hfsmp->vcbFreeExt[j] = hfsmp->vcbFreeExt[j+1];
			}
			hfsmp->vcbFreeExtCnt--;
			/* Decrement the index so that we check the extent 
			 * that just got shifted to the current index.
			 */
			i--;
			extentsRemoved++;
		}
		/* Continue looping as we might have to invalidate multiple extents, 
		 * probably not possible in normal case, but does not hurt.
		 */
	}
	
	lck_spin_unlock(&hfsmp->vcbFreeExtLock);

	sanity_check_free_ext(hfsmp, 0);

	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0);

	return;
}

/*
 * Add an entry to free extent cache after it has been deallocated.  
 *
 * If the extent provided has blocks beyond current allocLimit, it 
 * is clipped to allocLimit.  This function does not merge contiguous 
 * extents, if they already exist in the list.
 *
 * Inputs: 
 * 	hfsmp		- mount point structure 
 * 	startBlock	- starting block of the extent to be removed. 
 * 	blockCount	- number of blocks of the extent to be removed.
 *
 * Returns:
 * 	true		- if the extent was added successfully to the list
 * 	false		- if the extent was no added to the list, maybe because 
 * 			  the extent was beyond allocLimit, or is not best 
 * 			  candidate to be put in the cache.
 */
static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) 
{
	Boolean retval = false;
	u_int32_t start, end;
	int i; 
	
	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0);

	/*
	 * If using the red-black tree allocator, then there's no need to special case 
	 * for the sparse device case.  We'll simply add the region we've recently freed
	 * to the red-black tree, where it will get sorted by offset and length.  The only special 
	 * casing will need to be done on the allocation side, where we may favor free extents
	 * based on offset even if it will cause fragmentation.  This may be true, for example, if
	 * we are trying to reduce the number of bandfiles created in a sparse bundle disk image. 
	 */
#if CONFIG_HFS_ALLOC_RBTREE
	if (hfs_isrbtree_active(hfsmp) == true) {
		goto out_not_locked;
	}
#endif

	/* No need to add extent that is beyond current allocLimit */
	if (startBlock >= hfsmp->allocLimit) {
		goto out_not_locked;
	}

	/* If end of the free extent is beyond current allocLimit, clip the extent */
	if ((startBlock + blockCount) > hfsmp->allocLimit) {
		blockCount = hfsmp->allocLimit - startBlock;
	}

	lck_spin_lock(&hfsmp->vcbFreeExtLock);

	/* If the free extent cache is full and the new extent fails to 
	 * compare with the last extent, skip adding it to the list.
	 */
	if (hfsmp->vcbFreeExtCnt == kMaxFreeExtents) {
		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
			/* For sparse disks, free extent cache list is sorted by start block, lowest first */
			if (startBlock > hfsmp->vcbFreeExt[kMaxFreeExtents-1].startBlock) {
				goto out;
			} 
		} else {
			/* For normal mounts, free extent cache list is sorted by total blocks, highest first */
			if (blockCount <= hfsmp->vcbFreeExt[kMaxFreeExtents-1].blockCount) {
				goto out;
			} 
		}
	}

	/* Check if the current extent overlaps with any of the existing 
	 * extents.  If yes, just skip adding it to the list.  We have 
	 * to do this check before shifting the extent records.
	 */
	for (i = 0; i < (int)hfsmp->vcbFreeExtCnt; i++) {

		start = hfsmp->vcbFreeExt[i].startBlock;
		end = start + hfsmp->vcbFreeExt[i].blockCount;

		if (((startBlock >= start) && (startBlock < end)) ||
		    ((startBlock < start) && (startBlock + blockCount) > start)) {
			goto out;
		}
	}

	/* Scan the free extent cache array from tail to head till 
	 * we find the entry after which our new entry should be 
	 * inserted.  After we break out of this loop, the new entry 
	 * will be inserted at 'i+1'.
	 */
	for (i = (int)hfsmp->vcbFreeExtCnt-1; i >= 0; i--) {
		if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
			/* For sparse devices, find entry with smaller start block than ours */
			if (hfsmp->vcbFreeExt[i].startBlock < startBlock) {
				break;
			}
		} else {
			/* For normal devices, find entry with greater block count than ours */
			if (hfsmp->vcbFreeExt[i].blockCount >= blockCount) {
				break;
			}
		}

		/* If this is not the right spot to insert, and this is 
		 * not the last entry in the array, just shift it and 
		 * continue check another one. 
		 */
		if ((i+1) < kMaxFreeExtents) {
			hfsmp->vcbFreeExt[i+1] = hfsmp->vcbFreeExt[i];
		}
	}
	/* 'i' points to one index offset before which the new extent should be inserted */
	hfsmp->vcbFreeExt[i+1].startBlock = startBlock;
	hfsmp->vcbFreeExt[i+1].blockCount = blockCount;
	if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents) {
		hfsmp->vcbFreeExtCnt++;
	}
	retval = true;

out:
	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
out_not_locked:
	sanity_check_free_ext(hfsmp, 0);

	if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED)
		KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0);

	return retval;
}

/* Debug function to check if the free extent cache is good or not */
static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated)
{
	u_int32_t i, j;

	/* Do not do anything if debug is not on, or if we're using the red-black tree */
	if ((ALLOC_DEBUG == 0) || (hfs_isrbtree_active(hfsmp) == true)) {
		return;
	}

	lck_spin_lock(&hfsmp->vcbFreeExtLock);
	
	/* 
	 * Iterate the Free extent cache and ensure no entries are bogus or refer to
	 * allocated blocks.
	 */
	for(i=0; i < hfsmp->vcbFreeExtCnt; i++) {
		u_int32_t start, nblocks;

		start   = hfsmp->vcbFreeExt[i].startBlock;
		nblocks = hfsmp->vcbFreeExt[i].blockCount;

		//printf ("hfs: %p: slot:%d (%u,%u)\n", hfsmp, i, start, nblocks);

		/* Check if any of the blocks in free extent cache are allocated.  
		 * This should not be enabled always because it might take 
		 * very long for large extents that get added to the list.
		 *
		 * We have to drop vcbFreeExtLock while we call hfs_isallocated
		 * because it is going to do I/O.  Note that the free extent
		 * cache could change.  That's a risk we take when using this
		 * debugging code.  (Another alternative would be to try to
		 * detect when the free extent cache changed, and perhaps
		 * restart if the list changed while we dropped the lock.)
		 */
		if (check_allocated) {
			lck_spin_unlock(&hfsmp->vcbFreeExtLock);
			if (hfs_isallocated(hfsmp, start, nblocks)) {
				panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n",
					  hfsmp, i, start, nblocks);
			}
			lck_spin_lock(&hfsmp->vcbFreeExtLock);
		}

		/* Check if any part of the extent is beyond allocLimit */
		if ((start > hfsmp->allocLimit) || ((start + nblocks) > hfsmp->allocLimit)) {
			panic ("hfs: %p: slot %d:(%u,%u) in the free extent array is beyond allocLimit=%u\n",
					hfsmp, i, start, nblocks, hfsmp->allocLimit);
		}

		/* Check if there are any duplicate start blocks */
		for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) {
			if (start == hfsmp->vcbFreeExt[j].startBlock) {
				panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n", 
				      hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock, 
				      hfsmp->vcbFreeExt[j].blockCount);
			}
		}

		/* Check if the entries are out of order */
		if ((i+1) != hfsmp->vcbFreeExtCnt) {
			if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
				/* sparse devices are sorted by starting block number (ascending) */
				if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) {
					panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n", 
						hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 
						hfsmp->vcbFreeExt[i+1].blockCount);
				}
			} else {
				/* normally sorted by block count (descending) */
				if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) {
					panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n", 
						hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, 
						hfsmp->vcbFreeExt[i+1].blockCount);
				}
			}
		}
	}
	lck_spin_unlock(&hfsmp->vcbFreeExtLock);
}