kxld_dict.c   [plain text]


/*
 * Copyright (c) 2007-2008 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
#include <string.h>
#include <sys/types.h>

#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld"
#include <AssertMacros.h>

#include "kxld_dict.h"
#include "kxld_util.h"

/*******************************************************************************
* Types and macros
*******************************************************************************/

/* Ratio of num_entries:num_buckets that will cause a resize */
#define RESIZE_NUMER 7
#define RESIZE_DENOM 10
#define RESIZE_THRESHOLD(x) (((x)*RESIZE_NUMER) / RESIZE_DENOM)
#define MIN_BUCKETS(x) (((x)*RESIZE_DENOM) / RESIZE_NUMER) 

/* Selected for good scaling qualities when resizing dictionary
 * ... see: http://www.concentric.net/~ttwang/tech/hashsize.htm
 */
#define DEFAULT_DICT_SIZE 89

typedef struct dict_entry DictEntry;

typedef enum {
    EMPTY = 0,
    USED = 1,
    DELETED = 2
} DictEntryState;

struct dict_entry {
    const void *key;
    void *value;
    DictEntryState state;
};

/*******************************************************************************
* Function prototypes
*******************************************************************************/

static kern_return_t get_locate_index(const KXLDDict *dict, const void *key, 
    u_int *idx);
static kern_return_t get_insert_index(const KXLDDict *dict, const void *key, 
    u_int *idx);
static kern_return_t resize_dict(KXLDDict *dict);

/*******************************************************************************
*******************************************************************************/
kern_return_t
kxld_dict_init(KXLDDict * dict, kxld_dict_hash hash, kxld_dict_cmp cmp, 
    u_int num_entries) 
{
    kern_return_t rval = KERN_FAILURE;
    u_int min_buckets = MIN_BUCKETS(num_entries);
    u_int num_buckets = DEFAULT_DICT_SIZE;
    
    check(dict);
    check(hash);
    check(cmp);
    
    /* We want the number of allocated buckets to be at least twice that of the 
     * number to be inserted.
     */
    while (min_buckets > num_buckets) {
        num_buckets *= 2;
        num_buckets++;
    }
    
    /* Allocate enough buckets for the anticipated number of entries */
    rval = kxld_array_init(&dict->buckets, sizeof(DictEntry), num_buckets);
    require_noerr(rval, finish);
    
    /* Initialize */
    dict->hash = hash;
    dict->cmp = cmp;
    dict->num_entries = 0;
    dict->resize_threshold = RESIZE_THRESHOLD(num_buckets);
    
    rval = KERN_SUCCESS;
    
finish:
    return rval;
}

/*******************************************************************************
*******************************************************************************/
void
kxld_dict_clear(KXLDDict *dict)
{
    check(dict);

    dict->hash = NULL;
    dict->cmp = NULL;
    dict->num_entries = 0;
    dict->resize_threshold = 0;
    kxld_array_clear(&dict->buckets);
    kxld_array_clear(&dict->resize_buckets);
}

/*******************************************************************************
*******************************************************************************/
void
kxld_dict_iterator_init(KXLDDictIterator *iter, const KXLDDict *dict)
{
    check(iter);
    check(dict);

    iter->idx = 0;
    iter->dict = dict;
}

/*******************************************************************************
*******************************************************************************/
void
kxld_dict_deinit(KXLDDict *dict)
{
    check(dict);
    
    kxld_array_deinit(&dict->buckets);
    kxld_array_deinit(&dict->resize_buckets);
}

/*******************************************************************************
*******************************************************************************/
u_int
kxld_dict_get_num_entries(const KXLDDict *dict)
{
    check(dict);

    return dict->num_entries;
}

/*******************************************************************************
*******************************************************************************/
void *
kxld_dict_find(const KXLDDict *dict, const void *key)
{
    kern_return_t rval = KERN_FAILURE;
    DictEntry *entry = NULL;
    u_int idx = 0;
   
    check(dict);
    check(key);
   
    rval = get_locate_index(dict, key, &idx);
    if (rval) return NULL; 

    entry = kxld_array_get_item(&dict->buckets, idx);
    
    return entry->value;
}

/*******************************************************************************
* This dictionary uses linear probing, which means that when there is a
* collision, we just walk along the buckets until a free bucket shows up.
* A consequence of this is that when looking up an item, items that lie between
* its hash value and its actual bucket may have been deleted since it was
* inserted.  Thus, we should only stop a lookup when we've wrapped around the
* dictionary or encountered an EMPTY bucket.
********************************************************************************/
static kern_return_t
get_locate_index(const KXLDDict *dict, const void *key, u_int *_idx)
{
    kern_return_t rval = KERN_FAILURE;
    DictEntry *entry = NULL;
    u_int base, idx;

    base = idx = dict->hash(dict, key);
    
    /* Iterate until we match the key, wrap, or hit an empty bucket */
    entry = kxld_array_get_item(&dict->buckets, idx);
    while (!dict->cmp(entry->key, key)) {
        if (entry->state == EMPTY) goto finish;

        idx = (idx + 1) % dict->buckets.nitems;
        if (idx == base) goto finish;

        entry = kxld_array_get_item(&dict->buckets, idx);
    }

    check(idx < dict->buckets.nitems);

    *_idx = idx;
    rval = KERN_SUCCESS;

finish:
    return rval;
}

/*******************************************************************************
*******************************************************************************/
kern_return_t
kxld_dict_insert(KXLDDict *dict, const void *key, void *value)
{
    kern_return_t rval = KERN_FAILURE;
    DictEntry *entry = NULL;
    u_int idx = 0;
    
    check(dict);
    check(key);
    check(value);
    
    /* Resize if we are greater than the capacity threshold.
     * Note: this is expensive, but the dictionary can be sized correctly at
     * construction to avoid ever having to do this.
     */
    while (dict->num_entries > dict->resize_threshold) { 
        rval = resize_dict(dict); 
        require_noerr(rval, finish);
    }
    
    /* If this function returns FULL after we've already resized appropriately
     * something is very wrong and we should return an error.
     */
    rval = get_insert_index(dict, key, &idx);
    require_noerr(rval, finish);
    
    /* Insert the new key-value pair into the bucket, but only count it as a 
     * new entry if we are not overwriting an existing entry.
     */
    entry = kxld_array_get_item(&dict->buckets, idx);
    if (entry->state != USED) {
        dict->num_entries++;
        entry->key = key;
        entry->state = USED;
    }
    entry->value = value;

    rval = KERN_SUCCESS;
    
finish:
    return rval;
}

/*******************************************************************************
* Increases the hash table's capacity by 2N+1.  Uses dictionary API.  Not
* fast; just correct.
*******************************************************************************/
static kern_return_t
resize_dict(KXLDDict *dict)
{
    kern_return_t rval = KERN_FAILURE;
    KXLDArray tmparray;
    DictEntry *entry = NULL;
    u_int nbuckets = (dict->buckets.nitems * 2 + 1);
    u_int i = 0;

    check(dict);

    /* Initialize a new set of buckets to hold more entries */
    rval = kxld_array_init(&dict->resize_buckets, sizeof(DictEntry), nbuckets);
    require_noerr(rval, finish);

    /* Swap the new buckets with the old buckets */
    tmparray = dict->buckets;
    dict->buckets = dict->resize_buckets;
    dict->resize_buckets = tmparray; 

    /* Reset dictionary parameters */
    dict->num_entries = 0;
    dict->resize_threshold = RESIZE_THRESHOLD(dict->buckets.nitems);

    /* Rehash all of the entries */
    for (i = 0; i < dict->resize_buckets.nitems; ++i) {
        entry = kxld_array_get_item(&dict->resize_buckets, i);
        if (entry->state == USED) {
            rval = kxld_dict_insert(dict, entry->key, entry->value);
            require_noerr(rval, finish);
        }
    }

    /* Clear the old buckets */
    kxld_array_clear(&dict->resize_buckets);

    rval = KERN_SUCCESS;
    
finish:
    return rval;
}

/*******************************************************************************
* Simple function to find the first empty cell
*******************************************************************************/
static kern_return_t
get_insert_index(const KXLDDict *dict, const void *key, u_int *r_index)
{
    kern_return_t rval = KERN_FAILURE;
    DictEntry *entry = NULL;
    u_int base, idx;

    base = idx = dict->hash(dict, key);
    
    /* Iterate through the buckets until we find an EMPTY bucket, a DELETED
     * bucket, or a key match.
     */
    entry = kxld_array_get_item(&dict->buckets, idx);
    while (entry->state == USED && !dict->cmp(entry->key, key)) {
        idx = (idx + 1) % dict->buckets.nitems;
        require_action(base != idx, finish, rval=KERN_FAILURE);
        entry = kxld_array_get_item(&dict->buckets, idx);
    }
    
    *r_index = idx;
    rval = KERN_SUCCESS;
    
finish:
    return rval;
}

/*******************************************************************************
*******************************************************************************/
void
kxld_dict_remove(KXLDDict *dict, const void *key, void **value)
{
    kern_return_t rval = KERN_FAILURE;
    DictEntry *entry = NULL;
    u_int idx = 0;
   
    check(dict);
    check(key);
    
    /* Find the item */
    rval = get_locate_index(dict, key, &idx);
    if (rval) {
        if (value) *value = NULL;
        return;
    }

    entry = kxld_array_get_item(&dict->buckets, idx);

    /* Save the value if requested */    
    if (value) *value = entry->value;

    /* Delete the item from the dictionary */
    entry->key = NULL;
    entry->value = NULL;
    entry->state = DELETED;
    dict->num_entries--;
}

/*******************************************************************************
*******************************************************************************/
void 
kxld_dict_iterator_get_next(KXLDDictIterator *iter, const void **key, 
    void **value)
{
    DictEntry *entry = NULL;

    check(iter);
    check(key);
    check(value);

    *key = NULL;
    *value = NULL;

    /* Walk over the dictionary looking for USED buckets */
    for (; iter->idx < iter->dict->buckets.nitems; ++(iter->idx)) {
        entry = kxld_array_get_item(&iter->dict->buckets, iter->idx);
        if (entry->state == USED) {
            *key = entry->key;
            *value = entry->value;
            ++(iter->idx);
            break;
        }
    }
}

/*******************************************************************************
*******************************************************************************/
void 
kxld_dict_iterator_reset(KXLDDictIterator *iter)
{
    iter->idx = 0;
}

/*******************************************************************************
* This is Daniel Bernstein's hash algorithm from comp.lang.c
* It's fast and distributes well.  Returns an idx into the symbol hash table.
* NOTE: Will not check for a valid pointer - performance
*******************************************************************************/
u_int
kxld_dict_string_hash(const KXLDDict *dict, const void *_key) 
{
    const char *key = _key;
    u_int c = 0;
    u_int hash_val = 5381;

    check(dict);
    check(_key);

    while ((c = *key++)) {
        /* hash(i) = hash(i-1) *33 ^ name[i] */
        hash_val = ((hash_val << 5) + hash_val) ^ c;    
    }
    
    return (hash_val % dict->buckets.nitems);
}

u_int
kxld_dict_uint32_hash(const KXLDDict *dict, const void *_key)
{
    uint32_t key = *(const uint32_t *) _key;

    check(_key);

    return (u_int) (key % dict->buckets.nitems);
}

u_int
kxld_dict_kxldaddr_hash(const KXLDDict *dict, const void *_key)
{
    kxld_addr_t key = *(const kxld_addr_t *) _key;

    check(_key);

    return (u_int) (key % dict->buckets.nitems);
}

u_int
kxld_dict_string_cmp(const void *key1, const void *key2)
{
    return streq(key1, key2);
}

u_int
kxld_dict_uint32_cmp(const void *key1, const void *key2)
{
    const uint32_t *a = key1;
    const uint32_t *b = key2;

    return (a && b && (*a == *b));
}

u_int
kxld_dict_kxldaddr_cmp(const void *key1, const void *key2)
{
    const kxld_addr_t *a = key1;
    const kxld_addr_t *b = key2;

    return (a && b && (*a == *b));
}