/* $Id: htable.h,v 1.3 2008/11/16 17:42:53 culot Exp $ */ /* * Copyright (c) 2008 Frederic Culot * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the * following disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the * following disclaimer in the documentation and/or other * materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HTABLE_H #define HTABLE_H #include #include /* * This file defines data structures for hash tables. * * Hash tables are ideal for applications with datasets needing lots of adding, * searching or removal, as those are normally constant-time operations. * The primary operation it supports efficiently is a lookup: given a key (e.g. * a person's name), find the corresponding value (e.g. that person's telephone * number). It works by transforming the key using a hash function into a hash, * a number that is used as an index in an array to locate the desired location * ("bucket") where the values should be. * * Hash tables support the efficient insertion of new entries, in expected O(1) * time. The time spent in searching depends on the hash function and the load * of the hash table; both insertion and search approach O(1) time with well * chosen values and hashes. * * The collision resolution technique used here is direct chaining, implemented * using singly linked lists (the worst-case time is O(n)). * * This was chosen because performance degradation is linear as the table * fills, so there is almost no need to resize table * (for example, a chaining hash table containing twice its recommended * capacity of data would only be about twice as slow on average as the same * table at its recommended capacity). */ #define HTABLE_HEAD(name, size, type) \ struct name { \ uint32_t noitems; /* Number of items stored in hash table. */ \ uint32_t nosingle; /* Number of items alone in their bucket. */ \ uint32_t nofreebkts; /* Number of free buckets. */ \ struct type *bkts[size]; /* Pointers to user-defined data structures. */ \ } #define HTABLE_ENTRY(type) \ struct { \ struct type *next; /* To build the bucket chain list. */ \ } #define HTABLE_SIZE(head) \ (sizeof (*(head)->bkts) ? sizeof ((head)->bkts) / sizeof (*(head)->bkts) : 0) #define HTABLE_COUNT(head) \ ((head)->noitems ? (head)->noitems : 0) #define HTABLE_EMPTY(head) \ (HTABLE_COUNT((head)) == 0 ? 1 : 0) #define HTABLE_COLLS(head) \ ((head)->noitems ? 100.0 - 100 * (head)->nosingle / (head)->noitems : 0) #define HTABLE_LOAD(head) \ (HTABLE_SIZE((head)) ? \ 100.0 - 100.0 * (head)->nofreebkts / HTABLE_SIZE((head)) : 0) #define HTABLE_INITIALIZER(head) \ { 0, /* noitems */ \ 0, /* nosingle */ \ HTABLE_SIZE((head)) /* nofreebkts */ \ } #define HTABLE_INIT(head) do { \ bzero ((head), sizeof (*(head))); \ (head)->nofreebkts = HTABLE_SIZE((head)); \ } while (0) /* * Generate prototypes. */ #define HTABLE_PROTOTYPE(name, type) \ struct type *name##_HTABLE_INSERT(struct name *, struct type *); \ struct type *name##_HTABLE_REMOVE(struct name *, struct type *); \ struct type *name##_HTABLE_LOOKUP(struct name *, struct type *); /* * Generate function bodies. */ #define HTABLE_GENERATE(name, type, key, cmp) \ uint32_t \ name##_HTABLE_FIND_BKT(struct name *head, struct type *elm) \ { \ uint32_t __bkt; \ char *__key; \ int __len; \ \ (key) (elm, &__key, &__len); \ HTABLE_HASH(__key, __len, HTABLE_SIZE(head), __bkt); \ \ return __bkt; \ } \ \ int \ name##_HTABLE_CHAIN_LEN(struct name *head, uint32_t bkt) \ { \ struct type *__bktp; \ int __len; \ \ __len = 0; \ for (__bktp = (head)->bkts[(bkt)]; __bktp != NULL; __bktp = __bktp->next) \ __len++; \ \ return __len; \ } \ \ struct type * \ name##_HTABLE_INSERT(struct name *head, struct type *elm) \ { \ struct type *__bktp, **__bktpp; \ uint32_t __bkt, __pos; \ \ __pos = 0; \ __bkt = name##_HTABLE_FIND_BKT(head, elm); \ __bktpp = &head->bkts[__bkt]; \ while ((__bktp = *__bktpp)) \ { \ if (!(cmp)(elm, __bktp)) \ return NULL; \ else \ { \ __pos++; \ __bktpp = &__bktp->next; \ } \ } \ __bktp = elm; \ __bktp->next = NULL; \ *__bktpp = __bktp; \ head->noitems++; \ switch (__pos) \ { \ case 0: \ head->nosingle++; \ head->nofreebkts--; \ break; \ case 1: \ head->nosingle--; \ break; \ default: \ break; \ } \ \ return __bktp; \ } \ \ struct type * \ name##_HTABLE_REMOVE(struct name *head, struct type *elm) \ { \ struct type *__bktp, **__bktpp; \ uint32_t __bkt, __pos; \ \ __pos = 0; \ __bkt = name##_HTABLE_FIND_BKT(head, elm); \ __bktpp = &head->bkts[__bkt]; \ while ((__bktp = *__bktpp)) \ { \ if (!(cmp)(elm, __bktp)) \ { \ *__bktpp = __bktp->next; \ elm = __bktp; \ head->noitems--; \ if (__pos <= 1) /* Need to scan list to know if we have */ \ { /* a free bucket or a single item. */ \ int __len; \ \ __len = name##_HTABLE_CHAIN_LEN(head, __bkt); \ switch (__len) \ { \ case 0: \ head->nofreebkts++; \ head->nosingle--; \ break; \ case 1: \ head->nosingle++; \ break; \ } \ } \ return elm; \ } \ __pos++; \ __bktpp = &__bktp->next; \ } \ return NULL; \ } \ \ struct type * \ name##_HTABLE_LOOKUP(struct name *head, struct type *elm) \ { \ struct type *__bktp, **__bktpp; \ uint32_t __bkt; \ \ __bkt = name##_HTABLE_FIND_BKT(head, elm); \ __bktpp = &head->bkts[__bkt]; \ while ((__bktp = *__bktpp)) \ { \ if (!(cmp)(elm, __bktp)) \ return __bktp; \ else \ __bktpp = &__bktp->next; \ } \ \ return NULL; \ } \ \ struct type * \ name##_HTABLE_FIRST_FROM(struct name *head, int bkt) \ { \ struct type *__bktp; \ \ while (bkt < HTABLE_SIZE(head)) \ { \ if ((__bktp = head->bkts[bkt])) \ return __bktp; \ else \ bkt++; \ } \ \ return NULL; \ } \ \ struct type * \ name##_HTABLE_NEXT(struct name *head, struct type *elm) \ { \ struct type *__elmp, *__bktp, **__bktpp; \ uint32_t __bkt; \ \ __elmp = NULL; \ __bkt = name##_HTABLE_FIND_BKT(head, elm); \ __bktpp = &head->bkts[__bkt]; \ while ((__bktp = *__bktpp)) \ { \ if (!(cmp)(elm, __bktp)) \ { \ __elmp = __bktp; \ break; \ } \ else \ __bktpp = &__bktp->next; \ } \ \ if (!__elmp) \ return NULL; \ else if (__elmp->next) \ return __elmp->next; \ else \ return name##_HTABLE_FIRST_FROM(head, ++__bkt); \ } #define FIRST_BKT 0 #define HTABLE_INSERT(name, x, y) name##_HTABLE_INSERT(x, y) #define HTABLE_REMOVE(name, x, y) name##_HTABLE_REMOVE(x, y) #define HTABLE_LOOKUP(name, x, y) name##_HTABLE_LOOKUP(x, y) #define HTABLE_FIRST_FROM(name, x, y) (HTABLE_EMPTY(x) ? NULL \ : name##_HTABLE_FIRST_FROM(x, y)) #define HTABLE_FIRST(name, x) HTABLE_FIRST_FROM(name, x, FIRST_BKT) #define HTABLE_NEXT(name, x, y) (HTABLE_EMPTY(x) ? NULL \ : name##_HTABLE_NEXT(x, y)) #define HTABLE_FOREACH(x, name, head) \ for ((x) = HTABLE_FIRST(name, head); \ (x) != NULL; \ (x) = HTABLE_NEXT(name, head, x)) /* * Hash functions. */ #ifdef HASH_FUNCTION #define HTABLE_HASH HASH_FUNCTION #else #define HTABLE_HASH HASH_JEN #endif #define HASH_JEN_MIX(a, b, c) do { \ a -= b; a -= c; a ^= (c >> 13); \ b -= c; b -= a; b ^= (a << 8); \ c -= a; c -= b; c ^= (b >> 13); \ a -= b; a -= c; a ^= (c >> 12); \ b -= c; b -= a; b ^= (a << 16); \ c -= a; c -= b; c ^= (b >> 5); \ a -= b; a -= c; a ^= (c >> 3); \ b -= c; b -= a; b ^= (a << 10); \ c -= a; c -= b; c ^= (b >> 15); \ } while (0) #define HASH_JEN(key, keylen, num_bkts, bkt) do { \ register uint32_t i, j, k, hash; \ \ hash = 0xfeedbeef; \ i = j = 0x9e3779b9; \ k = keylen; \ while (k >= 12) \ { \ i += (key[0] + ((unsigned)key[1] << 8) \ + ((unsigned)key[2] << 16) \ + ((unsigned)key[3] << 24)); \ j += (key[4] + ((unsigned)key[5] << 8) \ + ((unsigned)key[6] << 16) \ + ((unsigned)key[7] << 24 )); \ hash += (key[8] + ((unsigned)key[9] << 8) \ + ((unsigned)key[10] << 16) \ + ((unsigned)key[11] << 24)); \ \ HASH_JEN_MIX (i, j, hash); \ \ key += 12; \ k -= 12; \ } \ hash += keylen; \ switch (k) \ { \ case 11: \ hash += ((unsigned)key[10] << 24); \ case 10: \ hash += ((unsigned)key[9] << 16); \ case 9: \ hash += ((unsigned)key[8] << 8); \ case 8: \ j += ((unsigned)key[7] << 24); \ case 7: \ j += ((unsigned)key[6] << 16); \ case 6: \ j += ((unsigned)key[5] << 8); \ case 5: \ j += key[4]; \ case 4: \ i += ((unsigned)key[3] << 24); \ case 3: \ i += ((unsigned)key[2] << 16); \ case 2: \ i += ((unsigned)key[1] << 8); \ case 1: \ i += key[0]; \ } \ HASH_JEN_MIX (i, j, hash); \ bkt = hash % (num_bkts); \ } while (0) #define HASH_OAT(key, keylen, num_bkts, bkt) do { \ register uint32_t hash; \ int i; \ \ hash = 0; \ for (i = 0; i < keylen; i++) \ { \ hash += key[i]; \ hash += (hash << 10); \ hash ^= (hash >> 6); \ } \ hash += (hash << 3); \ hash ^= (hash >> 11); \ hash += (hash << 15); \ bkt = hash % (num_bkts); \ } while (0) #endif /* !HTABLE_H */