rsys

Basic data structures and low-level features
git clone git://git.meso-star.fr/rsys.git
Log | Files | Refs | README | LICENSE

commit 25e61caedcce78bbbc1f0be6d52115f2d932b2c8
parent d2e2ea2be72d5c6e8621cb3c8aa6e5f56adf9ff4
Author: vaplv <vaplv@free.fr>
Date:   Sat,  5 Apr 2014 17:05:10 +0200

Implement and test the hash table data structure

Diffstat:
Mcmake/CMakeLists.txt | 1+
Asrc/dynamic_array_char.h | 10++++++++++
Asrc/hash.h | 45+++++++++++++++++++++++++++++++++++++++++++++
Asrc/hash_table.h | 421+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/math.h | 4++--
Asrc/test_hash_table.c | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 552 insertions(+), 2 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -107,6 +107,7 @@ endmacro(new_test) new_test(test_atomic) new_test(test_dynamic_array rsys) new_test(test_free_list rsys) +new_test(test_hash_table rsys) new_test(test_library rsys) new_test(test_list rsys) new_test(test_mem_allocator rsys) diff --git a/src/dynamic_array_char.h b/src/dynamic_array_char.h @@ -0,0 +1,10 @@ +#ifndef DYNAMIC_ARRRAY_CHAR_H +#define DYNAMIC_ARRRAY_CHAR_H + +#include "dynamic_array.h" + +#define DARRAY_NAME char +#define DARRAY_DATA char +#include "dynamic_array.h" + +#endif /* DYNAMIC_ARRRAY_CHAR_H */ diff --git a/src/hash.h b/src/hash.h @@ -0,0 +1,45 @@ +#ifndef HASH_H +#define HASH_H + +#include "hash.h" + +/* 32-bits Fowler/Noll/Vo hash function */ +static INLINE uint32_t +hash_fnv32(const void* data, const size_t len) +{ + const uint32_t FNV32_PRIME = + (uint32_t)(((uint32_t)1<<24) + ((uint32_t)1<<8) + 0x93); + const uint32_t OFFSET32_BASIS = 2166136261u; + const char* octets = (const char*)data; + uint32_t hash = OFFSET32_BASIS; + size_t i; + ASSERT(data); + + FOR_EACH(i, 0, len) { + hash = hash ^ (uint32_t)((unsigned char)octets[i]); + hash = hash * FNV32_PRIME; + } + return hash; +} + +/* 64-bits Fowler/Noll/Vo hash function */ +static INLINE uint64_t +hash_fnv64(const void* data, const size_t len) +{ + const uint64_t FNV64_PRIME = + (uint64_t)(((uint64_t)1<<40) + ((uint64_t)1<<8) + 0xB3); + const uint64_t OFFSET64_BASIS = 14695981039346656037u; + const char* octets = (const char*)data; + uint64_t hash = OFFSET64_BASIS; + size_t i; + ASSERT(data); + + FOR_EACH(i, 0, len) { + hash = hash ^ (uint64_t)((unsigned char)octets[i]); + hash = hash * FNV64_PRIME; + } + return hash; +} + +#endif /* HASH_H */ + diff --git a/src/hash_table.h b/src/hash_table.h @@ -0,0 +1,421 @@ +#if !defined(HTABLE_NAME) && !defined(HTABLE_KEY) && !defined(HTABLE_DATA) +#ifndef HASH_TABLE_H +#define HASH_TABLE_H + +/* Pre include required headers */ +#include "dynamic_array_char.h" +#include "hash.h" +#include "mem_allocator.h" +#include "math.h" +#include "rsys.h" +#include <string.h> + +#endif /* HASH_TABLE_H */ +#else +/* + * Generate the hash table data types and functions with respect to the + * following macros: + * - HTABLE_NAME: prefix of the hash table functions & types; + * - HTABLE_DATA: type of the data registered into the hash table; + * - HTABLE_KEY: type of the key indexing the hash table data; + * TODO + * - HTABLE_FUNCTOR_INIT: init functor on HTABLE_DATA. If not defined, no + * specific treatment is performed on the created data; + * - HTABLE_FUNCTOR_COPY: copy functor on HTABLE_DATA. If not defined a + * bitwise copy is used instead; + * - HTABLE_FUNCTOR_RELEASE: release functor on HTABLE_DATA. If not defined + * nothing is done on the release of an element; + * - HTABLE_FUNCTOR_COPY_AND_RELEASE: Copy and release of a HTABLE_DATA. If + * not defined the copy and the release functors is used. + * + * The name of the generated types are: + * struct htable_<HTABLE_NAME>[_pair|_iterator] + * + * while the generated functions are: + * htable_<HTABLE_NAME>_<FUNCTION_NAME> + * + * The hash table collisions are resolved by the "open addressing" strategy + * with a fixed linear probing of one. + */ +#if !defined( HTABLE_NAME ) +# error "Undefined HTABLE_NAME macro defining the hash table suffix" +#endif +#if !defined( HTABLE_KEY) +# error "Undefined HTABLE_KEY macro defining the hash key type" +#endif +#if !defined( HTABLE_DATA ) +# error "Undefined HTABLE_DATA macro defining the hash data type" +#endif + +#define HTABLE__ CONCAT(htable_, HTABLE_NAME) +#define HTABLE_PAIR__ CONCAT(CONCAT(HTABLE__, _), pair) +#define HTABLE_INTERATOR__ CONCAT(HTABLE, iterator) +#define HTABLE_FUNC__(Func) \ + CONCAT(CONCAT(CONCAT(CONCAT(htable, _), HTABLE_NAME), _), Func) + +/* Internal data structure */ +struct HTABLE_PAIR__ +{ + HTABLE_DATA data; + HTABLE_KEY key; +}; + +#define DARRAY_NAME CONCAT(HTABLE_NAME, __) +#define DARRAY_DATA struct HTABLE_PAIR__ +#include "dynamic_array.h" + +#define HTABLE_DATA__ CONCAT(CONCAT(darray_, HTABLE_NAME), __) +#define HTABLE_DATA_FUNC__(Func) CONCAT(CONCAT(HTABLE_DATA__, _), Func) + +struct HTABLE__ +{ + struct HTABLE_DATA__ table; + struct darray_char table_slot_is_used; + size_t table_size_in_use; + + size_t (*hash)(HTABLE_KEY const*); + char (*eq_key)(HTABLE_KEY const*, HTABLE_KEY const*); + + struct mem_allocator* allocator; +}; + +struct HTABLE_INTERATOR__ +{ + struct HTABLE__* hash_table; + size_t slot; + size_t entries_scanned; +}; + +/******************************************************************************* + * Internal helper functions + ******************************************************************************/ +static INLINE size_t +HTABLE_FUNC__(find_slot__)(const struct HTABLE__* htbl, HTABLE_KEY const* key) +{ + size_t slot = 0; + const struct HTABLE_PAIR__* pairs = NULL; + const char* used_pairs = NULL; + ASSERT(htbl && key); + /* The tbl size must be a pow of 2 */ + ASSERT(is_power_of_2(HTABLE_DATA_FUNC__(size_get)(&htbl->table))); + /* The tbl is not full */ + ASSERT(htbl->table_size_in_use < HTABLE_DATA_FUNC__(size_get)(&htbl->table)); + + pairs = HTABLE_DATA_FUNC__(cdata_get)(&htbl->table); + used_pairs = darray_char_cdata_get(&htbl->table_slot_is_used); + /* slot = hash % size */ + slot = htbl->hash(key) & (HTABLE_DATA_FUNC__(size_get)(&htbl->table) - 1); + + while(used_pairs[slot] && !htbl->eq_key(key, &pairs[slot].key)) { + /* slot = (slot + 1) % size */ + slot = (slot + 1) & (HTABLE_DATA_FUNC__(size_get)(&htbl->table) - 1); + } + return slot; +} + +static INLINE size_t +HTABLE_FUNC__(hash__)(HTABLE_KEY const* key) +{ +#ifdef ARCH_32BITS + return (size_t)hash_fnv32(key, sizeof(HTABLE_KEY)); +#elif defined(ARCH_64BITS) + return (size_t)hash_fnv64(key, sizeof(HTABLE_KEY)); +#else + #error "Unexpected architecture" +#endif +} + +/******************************************************************************* + * Hash table API + ******************************************************************************/ +static INLINE void +HTABLE_FUNC__(init) + (struct HTABLE__* htbl, + /* May be NULL <=> use default allocator */ + struct mem_allocator* allocator, + /* May be NULL <=> use default hash function */ + size_t (*hash)(HTABLE_KEY const*), + char (*eq_key)(HTABLE_KEY const*, HTABLE_KEY const*)) +{ + ASSERT( htbl && eq_key ); + HTABLE_DATA_FUNC__(init)(allocator, &htbl->table); + darray_char_init(allocator, &htbl->table_slot_is_used); + htbl->table_size_in_use = 0; + htbl->hash = hash ? hash : HTABLE_FUNC__(hash__); + htbl->eq_key = eq_key; + htbl->allocator = allocator ? allocator : &mem_default_allocator; +} + +static INLINE void +HTABLE_FUNC__(release)(struct HTABLE__* htbl) +{ + ASSERT(htbl); + HTABLE_DATA_FUNC__(release)(&htbl->table); + darray_char_release(&htbl->table_slot_is_used); +} + +static INLINE void +HTABLE_FUNC__(clear)(struct HTABLE__* htbl) +{ + ASSERT( htbl ); + htbl->table_size_in_use = 0; + memset + (darray_char_data_get(&htbl->table_slot_is_used), 0, + darray_char_size_get(&htbl->table_slot_is_used) * sizeof(char)); +} + +/* Return 0 on success and -1 otherwise */ +static int +HTABLE_FUNC__(reserve)(struct HTABLE__* htbl, const size_t size_submitted) +{ + struct HTABLE_DATA__ tbl; + struct darray_char tbl_slot_is_used; + const struct HTABLE_PAIR__* old_pairs = NULL; + struct HTABLE_PAIR__* new_pairs = NULL; + const char* old_used_pairs = NULL; + char* new_used_pairs = NULL; + size_t islot = 0; + size_t in_use = 0; + size_t size = 0; + int res = 0; + ASSERT(htbl); + + size = round_up_pow2( size_submitted ); + if( size <= HTABLE_DATA_FUNC__(size_get)(&htbl->table)) + goto exit; + + HTABLE_DATA_FUNC__(init)(htbl->allocator, &tbl); + if(HTABLE_DATA_FUNC__(resize)(&tbl, size)) + goto error; + + darray_char_init(htbl->allocator, &tbl_slot_is_used); + if(darray_char_resize(&tbl_slot_is_used, size)) + goto error; + memset(darray_char_data_get(&tbl_slot_is_used), 0, size*sizeof(char)); + + /* Rehash the hash table */ + old_pairs = HTABLE_DATA_FUNC__(cdata_get)(&htbl->table); + new_pairs = HTABLE_DATA_FUNC__(data_get)(&tbl); + old_used_pairs = darray_char_cdata_get(&htbl->table_slot_is_used); + new_used_pairs = darray_char_data_get(&tbl_slot_is_used); + FOR_EACH(islot, 0, HTABLE_DATA_FUNC__(size_get)(&htbl->table)) { + size_t islot_new = 0; + + if(in_use == htbl->table_size_in_use) + break; /* Early stop the rehasing since there is no more slot in htbl */ + + if(!old_used_pairs[islot]) + continue; + + islot_new = htbl->hash(&old_pairs[islot].key) & (size - 1); + while(new_used_pairs[islot_new]) { + ASSERT /* There is at most 1 entry for a given key */ + (!htbl->eq_key(&new_pairs[islot_new].key, &old_pairs[islot].key )); + islot_new = (islot_new + 1) & (size - 1); /* (islot_new + 1) % size */ + } + new_pairs[islot_new] = old_pairs[islot]; + new_used_pairs[islot_new] = 1; + ++in_use; + } + + HTABLE_DATA_FUNC__(copy_and_release)(&htbl->table, &tbl); + darray_char_copy_and_release(&htbl->table_slot_is_used, &tbl_slot_is_used); + +exit: + return res; +error: + HTABLE_DATA_FUNC__(release)(&tbl); + darray_char_release(&tbl_slot_is_used); + res = -1; + goto exit; +} + +/* Return 0 on success and -1 otherwise */ +static INLINE int +HTABLE_FUNC__(set) + ( struct HTABLE__* htbl, + HTABLE_KEY const* key, + HTABLE_DATA const* data ) +{ + size_t tbl_size = 0; + size_t i = 0; + int res = 0; + ASSERT(htbl && key && data); + + /* Increase hash table size when the load factor is too high */ + tbl_size = HTABLE_DATA_FUNC__(size_get)(&htbl->table); + if(htbl->table_size_in_use >= tbl_size * 3/4) { + if(!tbl_size) { + res = HTABLE_FUNC__(reserve)(htbl, 32); + } else { + res = HTABLE_FUNC__(reserve)(htbl, tbl_size * 2 ); + } + if(res) + return res; + } + + i = HTABLE_FUNC__(find_slot__)(htbl, key); + if(darray_char_cdata_get(&htbl->table_slot_is_used)[i]) { + HTABLE_DATA_FUNC__(data_get)(&htbl->table)[i].data = *data; + } else { + HTABLE_DATA_FUNC__(data_get)(&htbl->table)[i].data = *data; + HTABLE_DATA_FUNC__(data_get)(&htbl->table)[i].key = *key; + darray_char_data_get(&htbl->table_slot_is_used)[i] = 1; + ++htbl->table_size_in_use; + } + return 0; +} + +/* Return the number of erased elements, i.e. 0 or 1 */ +static INLINE size_t +HTABLE_FUNC__(erase)(struct HTABLE__* htbl, HTABLE_KEY const* key) +{ + size_t i = 0, j = 0, tbl_size = 0; + struct HTABLE_PAIR__* pairs = NULL; + char* used_pairs = NULL; + ASSERT(htbl && key ); + + pairs = HTABLE_DATA_FUNC__(data_get)(&htbl->table); + tbl_size = HTABLE_DATA_FUNC__(size_get)(&htbl->table); + ASSERT(is_power_of_2(tbl_size)); + + used_pairs = darray_char_data_get(&htbl->table_slot_is_used); + i = HTABLE_FUNC__(find_slot__)(htbl, key); + if(!used_pairs[i]) + return 0; /* No entry */ + + /* Remove the entry */ + used_pairs[i] = 0; + --htbl->table_size_in_use; + + for(j = (i + 1) & (tbl_size - 1); /* <=> j = (i+1) % size */ + used_pairs[j]; + j = (j + 1) & (tbl_size - 1) ) { /* <=> j = (j+1) % size */ + const size_t k = htbl->hash(&pairs[j].key) & (tbl_size- 1); + + if(i <= j ? (i < k && k <= j) : (i < k || k <= j)) + continue; + + pairs[i] = pairs[j]; + used_pairs[i] = 1; + used_pairs[j] = 0; + i = j; + } + return 1; +} + +static INLINE char +HTABLE_FUNC__(is_empty)(const struct HTABLE__* htbl) +{ + ASSERT(htbl); + return htbl->table_size_in_use == 0; +} + +static INLINE HTABLE_DATA* +HTABLE_FUNC__(find)(struct HTABLE__* htbl, HTABLE_KEY const* key) +{ + size_t i = 0; + ASSERT(htbl && key); + if(HTABLE_FUNC__(is_empty)(htbl)) + return NULL; + + i = HTABLE_FUNC__(find_slot__)(htbl, key); + if(darray_char_cdata_get(&htbl->table_slot_is_used)[i]) { + return &HTABLE_DATA_FUNC__(data_get)(&htbl->table)[i].data; + } else { + return NULL; + } +} + +static INLINE size_t +HTABLE_FUNC__(size_get)(const struct HTABLE__* htbl) +{ + ASSERT(htbl); + return htbl->table_size_in_use; +} + +static INLINE void +HTABLE_FUNC__(begin) + (struct HTABLE__* htbl, + struct HTABLE_INTERATOR__* it) +{ + const char* used_pairs = NULL; + size_t i = 0; + size_t tbl_size = 0; + ASSERT(htbl && it); + + tbl_size = HTABLE_DATA_FUNC__(size_get)(&htbl->table); + used_pairs = darray_char_cdata_get(&htbl->table_slot_is_used); + + it->slot = it->entries_scanned = 0; + for( i = 0; i < tbl_size && !used_pairs[i]; ++i) {} + it->slot = i; + it->entries_scanned = i < tbl_size; + it->hash_table = htbl; +} + +static INLINE void +HTABLE_FUNC__(end) + (struct HTABLE__* htbl, + struct HTABLE_INTERATOR__* it) +{ + ASSERT(htbl && it); + it->slot = HTABLE_DATA_FUNC__(size_get)(&htbl->table); + it->entries_scanned = htbl->table_size_in_use; + it->hash_table = htbl; +} + +static INLINE void +HTABLE_FUNC__(iterator_next)(struct HTABLE_INTERATOR__* it) +{ + size_t tbl_size = 0; + ASSERT(it); + + tbl_size = HTABLE_DATA_FUNC__(size_get)(&it->hash_table->table); + if(it->entries_scanned >= it->hash_table->table_size_in_use) { + it->slot = tbl_size; + } else { + size_t i = 0; + FOR_EACH(i, it->slot + 1, tbl_size) + if(darray_char_cdata_get(&it->hash_table->table_slot_is_used)[i]) break; + it->slot = i; + it->entries_scanned += i < tbl_size; + } +} + +static INLINE char +HTABLE_FUNC__(iterator_eq) + (const struct HTABLE_INTERATOR__* it0, + const struct HTABLE_INTERATOR__* it1) +{ + ASSERT(it0 && it1); + return + it0->slot == it1->slot + && it0->entries_scanned == it1->entries_scanned + && it0->hash_table == it1->hash_table; +} + +static INLINE HTABLE_KEY* +HTABLE_FUNC__(iterator_key_get)(const struct HTABLE_INTERATOR__* it) +{ + ASSERT(it && it->slot < HTABLE_DATA_FUNC__(size_get)(&it->hash_table->table)); + return &HTABLE_DATA_FUNC__(data_get)(&it->hash_table->table)[it->slot].key; +} + +static INLINE HTABLE_DATA* +HTABLE_FUNC__(iterator_data_get)(const struct HTABLE_INTERATOR__* it) +{ + ASSERT(it && it->slot < HTABLE_DATA_FUNC__(size_get)(&it->hash_table->table)); + return &HTABLE_DATA_FUNC__(data_get)(&it->hash_table->table)[it->slot].data; +} + +#undef HTABLE_KEY +#undef HTABLE_DATA +#undef HTABLE_NAME +#undef HTABLE +#undef HTABLE_PAIR__ +#undef HTABLE_INTERATOR__ +#undef HTABLE_FUNC__ + +#endif /* Pre inclusion */ + diff --git a/src/math.h b/src/math.h @@ -3,8 +3,8 @@ #include "rsys.h" -#define max(A, B) (A) > (B) ? (A) : (B) -#define min(A, B) (A) < (B) ? (A) : (B) +#define max(A, B) ((A) > (B) ? (A) : (B)) +#define min(A, B) ((A) < (B) ? (A) : (B)) static FINLINE char is_power_of_2(const size_t i) diff --git a/src/test_hash_table.c b/src/test_hash_table.c @@ -0,0 +1,73 @@ +#include "hash_table.h" +#include <string.h> + +#define HTABLE_KEY int +#define HTABLE_DATA float +#define HTABLE_NAME int_float +#include "hash_table.h" + +static INLINE char +eq_int( const int* a, const int* b ) +{ + return *a == *b; +} + +static void +test_htbl_int_float( void ) +{ + struct mem_allocator allocator_proxy; + struct htable_int_float htbl; + int i; + const int n = 678; + + htable_int_float_init(&htbl, NULL, NULL, eq_int); + htable_int_float_release(&htbl); + + mem_init_proxy_allocator(&allocator_proxy, &mem_default_allocator); + + htable_int_float_init(&htbl, &allocator_proxy, NULL, eq_int); + htable_int_float_reserve(&htbl, 30); + + FOR_EACH(i, 0, n) { + float f = (float)i; + CHECK(htable_int_float_set(&htbl, &i, &f), 0); + } + + FOR_EACH( i, 0, n ) { + float* p = htable_int_float_find(&htbl, &i); + NCHECK(p, NULL); + CHECK(*p, (float) i); + } + CHECK(htable_int_float_size_get(&htbl), (size_t)n); + FOR_EACH(i, 0, n / 2) { + CHECK(htable_int_float_erase(&htbl, &i), 1); + } + CHECK(htable_int_float_size_get(&htbl), (size_t)(n/2)); + FOR_EACH(i, 0, n/2) { + float* p = htable_int_float_find(&htbl, &i); + CHECK(p, NULL); + } + FOR_EACH(i, n/2, n) { + float* p = htable_int_float_find(&htbl, &i); + NCHECK(p, NULL); + CHECK(*p, (float) i); + } + htable_int_float_release(&htbl); + + if(MEM_ALLOCATED_SIZE(&allocator_proxy)) { + char dump[512]; + MEM_DUMP(&allocator_proxy, dump, sizeof(dump)/sizeof(char)); + fprintf(stderr, "%s\n", dump); + FATAL("Memory leaks\n"); + } + mem_shutdown_proxy_allocator(&allocator_proxy); +} + +int +main(int argc, char** argv) +{ + (void)argc, (void)argv; + test_htbl_int_float(); + return 0; +} +