1 /***************************************************************************
2 * Copyright 1995 Michael Veksler. mveksler@vnet.ibm.com
3 ***************************************************************************
5 * Purpose : dynamically growing hash, may use shared or local memory.
6 ***************************************************************************
10 #include "generic_hash.h"
12 #define ROUND_UP4(num) (( (num)+3) & ~3)
15 #define DELETED_ENTRY ((DWORD)-1)
17 #define NO_OF_PRIMES 512
18 #define GET_ITEM(items,size,i)\
20 ( ((char *)(items))+ \
23 static HASH_ITEM *locate_entry(HASH_CONTAINER* hash, DWORD key,
24 HASH_VAL *seeked_data, BOOL skip_deleted);
26 static void copy_hash_items(HASH_CONTAINER *hash, HASH_ITEM *old_items,
29 static BOOL arrays_initialized = FALSE;
30 static int primes[NO_OF_PRIMES];
31 static int best_primes[NO_OF_PRIMES];
32 static int no_of_primes;
33 static int no_of_best_primes;
36 /* binary search for `num' in the `primes' array */
37 static BOOL prime_binary_search_found(int num)
39 int min_idx, max_idx, idx;
42 max_idx=no_of_primes-1;
44 while (min_idx <= max_idx) {
45 idx = (max_idx + min_idx) >> 1;
46 if (num == primes[idx])
48 if (num < primes[idx])
56 static BOOL is_prime(int num)
59 if ((num & 0x1) == 0) /* can be divided by 2 */
65 if (num <= primes[no_of_primes-1])
66 return prime_binary_search_found(num);
68 for (i=0 ; i < no_of_primes ; i++) {
69 if (num % primes[i] == 0)
71 if (num < primes[i] * primes[i])
77 static void setup_primes()
85 /* count in modulo 6 to avoid numbers that divide by 2 or 3 */
86 for (num=5 ; ; num+=6) {
88 primes[no_of_primes++]=num;
89 if (no_of_primes >= NO_OF_PRIMES)
92 if (is_prime(num+2)) {
93 primes[no_of_primes++]=num+2;
94 if (no_of_primes >= NO_OF_PRIMES)
98 max_num= primes[no_of_primes-1] * primes[no_of_primes-1];
102 /* Find primes which are far "enough" from powers of two */
104 void setup_best_primes()
108 int pow2before, pow2after;
109 int min_range, max_range;
116 no_of_best_primes= 0;
117 for (i=0 ; i < no_of_primes ; i++){
120 if (num > pow2after) {
121 pow2before= pow2after;
123 min_range= pow2before+ (pow2before >> 3);
124 max_range= pow2after- (pow2before >> 2);
126 if (num > min_range && num < max_range)
127 best_primes[no_of_best_primes++]=num;
131 /* binary search for `num' in the `best_primes' array,
132 * Return smallest best_prime >= num.
134 static int best_prime_binary_search(int num)
136 int min_idx, max_idx, idx;
139 max_idx=no_of_best_primes-1;
142 idx = (max_idx + min_idx) >> 1;
143 if (num == best_primes[idx])
145 if (num < best_primes[idx]) {
147 if (max_idx <= min_idx)
148 return best_primes[idx];
152 if (min_idx >= max_idx)
153 return best_primes[max_idx];
159 /* Find the best prime, near `num' (which can be any number) */
160 static int best_prime(int num)
163 int pow2less, pow2more;
164 int min_range, max_range;
169 if (num <= best_primes[no_of_best_primes-1])
170 return best_prime_binary_search(num);
172 assert( num < max_num );
174 for (log2=0 ; num >> log2 ; log2++)
178 pow2more= 1 << (log2+1);
179 min_range= pow2less + (pow2less >> 3);
180 max_range= pow2more - (pow2more >> 3);
185 num |= 1; /* make sure num can't be divided by 2 */
188 if (num >= max_range) {
191 min_range= pow2less + (pow2less >> 3);
192 max_range= pow2more - (pow2more >> 3);
193 num= min_range | 1; /* make sure num can't be divided by 2 */
195 /* num should be here in the range: (min_range, max_range) */
202 /* FIXME: This can be done before compiling. (uning a script)*/
203 static void setup_arrays()
209 /* Discard all DELETED_ENTRYs moving the data to it's correct location.
210 * Done without a temporary buffer.
211 * May require some efficiency improvements ( currently it's o(N^2)
212 * or is it o(N^3) in the worst case ? In the avarege it seems to be
213 * something like o(N log (N)))
215 static void static_collect_garbge(HASH_CONTAINER *hash)
228 for (i=hash->shared->total_items-1 ; i >= 0 ; i--) {
229 item= &GET_ITEM(items,hash->bytes_per_item,i);
231 if (key != DELETED_ENTRY && key != FREE_ENTRY) {
232 /* try to place the entry in a deleted location */
233 located= locate_entry(hash, key, &item->data,
234 0 /* no skip_deleted */);
235 if (located->key == DELETED_ENTRY) {
237 memcpy(&located, &item,
238 hash->bytes_per_item);
239 item->key= DELETED_ENTRY;
245 /* No change means that there is no need to go through a DELETED_ENTRY
246 * in order to reach an item, so DELETED_ENTRY looses it's special
247 * meaning, and it is the same as FREE_ENTRY.
249 for (i=hash->shared->total_items-1 ; i >= 0 ; i--)
250 if (GET_ITEM(items,hash->bytes_per_item,i).key == DELETED_ENTRY)
251 GET_ITEM(items,hash->bytes_per_item,i).key = FREE_ENTRY;
252 hash->shared->deleted_items=0;
255 static void collect_garbge(HASH_CONTAINER *hash)
257 HASH_SHARED *shared= hash->shared;
258 HASH_ITEM *temp_items;
261 size= shared->total_items * hash->bytes_per_item;
262 temp_items= (HASH_ITEM*)malloc(size);
263 if (temp_items==NULL) {
264 static_collect_garbge(hash);
266 memcpy(temp_items, hash->items, size);
267 copy_hash_items(hash, temp_items, shared->total_items);
272 static void copy_hash_items(HASH_CONTAINER *hash, HASH_ITEM *old_items,
275 HASH_SHARED *shared= hash->shared;
279 shared->deleted_items = 0;
280 shared->free_items= shared->total_items;
282 /* make all items free */
283 for (i= shared->total_items-1 ; i>=0 ; i--)
284 GET_ITEM(hash->items, hash->bytes_per_item, i).key = FREE_ENTRY;
287 for (i=0 ; i <= old_n_items; i++) {
288 item= &GET_ITEM(old_items, hash->bytes_per_item,i);
289 if (item->key != FREE_ENTRY && item->key != DELETED_ENTRY)
290 hash_add_item(hash, item->key, &item->data);
295 static void reorder_hash(HASH_CONTAINER *hash)
297 HASH_SHARED *shared= hash->shared;
298 HASH_ITEM *items, *old_items;
299 HASH_PTR shared_items, old_shared_items;
300 int n_items, old_n_items;
303 if (shared->deleted_items > hash->min_free_items) {
304 collect_garbge(hash);
307 n_items= best_prime(shared->total_items * HASH_REALLOC_JUMPS);
310 (sizeof(items[0]) - sizeof(items[0].data) + hash->bytes_per_item);
312 shared_items= hash->allocate_mem(size);
313 items= hash->access_mem(shared_items);
316 collect_garbge(hash);
319 old_shared_items = shared->items;
320 old_n_items= shared->total_items;
321 old_items= hash->items;
323 /* setup a new clean hash based on the parameters of the original one */
325 shared->total_items = n_items;
326 shared->items= shared_items;
327 set_hash_parameters(hash, hash->maximum_load);
328 copy_hash_items(hash, old_items, old_n_items);
330 hash->free_mem(old_shared_items);
331 hash->last_ptr_update= ++shared->ptr_updates;
334 /* low level: attach hash existing hash items, no checks are performed
335 * No complex calculations done.
337 static HASH_CONTAINER *attach_no_check(HASH_ITEM *items, int bytes_per_datum)
339 HASH_CONTAINER *hash;
341 HASH_ITEM dummy_item;
343 hash= (HASH_CONTAINER*) malloc(sizeof(HASH_CONTAINER) );
347 bytes_per_item= bytes_per_datum+
348 sizeof(dummy_item)-sizeof(dummy_item.data);
349 hash->bytes_per_item= ROUND_UP4(bytes_per_item);
351 hash->is_correct_item= NULL;
352 hash->allocate_mem= HASH_MEM_ALLOC;
353 hash->access_mem= HASH_MEM_ACCESS;
354 hash->free_mem= HASH_MEM_FREE;
355 set_hash_parameters(hash, HASH_LOAD);
362 /* Attach existing & running remote (i.e. shared) hash.
363 * Attach the items using the data stored in "shared"
365 HASH_CONTAINER *attach_remote_hash(HASH_SHARED *shared, int bytes_per_datum,
366 HASH_ITEM *(*access_mem)(HASH_PTR))
368 HASH_CONTAINER *hash;
371 assert(access_mem != NULL);
372 if (! arrays_initialized)
375 items=access_mem(shared->items);
376 hash= attach_no_check(items, bytes_per_datum);
380 hash->shared_was_malloced = FALSE;
381 hash->shared= shared;
386 HASH_CONTAINER *create_remote_hash(HASH_SHARED *shared,
389 HASH_PTR (*allocate_mem)(int size),
390 HASH_ITEM *(*access_mem)(HASH_PTR))
392 HASH_CONTAINER *hash;
396 assert(total_items >= 1);
397 assert(bytes_per_datum >=1);
398 assert(access_mem != NULL);
399 assert(allocate_mem != NULL);
400 assert(shared != NULL);
401 if (! arrays_initialized)
404 if (total_items < MIN_HASH)
405 total_items= MIN_HASH;
407 total_items= best_prime(total_items);
409 hash= attach_no_check(NULL, bytes_per_datum);
416 shared->total_items= total_items;
417 hash->shared= shared;
418 hash->shared_was_malloced = FALSE;
420 size= total_items * hash->bytes_per_item;
422 shared->items = allocate_mem(size);
423 hash->items= access_mem(shared->items);
425 if (hash->items == NULL ) {
429 shared->items.ptr= hash->items;
431 /* make all items free */
432 for (i=0 ; i < total_items ; i++)
433 GET_ITEM(hash->items,hash->bytes_per_item,i).key = FREE_ENTRY;
435 shared->deleted_items= 0;
436 shared->free_items= total_items;
437 shared->ptr_updates= 0;
442 /* hash constructor: create brand new hash */
443 HASH_CONTAINER *create_hash(int bytes_per_datum, int total_items)
445 HASH_CONTAINER *hash;
449 shared= (HASH_SHARED*)malloc(sizeof(HASH_SHARED));
453 hash= create_remote_hash(shared, bytes_per_datum, total_items,
454 HASH_MEM_ALLOC, HASH_MEM_ACCESS);
461 hash->shared_was_malloced = TRUE;
465 /* set the extra handlers to non default values */
466 void set_hash_handlers(HASH_CONTAINER *hash,
467 HASH_ITEM_TEST *is_correct_item,
468 HASH_PTR (*allocate_mem)(int size),
469 void (*free_mem)(HASH_PTR),
470 HASH_ITEM *(*access_mem)(HASH_PTR))
473 assert(allocate_mem);
476 hash->free_mem = free_mem;
477 hash->allocate_mem = allocate_mem;
478 hash->access_mem = access_mem;
479 hash->is_correct_item = is_correct_item;
483 /* set extra parameters */
484 void set_hash_parameters(HASH_CONTAINER *hash, int load)
487 assert(load>30); /* no sence to realloc with less than */
488 /* 50% load, limiting to 30% to be on */
492 hash->maximum_load= load;
493 hash->min_free_items= (1.0 - load/100.0) * hash->shared->total_items + 1 ;
496 /* hash destructor: destroy anything related to the hash */
497 void destroy_hash(HASH_CONTAINER *hash)
500 hash->free_mem(hash->shared->items);
501 if (hash->shared_was_malloced)
506 /* hash destructor: just detach hash, without destroing it (makes */
507 /* sence in shared memory environment) */
508 void detach_hash(HASH_CONTAINER *hash)
515 /********** Hash usage *************/
516 static __inline__ BOOL
517 correct_entry(HASH_ITEM *item, int key, HASH_VAL *seeked_data,
518 HASH_ITEM_TEST *is_correct_item, BOOL skip_deleted)
525 return skip_deleted ? FALSE : TRUE;
528 if (item->key != key)
530 if (is_correct_item != NULL)
531 return is_correct_item(&item->data, seeked_data);
538 /* The algorithm of the hash (one of the 2 standard hash implementations):
539 * Iterate through the hash table until
540 * 1. The entry has been found.
541 * 2. A FREE entry has been found.
542 * 3. For insert operations only- A DELETED entry has been found.
543 * The difference between DELETED and FREE entires is that
544 * DELETED entry was one occupied, while FREE was never allocated.
545 * The idea behind this structure to keep other entries reachable.
548 static HASH_ITEM *locate_entry(HASH_CONTAINER* hash, DWORD key,
549 HASH_VAL *seeked_data, BOOL skip_deleted)
551 DWORD hash_idx, hash_leaps;
558 total_items= hash->shared->total_items;
559 hash_idx= key % total_items;
561 item= &GET_ITEM(hash->items, hash->bytes_per_item, hash_idx);
563 if ( correct_entry( item, key, seeked_data,
564 hash->is_correct_item, skip_deleted) )
567 /* get the WORDs in different order in this DWORD to avoid clustering */
568 hash_leaps=((DWORD)MAKELONG(HIWORD(key), LOWORD(key))
569 % (total_items-1)) +1;
571 /* interate through the hash table using hash_leaps */
572 for (i= total_items ; i ; i--) {
573 hash_idx+= hash_leaps;
574 if (hash_idx > total_items)
575 hash_idx -= total_items;
577 item= &GET_ITEM(hash->items,hash->bytes_per_item, hash_idx);
578 if ( correct_entry( item, key, seeked_data,
579 hash->is_correct_item, skip_deleted) )
586 static __inline__ void sync_shared_hash(HASH_CONTAINER *hash)
588 HASH_SHARED *shared= hash->shared;
590 if (shared->ptr_updates == hash->last_ptr_update)
593 assert(shared->ptr_updates >= hash->last_ptr_update);
594 hash->last_ptr_update= shared->ptr_updates;
595 hash->min_free_items= (1.0 - hash->maximum_load/100.0) *
596 shared->total_items + 1 ;
598 hash->items= hash->access_mem(shared->items);
601 HASH_VAL *hash_locate_item(HASH_CONTAINER* hash,
602 int key, HASH_VAL *seeked_data)
606 assert(hash != NULL);
607 sync_shared_hash(hash);
609 item= locate_entry(hash, key, seeked_data, 1 /* skip_deleted */);
612 if (item->key == FREE_ENTRY )
619 BOOL hash_add_item(HASH_CONTAINER* hash, int key, HASH_VAL *data)
624 assert(hash != NULL);
626 sync_shared_hash(hash);
627 shared= hash->shared;
629 item=locate_entry(hash, key, data, 0 /* no skip_deleted */);
630 assert(item != NULL);
631 if (item->key == key)
633 if (item->key == FREE_ENTRY)
634 shared->free_items--;
636 shared->deleted_items--;
639 memcpy(&item->data, data, hash->bytes_per_item-sizeof(key));
641 if (shared->free_items < hash->min_free_items ||
642 shared->deleted_items > hash->min_free_items)
649 BOOL hash_delete_item(HASH_CONTAINER* hash, int key, HASH_VAL *seeked_data)
653 assert(hash != NULL);
654 sync_shared_hash(hash);
656 item=locate_entry(hash, key, seeked_data, 1 /* skip_deleted */);
660 if (item->key == FREE_ENTRY)
663 item->key = DELETED_ENTRY;
664 hash->shared->deleted_items++;
675 HASH_ITEM *access_local_hash(HASH_PTR ptr)