5
5
#include "bin_io.h"
6
6
#include "log.h"
7
7
8
+ //
9
+ // hash table structures
10
+ //
11
+ enum {
12
+ HASH_SIZE = 256 //TODO: tune it. prime number or power of 2 ? see https://en.wikipedia.org/wiki/Hash_table
13
+ };
14
+
15
+ typedef struct hash_entry {
16
+ adh_node_t * value ;
17
+ struct hash_entry * next ;
18
+ struct hash_entry * prev ;
19
+ } hash_entry_t ;
20
+
21
+ typedef struct {
22
+ int length ;
23
+ hash_entry_t * * buckets ;
24
+ } hash_table_t ;
25
+
26
+
8
27
//
9
28
// module variables
10
29
//
11
30
static adh_order_t adh_next_order ;
12
31
static adh_node_t * adh_root_node = NULL ;
13
32
static adh_node_t * adh_nyt_node = NULL ;
14
- static adh_node_t * adh_node_array [MAX_ORDER ];
15
- static adh_node_t * adh_symbol_node_array [MAX_CODE_BITS ];
16
- static int last_index_of_node_array ;
33
+ static adh_node_t * symbol_node_array [MAX_CODE_BITS ];
34
+ static hash_table_t map_weight_nodes ;
17
35
18
36
//
19
37
// private methods
@@ -23,8 +41,15 @@ adh_node_t* create_nyt();
23
41
adh_node_t * create_node (adh_symbol_t symbol );
24
42
void destroy_node (adh_node_t * node );
25
43
void update_node_encoding (adh_node_t * node );
26
- void sort_node_array ( );
44
+ void increase_weight ( adh_node_t * node );
27
45
46
+ void hash_init ();
47
+ void hash_release ();
48
+ void hash_add (adh_node_t * node );
49
+ void hash_remove (adh_node_t * node );
50
+ unsigned int hash_code (adh_weight_t weight );
51
+ adh_node_t * hash_get (adh_weight_t weight , adh_order_t order );
52
+ void hash_check_collision (adh_weight_t weight , int hash_index , const adh_node_t * node );
28
53
29
54
/*
30
55
* get NYT node
@@ -52,6 +77,7 @@ int adh_init(const char input_file_name[], const char output_file_name[],
52
77
}
53
78
54
79
adh_init_tree ();
80
+ hash_init ();
55
81
56
82
return rc ;
57
83
}
@@ -65,12 +91,7 @@ int adh_init_tree() {
65
91
#endif
66
92
67
93
for (int i = 0 ; i < MAX_CODE_BITS ; ++ i ) {
68
- adh_symbol_node_array [i ] = NULL ;
69
- }
70
-
71
- last_index_of_node_array = 0 ;
72
- for (int i = 0 ; i < MAX_ORDER ; ++ i ) {
73
- adh_node_array [i ] = NULL ;
94
+ symbol_node_array [i ] = NULL ;
74
95
}
75
96
76
97
adh_next_order = MAX_ORDER ;
@@ -92,6 +113,7 @@ void adh_destroy_tree() {
92
113
#endif
93
114
94
115
destroy_node (adh_root_node );
116
+ hash_release ();
95
117
adh_root_node = NULL ;
96
118
adh_nyt_node = NULL ;
97
119
}
@@ -135,7 +157,7 @@ adh_node_t * adh_create_node_and_append(adh_symbol_t symbol) {
135
157
// create right leaf node with passed symbol (and weight 1)
136
158
adh_node_t * newNode = create_node (symbol );
137
159
if (newNode ) {
138
- newNode -> weight = 1 ;
160
+ increase_weight ( newNode ) ;
139
161
newNode -> parent = adh_nyt_node ;
140
162
adh_nyt_node -> right = newNode ;
141
163
@@ -151,7 +173,6 @@ adh_node_t * adh_create_node_and_append(adh_symbol_t symbol) {
151
173
152
174
update_node_encoding (newNode ); // update bit_array
153
175
update_node_encoding (newNYT ); // update bit_array
154
- //sort_node_array();
155
176
}
156
177
return newNode ;
157
178
}
@@ -182,13 +203,10 @@ adh_node_t * create_node(adh_symbol_t symbol) {
182
203
183
204
adh_node_t * node = malloc (sizeof (adh_node_t ));
184
205
185
- // add node to node array and increase the last index
186
- adh_node_array [last_index_of_node_array ++ ] = node ;
187
-
188
206
// if the new node is a symbol node
189
- // save its reference in the adh_symbol_node_array to improve searches
207
+ // save its reference in the symbol_node_array to improve searches
190
208
if (symbol > ADH_NYT_CODE )
191
- adh_symbol_node_array [symbol ] = node ;
209
+ symbol_node_array [symbol ] = node ;
192
210
193
211
node -> left = NULL ;
194
212
node -> right = NULL ;
@@ -203,52 +221,13 @@ adh_node_t * create_node(adh_symbol_t symbol) {
203
221
return node ;
204
222
}
205
223
206
- void sort_node_array () {
207
- int holePos ;
208
- for (int i = 1 ; i < last_index_of_node_array ; i ++ ) {
209
- adh_node_t * nodeToInsert = adh_node_array [i ];
210
- holePos = i ;
211
-
212
- // sort by weight so the find_higher_order_same_weight method will be faster
213
- while (holePos > 0 && adh_node_array [holePos - 1 ]-> weight > nodeToInsert -> weight ) {
214
- adh_node_array [holePos ] = adh_node_array [holePos - 1 ];
215
- holePos -- ;
216
- }
217
-
218
- if (holePos != i )
219
- adh_node_array [holePos ] = nodeToInsert ;
220
- }
221
- }
222
-
223
224
adh_node_t * find_higher_order_same_weight (adh_weight_t weight , adh_order_t order ) {
224
225
// small optimization: only NYT and new nodes have weight 0
225
226
// so they are already ordered, don't swap
226
227
if (weight == 0 )
227
228
return NULL ;
228
229
229
- adh_node_t * node_to_be_returned = NULL ;
230
- adh_node_t * current_node ;
231
- for (int i = 0 ; i < last_index_of_node_array ;i ++ ){
232
- current_node = adh_node_array [i ];
233
-
234
- //TODO: ordinando adh_node_array saremmo piu' veloci nella ricerca.
235
- // da valutare il costo dell'ordinamento rispettto a una ricerca completa
236
- // 83% del costo della compressione di immagine.tiff e' speso in questo metodo
237
-
238
- // tested with an insertion sort. the cost of sorting is higher than the benefits of search
239
- // I've disabled the sort for the moment
240
- // if(current_node->weight > weight)
241
- // break;
242
-
243
- if ((current_node -> weight == weight ) &&
244
- (current_node -> order > order ) &&
245
- (current_node != adh_root_node ) &&
246
- (node_to_be_returned == NULL || current_node -> order > node_to_be_returned -> order )) {
247
- node_to_be_returned = current_node ;
248
- }
249
- }
250
-
251
- return node_to_be_returned ;
230
+ return hash_get (weight , order );
252
231
}
253
232
254
233
/*
@@ -258,7 +237,7 @@ adh_node_t * adh_search_symbol_in_tree(adh_symbol_t symbol) {
258
237
#ifdef _DEBUG
259
238
log_trace (" adh_search_symbol_in_tree" , "%s\n" , fmt_symbol (symbol ));
260
239
#endif
261
- return adh_symbol_node_array [symbol ];
240
+ return symbol_node_array [symbol ];
262
241
}
263
242
264
243
/*
@@ -339,15 +318,13 @@ void adh_update_tree(adh_node_t *node, bool is_new_node) {
339
318
swap_nodes (node_to_check , node_to_swap );
340
319
}
341
320
// now we can safely update the weight of the node
342
- node_to_check -> weight ++ ;
343
- //sort_node_array();
321
+ increase_weight (node_to_check );
344
322
345
323
// continue ascending the tree
346
324
node_to_check = node_to_check -> parent ;
347
325
}
348
- if (node_to_check != NULL ) {
349
- node_to_check -> weight ++ ;
350
- }
326
+
327
+ increase_weight (node_to_check );
351
328
352
329
#ifdef _DEBUG
353
330
log_tree ();
@@ -424,6 +401,16 @@ adh_node_t* adh_search_leaf_by_encoding(const bit_array_t *bit_array) {
424
401
return NULL ;
425
402
}
426
403
404
+ void increase_weight (adh_node_t * node ) {
405
+ if (node == NULL )
406
+ return ;
407
+
408
+ hash_remove (node );
409
+ node -> weight ++ ;
410
+ hash_add (node );
411
+ }
412
+
413
+
427
414
void print_sub_tree (const adh_node_t * node , int depth )
428
415
{
429
416
if (node == NULL )
@@ -454,9 +441,98 @@ void print_tree() {
454
441
fprintf (stdout , "\n" );
455
442
}
456
443
457
- void print_node_array () {
458
- log_debug ("print_node_array" , "\n" );
459
- for (int i = 0 ; i < last_index_of_node_array ;i ++ ){
460
- log_debug ("" , "%3i %s \n" , i , fmt_node (adh_node_array [i ]));
444
+ inline unsigned int hash_code (adh_weight_t weight ) {
445
+ return weight % HASH_SIZE ;
446
+ }
447
+
448
+ void hash_init () {
449
+ map_weight_nodes .length = HASH_SIZE ;
450
+ map_weight_nodes .buckets = calloc (HASH_SIZE , HASH_SIZE * sizeof (map_weight_nodes .buckets ));
451
+ }
452
+
453
+ void hash_release () {
454
+ for (int i = 0 ; i < map_weight_nodes .length ; ++ i ) {
455
+ hash_entry_t * entry = map_weight_nodes .buckets [i ];
456
+ while (entry ) {
457
+ hash_entry_t * next = entry -> next ;
458
+ free (entry );
459
+ entry = next ;
460
+ }
461
+ }
462
+ free (map_weight_nodes .buckets );
463
+ }
464
+
465
+ void hash_remove (adh_node_t * node ){
466
+ int hash_index = hash_code (node -> weight );
467
+
468
+ hash_entry_t * entry = map_weight_nodes .buckets [hash_index ];
469
+ while (entry ) {
470
+ if (entry -> value == node ) {
471
+ if (entry -> prev == NULL ) {
472
+ map_weight_nodes .buckets [hash_index ] = entry -> next ;
473
+ }
474
+ else {
475
+ entry -> prev -> next = entry -> next ;
476
+ }
477
+
478
+ if (entry -> next )
479
+ entry -> next -> prev = entry -> prev ;
480
+
481
+ free (entry );
482
+ break ;
483
+ }
484
+ entry = entry -> next ;
485
+ }
486
+ }
487
+
488
+ void hash_add (adh_node_t * node ){
489
+ int hash_index = hash_code (node -> weight );
490
+
491
+ hash_entry_t * new_entry = calloc (1 , sizeof (hash_entry_t ));
492
+ new_entry -> value = node ;
493
+
494
+ hash_entry_t * last = map_weight_nodes .buckets [hash_index ];
495
+ if (last == NULL ) {
496
+ map_weight_nodes .buckets [hash_index ] = new_entry ;
497
+ }
498
+ else {
499
+ while (last && last -> next ) {
500
+ last = last -> next ;
501
+ }
502
+
503
+ last -> next = new_entry ;
504
+ new_entry -> prev = last ;
505
+ }
506
+ }
507
+
508
+ adh_node_t * hash_get (adh_weight_t weight , adh_order_t order ) {
509
+ adh_node_t * node_result = NULL ;
510
+ int hash_index = hash_code (weight );
511
+ hash_entry_t * entry = map_weight_nodes .buckets [hash_index ];
512
+ while (entry ) {
513
+ adh_node_t * current_node = entry -> value ;
514
+
515
+ #ifdef _DEBUG
516
+ hash_check_collision (weight , hash_index , current_node );
517
+ #endif
518
+
519
+ if (current_node -> weight == weight && current_node -> order > order && current_node != adh_root_node ) {
520
+ node_result = current_node ;
521
+ order = node_result -> order ;
522
+ }
523
+ entry = entry -> next ;
524
+ }
525
+ return node_result ;
526
+ }
527
+
528
+ void hash_check_collision (adh_weight_t weight , int hash_index , const adh_node_t * node ) {
529
+ if (node -> weight != weight ) {
530
+ int size = 0 ;
531
+ hash_entry_t * he = map_weight_nodes .buckets [hash_index ];
532
+ while (he != NULL ) {
533
+ size ++ ;
534
+ he = he -> next ;
535
+ }
536
+ log_info ("hash_get" , "collision, size:%d w1:%d w2:%d\n" , size , weight , node -> weight );
461
537
}
462
538
}
0 commit comments