Skip to content

Commit 4490160

Browse files
committed
performance improvement: substitute the array node with an hash table
compress immagine.tiff 13" -> 4" decompress immagine.tiff 18" -> 8"
1 parent 2e6ef22 commit 4490160

File tree

5 files changed

+147
-71
lines changed

5 files changed

+147
-71
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# gcc -o adaptive_huffman log.c adhuff_decompress.c bin_io.c adhuff_compress.c main.c adhuff_common.c -std=c99 -O3 -lm
33

44
CC = gcc
5-
CFLAGS = -std=c99 -O3 -lm
5+
CFLAGS = -std=c99 -O3 -lm -Wall
66
OUTFILE = adaptive_huffman
77
DEPS = *.h
88
OBJ = *.c

adhuff_common.c

Lines changed: 143 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,33 @@
55
#include "bin_io.h"
66
#include "log.h"
77

8+
//
9+
// hash table structures
10+
//
11+
enum {
12+
HASH_SIZE = 256 //TODO: tune it. prime number or power of 2 ? see https://en.wikipedia.org/wiki/Hash_table
13+
};
14+
15+
typedef struct hash_entry {
16+
adh_node_t* value;
17+
struct hash_entry* next;
18+
struct hash_entry* prev;
19+
} hash_entry_t;
20+
21+
typedef struct {
22+
int length;
23+
hash_entry_t **buckets;
24+
} hash_table_t;
25+
26+
827
//
928
// module variables
1029
//
1130
static adh_order_t adh_next_order;
1231
static adh_node_t * adh_root_node = NULL;
1332
static adh_node_t * adh_nyt_node = NULL;
14-
static adh_node_t * adh_node_array[MAX_ORDER];
15-
static adh_node_t * adh_symbol_node_array[MAX_CODE_BITS];
16-
static int last_index_of_node_array;
33+
static adh_node_t * symbol_node_array[MAX_CODE_BITS];
34+
static hash_table_t map_weight_nodes;
1735

1836
//
1937
// private methods
@@ -23,8 +41,15 @@ adh_node_t* create_nyt();
2341
adh_node_t* create_node(adh_symbol_t symbol);
2442
void destroy_node(adh_node_t *node);
2543
void update_node_encoding(adh_node_t *node);
26-
void sort_node_array();
44+
void increase_weight(adh_node_t *node);
2745

46+
void hash_init();
47+
void hash_release();
48+
void hash_add(adh_node_t* node);
49+
void hash_remove(adh_node_t* node);
50+
unsigned int hash_code(adh_weight_t weight);
51+
adh_node_t* hash_get(adh_weight_t weight, adh_order_t order);
52+
void hash_check_collision(adh_weight_t weight, int hash_index, const adh_node_t *node);
2853

2954
/*
3055
* get NYT node
@@ -52,6 +77,7 @@ int adh_init(const char input_file_name[], const char output_file_name[],
5277
}
5378

5479
adh_init_tree();
80+
hash_init();
5581

5682
return rc;
5783
}
@@ -65,12 +91,7 @@ int adh_init_tree() {
6591
#endif
6692

6793
for (int i = 0; i < MAX_CODE_BITS; ++i) {
68-
adh_symbol_node_array[i] = NULL;
69-
}
70-
71-
last_index_of_node_array = 0;
72-
for (int i = 0; i < MAX_ORDER; ++i) {
73-
adh_node_array[i] = NULL;
94+
symbol_node_array[i] = NULL;
7495
}
7596

7697
adh_next_order = MAX_ORDER;
@@ -92,6 +113,7 @@ void adh_destroy_tree() {
92113
#endif
93114

94115
destroy_node(adh_root_node);
116+
hash_release();
95117
adh_root_node = NULL;
96118
adh_nyt_node = NULL;
97119
}
@@ -135,7 +157,7 @@ adh_node_t * adh_create_node_and_append(adh_symbol_t symbol) {
135157
// create right leaf node with passed symbol (and weight 1)
136158
adh_node_t * newNode = create_node(symbol);
137159
if(newNode) {
138-
newNode->weight = 1;
160+
increase_weight(newNode);
139161
newNode->parent = adh_nyt_node;
140162
adh_nyt_node->right = newNode;
141163

@@ -151,7 +173,6 @@ adh_node_t * adh_create_node_and_append(adh_symbol_t symbol) {
151173

152174
update_node_encoding(newNode); // update bit_array
153175
update_node_encoding(newNYT); // update bit_array
154-
//sort_node_array();
155176
}
156177
return newNode;
157178
}
@@ -182,13 +203,10 @@ adh_node_t * create_node(adh_symbol_t symbol) {
182203

183204
adh_node_t* node = malloc (sizeof(adh_node_t));
184205

185-
// add node to node array and increase the last index
186-
adh_node_array[last_index_of_node_array++] = node;
187-
188206
// if the new node is a symbol node
189-
// save its reference in the adh_symbol_node_array to improve searches
207+
// save its reference in the symbol_node_array to improve searches
190208
if(symbol > ADH_NYT_CODE)
191-
adh_symbol_node_array[symbol] = node;
209+
symbol_node_array[symbol] = node;
192210

193211
node->left = NULL;
194212
node->right = NULL;
@@ -203,52 +221,13 @@ adh_node_t * create_node(adh_symbol_t symbol) {
203221
return node;
204222
}
205223

206-
void sort_node_array() {
207-
int holePos;
208-
for (int i = 1; i < last_index_of_node_array; i++) {
209-
adh_node_t* nodeToInsert = adh_node_array[i];
210-
holePos = i;
211-
212-
// sort by weight so the find_higher_order_same_weight method will be faster
213-
while (holePos > 0 && adh_node_array[holePos-1]->weight > nodeToInsert->weight) {
214-
adh_node_array[holePos] = adh_node_array[holePos-1];
215-
holePos--;
216-
}
217-
218-
if(holePos != i)
219-
adh_node_array[holePos] = nodeToInsert;
220-
}
221-
}
222-
223224
adh_node_t * find_higher_order_same_weight(adh_weight_t weight, adh_order_t order) {
224225
// small optimization: only NYT and new nodes have weight 0
225226
// so they are already ordered, don't swap
226227
if(weight == 0)
227228
return NULL;
228229

229-
adh_node_t *node_to_be_returned=NULL;
230-
adh_node_t *current_node;
231-
for (int i=0; i<last_index_of_node_array ;i++){
232-
current_node = adh_node_array[i];
233-
234-
//TODO: ordinando adh_node_array saremmo piu' veloci nella ricerca.
235-
// da valutare il costo dell'ordinamento rispettto a una ricerca completa
236-
// 83% del costo della compressione di immagine.tiff e' speso in questo metodo
237-
238-
// tested with an insertion sort. the cost of sorting is higher than the benefits of search
239-
// I've disabled the sort for the moment
240-
// if(current_node->weight > weight)
241-
// break;
242-
243-
if ((current_node->weight == weight) &&
244-
(current_node->order > order) &&
245-
(current_node != adh_root_node) &&
246-
(node_to_be_returned == NULL || current_node->order > node_to_be_returned->order)) {
247-
node_to_be_returned = current_node;
248-
}
249-
}
250-
251-
return node_to_be_returned;
230+
return hash_get(weight, order);
252231
}
253232

254233
/*
@@ -258,7 +237,7 @@ adh_node_t * adh_search_symbol_in_tree(adh_symbol_t symbol) {
258237
#ifdef _DEBUG
259238
log_trace(" adh_search_symbol_in_tree", "%s\n", fmt_symbol(symbol));
260239
#endif
261-
return adh_symbol_node_array[symbol];
240+
return symbol_node_array[symbol];
262241
}
263242

264243
/*
@@ -339,15 +318,13 @@ void adh_update_tree(adh_node_t *node, bool is_new_node) {
339318
swap_nodes(node_to_check, node_to_swap);
340319
}
341320
// now we can safely update the weight of the node
342-
node_to_check->weight++;
343-
//sort_node_array();
321+
increase_weight(node_to_check);
344322

345323
// continue ascending the tree
346324
node_to_check = node_to_check->parent;
347325
}
348-
if(node_to_check != NULL) {
349-
node_to_check->weight++;
350-
}
326+
327+
increase_weight(node_to_check);
351328

352329
#ifdef _DEBUG
353330
log_tree();
@@ -424,6 +401,16 @@ adh_node_t* adh_search_leaf_by_encoding(const bit_array_t *bit_array) {
424401
return NULL;
425402
}
426403

404+
void increase_weight(adh_node_t *node) {
405+
if(node == NULL)
406+
return;
407+
408+
hash_remove(node);
409+
node->weight++;
410+
hash_add(node);
411+
}
412+
413+
427414
void print_sub_tree(const adh_node_t *node, int depth)
428415
{
429416
if(node==NULL)
@@ -454,9 +441,98 @@ void print_tree() {
454441
fprintf(stdout, "\n");
455442
}
456443

457-
void print_node_array() {
458-
log_debug("print_node_array", "\n");
459-
for (int i=0; i<last_index_of_node_array;i++){
460-
log_debug("", "%3i %s \n", i, fmt_node(adh_node_array[i]));
444+
inline unsigned int hash_code(adh_weight_t weight) {
445+
return weight % HASH_SIZE;
446+
}
447+
448+
void hash_init() {
449+
map_weight_nodes.length = HASH_SIZE;
450+
map_weight_nodes.buckets = calloc(HASH_SIZE, HASH_SIZE * sizeof(map_weight_nodes.buckets));
451+
}
452+
453+
void hash_release() {
454+
for (int i = 0; i < map_weight_nodes.length; ++i) {
455+
hash_entry_t *entry = map_weight_nodes.buckets[i];
456+
while(entry) {
457+
hash_entry_t *next = entry->next;
458+
free(entry);
459+
entry = next;
460+
}
461+
}
462+
free(map_weight_nodes.buckets);
463+
}
464+
465+
void hash_remove(adh_node_t* node){
466+
int hash_index = hash_code(node->weight);
467+
468+
hash_entry_t *entry = map_weight_nodes.buckets[hash_index];
469+
while(entry) {
470+
if(entry->value == node) {
471+
if(entry->prev == NULL) {
472+
map_weight_nodes.buckets[hash_index] = entry->next;
473+
}
474+
else {
475+
entry->prev->next = entry->next;
476+
}
477+
478+
if(entry->next)
479+
entry->next->prev = entry->prev;
480+
481+
free(entry);
482+
break;
483+
}
484+
entry = entry->next;
485+
}
486+
}
487+
488+
void hash_add(adh_node_t* node){
489+
int hash_index = hash_code(node->weight);
490+
491+
hash_entry_t *new_entry = calloc(1, sizeof(hash_entry_t));
492+
new_entry->value = node;
493+
494+
hash_entry_t *last = map_weight_nodes.buckets[hash_index];
495+
if(last == NULL) {
496+
map_weight_nodes.buckets[hash_index] = new_entry;
497+
}
498+
else {
499+
while(last && last->next) {
500+
last = last->next;
501+
}
502+
503+
last->next = new_entry;
504+
new_entry->prev = last;
505+
}
506+
}
507+
508+
adh_node_t* hash_get(adh_weight_t weight, adh_order_t order) {
509+
adh_node_t* node_result = NULL;
510+
int hash_index = hash_code(weight);
511+
hash_entry_t* entry = map_weight_nodes.buckets[hash_index];
512+
while(entry) {
513+
adh_node_t* current_node = entry->value;
514+
515+
#ifdef _DEBUG
516+
hash_check_collision(weight, hash_index, current_node);
517+
#endif
518+
519+
if(current_node->weight == weight && current_node->order > order && current_node != adh_root_node) {
520+
node_result = current_node;
521+
order = node_result->order;
522+
}
523+
entry = entry->next;
524+
}
525+
return node_result;
526+
}
527+
528+
void hash_check_collision(adh_weight_t weight, int hash_index, const adh_node_t *node) {
529+
if(node->weight != weight) {
530+
int size = 0;
531+
hash_entry_t* he = map_weight_nodes.buckets[hash_index];
532+
while(he != NULL) {
533+
size++;
534+
he = he->next;
535+
}
536+
log_info("hash_get", "collision, size:%d w1:%d w2:%d\n", size, weight, node->weight);
461537
}
462538
}

adhuff_compress.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ int output_existing_symbol(byte_t symbol, adh_node_t *node, byte_t *output_buffe
124124

125125
int output_new_symbol(byte_t symbol, byte_t *output_buffer, FILE* output_file_ptr) {
126126
// write symbol code
127-
bit_array_t bit_array = { 0, 0 };
127+
bit_array_t bit_array = {0};
128128
symbol_to_bits(symbol, &bit_array);
129129

130130
#ifdef _DEBUG

adhuff_decompress.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ int decode_existing_symbol(const byte_t input_buffer[]) {
148148
unsigned int original_input_buffer_bit_idx = in_bit_idx;
149149

150150
adh_node_t* node = NULL;
151-
bit_array_t bit_array = { 0, 0 };
151+
bit_array_t bit_array = {0};
152152
byte_t sub_buffer[MAX_CODE_BYTES] = {0};
153153
int missing = last_bit_idx - in_bit_idx + 1;
154154

log.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ void log_trace_char_bin(byte_t symbol) {
5050
if(get_log_level() < LOG_TRACE)
5151
return;
5252

53-
bit_array_t bit_array = { 0, 0 };
53+
bit_array_t bit_array = {0};
5454
symbol_to_bits(symbol, &bit_array);
5555
fprintf(stdout, "%s\n", fmt_bit_array(&bit_array));
5656
}

0 commit comments

Comments
 (0)