哈夫曼编码

哈夫曼编码主要解决不定长编码问题,可以更加节省传输字节.由于每个字母都是叶结点,所以不存在一个编码是另一个编码的前缀的问题.越频繁的编码字母离根结点越近,即编码越短.
由传输的比特流01联想到二叉树的左右孩子,进而想出新的编码方式,这真是太伟大了.
priority_queue使用时需注意:
1.若存放实体,则直接在实体内重构比较运算符即可;
2.若存放指针,则需指定排序规则;
(c语言的编程精髓就是如何使用指针,不过时刻注意防止内存泄露问题.)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include<cstdio>
#include<cstdlib>
#include<cstring>
#include<stack>
#include<queue>
using namespace std;

typedef char DATA_TYPE;

typedef struct node
{
DATA_TYPE data;
float rate;
node *lch, *rch;
node()
{
lch = rch = 0;
}
bool operator < (const node &x) const //按rate从小到大排序
{
return rate > x.rate;
}
} Node;

struct cmp
{
bool operator ()(Node *x, Node *y)
{
return x->rate > y->rate;
}
};

void print(Node *r, int encodes[], int idx)
{
if(r)
{
if(!r->lch && !r->rch)
{
printf("%c: ", r->data);
for(int i=0; i<idx; i++)
{
printf("%d", encodes[i]);
}
printf("\n");
}
encodes[idx]=0;
print(r->lch, encodes, idx+1);
encodes[idx]=1;
print(r->rch, encodes, idx+1);
free(r);
}
}

int main()
{
DATA_TYPE dataArray[] = "abcdefgh";
float rateArray[] = {0.05,0.29,0.07,0.08,0.14,0.23,0.03,0.11};
priority_queue<Node*, vector<Node*>, cmp> pqueue; //队列中存放指针,故需指定比较器cmp

for(int i=0; i<8; i++)
{
Node *o = (Node*)malloc(sizeof(Node));
o->data=dataArray[i];
o->rate=rateArray[i];
o->lch=o->rch=NULL;
pqueue.push(o);
}

while(pqueue.size()>1)
{
Node *x, *y, *o = (Node*)malloc(sizeof(Node));
x = pqueue.top();
pqueue.pop();
y = pqueue.top();
pqueue.pop();
o->rate = x->rate + y->rate;
o->lch = x;
o->rch = y;
pqueue.push(o);
}
Node *root = pqueue.top();
pqueue.pop();
int encodes[100];
print(root, encodes, 0);
return 0;
}