In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters ‘a‘, ‘x‘, ‘u‘ and ‘z‘ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a‘=0, ‘x‘=10, ‘u‘=110, ‘z‘=111}, or in another way as {‘a‘=1, ‘x‘=01, ‘u‘=001, ‘z‘=000}, both compress the string into 14 bits. Another set of code can be given as {‘a‘=0, ‘x‘=11, ‘u‘=100, ‘z‘=101}, but {‘a‘=0, ‘x‘=01, ‘u‘=011, ‘z‘=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {‘0‘ - ‘9‘, ‘a‘ - ‘z‘, ‘A‘ - ‘Z‘, ‘_‘}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 ‘0‘s and ‘1‘s.
For each test case, print in each line either "Yes" if the student‘s submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Yes
Yes
No
No
根据题意,只要学生输入的WPL和通过哈夫曼树计算出来的WPL相同,并且满足前缀码的规则就输出Yes,否则输出No。于是我们先根据字符出现次数计算出最小带权路径长度,然后与学生输入比较,若相同则判断是否满足前缀码的规则。最小带权路径长度可以通过一个小顶堆计算出,而判断前缀码我使用了暴力法。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXSIZE 63
struct MinHeap {
int data[MAXSIZE + 1];
int size;
};
typedef struct MinHeap *MinHeap;
MinHeap createHeap() {
MinHeap minHeap = (MinHeap) malloc(sizeof(struct MinHeap));
minHeap->data[0] = 0; //哨兵值
minHeap->size = 0;
return minHeap;
}
void insertHeap(MinHeap minHeap, int data) {
if (minHeap->size == MAXSIZE) return;
minHeap->data[++minHeap->size] = data;
int i = minHeap->size;
for ( ; minHeap->data[i/2] > data; i /= 2) {
minHeap->data[i] = minHeap->data[i/2];
}
minHeap->data[i] = data;
}
int deleteHeap(MinHeap minHeap) {
if (minHeap->size == 0) return 0;
int parent, child;
int ret = minHeap->data[1];
minHeap->data[1] = minHeap->data[minHeap->size--]; //把最后一个元素放到堆顶
for (parent = 1; 2 * parent <= minHeap->size; parent = child) {
child = 2 * parent;
//存在右儿子并且右儿子更小
if (child < minHeap->size && minHeap->data[child+1] < minHeap->data[child]) {
child = child + 1;
}
//父结点更大
if (minHeap->data[parent] > minHeap->data[child]) {
int tmp = minHeap->data[parent];
minHeap->data[parent] = minHeap->data[child];
minHeap->data[child] = tmp;
} else {
break;
}
}
return ret;
}
//不需要构建哈夫曼树,直接算出WPL
int getWPL(MinHeap minHeap) {
int WPL = 0, size = minHeap->size;
int min1, min2;
for (int i = 1; i < size; i++) { //没有算哈夫曼树的根结点,其余结点的和刚好等于WPL,很巧妙
min1 = deleteHeap(minHeap);
min2 = deleteHeap(minHeap);
insertHeap(minHeap, min1 + min2);
WPL += min1 + min2;
}
return WPL;
}
//判断两个字符串是否有相同前缀
int samePrefix(char *str1, char *str2) {
while (str1 && str2 && *str1 == *str2) {
str1++; str2++;
}
if (*str1 == ‘\0‘ || *str2 == ‘\0‘) {
return 1;
} else {
return 0;
}
}
//判断一组字符串是否满足前缀码
int isPrefixCode(char strs[][MAXSIZE], int n) {
for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
if (samePrefix(strs[i], strs[j]))
return 0;
}
}
return 1;
}
int main() {
int N, M, WPL, data[MAXSIZE];
char ch;
MinHeap minHeap = createHeap();
scanf("%d", &N);
for (int i = 0; i < N; i++) {
scanf(" %c %d", &ch, &data[i]);
insertHeap(minHeap, data[i]);
}
WPL = getWPL(minHeap);
scanf("%d", &M);
for (int i = 0; i < M; i++) {
int thisWPL = 0;
char strs[MAXSIZE][MAXSIZE];
for (int j = 0; j < N; j++) {
scanf(" %c %s", &ch, strs[j]);
thisWPL += data[j] * strlen(strs[j]);
}
if (thisWPL == WPL && isPrefixCode(strs, N)) {
printf("Yes\n");
} else {
printf("No\n");
}
}
return 0;
}
原文:https://www.cnblogs.com/AndyHY-Notes/p/12569086.html