加密压缩与解压

中英文文本文件的加密压缩与解密。

源文件

predefine.h 预定义模块。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include<bits/stdc++.h>
#define break_line cout << string(35, '-') << '\n';
using namespace std;
using uchar = unsigned char;

// FNV-1a 64位哈希算法的初始值
#define FNV1A_64_INIT 0xcbf29ce484222325ULL
// FNV-1a 64位哈希算法的质数
#define FNV1A_64_PRIME 0x100000001b3

uint64_t fnv1a_64(const void *data, size_t length)
{
uint64_t hash = FNV1A_64_INIT;
const uint8_t *byte_data = (const uint8_t *)data;
for (size_t i = 0; i < length; i++) {
hash ^= byte_data[i];
hash *= FNV1A_64_PRIME;
}
return hash;
}

class HuffmanNode{
public:
uchar ch;
int freq;
HuffmanNode *left, *right;

HuffmanNode(uchar c, int f, HuffmanNode *l = nullptr, HuffmanNode *r = nullptr) :
ch(c), freq(f), left(l), right(r) {}

bool operator<(const HuffmanNode& other) const {
// 词频小的节点放在前面;如果词频相同,则按字节值从小到大排序
return freq > other.freq || (freq == other.freq && ch > other.ch);
}
};

template <typename T>
class Heap_sort {
private:
vector<T> heap;
// 维护堆的性质,通过上浮操作
void siftUp(int index) {
while (index > 0) {
int parent = (index - 1) / 2;
if (heap[parent] < heap[index]) {
swap(heap[index], heap[parent]);
index = parent;
} else {
break;
}
}
}
// 维护堆的性质,通过下沉操作
void siftDown(int index) {
int size = heap.size();
while (2 * index + 1 < size) {
int left = 2 * index + 1;
int right = 2 * index + 2;
int largest = index;
if (heap[largest] < heap[left]) {
largest = left;
}
if (right < size && heap[largest] < heap[right]) {
largest = right;
}
if (largest == index) {
break;
}
swap(heap[index], heap[largest]);
index = largest;
}
}

public:
void emplace(const T& value) {
heap.push_back(value);
siftUp(heap.size() - 1);
}
T& top() {
return heap[0];
}
void pop() {
swap(heap[0], heap[heap.size() - 1]);
heap.pop_back();
if (!heap.empty()) {
siftDown(0);
}
}
bool empty() const {
return heap.empty();
}
size_t size() const {
return heap.size();
}
};

compress.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#include"predefine.h"

void solve(){
cout << "Enter the filename: ";
string filename; cin >> filename;
cout << "Sender's ID: ";
string sender_id; cin >> sender_id;
cout << "Sender's name: ";
string sender_name; cin >> sender_name;
cout << "Receiver's ID: ";
string receiver_id; cin >> receiver_id;
cout << "Receiver's name: ";
string receiver_name; cin >> receiver_name;
cout << '\n';

string info = sender_id + ", " + sender_name + "\n"
+ receiver_id + ", " + receiver_name + "\n";

// freqMap 统计字符频率信息
ifstream file(filename, ios::binary);
file.seekg(0, ios::end);
int length = file.tellg();
file.seekg(0, ios::beg);
unordered_map<uchar, int> freqMap;
char ch;
for(auto ch : info) freqMap[((uchar)ch + 0x55) % 0x100]++;
while(file.get(ch)) freqMap[((uchar)ch + 0x55) % 0x100]++;
file.close();

// 以字符频率信息构建小根堆
priority_queue<HuffmanNode> minHeap;
for(auto &[ch, freq] : freqMap){
minHeap.emplace(HuffmanNode(ch, freq));
}

// 展示经过排序的词频统计列表
auto temp = minHeap;
break_line;
cout << "Word frequency statistics list:" << '\n';
cout << "byte freq" << '\n';
while(!temp.empty()){
cout << "0x" << setw(2) << setfill('0') << hex << (int)temp.top().ch
<< setw(6) << setfill(' ') << dec << temp.top().freq << '\n';
temp.pop();
}
break_line;

// 构建哈夫曼树
while (minHeap.size() > 1) {
auto left = new HuffmanNode(minHeap.top()); // 动态分配左子节点
minHeap.pop();
auto right = new HuffmanNode(minHeap.top()); // 动态分配右子节点
minHeap.pop();

uchar merge_ch = max(left->ch, right->ch);
int merge_freq = left->freq + right->freq;

auto newNode = new HuffmanNode(merge_ch, merge_freq, left, right);
minHeap.emplace(*newNode); // 将新节点插入堆中
}
HuffmanNode Head = minHeap.top();

// 计算 WPL
auto cal_WPL = [](auto &&self, HuffmanNode *u, int dep) -> int{
if(u->left == nullptr && u->right == nullptr) {
return u->freq * dep;
}
return self(self, u->left, dep + 1) + self(self, u->right, dep + 1);
};
cout << "WPL : " << dec << cal_WPL(cal_WPL, &Head, 0) << '\n';

// 构建哈夫曼编码映射表
map<uchar, string>HuffmanCode;
auto generate = [&](auto &&self, HuffmanNode* node, const string& code) -> void{
if (node == nullptr) return;
if (node->left == nullptr && node->right == nullptr) {
// 如果是叶子节点,保存字符和编码
HuffmanCode[node->ch] = code;
}
// 递归生成左子树的编码('0'表示左子树)
self(self, node->left, code + "0");
// 递归生成右子树的编码('1'表示右子树)
self(self, node->right, code + "1");
};
generate(generate, &Head, "");

// 生成编码表
ofstream outfile("code.txt", ios::binary);
outfile << length << '\n';
for(auto &[ch, cd] : HuffmanCode){
outfile << "0x" << hex << setw(2) << setfill('0') << (int)ch;
int code_len = cd.size();
outfile << " 0x" << hex << setw(2) << setfill('0') << (int)code_len;
string encoded;
for(int i = 0; i < cd.size(); i += 8){
uchar byte = 0;
for(int j = 0; j < 8 && i + j < cd.size(); j++){
byte |= (cd[i + j] == '1') << (7 - j);
}
encoded += byte;
}
for(auto &byte : encoded){
outfile << " 0x" << hex << setw(2) << setfill('0') << (int)(uchar)byte;
}
outfile << '\n';
}
outfile.close();

// 输出编码表到交互界面
file.open("code.txt");
string line;
break_line;
cout << "Code table:" << '\n';
while (getline(file, line)) {
cout << line << endl;
}
file.close();

// 获取压缩文件 (先获取01串, 在每8位合成一个字节), 输出最后16字节信息, 压缩文件大小
string compressed_file = filename.substr(0, filename.length() - 3) + "hfm";
outfile.open(compressed_file, ios::binary);
file.open(filename, ios::binary);
string bitStream;
for(auto ch : info) bitStream += HuffmanCode[((uchar)ch + 0x55) % 0x100];
while(file.get(ch)) bitStream += HuffmanCode[((uchar)ch + 0x55) % 0x100];
file.close();
break_line;
cout << "Size of compressed file: " << bitStream.size() / 8 << " bytes" << '\n';
cout << "Last 16 bytes in " + compressed_file << ":\n";
for(int i = 0; i < bitStream.size(); i += 8){
uchar byte = 0;
for(int j = 0; j < 8 && i + j < bitStream.size(); j++){
byte |= (bitStream[i + j] == '1') << (7 - j);
}
outfile << byte;
if(bitStream.size() - i < 8 * 16){
cout << "0x" << setw(2) << setfill('0') << hex << (int)byte << " ";
}
}
cout << '\n';
break_line;
outfile.close();

// 计算原文本和加入info信息后的文本哈希值
file.open(filename, ios::binary);
char *buffer = new char[length];
file.read(buffer, length);
file.close();
char* buffer_with_info = new char[info.size() + length];
memcpy(buffer_with_info, info.c_str(), info.size());
memcpy(buffer_with_info + info.size(), buffer, length);
uint64_t hash_value = fnv1a_64(buffer, length);
cout << "Original file's FNV-1a hash: 0x" << hex << hash_value << '\n';
hash_value = fnv1a_64(buffer_with_info, info.size() + length);
cout << "File with info's FNV-1a hash: 0x" << hex << hash_value << '\n';

// 计算编码文件哈希值
file.open(compressed_file, ios::binary);
file.seekg(0, ios::end);
int compressed_length = file.tellg();
file.seekg(0, ios::beg);
buffer = new char[compressed_length];
file.read(buffer, compressed_length);
file.close();
hash_value = fnv1a_64(buffer, compressed_length);
cout << "Compressed file's FNV-1a hash: 0x" << hex << hash_value << '\n';
break_line;
}

signed main(){
// ios::sync_with_stdio(0);
// cin.tie(0); cout.tie(0);
solve();
system("pause");
return 0;
}

decompress.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#include"predefine.h"

void solve(){
// 根据编码表建立哈夫曼树
auto *Head = new HuffmanNode(0, -1);
ifstream file("code.txt");
string whole_len_str;
getline(file, whole_len_str);
int whole_len = stoi(whole_len_str);
string line;
while(getline(file, line)){
istringstream iss(line);
string byte_str, code_len_str, code;

iss >> byte_str;
uchar ch = (stoi(byte_str, nullptr, 16) + 0x100 - 0x55) % 0x100;

iss >> code_len_str;
int code_len = stoi(code_len_str, nullptr, 16);

vector<uchar>encoded_bytes;
while(iss >> byte_str){
uchar byte = (uchar)stoi(byte_str, nullptr, 16);
encoded_bytes.emplace_back(byte);
}
string encoded;
for(auto &byte : encoded_bytes){
bitset<8>bits(byte);
encoded += bits.to_string();
}
encoded = encoded.substr(0, code_len);

auto p = Head;
for(auto &u : encoded){
if(u == '0'){
if(p->left == nullptr){
auto *left = new HuffmanNode(0, -1);
p->left = left;
}
p = p->left;
}
else{
if(p->right == nullptr){
auto *right = new HuffmanNode(0, -1);
p->right = right;
}
p = p->right;
}
}
p->ch = ch;
}
file.close();

// 解码文件信息
cout << "Enter the compressed filename: ";
string compressed_filename; cin >> compressed_filename;
cout << "Receiver's ID: ";
string receiver_id; cin >> receiver_id;
cout << "Receiver's name: ";
string receiver_name; cin >> receiver_name;
cout << '\n';

string decompressed_filename = compressed_filename.substr(0, compressed_filename.length() - 4) + "_j.txt";
ifstream encoded_file(compressed_filename, ios::binary);
ofstream decoded_file(decompressed_filename, ios::binary);

// 解码, 开始计时
auto start = chrono::high_resolution_clock::now();
char ch;
string binary_string;
auto p = Head;
while (encoded_file.get(ch)) {
binary_string += bitset<8>((uchar)ch).to_string();
}

int cur_len = 0, lfcnt = 0;
string info; bool check = false;
for(auto &bit : binary_string){
if(bit == '0'){
p = p->left;
}
else{
p = p->right;
}
if(p->ch != 0){
if(!check) info += p->ch;
else {
decoded_file << p->ch;
cur_len ++;
}
if(p->ch == '\n') {
lfcnt++;
if(lfcnt == 2) {
stringstream Info(info);
getline(Info, line);
getline(Info, line);
if(line == receiver_id + ", " + receiver_name) {
cout << "Info of sender and receiver: \n" << info << '\n';
check = true;
}
else{
cout << "Receiver not match!" << "\n\n";
break;
}
}
}
if(cur_len == whole_len) break;
p = Head;
}
}

encoded_file.close();
decoded_file.close();

if(!check) return;

// 结束计时, 计算解码时间
auto end = chrono::high_resolution_clock::now();
chrono::duration<double> duration = end - start;
cout << "Decoding time: " << duration.count() << " s" << '\n';

// 计算解码文本哈希值
file.open(decompressed_filename, ios::binary);
char *buffer = new char[whole_len];
file.read(buffer, whole_len);
file.close();
uint64_t hash_value = fnv1a_64(buffer, whole_len);
cout << "Decompressed file's FNV-1a hash: 0x" << hex << hash_value << '\n';

// 计算压缩率
double ratio = 1.0 * binary_string.size() / 8 / whole_len;
cout << "Compression Ratio: " << ratio * 100 << "%" << '\n';
}

signed main(){
// ios::sync_with_stdio(0);
// cin.tie(0); cout.tie(0);
solve();
system("pause");
return 0;
}

功能说明

压缩方:

  • 使用程序:compress.exe可执行文件,predefine.h头文件
  • 准备文件:xxx.txt(需要进行压缩的文本文件)
  • 使用方式:打开compress.exe 可执行文件,输入需要进行压缩的文本文件路径,并依次输入发送人学号、姓名,接收人学号、姓名信息,等待文件压缩。
  • 终端显示:字符频率信息,哈夫曼树WPL值,编码表信息,压缩后文件大小,压缩后文件最后16字节hex值,原文本文件、带信息文本文件、压缩文件的Hash值。
  • 生成文件:code.txt 编码表 xxx.hfm 压缩文件

解压缩方:

  • 使用程序:decompress.exe可执行文件,predefine.h头文件
  • 准备文件:code.txt(编码表文件)xxx.hfm(需要进行解压缩的压缩文件)
  • 使用方式:打开decompress.exe 可执行文件,输入需要进行解压缩的压缩文件路径,并输入接收人学号、姓名信息,等待文件解压缩。
  • 终端显示:发送人接收人信息,解压缩时间,解压缩文本文件Hash值,压缩率。
  • 生成文件:xxx_j.txt 解压缩文本文件

(注意,如果输入的接收人信息不正确,程序将拒绝继续解压缩)

简单的说,就是一个人可以加密并压缩文件,另一个人可以解密并解压文件。
两个人可以互相传着玩,但别人看不懂俩人在干什么。