C++数据结构之文件压缩(哈夫曼树)实例详解

C++数据结构之文件压缩(哈夫曼树)实例详解

成都创新互联公司坚持“要么做到,要么别承诺”的工作理念,服务领域包括:成都网站建设、网站设计、企业官网、英文网站、手机端网站、网站推广等服务,满足客户于互联网时代的黎平网站设计、移动媒体设计的需求,帮助企业找到有效的互联网解决方案。努力成为您成熟可靠的网络建设合作伙伴!

概要:

项目简介:利用哈夫曼编码的方式对文件进行压缩,并且对压缩文件可以解压

开发环境:windows vs2013

项目概述:

        1.压缩

            a.读取文件,将每个字符,该字符出现的次数和权值构成哈夫曼树

            b.哈夫曼树是利用小堆构成,字符出现次数少的节点指针存在堆顶,出现次数多的在堆底

            c.每次取堆顶的两个数,再将两个数相加进堆,直到堆被取完,这时哈夫曼树也建成

            d.从哈夫曼树中获取哈夫曼编码,然后再根据整个字符数组来获取出现了得字符的编码

            e.获取编码后每次凑满8位就将编码串写入到压缩文件(value处理编码1与它即可,0只移动位)

             f.写好配置文件,统计每个字符及其出现次数,并以“字符+','+次数”的形式保存到配置文件中

         2.解压

             a.读取配置文件,统计所有字符的个数

             b.构建哈夫曼树,读解压缩文件,将所读到的编码字符的这个节点所所含的字符写入到解压缩文件中,知道将压缩文件读完

             c.压缩解压缩完全完成,进行小文件大文件的测试

实例代码:

#pragma once 
#include 
 
template 
struct Less 
{ 
  bool operator()(const T& l, const T& r) const 
  { 
    return l < r; 
  } 
}; 
 
template 
struct Greater 
{ 
  bool operator()(const T& l, const T& r) const 
  { 
    return l > r; 
  } 
}; 
 
template 
class Heap 
{ 
public: 
  Heap() 
  {} 
 
  Heap(T* array, size_t n)   //建堆 
  { 
    _array.reserve(n); 
 
    for (size_t i = 0; i < n; i++) 
    { 
      _array.push_back(array[i]); 
    } 
 
    for (int i = (_array.size() - 2) >> 1; i >= 0; --i) 
    { 
      _AdjustDown(i); 
    } 
  } 
 
  const T& Top()const 
  { 
    return _array[0]; 
  } 
 
  void Push(const T& x) 
  { 
    _array.push_back(x); 
    _AdjustUp(_array.size() - 1); 
  } 
 
  size_t Size() 
  { 
    return _array.size(); 
  } 
 
  void Pop() 
  { 
    assert(_array.size() > 0); 
    swap(_array[0], _array[_array.size() - 1]); 
    _array.pop_back(); 
    _AdjustDown(0); 
  } 
 
  bool Empty() 
  { 
    return _array.size() == 0; 
  } 
 
  void Print() 
  { 
    for (size_t i = 0; i < _array.size(); ++i) 
    { 
      cout << _array[i] << " "; 
    } 
    cout << endl; 
  } 
 
protected: 
  void _AdjustUp(int child)  //上调 
  { 
    Compare ComFunc; 
    int parent = (child - 1) >> 1; 
    while (child) 
    { 
      if (ComFunc(_array[child], _array[parent])) 
      { 
        swap(_array[child], _array[parent]); 
        child = parent; 
        parent = (child - 1) >> 1; 
      } 
      else 
      { 
        break; 
      } 
    } 
  } 
 
 
 
 
  void _AdjustDown(int root)   //下调 
  { 
    Compare ComFunc; 
    int parent = root; 
    int child = root * 2 + 1; 
    while (child < _array.size()) 
    { 
      if (child + 1 < _array.size() && ComFunc(_array[child + 1], _array[child])) 
      { 
        ++child; 
      } 
 
      if (ComFunc(_array[child], _array[parent])) 
      { 
        swap(_array[child], _array[parent]); 
        parent = child; 
        child = parent * 2 + 1; 
      } 
      else 
      { 
        break; 
      } 
    } 
  } 
 
 
 
protected: 
  vector _array; 
 
 
}; 
 
 
 
 
void TestHeap() 
{ 
  int a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 }; 
  //Heap heap(a, sizeof(a) / sizeof(a[0])); 
  //Heap> heap(a, sizeof(a) / sizeof(a[0])); 
  Heap> heap(a, sizeof(a) / sizeof(a[0])); 
  heap.Print(); 
  heap.Push(25); 
  heap.Print(); 
  heap.Pop(); 
  heap.Print(); 
} 



#pragma once 
#include"Heap.h" 
 
template 
struct HuffmanTreeNode 
{ 
  HuffmanTreeNode* _left; 
  HuffmanTreeNode* _right; 
  HuffmanTreeNode* _parent; 
  T _w;       //权值 
 
 
  HuffmanTreeNode(const T& x) 
    :_w(x) 
    , _left(NULL) 
    , _right(NULL) 
    , _parent(NULL) 
  {} 
 
}; 
 
template 
class HuffmanTree 
{ 
  typedef HuffmanTreeNode Node; 
public: 
  HuffmanTree() 
    :_root(NULL) 
  {} 
 
  HuffmanTree(T* a, size_t n, const T& invalid = T())  //构建哈夫曼树 
  { 
    struct Compare 
    { 
      bool operator()(Node* l, Node* r) const 
      { 
        return l->_w < r->_w; 
      } 
    }; 
 
    Heap minHeap; 
    for (size_t i = 0; i < n; ++i) 
    { 
      if (a[i] != invalid) 
      { 
        minHeap.Push(new Node(a[i])); 
      } 
    } 
 
    while (minHeap.Size() > 1) 
    { 
      Node* left = minHeap.Top(); 
      minHeap.Pop(); 
      Node* right = minHeap.Top(); 
      minHeap.Pop(); 
      Node* parent = new Node(left->_w + right->_w); 
      parent->_left = left; 
      parent->_right = right; 
      left->_parent = parent; 
      right->_parent = parent; 
      minHeap.Push(parent); 
    } 
    _root = minHeap.Top(); 
  } 
 
 
  Node* GetRoot() 
  { 
    return _root; 
  } 
 
  ~HuffmanTree() 
  { 
    _Destory(_root); 
  } 
 
 
 
 
 
protected: 
  void _Destory(Node* root) 
  { 
    if (root == NULL) 
    { 
      return; 
    } 
 
    _Destory(root->_left); 
    _Destory(root->_right); 
    delete root; 
  } 
 
  HuffmanTree(const HuffmanTree& tree); 
  HuffmanTree& operator=(const HuffmanTree& tree); 
 
protected: 
  Node* _root; 
 
}; 
 
 
void TestHuffmanTree() 
{
#pragma once 
 
#include 
using namespace std; 
#include 
#include"HuffmanTree.h" 
 
 
typedef long long LongType; 
struct CharInfo    
{ 
  unsigned char _ch;     //字符 
  LongType _count;     //字符出现的次数 
  string _code;      //huffman编码 
 
  CharInfo(LongType count = 0) 
    :_count(count) 
    , _ch(0) 
    , _code("") 
  {} 
 
 
  bool operator<(const CharInfo& info) const 
  { 
    return _count < info._count; 
  } 
 
  CharInfo operator+(const CharInfo& info) 
  { 
    return CharInfo(_count + info._count); 
  } 
 
  bool operator!=(const CharInfo& info)const 
  { 
    return _count != info._count; 
  } 
}; 
 
 
struct CountInfo 
{ 
  unsigned char _ch;    //字符 
  LongType _count;     //字符出现的次数 
}; 
 
 
 
 
class FileCompress 
{ 
public: 
  FileCompress() 
  { 
    for (size_t i = 0; i < 256; ++i) 
    { 
      _info[i]._ch = i; 
    } 
  } 
 
  void Compress(const char* filename) 
  { 
    assert(filename); 
     
    //统计字符出现的次数,均已二进制方式读写 
    FILE* fout = fopen(filename, "rb"); 
    assert(fout); 
    int ch = fgetc(fout); 
 
    string CompressFile = filename; 
    CompressFile += ".huffman"; 
    FILE* fin = fopen(CompressFile.c_str(), "wb"); 
    assert(fin); 
    CountInfo info; 
 
 
    while (ch != EOF) 
    { 
      _info[(unsigned char)ch]._count++; 
      ch = fgetc(fout); 
    } 
 
    //构建huffman tree 
    CharInfo invalid; 
    invalid._count = 0; 
    HuffmanTree tree(_info, 256, invalid); 
 
    //生成huffman code 
    GetHuffmanCode(tree.GetRoot()); 
 
    //压缩 
    //写配置文件 
    for (size_t i = 0; i < 256; ++i) 
    { 
      if (_info[i]._count) 
      { 
        info._ch = _info[i]._ch; 
        info._count = _info[i]._count; 
        fwrite(&info, sizeof(info), 1, fin); 
      } 
    } 
 
    info._count = -1; 
    fwrite(&info, sizeof(info), 1, fin); 
 
    fseek(fout, 0, SEEK_SET);   //返回到文件开始 
    ch = fgetc(fout); 
    char value = 0;     //二进制 
    int pos = 0;    //左移位数 
    while (ch != EOF) 
    { 
      string& code = _info[(unsigned char)ch]._code; 
      size_t i = 0; 
      for (i = 0; i < code.size(); ++i) 
      { 
        value <<= 1; 
        ++pos; 
        if (code[i] == '1') 
        { 
          value |= 1; 
        } 
        if (pos == 8)       //满8位写进文件中 
        { 
          fputc(value, fin); 
          value = 0; 
          pos = 0; 
        } 
      } 
      ch = fgetc(fout); 
    } 
    if (pos) 
    { 
      value <<= (8 - pos); 
      fputc(value, fin); 
    } 
    fclose(fin); 
    fclose(fout); 
  } 
 
  void GetHuffmanCode(HuffmanTreeNode* root)    //huffman编码 
  { 
    if (root == NULL) 
    { 
      return; 
    } 
 
    if (root->_left == NULL && root->_right == NULL) 
    { 
      HuffmanTreeNode *parent, *cur; 
      cur = root; 
      parent = cur->_parent; 
      string& code = _info[(unsigned char)root->_w._ch]._code; 
      while (parent) 
      { 
        if (cur == parent->_left) 
        { 
          code += '0'; 
        } 
        else 
        { 
          code += '1'; 
        } 
        cur = parent; 
        parent = cur->_parent; 
      } 
      reverse(code.begin(), code.end()); 
    } 
    GetHuffmanCode(root->_left); 
    GetHuffmanCode(root->_right); 
  } 
 
  //解压 
  void UnCompress(const char* filename) 
  { 
    assert(filename); 
    string UnCompressFile = filename; 
    size_t index = UnCompressFile.rfind('.'); 
    assert(index != string::npos); 
    UnCompressFile = UnCompressFile.substr(0, index); 
    UnCompressFile += ".unhuffman"; 
    FILE* fout = fopen(filename, "rb"); 
    assert(fout); 
    FILE* fin = fopen(UnCompressFile.c_str(), "wb"); 
    assert(fin); 
    CountInfo info; 
    fread(&info, sizeof(CountInfo), 1, fout); 
 
    //读配置信息 
    while (1) 
    { 
      fread(&info, sizeof(CountInfo), 1, fout); 
      if (info._count == -1) 
      { 
        break; 
      } 
      _info[(unsigned char)info._ch]._ch = info._ch; 
      _info[(unsigned char)info._ch]._count = info._count; 
    } 
 
    //重建huffman树 
    CharInfo invalid; 
    invalid._count = 0; 
    HuffmanTree tree(_info, 256, invalid); 
    HuffmanTreeNode* root = tree.GetRoot(); 
    HuffmanTreeNode* cur = root; 
    LongType count = root->_w._count; 
    int value = fgetc(fout); 
 
    if (value == EOF)       //只有一种字符 
    { 
      if (info._ch != 0) 
      { 
        while (cur->_w._count--) 
        { 
          fputc(cur->_w._ch, fin); 
        } 
      } 
    } 
    else 
    { 
      while (value != EOF) 
      { 
        int pos = 7; 
        char test = 1; 
        while (pos >= 0) 
        { 
          if (value & (test << pos)) 
          { 
            cur = cur->_right; 
          } 
          else 
          { 
            cur = cur->_left; 
          } 
          if (cur->_left == NULL && cur->_right == NULL) 
          { 
            fputc(cur->_w._ch, fin); 
            cur = root; 
            if (--count == 0) 
            { 
              break; 
            } 
          } 
          --pos; 
        } 
        value = fgetc(fout); 
      } 
    } 
 
    fclose(fout); 
    fclose(fin); 
  } 
 
protected: 
  CharInfo _info[256];   //所有字符信息 
}; 
 
void TestFileCompress() 
{ 
  FileCompress fc; 
  //fc.Compress("实验.doc"); 
  //fc.UnCompress("实验.doc.huffman"); 
  //fc.Compress("qq.JPG"); 
  //fc.UnCompress("qq.JPG.huffman"); 
  //fc.Compress("www.MP3"); 
  //fc.UnCompress("www.MP3.huffman"); 
 
  fc.Compress("yppppp.txt"); 
  fc.UnCompress("yppppp.txt.huffman"); 
}

int array[10] = { 2, 4, 6, 9, 7, 8, 5, 0, 1, 3 };HuffmanTree t(array, 10);}
 

 

#include  
#include  
#include  
using namespace std; 
 
#include"Heap.h" 
#include"HuffmanTree.h" 
#include"FileCompress.h" 
 
int main() 
{ 
  //TestHeap();   
  TestHuffmanTree(); 
  TestFileCompress(); 
 
 
  system("pause"); 
  return 0; 
}




 以上就是哈夫曼树的实例详解,如有疑问请留言或者到本站社区交流,感谢阅读,希望能帮助到大家,谢谢大家对本站的支持!


当前标题:C++数据结构之文件压缩(哈夫曼树)实例详解
地址分享:http://hbruida.cn/article/jdgesh.html