赫夫曼編碼
資料解壓
byte[] --> String
package tree.huffmancode;
import java.util.*;
public class HuffmanCodeDemo {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte[] contentBytes = content.getBytes();
byte[] huffmanCodesBytes = huffmanZip(contentBytes);
System.out.println("after compression:"+Arrays.toString(huffmanCodesBytes));
byte[] sourceBytes = decode(huffmanCodes, huffmanCodesBytes);
System.out.println("original string" +new String(sourceBytes));
/*
List<Node> nodes = getNode(contentBytes);
System.out.println(nodes);
// [Node{data=32, weight=9}, Node{data=97, weight=5}, Node{data=100, weight=1},
Node huffmanTreeRoot = CreateHuffmanTree(nodes);
huffmanTreeRoot.preOrder();
System.out.println("huffman code: ");
getCodes(huffmanTreeRoot);
// System.out.println(huffmanCodes);
byte[] huffmanCodeBytes = zip(contentBytes,huffmanCodes);
System.out.println(Arrays.toString(huffmanCodeBytes)); // length:17
*/
}
// decompression
/**
*
* @param huffmanCodes huffman coding table -- map
* @param huffmanBytes huffman code -> byte array [-88....]
* @return original string corresponding array
*/
private static byte[] decode(Map<Byte,String> huffmanCodes,byte[] huffmanBytes){
// 1. get binary string corresponding to huffmanBytes, eg:10101000..
StringBuilder stringBuilder = new StringBuilder();
// byte --> binary string
for(int i = 0; i<huffmanBytes.length;i++){
byte b = huffmanBytes[i];
// 判斷是不是最後一個位元組
boolean flag = (i==huffmanBytes.length-1);
stringBuilder.append(byteToBitString(!flag,b));
}
// 把字元串安裝指定的赫夫曼編碼進行解碼
// 把赫夫曼編碼表進行調換,因為要反向查詢 a->100 100->a
Map<String,Byte> map = new HashMap<String,Byte>();
for(Map.Entry<Byte,String> entry:huffmanCodes.entrySet()){
map.put(entry.getValue(),entry.getKey());
}
// build a collection, store byte
List<Byte> list = new ArrayList<>();
// i 可以了解成是索引,在掃描 stringBuilder
for(int i = 0;i<stringBuilder.length();){
int count = 1; // 計數器
boolean flag = true;
Byte b = null;
while(flag){
// 取出一個 "1" "0"
// 遞增的取出key
String key = stringBuilder.substring(i,i+count);
// i 不動,讓 count 移動, 指定比對到一個字元
b = map.get(key);
if(b==null){
count++;
} else{
// 比對到
flag = false;
}
}
list.add(b);
i+=count; // i直接移動到count
}
// for循環結束後,list中就存放了所有的字元
// list中的資料放入byte[] 并傳回
byte b[] = new byte[list.size()];
for(int i=0;i<b.length;i++){
b[i] = list.get(i);
}
return b;
}
// date decompression
// 1. huffman code bytes [-88,-65,-56...]
// ---> binary corresponding huffman code: 1001010...
// ---> "i like like...."
/**
* transfer a byte to a binary string
* @param b 傳入的byte
* @param flag 辨別是否需要補高位,如果是true,表示需要補高位,如果是false表示不補,如果是最後一個位元組,無需補高位
* @return 是該b 對應的二進制的字元串(注意是按補碼傳回)
*/
private static String byteToBitString(boolean flag,byte b){
// use a variable save b
int temp = b; // transfer b to int
// 如果是正數,需要補高位
if(flag){
temp |= 256; // 按位與 256:1 0000 0000
// 1:0000 0001
// => 1 0000 0001
}
String str = Integer.toBinaryString(temp); // 傳回的是temp對應的二進制的補碼
if(flag){
return str.substring(str.length()-8); // 取後面的8位
} else{
return str;
}
}
// use a method packaging methods' calls
/**
*
* @param bytes byte array corresponding to the original string (content bytes)
* @return byte array after processing by huffman code (compression array)
*/
private static byte[] huffmanZip(byte[] bytes){
List<Node> nodes = getNode(bytes);
Node huffmanTreeRoot = CreateHuffmanTree(nodes);
Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot);
byte[] huffmanCodeBytes = zip(bytes, HuffmanCodeDemo.huffmanCodes);
return huffmanCodeBytes;
}
// byte[] of string ---> huffman code table ---> huffman code(byte[])
/**
*
* @param bytes The byte array corresponding to the original string
* @param huffmanCodes Huffman codes map
* @return byte[] processed by huffman code, byte[] corresponding to "10101000..."
* 8 bit --> 1 byte, put into huffmanCodeBytes
* huffmanCodeBytes[0] = 10101000(complement補碼) => byte[10101000=>10101000-1=>10100111(Inverted code 補碼對應的反碼)
* => 11011000 = -88(original code 反碼對應的原碼:符号位/第一位不變,其他位取反))
* huffmanCodeBytes[1] = -88
*/
private static byte[] zip(byte[] bytes,Map<Byte,String> huffmanCodes){
// 1. transfer bytes to string corresponding to huffmanCode using huffmanCodes
StringBuilder stringBuilder = new StringBuilder();
// traverse bytes
for(byte b:bytes){
stringBuilder.append(huffmanCodes.get(b));
}
// stringBuilder -> byte[]
// count length of byte[] huffmanCodeBytes
int len;
if(stringBuilder.length() % 8 == 0){
len = stringBuilder.length()/8;
} else{
len = stringBuilder.length() / 8 + 1;
}
// one line: int len = (stringBuilder.length()+7)/8;
// create byte[] store data after compression
byte[] huffmanCodeBytes = new byte[len];
int index = 0; // record number of byte
for(int i = 0;i < stringBuilder.length();i+=8){ // 8 bit->1 byte
String strByte;
if(i+8 > stringBuilder.length()){ // not enough 8 bit
strByte = stringBuilder.substring(i);
}else{
strByte = stringBuilder.substring(i,i+8);
// strByte --> byte array --> put into huffmanCodeBytes
}
huffmanCodeBytes[index] = (byte)Integer.parseInt(strByte,2); // string -> binary
index++;
}
return huffmanCodeBytes;
}
// huffman coding based on huffman tree
// 1. huffman coding is stored in Map<Byte,String>, eg: o:1000 u:10010
static Map<Byte,String> huffmanCodes = new HashMap<Byte,String>();
// 2. define a StringBuilder storing path of leaf node when creating huffman code
static StringBuilder stringBuilder = new StringBuilder();
// for convenience, override getCodes()
private static Map<Byte,String> getCodes(Node root){
if(root==null){
return null;
}
getCodes(root.left,"0",stringBuilder);
getCodes(root.right,"1",stringBuilder);
return huffmanCodes;
}
/**
* get all leaf nodes' huffman code of given node, putting into huffmanCodes collection
* @param node given node, root
* @param code path: left node:0; right node:1;
* @param stringBuilder splice path
*/
private static void getCodes(Node node,String code,StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
// add code into stringBuilder2
stringBuilder2.append(code);
if(node!=null){ // if node==null, pass
// determine current node is leaf node or non-leaf node
if(node.data==null){
// recursion
// left
getCodes(node.left,"0",stringBuilder2);
// right
getCodes(node.right,"1",stringBuilder2);
} else{ // leaf node
huffmanCodes.put(node.data,stringBuilder2.toString());
}
}
}
private static void preOrder(Node root){
if(root!=null){
root.preOrder();
}else{
System.out.println("empty");
}
}
/**
*
* @param bytes receive byte[]
* @return List like: [Node[data=97 (ascii) ,weight = 5], Node[data = 32, weight = 9]]
*/
private static List<Node> getNode(byte[] bytes){
// 1. create an arraylist
ArrayList<Node> nodes = new ArrayList<>();
// 2. traverse bytes, count each byte frequency --> map[key,value]
HashMap<Byte, Integer> counts = new HashMap<>();
for(byte b:bytes){
Integer count = counts.get(b);
if(count==null){ // no data in map
counts.put(b,1);
}else{
counts.put(b,count+1);
}
}
// 3. transfer each key-value pair to a Node object, add to nodes collection
for(Map.Entry<Byte,Integer> entry:counts.entrySet()){
nodes.add(new Node(entry.getKey(),entry.getValue()));
}
return nodes;
}
// build huffman tree
private static Node CreateHuffmanTree(List<Node> nodes){
while(nodes.size()>1){
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
Node parent = new Node(null,leftNode.weight+rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
}
// create Node class, with data and weight
class Node implements Comparable<Node>{
Byte data; // store data, eg: 'a' --> 97 ' ' --> 32
int weight; // weight: number of characters|frequency
Node left;
Node right;
public Node(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public int compareTo(Node o) {
return this.weight-o.weight; // ascending
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
// preOrder
public void preOrder(){
System.out.println(this);
if(this.left!=null){
this.left.preOrder();
}
if(this.right!=null){
this.right.preOrder();
}
}
}
檔案壓縮
讀取檔案 – 得到赫夫曼編碼表 – 完成壓縮 – 得到.zip檔案
// compress file
/**
*
* @param srcFile path of file need to be compressed
* @param dstFile path that stores the file after decompressing
*/
public static void zipFile(String srcFile,String dstFile){
// output stream
// input stream read file
FileInputStream fis = null;
ObjectOutputStream oos = null;
FileOutputStream fos = null;
try {
fis = new FileInputStream(srcFile);
byte[] b = new byte[fis.available()]; // length of original file
// read file
fis.read(b);
// get huffman code corresponding to file
// compress original file
byte[] huffmanBytes = huffmanZip(b);
// create output stream storing compress file
fos = new FileOutputStream(dstFile);
// create ObjectOutputStream corresponding to file output stream
oos = new ObjectOutputStream(fos);
// 把赫夫曼編碼後的位元組數組寫入壓縮檔案
oos.writeObject(huffmanBytes);
// 以對象流的方式寫入赫夫曼編碼,為了以後恢複源檔案時使用
oos.writeObject(huffmanCodes);
} catch (Exception e) {
System.out.println(e.getMessage());
} finally {
try {
fis.close();
fos.close();
oos.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
}
// test compress file
String srcFile = "/Users/pz/Desktop/img4.jpg";
String dstFile = "/Users/pz/Desktop/dst.dmg";
zipFile(srcFile,dstFile);
System.out.println("compress done");
檔案解壓
讀取解壓檔案(資料和赫夫曼編碼表)-- 俄暗沉解壓
// unzip file
/**
*
* @param zipFile file for unzip
* @param dstFile path for saving
*/
public static void unzipFile(String zipFile,String dstFile){
// define file input stream
InputStream is = null;
// define a object input stream
ObjectInputStream ois = null;
// define file output stream
OutputStream os = null;
try{
// create file input stream
is = new FileInputStream(zipFile);
// create a object input stream corresponding to is
ois = new ObjectInputStream(is);
// read byte[] huffmanBytes
byte[] huffmanBytes = (byte[])ois.readObject();
// read huffman codes
Map<Byte,String> huffmanCodes = (Map<Byte, String>) ois.readObject();
// decode
byte[] bytes = decode(huffmanCodes,huffmanBytes);
// write file to dsfFile
os = new FileOutputStream(dstFile);
// write data to file
os.write(bytes);
}catch(Exception e){
System.out.println(e.getMessage());
}finally {
try {
os.close();
ois.close();
is.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
}
// test decode file
String zipFile = "/Users/pz/Desktop/dst.dmg";
String dstFile = "/Users/pz/Desktop/su.jpg";
unzipFile(zipFile,dstFile);
System.out.println("success");