天天看點

資料結構與算法 17 赫夫曼編碼 資料解壓 檔案壓縮 檔案解壓赫夫曼編碼

赫夫曼編碼

資料解壓

byte[] --> String

package tree.huffmancode;

import java.util.*;

public class HuffmanCodeDemo {
    public static void main(String[] args) {
        String content = "i like like like java do you like a java";
        byte[] contentBytes = content.getBytes();
        byte[] huffmanCodesBytes = huffmanZip(contentBytes);
        System.out.println("after compression:"+Arrays.toString(huffmanCodesBytes));
        byte[] sourceBytes = decode(huffmanCodes, huffmanCodesBytes);
        System.out.println("original string" +new String(sourceBytes));

        /*
        List<Node> nodes = getNode(contentBytes);
        System.out.println(nodes);
        // [Node{data=32, weight=9}, Node{data=97, weight=5}, Node{data=100, weight=1},
        Node huffmanTreeRoot = CreateHuffmanTree(nodes);
        huffmanTreeRoot.preOrder();
        System.out.println("huffman code: ");
        getCodes(huffmanTreeRoot);
        // System.out.println(huffmanCodes);
        byte[] huffmanCodeBytes =  zip(contentBytes,huffmanCodes);
        System.out.println(Arrays.toString(huffmanCodeBytes)); // length:17
         */
    }


    // decompression

    /**
     *
     * @param huffmanCodes huffman coding table -- map
     * @param huffmanBytes huffman code -> byte array [-88....]
     * @return original string corresponding array
     */
    private static byte[] decode(Map<Byte,String> huffmanCodes,byte[] huffmanBytes){
        // 1. get binary string corresponding to huffmanBytes, eg:10101000..
        StringBuilder stringBuilder = new StringBuilder();
        // byte --> binary string
        for(int i = 0; i<huffmanBytes.length;i++){
            byte b = huffmanBytes[i];
            // 判斷是不是最後一個位元組
            boolean flag = (i==huffmanBytes.length-1);
            stringBuilder.append(byteToBitString(!flag,b));
        }
        // 把字元串安裝指定的赫夫曼編碼進行解碼
        // 把赫夫曼編碼表進行調換,因為要反向查詢 a->100 100->a
        Map<String,Byte> map = new HashMap<String,Byte>();
        for(Map.Entry<Byte,String> entry:huffmanCodes.entrySet()){
            map.put(entry.getValue(),entry.getKey());
        }
        // build a collection, store byte
        List<Byte> list = new ArrayList<>();
        // i 可以了解成是索引,在掃描 stringBuilder
        for(int i = 0;i<stringBuilder.length();){
            int count = 1; // 計數器
            boolean flag = true;
            Byte b = null;
            while(flag){
                // 取出一個 "1" "0"
                // 遞增的取出key
                String key = stringBuilder.substring(i,i+count);
                // i 不動,讓 count 移動, 指定比對到一個字元
                b = map.get(key);
                if(b==null){
                    count++;
                } else{
                    // 比對到
                    flag = false;
                }
            }
            list.add(b);
            i+=count; // i直接移動到count
        }
        // for循環結束後,list中就存放了所有的字元
        // list中的資料放入byte[] 并傳回
        byte b[] = new byte[list.size()];
        for(int i=0;i<b.length;i++){
            b[i] = list.get(i);
        }
        return b;


    }


    // date decompression
    // 1. huffman code bytes [-88,-65,-56...]
    //      ---> binary corresponding huffman code: 1001010...
    //      ---> "i like like...."

    /**
     * transfer a byte to a binary string
     * @param b 傳入的byte
     * @param flag 辨別是否需要補高位,如果是true,表示需要補高位,如果是false表示不補,如果是最後一個位元組,無需補高位
     * @return 是該b 對應的二進制的字元串(注意是按補碼傳回)
     */
    private static String byteToBitString(boolean flag,byte b){
        // use a variable save b
        int temp = b; // transfer b to int
        // 如果是正數,需要補高位
        if(flag){
            temp |= 256; // 按位與 256:1 0000 0000
            //                      1:0000 0001
            //                      => 1 0000 0001
        }
        String str = Integer.toBinaryString(temp); // 傳回的是temp對應的二進制的補碼
        if(flag){
            return str.substring(str.length()-8); // 取後面的8位
        } else{
            return str;
        }
    }





    // use a method packaging methods' calls

    /**
     *
     * @param bytes byte array corresponding to the original string (content bytes)
     * @return byte array after processing by huffman code (compression array)
     */
    private static byte[] huffmanZip(byte[] bytes){
        List<Node> nodes = getNode(bytes);
        Node huffmanTreeRoot = CreateHuffmanTree(nodes);
        Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot);
        byte[] huffmanCodeBytes =  zip(bytes, HuffmanCodeDemo.huffmanCodes);
        return huffmanCodeBytes;
    }

    // byte[] of string ---> huffman code table ---> huffman code(byte[])

    /**
     *
     * @param bytes The byte array corresponding to the original string
     * @param huffmanCodes Huffman codes map
     * @return byte[] processed by huffman code, byte[] corresponding to "10101000..."
     * 8 bit --> 1 byte, put into huffmanCodeBytes
     * huffmanCodeBytes[0] = 10101000(complement補碼) => byte[10101000=>10101000-1=>10100111(Inverted code 補碼對應的反碼)
     * => 11011000 = -88(original code 反碼對應的原碼:符号位/第一位不變,其他位取反))
     * huffmanCodeBytes[1] = -88
     */
    private static byte[] zip(byte[] bytes,Map<Byte,String> huffmanCodes){
        // 1. transfer bytes to string corresponding to huffmanCode using huffmanCodes
        StringBuilder stringBuilder = new StringBuilder();
        // traverse bytes
        for(byte b:bytes){
            stringBuilder.append(huffmanCodes.get(b));
        }

        // stringBuilder -> byte[]
        // count length of byte[] huffmanCodeBytes
        int len;
        if(stringBuilder.length() % 8 == 0){
            len = stringBuilder.length()/8;
        } else{
            len = stringBuilder.length() / 8 + 1;
        }
        // one line: int len = (stringBuilder.length()+7)/8;

        // create byte[] store data after compression
        byte[] huffmanCodeBytes = new byte[len];
        int index = 0; // record number of byte
        for(int i = 0;i < stringBuilder.length();i+=8){ // 8 bit->1 byte
            String strByte;
            if(i+8 > stringBuilder.length()){ // not enough 8 bit
                strByte = stringBuilder.substring(i);
            }else{
                strByte = stringBuilder.substring(i,i+8);
                // strByte --> byte array --> put into huffmanCodeBytes
            }
            huffmanCodeBytes[index] = (byte)Integer.parseInt(strByte,2); // string -> binary
            index++;
        }
        return huffmanCodeBytes;
    }



    // huffman coding based on huffman tree
    // 1. huffman coding is stored in Map<Byte,String>, eg: o:1000 u:10010
    static Map<Byte,String> huffmanCodes = new HashMap<Byte,String>();
    // 2. define a StringBuilder storing path of leaf node when creating huffman code
    static StringBuilder stringBuilder = new StringBuilder();

    // for convenience, override getCodes()
    private static Map<Byte,String> getCodes(Node root){
        if(root==null){
            return null;
        }
        getCodes(root.left,"0",stringBuilder);
        getCodes(root.right,"1",stringBuilder);
        return huffmanCodes;
    }

    /**
     * get all leaf nodes' huffman code of given node, putting into huffmanCodes collection
     * @param node given node, root
     * @param code path: left node:0; right node:1;
     * @param stringBuilder splice path
     */
    private static void getCodes(Node node,String code,StringBuilder stringBuilder){
        StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
        // add code into stringBuilder2
        stringBuilder2.append(code);
        if(node!=null){ // if node==null, pass
            // determine current node is leaf node or non-leaf node
            if(node.data==null){
                // recursion
                // left
                getCodes(node.left,"0",stringBuilder2);
                // right
                getCodes(node.right,"1",stringBuilder2);
            } else{ // leaf node
                huffmanCodes.put(node.data,stringBuilder2.toString());
            }
        }
    }

    private static void preOrder(Node root){
        if(root!=null){
            root.preOrder();
        }else{
            System.out.println("empty");
        }
    }


    /**
     *
     * @param bytes receive byte[]
     * @return List like: [Node[data=97 (ascii) ,weight = 5], Node[data = 32, weight = 9]]
     */
    private static List<Node> getNode(byte[] bytes){
        // 1. create an arraylist
        ArrayList<Node> nodes = new ArrayList<>();
        // 2. traverse bytes, count each byte frequency --> map[key,value]
        HashMap<Byte, Integer> counts = new HashMap<>();
        for(byte b:bytes){
            Integer count = counts.get(b);
            if(count==null){ // no data in map
                counts.put(b,1);
            }else{
                counts.put(b,count+1);
            }
        }
        // 3. transfer each key-value pair to a Node object, add to nodes collection
        for(Map.Entry<Byte,Integer> entry:counts.entrySet()){
            nodes.add(new Node(entry.getKey(),entry.getValue()));
        }
        return nodes;
    }

    // build huffman tree
    private static Node CreateHuffmanTree(List<Node> nodes){
        while(nodes.size()>1){
            Collections.sort(nodes);
            Node leftNode = nodes.get(0);
            Node rightNode = nodes.get(1);
            Node parent = new Node(null,leftNode.weight+rightNode.weight);
            parent.left = leftNode;
            parent.right = rightNode;
            nodes.remove(leftNode);
            nodes.remove(rightNode);
            nodes.add(parent);
        }
        return nodes.get(0);
    }
}

// create Node class, with data and weight
class Node implements Comparable<Node>{
    Byte data; // store data, eg: 'a' --> 97 ' ' --> 32
    int weight; // weight: number of characters|frequency
    Node left;
    Node right;

    public Node(Byte data, int weight) {
        this.data = data;
        this.weight = weight;
    }

    @Override
    public int compareTo(Node o) {
        return this.weight-o.weight; // ascending
    }

    @Override
    public String toString() {
        return "Node{" +
                "data=" + data +
                ", weight=" + weight +
                '}';
    }

    // preOrder
    public void preOrder(){
        System.out.println(this);
        if(this.left!=null){
            this.left.preOrder();
        }
        if(this.right!=null){
            this.right.preOrder();
        }
    }
}
           

檔案壓縮

讀取檔案 – 得到赫夫曼編碼表 – 完成壓縮 – 得到.zip檔案

// compress file

    /**
     *
     * @param srcFile path of file need to be compressed
     * @param dstFile path that stores the file after decompressing
     */
    public static void zipFile(String srcFile,String dstFile){
        // output stream

        // input stream read file
        FileInputStream fis = null;
        ObjectOutputStream oos = null;
        FileOutputStream fos = null;
        try {
            fis = new FileInputStream(srcFile);
            byte[] b = new byte[fis.available()]; // length of original file
            // read file
            fis.read(b);
            // get huffman code corresponding to file
            // compress original file
            byte[] huffmanBytes = huffmanZip(b);
            // create output stream storing compress file
            fos = new FileOutputStream(dstFile);
            // create ObjectOutputStream corresponding to file output stream
            oos = new ObjectOutputStream(fos);
            // 把赫夫曼編碼後的位元組數組寫入壓縮檔案
            oos.writeObject(huffmanBytes);
            // 以對象流的方式寫入赫夫曼編碼,為了以後恢複源檔案時使用
            oos.writeObject(huffmanCodes);

        } catch (Exception e) {
            System.out.println(e.getMessage());
        } finally {
            try {
                fis.close();
                fos.close();
                oos.close();
            } catch (IOException e) {
                System.out.println(e.getMessage());
            }
        }
    }
           
// test compress file
        String srcFile = "/Users/pz/Desktop/img4.jpg";
        String dstFile = "/Users/pz/Desktop/dst.dmg";
        zipFile(srcFile,dstFile);
        System.out.println("compress done");
           

檔案解壓

讀取解壓檔案(資料和赫夫曼編碼表)-- 俄暗沉解壓

// unzip file

    /**
     *
     * @param zipFile file for unzip
     * @param dstFile path for saving
     */
    public static void unzipFile(String zipFile,String dstFile){
        // define file input stream
        InputStream is = null;
        // define a object input stream
        ObjectInputStream ois = null;
        // define file output stream
        OutputStream os = null;
        try{
            // create file input stream
            is = new FileInputStream(zipFile);
            // create a object input stream corresponding to is
            ois = new ObjectInputStream(is);
            // read byte[] huffmanBytes
            byte[] huffmanBytes = (byte[])ois.readObject();
            // read huffman codes
            Map<Byte,String> huffmanCodes = (Map<Byte, String>) ois.readObject();
            // decode
            byte[] bytes = decode(huffmanCodes,huffmanBytes);
            // write file to dsfFile
            os = new FileOutputStream(dstFile);
            // write data to file
            os.write(bytes);

        }catch(Exception e){
            System.out.println(e.getMessage());
        }finally {
            try {
                os.close();
                ois.close();
                is.close();
            } catch (IOException e) {
                System.out.println(e.getMessage());
            }
        }
    }
           
// test decode file
        String zipFile = "/Users/pz/Desktop/dst.dmg";
        String dstFile = "/Users/pz/Desktop/su.jpg";
        unzipFile(zipFile,dstFile);
        System.out.println("success");
           

繼續閱讀