天天看點

算法系列之赫夫曼編碼實戰一【資料壓縮、資料解壓】

  • ​​1.何謂赫夫曼編碼?​​
  • ​​2.赫夫曼資料壓縮​​
  • ​​3.赫夫曼資料解壓​​
  • ​​4.全代碼​​

1.何謂赫夫曼編碼?

哈夫曼編碼是一種編碼方式,哈夫曼編碼是可變字長編碼的一種。Huffman于1952年提出一種編碼方法,該方法完全依據字元出現機率來構造異字頭的平均長度最短的碼字,有時稱之為最佳編碼,一般就叫做Huffman編碼(有時也稱為霍夫曼編碼)。

有關哈夫曼樹的相關文章

2.赫夫曼資料壓縮

給定一串字元串,将其進行赫夫曼壓縮

1.周遊字元串,将字元作為key,出現的頻率作為value存儲到map集合中;然後周遊該map集合,将其封裝到EncryNode對象中,并将其加入到list集合中。
2.根據List集合建構哈夫曼樹
3.根據哈夫曼樹生成哈夫曼編碼表
4.将十進制字元串,轉換為二進制整數
      
package com.zsh.algorithm.tree;

import java.io.*;
import java.util.*;

/**
 * @author:Ronin
 * @since:2021/9/29

 */

public class HuffmanEncrypt {

    //用來存儲轉換二進制字元串集合
    private static StringBuilder stringBuilder = new StringBuilder();
    //哈夫曼編碼表
    private static Map<Byte, String> huffmanCodesMap = new HashMap<>();

//---------------------------------------以下是壓縮相關函數------------------------------------------//
    /**
     * 赫夫曼編碼封裝方法
     *
     * @param bytes
     * @return
     */
    public static byte[] huffmanZip(byte[] bytes) {
        //1.計算字元串中每個字元出現的次數,并字元、次數封裝到EncryptNode結點中,将其加入到list集合中
        List<EncryptNode> encryptNodeList = calByteNums(bytes);
        //outputList(encryptNodeList);
        //2.建構哈夫曼樹
        EncryptNode root = buildHuffmanTree(encryptNodeList);
        // preErgodic(root);
        //3.生成赫夫曼編碼表
        Map<Byte, String> huffmanCodes = getHuffmanCodes(root);
        //System.out.println(huffmanCodes);
        //4.轉換為赫夫曼編碼位元組表
        return zip(bytes, huffmanCodes);
    }

    /**
     * 計算字元串中的每個字元的長度
     *
     * @param bytes 待計算的字元串
     * @return 傳回list集合
     */
    public static List<EncryptNode> calByteNums(byte[] bytes) {
        Map<Byte, Integer> map = new HashMap<>();
        byte[] charArray = bytes;
        for (byte c : charArray) {
            if (map.containsKey(c)) {
                int num = map.get(c);
                map.replace(c, num, num + 1);
            } else {
                map.put(c, 1);
            }
        }
        List<EncryptNode> list = new ArrayList<>();
        for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
            Byte key = entry.getKey();
            Integer value = entry.getValue();
            list.add(new EncryptNode(key, value));
        }
        return list;
    }

    /**
     * 建構赫夫曼樹
     *
     * @param list 權重集合
     * @return 傳回root節點
     */
    public static EncryptNode buildHuffmanTree(List<EncryptNode> list) {
        while (list.size() > 1) {
            Collections.sort(list);
            EncryptNode leftNode = list.remove(0);
            EncryptNode rightNode = list.remove(0);
            EncryptNode parentNode = new EncryptNode(null, leftNode.value + rightNode.value);
            parentNode.left = leftNode;
            parentNode.right = rightNode;
            list.add(parentNode);
        }
        return list.remove(0);
    }

    /**
     * 重載getHuffmanCodes(EncryptNode node, String path,  StringBuilder stringBuilder)
     *
     * @param root 根節點
     * @return 哈夫曼編碼map集合
     */
    public static Map<Byte, String> getHuffmanCodes(EncryptNode root) {
        if (root == null) {
            return null;
        } else {
            getHuffmanCodes(root.left, "0", stringBuilder);
            getHuffmanCodes(root.right, "1", stringBuilder);
        }
        return huffmanCodesMap;
    }

    /**
     * 赫夫曼編碼
     *
     * @param node          結點
     * @param path          1代表右結點,0代表左結點路徑
     * @param stringBuilder 用于path拼接
     */
    public static void getHuffmanCodes(EncryptNode node, String path, StringBuilder stringBuilder) {
        StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
        stringBuilder2.append(path);
        if (node != null) {
            if (node.c == null) {
                getHuffmanCodes(node.left, "0", stringBuilder2);
                getHuffmanCodes(node.right, "1", stringBuilder2);
            } else {
                huffmanCodesMap.put(node.c, stringBuilder2.toString());
            }
        }
    }

    /**
     * @param bytes 待編碼位元組數組
     * @param entry 赫夫曼編碼集合
     * @return 傳回編碼後的byte數組,将十進制字元串111001100101轉換為二進制byte數組
     */
    public static byte[] zip(byte[] bytes, Map<Byte, String> entry) {
        StringBuilder stringBuilder = new StringBuilder();
        for (byte ch : bytes) {
            stringBuilder.append(entry.get(ch));
        }
        //System.out.println("stringBuilder=" + stringBuilder.toString());
        int length = stringBuilder.length();
        int len = (length + 7) / 8;
        byte[] huffmanCodeBytes = new byte[len];
        int index = 0;
        for (int k = 0; k < length; k += 8) {
            String str;

            if ((k + 8) > length) {
                str = stringBuilder.substring(k);
            } else {
                str = stringBuilder.substring(k, k + 8);
            }
            huffmanCodeBytes[index] = (byte) Integer.parseInt(str, 2);
            index++;
        }
        return huffmanCodeBytes;
    }

    /**
     * 計算赫夫曼壓縮率
     *
     * @param original 未壓縮前的位元組數組
     * @param newByte  壓縮後的位元組數組
     * @return 壓縮率
     */
    public static String calHuffmanZipRatio(byte[] original, byte[] newByte) {
        double zipRatio = (double) (original.length - newByte.length) / original.length * 100;
        String s = String.format("%.2f", zipRatio);
        return s + "%";
    }

    //---------------------------------以上是壓縮相關函數--------------------------------------------------------//


    //先序周遊赫夫曼樹
    public static void preErgodic(EncryptNode cur) {
        if (cur == null) {
            return;
        }
        System.out.println(cur);
        preErgodic(cur.left);
        preErgodic(cur.right);
    }

    //列印list集合
    public static <T> void outputList(List<T> list) {
        for (T t : list) {
            System.out.println(t);
        }
    }
}

/**
 * 實作Comparable接口,便于使用Collections.sort排序
 */
class EncryptNode implements Comparable<EncryptNode> {
    public int value;   //出現的次數
    public Byte c;      //字元
    public EncryptNode left;
    public EncryptNode right;

    public EncryptNode(int value) {
        this.value = value;
    }

    public EncryptNode(Byte c, int value) {
        this.value = value;
        this.c = c;
    }

    @Override
    public int compareTo(EncryptNode o) {
        return this.value - o.value;   //升序
    }

    @Override
    public String toString() {
        return "EncryptNode{" +
                "value=" + value +
                ", c=" + c +
                '}';
    }
}      

3.赫夫曼資料解壓

private static byte[] decode(Map<Byte, String> huffmanCodesMap, byte[] bytes) {
        StringBuilder stringBuilder = new StringBuilder();
        for (int k = 0; k < bytes.length; k++) {
            boolean flag = (k == bytes.length - 1);
            stringBuilder.append(byteToBinaryString(!flag, bytes[k]));
        }
        //将map中的key和value置換
        Map<String, Byte> reverseMap = new HashMap<>();
        for (Map.Entry<Byte, String> map : huffmanCodesMap.entrySet()) {
            reverseMap.put(map.getValue(), map.getKey());
        }
        //根據赫夫曼編碼進行字元串的比對
        /**
         *例如以1001010101100000111為例
         *将變量i指向第一位1,然後變量count進行移動
         */
        List<Byte> list = new ArrayList<>();
        for (int i = 0; i < stringBuilder.length(); ) {
            int count = 1;
            Byte b = null;
            boolean flag = true;
            while (flag) {
                String key = stringBuilder.substring(i, i + count);
                b = reverseMap.get(key);
                if (b == null) {
                    count++;
                } else {
                    flag = false;
                }
            }
            list.add(b);
            i += count;
        }
        //将list集合中的結果資料封裝到位元組數組中
        byte[] result = new byte[list.size()];
        for (int k = 0; k < result.length; k++) {
            result[k] = list.get(k);
        }
        return result;
    }

    /**
     * @param flag 如果為true, 表示需要進行高位補齊;如果是false,表示不需要進行高位補齊-->因為最後一位不一定剛好是滿足八位
     * @param be   這是将轉換為二進制字元串的位元組
     * @return 傳回二進制字元串
     */
    private static String byteToBinaryString(boolean flag, byte be) {
        int beInt = be;
        //進行if判斷是因為如果是最為一個字元的二進制不滿足8位,這是和壓縮時對應的,是以不需要進行高位補齊,低位截取。
        if (flag) {
            beInt |= 256;                                   //按位與  , 解決正數的高位補齊的問題
        }
        String s = Integer.toBinaryString(beInt);           //補碼,正數需要高位補齊,負數需要截取
        if (flag) {
            return s.substring(s.length() - 8);
        } else {
            return s;
        }
    }      

4.全代碼

package com.zsh.algorithm.tree;

import java.io.*;
import java.util.*;

/**
 * @author:Ronin
 * @since:2021/9/29

 */

public class HuffmanEncrypt {
    public static void main(String[] args) {

    }

    //用來存儲轉換二進制字元串集合
    private static StringBuilder stringBuilder = new StringBuilder();
    //哈夫曼編碼表
    private static Map<Byte, String> huffmanCodesMap = new HashMap<>();

//---------------------------------------以下是壓縮相關函數------------------------------------------//
    /**
     * 赫夫曼編碼封裝方法
     *
     * @param bytes
     * @return
     */
    public static byte[] huffmanZip(byte[] bytes) {
        //1.計算字元串中每個字元出現的次數,并字元、次數封裝到EncryptNode結點中,将其加入到list集合中
        List<EncryptNode> encryptNodeList = calByteNums(bytes);
        //outputList(encryptNodeList);
        //2.建構哈夫曼樹
        EncryptNode root = buildHuffmanTree(encryptNodeList);
        // preErgodic(root);
        //3.生成赫夫曼編碼表
        Map<Byte, String> huffmanCodes = getHuffmanCodes(root);
        //System.out.println(huffmanCodes);
        //4.轉換為赫夫曼編碼位元組表
        return zip(bytes, huffmanCodes);
    }

    /**
     * 計算字元串中的每個字元的長度
     *
     * @param bytes 待計算的字元串
     * @return 傳回list集合
     */
    public static List<EncryptNode> calByteNums(byte[] bytes) {
        Map<Byte, Integer> map = new HashMap<>();
        byte[] charArray = bytes;
        for (byte c : charArray) {
            if (map.containsKey(c)) {
                int num = map.get(c);
                map.replace(c, num, num + 1);
            } else {
                map.put(c, 1);
            }
        }
        List<EncryptNode> list = new ArrayList<>();
        for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
            Byte key = entry.getKey();
            Integer value = entry.getValue();
            list.add(new EncryptNode(key, value));
        }
        return list;
    }

    /**
     * 建構赫夫曼樹
     *
     * @param list 權重集合
     * @return 傳回root節點
     */
    public static EncryptNode buildHuffmanTree(List<EncryptNode> list) {
        while (list.size() > 1) {
            Collections.sort(list);
            EncryptNode leftNode = list.remove(0);
            EncryptNode rightNode = list.remove(0);
            EncryptNode parentNode = new EncryptNode(null, leftNode.value + rightNode.value);
            parentNode.left = leftNode;
            parentNode.right = rightNode;
            list.add(parentNode);
        }
        return list.remove(0);
    }

    /**
     * 重載getHuffmanCodes(EncryptNode node, String path,  StringBuilder stringBuilder)
     *
     * @param root 根節點
     * @return 哈夫曼編碼map集合
     */
    public static Map<Byte, String> getHuffmanCodes(EncryptNode root) {
        if (root == null) {
            return null;
        } else {
            getHuffmanCodes(root.left, "0", stringBuilder);
            getHuffmanCodes(root.right, "1", stringBuilder);
        }
        return huffmanCodesMap;
    }

    /**
     * 赫夫曼編碼
     *
     * @param node          結點
     * @param path          1代表右結點,0代表左結點路徑
     * @param stringBuilder 用于path拼接
     */
    public static void getHuffmanCodes(EncryptNode node, String path, StringBuilder stringBuilder) {
        StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
        stringBuilder2.append(path);
        if (node != null) {
            if (node.c == null) {
                getHuffmanCodes(node.left, "0", stringBuilder2);
                getHuffmanCodes(node.right, "1", stringBuilder2);
            } else {
                huffmanCodesMap.put(node.c, stringBuilder2.toString());
            }
        }
    }

    /**
     * @param bytes 待編碼位元組數組
     * @param entry 赫夫曼編碼集合
     * @return 傳回編碼後的byte數組
     */
    public static byte[] zip(byte[] bytes, Map<Byte, String> entry) {
        StringBuilder stringBuilder = new StringBuilder();
        for (byte ch : bytes) {
            stringBuilder.append(entry.get(ch));
        }
        //System.out.println("stringBuilder=" + stringBuilder.toString());
        int length = stringBuilder.length();
        int len = (length + 7) / 8;
        byte[] huffmanCodeBytes = new byte[len];
        int index = 0;
        for (int k = 0; k < length; k += 8) {
            String str;

            if ((k + 8) > length) {
                str = stringBuilder.substring(k);
            } else {
                str = stringBuilder.substring(k, k + 8);
            }
            huffmanCodeBytes[index] = (byte) Integer.parseInt(str, 2);
            index++;
        }
        return huffmanCodeBytes;
    }

    /**
     * 計算赫夫曼壓縮率
     *
     * @param original 未壓縮前的位元組數組
     * @param newByte  壓縮後的位元組數組
     * @return 壓縮率
     */
    public static String calHuffmanZipRatio(byte[] original, byte[] newByte) {
        double zipRatio = (double) (original.length - newByte.length) / original.length * 100;
        String s = String.format("%.2f", zipRatio);
        return s + "%";
    }

    //---------------------------------以上是壓縮相關函數--------------------------------------------------------//
    //---------------------------------以下是解壓縮相關函數-----------------------------------------------------//
    private static byte[] decode(Map<Byte, String> huffmanCodesMap, byte[] bytes) {
        StringBuilder stringBuilder = new StringBuilder();
        for (int k = 0; k < bytes.length; k++) {
            boolean flag = (k == bytes.length - 1);
            stringBuilder.append(byteToBinaryString(!flag, bytes[k]));
        }
        Map<String, Byte> reverseMap = new HashMap<>();
        for (Map.Entry<Byte, String> map : huffmanCodesMap.entrySet()) {
            reverseMap.put(map.getValue(), map.getKey());
        }
        List<Byte> list = new ArrayList<>();
        for (int i = 0; i < stringBuilder.length(); ) {
            int count = 1;
            Byte b = null;
            boolean flag = true;
            while (flag) {
                String key = stringBuilder.substring(i, i + count);
                b = reverseMap.get(key);
                if (b == null) {
                    count++;
                } else {
                    flag = false;
                }
            }
            list.add(b);
            i += count;
        }
        byte[] result = new byte[list.size()];
        for (int k = 0; k < result.length; k++) {
            result[k] = list.get(k);
        }
        return result;
    }

    /**
     * @param flag 如果為true, 表示需要進行高位補齊;如果是false,表示不需要進行高位補齊-->因為最後一位不一定剛好是滿足八位
     * @param be   這是将轉換為二進制字元串的位元組
     * @return 傳回二進制字元串
     */
    private static String byteToBinaryString(boolean flag, byte be) {
        int beInt = be;
        if (flag) {
            beInt |= 256;                                   //按位與  , 解決正數的高位補齊的問題
        }
        String s = Integer.toBinaryString(beInt);           //補碼,正數需要高位補齊,負數需要截取
        if (flag) {
            return s.substring(s.length() - 8);
        } else {
            return s;
        }
    }

    //---------------------------------以上是解壓縮相關函數-----------------------------------------------------//

    //---------------------------------以下是檔案壓縮解壓相關函數-----------------------------------------------------//

    /**
     * 檔案壓縮方法  --- 赫夫曼編碼壓縮
     * @param readFile  壓縮檔案路徑      檔案 + 檔案名
     * @param storeFile 壓縮之後存儲的路徑  檔案 + 檔案名
     */
    private static void fileEncode(String readFile, String storeFile) {
        FileInputStream is = null;
        ObjectOutputStream oos = null;
        FileOutputStream os = null;
        try {
            is = new FileInputStream(readFile);
            byte[] bytes = new byte[is.available()];
            is.read(bytes);
            byte[] huffmanBytes = huffmanZip(bytes);
            os = new FileOutputStream(storeFile);
            oos = new ObjectOutputStream(os);
            oos.writeObject(huffmanBytes);
            oos.writeObject(huffmanCodesMap);
            os.write(huffmanBytes);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                oos.close();
                os.close();
                is.close();
            } catch (IOException e) {
                e.printStackTrace();
            }

        }
    }
    public static void fileDecode(String zipFile, String dstFile) {
        System.out.println("檔案解壓開始----------------------");
        FileInputStream is = null;
        ObjectInputStream ois = null;
        FileOutputStream os = null;
        try {
            is = new FileInputStream(zipFile);
            ois = new ObjectInputStream(is);
            byte[] huffmanBytes = (byte[])ois.readObject();
            Map<Byte, String> huffmanCodes = (Map<Byte, String>) ois.readObject();
            byte[] bytes = decode(huffmanCodes, huffmanBytes);
            os = new FileOutputStream(dstFile);
            os.write(bytes);

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } finally {
            try {
                os.close();
                ois.close();
                is.close();
            } catch (IOException e) {
                e.printStackTrace();
            }

        }
    }
    //---------------------------------以上是檔案壓縮解壓相關函數-----------------------------------------------------//

    //先序周遊赫夫曼樹
    public static void preErgodic(EncryptNode cur) {
        if (cur == null) {
            return;
        }
        System.out.println(cur);
        preErgodic(cur.left);
        preErgodic(cur.right);
    }

    //列印list集合
    public static <T> void outputList(List<T> list) {
        for (T t : list) {
            System.out.println(t);
        }
    }
}

/**
 * 實作Comparable接口,便于使用Collections.sort排序
 */
class EncryptNode implements Comparable<EncryptNode> {
    public int value;   //出現的次數
    public Byte c;      //字元
    public EncryptNode left;
    public EncryptNode right;

    public EncryptNode(int value) {
        this.value = value;
    }

    public EncryptNode(Byte c, int value) {
        this.value = value;
        this.c = c;
    }

    @Override
    public int compareTo(EncryptNode o) {
        return this.value - o.value;   //升序
    }

    @Override
    public String toString() {
        return "EncryptNode{" +
                "value=" + value +
                ", c=" + c +
                '}';
    }
}      

繼續閱讀