實驗三 LZW

LZW編解碼算法

文章目錄

LZW編解碼算法

編碼原理
解碼原理
代碼實作

LZW編碼（LZW Encoding）又稱“串表壓縮算法”，由J.Ziv和A.Lempel在1978年首次介紹，并由Terry A.Welch在1984年予以改進，最終該編碼方法由三人的名字命名。

該編碼方法屬于詞典壓縮編碼方法。詞典編碼是一種通用編碼方法，适用于無法觀察新源統計特性，或雖然可觀察但統計特性不固定的情形。

LZW編碼可應用于通用檔案壓縮（如WinZip）、動畫圖像壓縮（如GIF、TIFF）等領域。

編碼原理

LZW編碼的核心思想是從輸入資料中建立一個“短語詞典（Dictionary of Phrases）”，這種短語可以是任意字元的組合。編碼資料過程中當遇到已經在詞典中出現的“短語”時，編碼器就輸出該詞條的“索引号”，而不是短語本身，以實作壓縮。

編碼流程如下：

例如：對“abbababac”進行LZW編碼。

初始化詞典：a = 97，b = 98，c = 99（ASCII碼）

步驟	P	C	PC在詞典中？	若不在，輸出P	新增詞條	說明
1	NULL	a	初始化，不處理
2	a	b	否	97（a）	ab：256	ab不在詞典中，擴充，P = C
3	b	b	否	98（b）	bb：257	bb不在詞典中，擴充，P = C
4	b	a	否	98（b）	ba：258	ba不在詞典中，擴充，P = C
5	a	b	是	ab在詞典中，P = PC
6	ab	a	否	256（ab）	aba：259	aba不在詞典中，擴充，P = C
7	a	b	是	ab在詞典中，P = PC
8	ab	a	是	aba在詞典中，P = PC
9	aba	c	否	259（aba）	abac：260	abac不在詞典中，擴充，P = C
10	c	NULL	99（c）	結束，輸出未編碼的字元

即編碼結果為：

“97 98 98 256 259 99”

解碼原理

首先我們要明确一點，編碼過程中建立的詞典，實際上并不與碼流一起傳送。這主要考慮到以下兩點：一是由于碼表可能占用的空間很大，不傳送碼表可以将壓縮比最大化；二是如果需要等收到編碼端的詞典後再進行解碼，便不可能實作編解碼兩端的同步操作。

那麼，我們就要在解碼端按照與編碼端相同的規則同步地建立詞典（Trie樹），這便是LZW解碼的核心思想。

解碼流程如下圖所示：

例如将上述輸出的編碼碼流“97 98 98 256 259 99”進行解碼：

步驟	pW	cW	cW在詞典中？	輸出	新增詞條	說明
1	97	是	a	在詞典中，輸出cW；新增pW + cW首字元
2	97	98	是	b	ab：256	在詞典中，輸出cW；新增pW + cW首字元
3	98	98	是	b	bb：257	在詞典中，輸出cW；新增pW + cW首字元
4	98	256	是	ab	ba：258	在詞典中，輸出cW；新增pW + cW首字元
5	256	259	否	aba	aba：259	不在詞典中，輸出pW + pW首字元，并添加至詞典
6	259	99	是	c	abac：260	在詞典中，輸出cW；新增pW + cW首字元

按上述流程解碼得“abbababac”，即成功複原原始資料。

代碼實作

declarations.h

#pragma once
#include "BitIO.h"
#define DICT_CAPACITY 65535// Capacity of the dictionary
/* Global variables */
struct {
    int suffix;
    int parent;
    int firstChild;
    int nextSibling;
} dictionary[DICT_CAPACITY + 1];
extern int nextNodeIdx;    // Index of next node (i.e. next dictionary entry)
extern int decStack[DICT_CAPACITY]; // Stack for decoding a phrase

/* Functions */
void LzwEncoding(FILE* inFilePtr, BITFILE* outBitFilePtr);
void LzwDecoding(BITFILE* inBitFilePtr, FILE* outFilePtr);

BitIO.h

#pragma once
#include <iostream>

typedef struct {
  FILE* fp;
  unsigned char mask;
  int rack;
}BITFILE;

BITFILE* OpenBitFileInput(char* fileName);
BITFILE* OpenBitFileOutput(char* fileName);
void CloseBitFileInput(BITFILE* bf);
void CloseBitFileOutput(BITFILE* bf);
int BitInput(BITFILE* bf);
unsigned long BitsInput(BITFILE* bf, int count);
void BitOutput(BITFILE* bf, int bit);
void BitsOutput(BITFILE* bf, unsigned long code, int count);

BitIO.cpp

/* Definitions for bitwise IO */

#include <iostream>
#include <stdlib.h>
#include "BitIO.h"

BITFILE* OpenBitFileInput(char* fileName) {
  //BITFILE* bf = (BITFILE*)malloc(sizeof(BITFILE));
  BITFILE* bf = new BITFILE;
  if (bf == NULL) {
    return NULL;
  }
  if (fileName == NULL) {
    bf->fp = stdin;
  } else {
    fopen_s(&bf->fp, fileName, "rb");
  }
  if (bf->fp == NULL) {
    return NULL;
  }
  bf->mask = 0x80;
  bf->rack = 0;
  return bf;
}

BITFILE* OpenBitFileOutput(char* fileName) {
  //BITFILE* bf = (BITFILE*)malloc(sizeof(BITFILE));
  BITFILE* bf = new BITFILE;
  if (bf == NULL) {
    return NULL;
  }
  if (fileName == NULL) {
    bf->fp = stdout;
  } else {
    fopen_s(&bf->fp, fileName, "wb");
  }
  if (bf->fp == NULL) {
    return NULL;
  }
  bf->mask = 0x80;
  bf->rack = 0;
  return bf;
}

void CloseBitFileInput(BITFILE* bf) {
  fclose(bf->fp);
  //free(bf);
  delete bf;
}

void CloseBitFileOutput(BITFILE* bf) {
  /* Output the remaining bits */
  if (bf->mask != 0x80) {
    fputc(bf->rack, bf->fp);
  }
  fclose(bf->fp);
  //free(bf);
  delete bf;
}

int BitInput(BITFILE* bf) {
  int value;

  if (bf->mask == 0x80) {
    bf->rack = fgetc(bf->fp);
    if (bf->rack == EOF) {
      fprintf(stderr, "Reached the end of file.\n");
      exit(-1);
    }
  }
  value = bf->mask & bf->rack;
  bf->mask >>= 1;
  if (0 == bf->mask) {
    bf->mask = 0x80;
  }
  return((value == 0) ? 0 : 1);
}

unsigned long BitsInput(BITFILE* bf, int count) {
  unsigned long mask;
  unsigned long value;
  mask = 1L << (count - 1);
  value = 0L;
  while (mask != 0) {
    if (BitInput(bf) == 1)
      value |= mask;
    mask >>= 1;
  }
  return value;
}

void BitOutput(BITFILE* bf, int bit) {
  if (bit != 0) {
    bf->rack |= bf->mask;
  }
  bf->mask >>= 1;
  if (bf->mask == 0) {  // 8 bits in rack
    fputc(bf->rack, bf->fp);
    bf->rack = 0;
    bf->mask = 0x80;
  }
}

void BitsOutput(BITFILE* bf, unsigned long code, int count) {
  unsigned long mask;

  mask = 1L << (count - 1);
  while (mask != 0) {
    BitOutput(bf, (int)((code & mask) == 0 ? 0 : 1));
    mask >>= 1;
  }
}

LzwED.cpp

#include <iostream>
#include "declarations.h"
#include "BitIO.h"
using namespace std;

/* Global variables */
int nextNodeIdx;
int decStack[DICT_CAPACITY];

/* Macros */
#define Input(f) ((int)BitsInput(f, 16))
#define Output(f, x) BitsOutput(f, (unsigned long)(x), 16)

void InitialiseDict() {  // Dictionary initialisation (initialise root node 0-255)
  for (int i = 0; i < 256; i++) {
    dictionary[i].suffix = i;  // The suffix of each node is the corresponding ASCII code
    dictionary[i].parent = -1;  // Temporarily doesn't have a parent node (i.e. prefix)
    dictionary[i].firstChild = -1;  // Temporarily doesn't have any child nodes
    dictionary[i].nextSibling = i + 1;  // The index of the next sibling root node is the next ASCII code
  }

  dictionary[255].nextSibling = -1;  // No next sibling for the last root node
  nextNodeIdx = 256;  // The index of next dictionary entry
}

int InDict(int P, int C) {
  if (P == -1) {
    /* In this case, the current character is the start of the file,
    and it's evidently in the dictionary,
    thus return the corresponding ASCII code (let P = this character). */
    return C;
  }

  /* Traverse all child node(s) of node P from left to right (i.e. all sibling nodes of the first child node) */
  int sibling = dictionary[P].firstChild;  // Start from the first child of P
  while (sibling > -1) {  // sibling == -1 indicates the end of sibling traversal
    /* If a C-suffixed sibling is found, then return the code of PC (i.e. the index of this sibling) */
    if (C == dictionary[sibling].suffix) {
      return sibling;
    }
    /* If the suffixes don't match, then look for the next */
    sibling = dictionary[sibling].nextSibling;
  }

  /* The suffix of all siblings don't match PC, thus PC isn't in the dictionary */
  return -1;
}

void NewDictEntry(int P, int C) {
  if (P < 0) {
    return;
  }

  dictionary[nextNodeIdx].suffix = C;
  dictionary[nextNodeIdx].parent = P;
  dictionary[nextNodeIdx].nextSibling = -1;  // Indicates that the node is the last sibling
  dictionary[nextNodeIdx].firstChild = -1;  // Temporarily this node doesn't have a child
  int pFirstChild = dictionary[P].firstChild;  // The first child of P
  int pChild;

  /* Set up the new sibling-relation */
  if (pFirstChild > -1) {  /* Parent of the new node originally have a child node */
    pChild = pFirstChild;  // Start from the first child of P

    /* Look for the youngest child of P (i.e. the last sibling) */
    while (dictionary[pChild].nextSibling > -1) {
      pChild = dictionary[pChild].nextSibling;
    }

    dictionary[pChild].nextSibling = nextNodeIdx;  // Set the new node as the next sibling of the current last sibling
  } else {  /* Parent of the new node originally doesn't have a child */
    dictionary[P].firstChild = nextNodeIdx;  // Set the new node as PC (i.e. the first child of P)
  }

  nextNodeIdx++;  //Index of the next entry + 1
}

void LzwEncoding(FILE* inFilePtr, BITFILE* outBitFilePtr) {
  int previousStr;  // P
  int currentChar;  // C
  int PC;  // P & C combined as 1 character

  /* Compute the size of file */
  fseek(inFilePtr, 0, SEEK_END);
  int inFileSize = ftell(inFilePtr);
  fseek(inFilePtr, 0, SEEK_SET);
  BitsOutput(outBitFilePtr, inFileSize, 4 * 8);

  /* Initialise the dictionary and P */
  InitialiseDict();
  previousStr = -1;  // Initialise P

  //fprintf(outFilePtr, "LZW-encoded string: ");
  while ( (currentChar = fgetc(inFilePtr)) != EOF ) {
    /* Check if PC is in the dictionary */
    PC = InDict(previousStr, currentChar);
    if (PC >= 0) {  /* PC is in the dictionary */
      previousStr = PC;  // Set P = PC
    } else {  /* PC isn't in the dictionary */
      Output(outBitFilePtr, previousStr);  // Output P
      if (nextNodeIdx < DICT_CAPACITY) {  /* Enough space to add PC into the dictionary */
        NewDictEntry(previousStr, currentChar);
      }
      previousStr = currentChar;  // Set P = C
    }
  }

  Output(outBitFilePtr, previousStr);  // Output the last unencoded character(s)
}

int DecodeString(int start, int code) {
  int count = start;
  while (code >= 0) {
    /* Look for the root node */
    decStack[count] = dictionary[code].suffix;  // Store the original string in inverted order
    code = dictionary[code].parent;  // Set the parent of the current node as the next node
    count++;  // Points to the next node
  }
  return count;  // The distance between the current node and the root
}

void LzwDecoding(BITFILE* inBitFilePtr, FILE* outFilePtr) {
  int character;
  int previousCode;  // pW
  int currentCode;  // cW
  int phraseLen;  // Length of phrase

  unsigned long inFileSize = BitsInput(inBitFilePtr, 4 * 8);
  if (inFileSize == -1) {
    inFileSize = 0;
  }

  /* Initialise dictionary and pW*/
  InitialiseDict();
  previousCode = -1;

  while (inFileSize > 0) {
    currentCode = Input(inBitFilePtr);
    if (currentCode < nextNodeIdx) {  /* cW is in dictionary */
      phraseLen = DecodeString(0, currentCode);  // The length of cW
    } else {  /* When cW ¡Ý next node index, which means cW > current node index, cW isn't in dictionary */
      decStack[0] = character;  // The last character in stack of the last loop, i.e. 1st character of pW
      phraseLen = DecodeString(1, previousCode);  // The length of pW + 1
    }
    character = decStack[phraseLen - 1];  // The last character in the stack, i.e. the 1st character of pW or cW

    while (phraseLen > 0) {
      phraseLen--;
      fputc(decStack[phraseLen], outFilePtr);  // Output the decoded string (in inverted order of decStack)
      inFileSize--;
    }
    if (nextNodeIdx < DICT_CAPACITY) {  /* Add the new phrase into dictionary */
      NewDictEntry(previousCode, character);  // Add "pW + 1st character of cW" or "pW + 1st character of pW" into dictionary
    }
    previousCode = currentCode;  // Set pW = cW
  }
}

main.cpp

#include <iostream>
#include "declarations.h"
#include "BitIO.h"
using namespace std;

int main(int argc, char* argv[]) {
  FILE* fp;
  BITFILE* bf;

  if (argc < 4) {
    fprintf(stdout, "Usage: \n%s <options> <inFile> <outFile>\n", argv[0]);
    fprintf(stdout, "\t<options>: E for LZW encoding or D for LZW decoding.\n");
    fprintf(stdout, "\t<inFile>: name of input file.\n");
    fprintf(stdout, "\t<outFile>: name of output file.\n");
    return -1;
  }

  if ('E' == argv[1][0]) {  /* Do LZW encoding */
    /* Open the files */
    if (fopen_s(&fp, argv[2], "rb") != 0) {
      cout << "Failed to open \"" << argv[2] << "\"." << endl;
      exit(-1);
    }
    bf = OpenBitFileOutput(argv[3]);

    if ((fp != NULL) && (bf != NULL)) {
      LzwEncoding(fp, bf);
      //LZWEncode(fp, bf);
      fclose(fp);
      CloseBitFileOutput(bf);
      fprintf(stdout, "LZW encoding done.\n");
    }
  } else if ('D' == argv[1][0]) {  /* Do LZW decoding */
    /* Open the files */
    bf = OpenBitFileInput(argv[2]);
    if (fopen_s(&fp, argv[3], "wb") != 0) {
      cout << "Failed to open \"" << argv[2] << "\"." << endl;
      exit(-1);
    }

    if ((fp != NULL) && (bf != NULL)) {
      LzwDecoding(bf, fp);
      //LZWDecode(bf, fp);
      fclose(fp);
      CloseBitFileInput(bf);
      fprintf(stdout, "LZW decoding done.\n");
    }
  } else {
    fprintf(stderr, "Unsupported operation.\n");
  }
}

原始檔案為string.txt，編碼後的檔案寫為了二進制檔案string.dat，解碼後的檔案為string_D.txt：

打開編解碼後的檔案：

實驗三 LZW

LZW編解碼算法

文章目錄

編碼原理

解碼原理

代碼實作

繼續閱讀

FZU 1978 Repair the brackets

UVA 10344- 23 out of 5

ZOJ 3935 2016

POJ 2115 C Looooops

HDU 5381 The sum of gcd

ZOJ 1104 Leaps Tall Buildings

ZOJ 3700 Ever Dream

HDU 2821 Pusher

ZOJ 1199 Point of Intersection

UVA 1401 Remember the Word

UVA 620 Cellular Structure

ZOJ 2748 Free Kick

CSU 1567 Reverse Rot

UVA 519 Puzzle (II)

swift資料合集

蘋果稽核 Guideline 2.1 - Information Needed