实验三 LZW

LZW编解码算法

文章目录

LZW编解码算法

编码原理
解码原理
代码实现

LZW编码（LZW Encoding）又称“串表压缩算法”，由J.Ziv和A.Lempel在1978年首次介绍，并由Terry A.Welch在1984年予以改进，最终该编码方法由三人的名字命名。

该编码方法属于词典压缩编码方法。词典编码是一种通用编码方法，适用于无法观察新源统计特性，或虽然可观察但统计特性不固定的情形。

LZW编码可应用于通用文件压缩（如WinZip）、动画图像压缩（如GIF、TIFF）等领域。

编码原理

LZW编码的核心思想是从输入数据中创建一个“短语词典（Dictionary of Phrases）”，这种短语可以是任意字符的组合。编码数据过程中当遇到已经在词典中出现的“短语”时，编码器就输出该词条的“索引号”，而不是短语本身，以实现压缩。

编码流程如下：

例如：对“abbababac”进行LZW编码。

初始化词典：a = 97，b = 98，c = 99（ASCII码）

步骤	P	C	PC在词典中？	若不在，输出P	新增词条	说明
1	NULL	a	初始化，不处理
2	a	b	否	97（a）	ab：256	ab不在词典中，扩充，P = C
3	b	b	否	98（b）	bb：257	bb不在词典中，扩充，P = C
4	b	a	否	98（b）	ba：258	ba不在词典中，扩充，P = C
5	a	b	是	ab在词典中，P = PC
6	ab	a	否	256（ab）	aba：259	aba不在词典中，扩充，P = C
7	a	b	是	ab在词典中，P = PC
8	ab	a	是	aba在词典中，P = PC
9	aba	c	否	259（aba）	abac：260	abac不在词典中，扩充，P = C
10	c	NULL	99（c）	结束，输出未编码的字符

即编码结果为：

“97 98 98 256 259 99”

解码原理

首先我们要明确一点，编码过程中建立的词典，实际上并不与码流一起传送。这主要考虑到以下两点：一是由于码表可能占用的空间很大，不传送码表可以将压缩比最大化；二是如果需要等收到编码端的词典后再进行解码，便不可能实现编解码两端的同步操作。

那么，我们就要在解码端按照与编码端相同的规则同步地建立词典（Trie树），这便是LZW解码的核心思想。

解码流程如下图所示：

例如将上述输出的编码码流“97 98 98 256 259 99”进行解码：

步骤	pW	cW	cW在词典中？	输出	新增词条	说明
1	97	是	a	在词典中，输出cW；新增pW + cW首字符
2	97	98	是	b	ab：256	在词典中，输出cW；新增pW + cW首字符
3	98	98	是	b	bb：257	在词典中，输出cW；新增pW + cW首字符
4	98	256	是	ab	ba：258	在词典中，输出cW；新增pW + cW首字符
5	256	259	否	aba	aba：259	不在词典中，输出pW + pW首字符，并添加至词典
6	259	99	是	c	abac：260	在词典中，输出cW；新增pW + cW首字符

按上述流程解码得“abbababac”，即成功复原原始数据。

代码实现

declarations.h

#pragma once
#include "BitIO.h"
#define DICT_CAPACITY 65535// Capacity of the dictionary
/* Global variables */
struct {
    int suffix;
    int parent;
    int firstChild;
    int nextSibling;
} dictionary[DICT_CAPACITY + 1];
extern int nextNodeIdx;    // Index of next node (i.e. next dictionary entry)
extern int decStack[DICT_CAPACITY]; // Stack for decoding a phrase

/* Functions */
void LzwEncoding(FILE* inFilePtr, BITFILE* outBitFilePtr);
void LzwDecoding(BITFILE* inBitFilePtr, FILE* outFilePtr);

BitIO.h

#pragma once
#include <iostream>

typedef struct {
  FILE* fp;
  unsigned char mask;
  int rack;
}BITFILE;

BITFILE* OpenBitFileInput(char* fileName);
BITFILE* OpenBitFileOutput(char* fileName);
void CloseBitFileInput(BITFILE* bf);
void CloseBitFileOutput(BITFILE* bf);
int BitInput(BITFILE* bf);
unsigned long BitsInput(BITFILE* bf, int count);
void BitOutput(BITFILE* bf, int bit);
void BitsOutput(BITFILE* bf, unsigned long code, int count);

BitIO.cpp

/* Definitions for bitwise IO */

#include <iostream>
#include <stdlib.h>
#include "BitIO.h"

BITFILE* OpenBitFileInput(char* fileName) {
  //BITFILE* bf = (BITFILE*)malloc(sizeof(BITFILE));
  BITFILE* bf = new BITFILE;
  if (bf == NULL) {
    return NULL;
  }
  if (fileName == NULL) {
    bf->fp = stdin;
  } else {
    fopen_s(&bf->fp, fileName, "rb");
  }
  if (bf->fp == NULL) {
    return NULL;
  }
  bf->mask = 0x80;
  bf->rack = 0;
  return bf;
}

BITFILE* OpenBitFileOutput(char* fileName) {
  //BITFILE* bf = (BITFILE*)malloc(sizeof(BITFILE));
  BITFILE* bf = new BITFILE;
  if (bf == NULL) {
    return NULL;
  }
  if (fileName == NULL) {
    bf->fp = stdout;
  } else {
    fopen_s(&bf->fp, fileName, "wb");
  }
  if (bf->fp == NULL) {
    return NULL;
  }
  bf->mask = 0x80;
  bf->rack = 0;
  return bf;
}

void CloseBitFileInput(BITFILE* bf) {
  fclose(bf->fp);
  //free(bf);
  delete bf;
}

void CloseBitFileOutput(BITFILE* bf) {
  /* Output the remaining bits */
  if (bf->mask != 0x80) {
    fputc(bf->rack, bf->fp);
  }
  fclose(bf->fp);
  //free(bf);
  delete bf;
}

int BitInput(BITFILE* bf) {
  int value;

  if (bf->mask == 0x80) {
    bf->rack = fgetc(bf->fp);
    if (bf->rack == EOF) {
      fprintf(stderr, "Reached the end of file.\n");
      exit(-1);
    }
  }
  value = bf->mask & bf->rack;
  bf->mask >>= 1;
  if (0 == bf->mask) {
    bf->mask = 0x80;
  }
  return((value == 0) ? 0 : 1);
}

unsigned long BitsInput(BITFILE* bf, int count) {
  unsigned long mask;
  unsigned long value;
  mask = 1L << (count - 1);
  value = 0L;
  while (mask != 0) {
    if (BitInput(bf) == 1)
      value |= mask;
    mask >>= 1;
  }
  return value;
}

void BitOutput(BITFILE* bf, int bit) {
  if (bit != 0) {
    bf->rack |= bf->mask;
  }
  bf->mask >>= 1;
  if (bf->mask == 0) {  // 8 bits in rack
    fputc(bf->rack, bf->fp);
    bf->rack = 0;
    bf->mask = 0x80;
  }
}

void BitsOutput(BITFILE* bf, unsigned long code, int count) {
  unsigned long mask;

  mask = 1L << (count - 1);
  while (mask != 0) {
    BitOutput(bf, (int)((code & mask) == 0 ? 0 : 1));
    mask >>= 1;
  }
}

LzwED.cpp

#include <iostream>
#include "declarations.h"
#include "BitIO.h"
using namespace std;

/* Global variables */
int nextNodeIdx;
int decStack[DICT_CAPACITY];

/* Macros */
#define Input(f) ((int)BitsInput(f, 16))
#define Output(f, x) BitsOutput(f, (unsigned long)(x), 16)

void InitialiseDict() {  // Dictionary initialisation (initialise root node 0-255)
  for (int i = 0; i < 256; i++) {
    dictionary[i].suffix = i;  // The suffix of each node is the corresponding ASCII code
    dictionary[i].parent = -1;  // Temporarily doesn't have a parent node (i.e. prefix)
    dictionary[i].firstChild = -1;  // Temporarily doesn't have any child nodes
    dictionary[i].nextSibling = i + 1;  // The index of the next sibling root node is the next ASCII code
  }

  dictionary[255].nextSibling = -1;  // No next sibling for the last root node
  nextNodeIdx = 256;  // The index of next dictionary entry
}

int InDict(int P, int C) {
  if (P == -1) {
    /* In this case, the current character is the start of the file,
    and it's evidently in the dictionary,
    thus return the corresponding ASCII code (let P = this character). */
    return C;
  }

  /* Traverse all child node(s) of node P from left to right (i.e. all sibling nodes of the first child node) */
  int sibling = dictionary[P].firstChild;  // Start from the first child of P
  while (sibling > -1) {  // sibling == -1 indicates the end of sibling traversal
    /* If a C-suffixed sibling is found, then return the code of PC (i.e. the index of this sibling) */
    if (C == dictionary[sibling].suffix) {
      return sibling;
    }
    /* If the suffixes don't match, then look for the next */
    sibling = dictionary[sibling].nextSibling;
  }

  /* The suffix of all siblings don't match PC, thus PC isn't in the dictionary */
  return -1;
}

void NewDictEntry(int P, int C) {
  if (P < 0) {
    return;
  }

  dictionary[nextNodeIdx].suffix = C;
  dictionary[nextNodeIdx].parent = P;
  dictionary[nextNodeIdx].nextSibling = -1;  // Indicates that the node is the last sibling
  dictionary[nextNodeIdx].firstChild = -1;  // Temporarily this node doesn't have a child
  int pFirstChild = dictionary[P].firstChild;  // The first child of P
  int pChild;

  /* Set up the new sibling-relation */
  if (pFirstChild > -1) {  /* Parent of the new node originally have a child node */
    pChild = pFirstChild;  // Start from the first child of P

    /* Look for the youngest child of P (i.e. the last sibling) */
    while (dictionary[pChild].nextSibling > -1) {
      pChild = dictionary[pChild].nextSibling;
    }

    dictionary[pChild].nextSibling = nextNodeIdx;  // Set the new node as the next sibling of the current last sibling
  } else {  /* Parent of the new node originally doesn't have a child */
    dictionary[P].firstChild = nextNodeIdx;  // Set the new node as PC (i.e. the first child of P)
  }

  nextNodeIdx++;  //Index of the next entry + 1
}

void LzwEncoding(FILE* inFilePtr, BITFILE* outBitFilePtr) {
  int previousStr;  // P
  int currentChar;  // C
  int PC;  // P & C combined as 1 character

  /* Compute the size of file */
  fseek(inFilePtr, 0, SEEK_END);
  int inFileSize = ftell(inFilePtr);
  fseek(inFilePtr, 0, SEEK_SET);
  BitsOutput(outBitFilePtr, inFileSize, 4 * 8);

  /* Initialise the dictionary and P */
  InitialiseDict();
  previousStr = -1;  // Initialise P

  //fprintf(outFilePtr, "LZW-encoded string: ");
  while ( (currentChar = fgetc(inFilePtr)) != EOF ) {
    /* Check if PC is in the dictionary */
    PC = InDict(previousStr, currentChar);
    if (PC >= 0) {  /* PC is in the dictionary */
      previousStr = PC;  // Set P = PC
    } else {  /* PC isn't in the dictionary */
      Output(outBitFilePtr, previousStr);  // Output P
      if (nextNodeIdx < DICT_CAPACITY) {  /* Enough space to add PC into the dictionary */
        NewDictEntry(previousStr, currentChar);
      }
      previousStr = currentChar;  // Set P = C
    }
  }

  Output(outBitFilePtr, previousStr);  // Output the last unencoded character(s)
}

int DecodeString(int start, int code) {
  int count = start;
  while (code >= 0) {
    /* Look for the root node */
    decStack[count] = dictionary[code].suffix;  // Store the original string in inverted order
    code = dictionary[code].parent;  // Set the parent of the current node as the next node
    count++;  // Points to the next node
  }
  return count;  // The distance between the current node and the root
}

void LzwDecoding(BITFILE* inBitFilePtr, FILE* outFilePtr) {
  int character;
  int previousCode;  // pW
  int currentCode;  // cW
  int phraseLen;  // Length of phrase

  unsigned long inFileSize = BitsInput(inBitFilePtr, 4 * 8);
  if (inFileSize == -1) {
    inFileSize = 0;
  }

  /* Initialise dictionary and pW*/
  InitialiseDict();
  previousCode = -1;

  while (inFileSize > 0) {
    currentCode = Input(inBitFilePtr);
    if (currentCode < nextNodeIdx) {  /* cW is in dictionary */
      phraseLen = DecodeString(0, currentCode);  // The length of cW
    } else {  /* When cW ¡Ý next node index, which means cW > current node index, cW isn't in dictionary */
      decStack[0] = character;  // The last character in stack of the last loop, i.e. 1st character of pW
      phraseLen = DecodeString(1, previousCode);  // The length of pW + 1
    }
    character = decStack[phraseLen - 1];  // The last character in the stack, i.e. the 1st character of pW or cW

    while (phraseLen > 0) {
      phraseLen--;
      fputc(decStack[phraseLen], outFilePtr);  // Output the decoded string (in inverted order of decStack)
      inFileSize--;
    }
    if (nextNodeIdx < DICT_CAPACITY) {  /* Add the new phrase into dictionary */
      NewDictEntry(previousCode, character);  // Add "pW + 1st character of cW" or "pW + 1st character of pW" into dictionary
    }
    previousCode = currentCode;  // Set pW = cW
  }
}

main.cpp

#include <iostream>
#include "declarations.h"
#include "BitIO.h"
using namespace std;

int main(int argc, char* argv[]) {
  FILE* fp;
  BITFILE* bf;

  if (argc < 4) {
    fprintf(stdout, "Usage: \n%s <options> <inFile> <outFile>\n", argv[0]);
    fprintf(stdout, "\t<options>: E for LZW encoding or D for LZW decoding.\n");
    fprintf(stdout, "\t<inFile>: name of input file.\n");
    fprintf(stdout, "\t<outFile>: name of output file.\n");
    return -1;
  }

  if ('E' == argv[1][0]) {  /* Do LZW encoding */
    /* Open the files */
    if (fopen_s(&fp, argv[2], "rb") != 0) {
      cout << "Failed to open \"" << argv[2] << "\"." << endl;
      exit(-1);
    }
    bf = OpenBitFileOutput(argv[3]);

    if ((fp != NULL) && (bf != NULL)) {
      LzwEncoding(fp, bf);
      //LZWEncode(fp, bf);
      fclose(fp);
      CloseBitFileOutput(bf);
      fprintf(stdout, "LZW encoding done.\n");
    }
  } else if ('D' == argv[1][0]) {  /* Do LZW decoding */
    /* Open the files */
    bf = OpenBitFileInput(argv[2]);
    if (fopen_s(&fp, argv[3], "wb") != 0) {
      cout << "Failed to open \"" << argv[2] << "\"." << endl;
      exit(-1);
    }

    if ((fp != NULL) && (bf != NULL)) {
      LzwDecoding(bf, fp);
      //LZWDecode(bf, fp);
      fclose(fp);
      CloseBitFileInput(bf);
      fprintf(stdout, "LZW decoding done.\n");
    }
  } else {
    fprintf(stderr, "Unsupported operation.\n");
  }
}

原始文件为string.txt，编码后的文件写为了二进制文件string.dat，解码后的文件为string_D.txt：

打开编解码后的文件：

实验三 LZW

LZW编解码算法

文章目录

编码原理

解码原理

代码实现

继续阅读

FZU 1978 Repair the brackets

UVA 10344- 23 out of 5

ZOJ 3935 2016

POJ 2115 C Looooops

HDU 5381 The sum of gcd

ZOJ 1104 Leaps Tall Buildings

ZOJ 3700 Ever Dream

HDU 2821 Pusher

ZOJ 1199 Point of Intersection

UVA 1401 Remember the Word

UVA 620 Cellular Structure

ZOJ 2748 Free Kick

CSU 1567 Reverse Rot

UVA 519 Puzzle (II)

swift资料合集

苹果审核 Guideline 2.1 - Information Needed