這是我在學習字典壓縮算法時自己設計的,采用64K固定視窗對資料進行分塊壓縮,編碼時使用了變長偏移長度,變長比對長度,哈希字典等對算法進行了優化,壓縮率和壓縮速度均比較理想,很适合用于網絡傳輸中的資料實時壓縮。
算法提供的兩個外部調用函數:
// ---------------------------------------------------------------------------
// 使用LZ77壓縮算法對資料進行壓縮
// dst:壓縮輸出緩沖區(輸出緩沖區長度至少應保留原始資料長度+128K)
// src:原始資料
// len:原始資料長度
// level:壓縮等級(可選0 - 4,一般使用2)
// return 傳回壓縮後資料長度
// ---------------------------------------------------------------------------
int Lz77Compress(void *dst, void *src, int len, int level);
// ---------------------------------------------------------------------------
// 解壓LZ7算法壓縮過的資料
// dst:解壓輸出緩沖區
// src:壓縮資料
// len:壓縮資料長度
// return 傳回解壓後資料長度
// ---------------------------------------------------------------------------
int Lz77Decompress(void *dst, void *src, int len);
lz77.c
#ifdef WIN32
#include <windows.h>
#define malloc(s) HeapAlloc(GetProcessHeap(), 0, s)
#define free(p) HeapFree(GetProcessHeap(), 0, p)
#else
#include <stdlib.h>
#include <string.h>
#endif
#define MAXBITS 15
#define MINOFFSET 0x01
#define MINMATCH 0x03
#define MAXMATCH ((1 << 24) + MINMATCH)
#define MAXWND (1 << MAXBITS)
#define NIL 0xffff
#define M 3
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
typedef unsigned char UCHAR;
typedef unsigned short USHORT;
typedef unsigned long ULONG;
typedef struct _LZ77_MATCHINFO
{
ULONG len;
ULONG off;
} LZ77_MATCHINFO;
typedef struct _LZ77_RUNSTATE
{
ULONG wsize;
UCHAR *pwnd;
ULONG confine;
USHORT *head;
USHORT *prev;
ULONG nice;
} LZ77_RUNSTATE;
typedef struct _LZ77_IOSTATE
{
UCHAR *pbuf;
ULONG bytenum;
UCHAR bitnum;
UCHAR codelen;
} LZ77_INPUTS, LZ77_OUTPUTS;
// ---------------------------------------------------------------------------
// 計算用二進制表示指定數值至少需要多少位
// ---------------------------------------------------------------------------
static UCHAR log2(ULONG n)
{
UCHAR c, i;
if (n > 0xffff)
{
for (i = 16; n > ((ULONG)-1 >> (sizeof(ULONG) * 8 - i)); i++);
return i;
}
if (n & 0xff00)
{
if (n & 0xf000)
{
if (n & 0xc000)
{
if (n & 0x8000)
{
c = 16;
}
else
{
c = 15;
}
}
else
{
if (n & 0x2000)
{
c = 14;
}
else
{
c = 13;
}
}
}
else
{
if (n & 0x0c00)
{
if (n & 0x0800)
{
c = 12;
}
else
{
c = 11;
}
}
else
{
if (n & 0x0200)
{
c = 10;
}
else
{
c = 9;
}
}
}
}
else
{
if (n & 0x00f0)
{
if (n & 0x00c0)
{
if (n & 0x0080)
{
c = 8;
}
else
{
c = 7;
}
}
else
{
if (n & 0x0020)
{
c = 6;
}
else
{
c = 5;
}
}
}
else
{
if (n & 0x000c)
{
if (n & 0x0008)
{
c = 4;
}
else
{
c = 3;
}
}
else
{
if (n & 0x0002)
{
c = 2;
}
else
{
c = 1;
}
}
}
}
return c;
}
// ---------------------------------------------------------------------------
// 輸出指定長度的二進制位,最大長度為sizeof(ULONG)
// ---------------------------------------------------------------------------
static void PutBits(LZ77_OUTPUTS *out, ULONG v, int num)
{
UCHAR *s = out->pbuf + out->bytenum;
ULONG i = 0;
ULONG temp = v & ~(-1 << num);
do
{
s[i] &= ~(-1 << out->bitnum);
s[i] |= (UCHAR)(temp << out->bitnum);
if (8 - out->bitnum >= num)
break;
s[i + 1] = (UCHAR)(temp >> (8 - out->bitnum));
temp >>= 8;
} while ((++i << 3) < (ULONG)num);
out->bitnum += (UCHAR)num;
out->bytenum += out->bitnum >> 3;
out->bitnum &= 7;
}
// ---------------------------------------------------------------------------
// 擷取指定長度的二進制位,最大長度為sizeof(ULONG)
// ---------------------------------------------------------------------------
static ULONG GetBits(LZ77_INPUTS *in, int num)
{
UCHAR *s = in->pbuf + in->bytenum;
ULONG i = 0, v = 0;
do
{
v |= (s[i] >> in->bitnum) << (i << 3);
if (8 - in->bitnum >= num)
break;
v |= (s[i + 1] << (8 - in->bitnum)) << (i << 3);
} while ((++i << 3) < (ULONG)num);
in->bitnum += (UCHAR)num;
in->bytenum += in->bitnum >> 3;
in->bitnum &= 7;
return v & ~(-1 << num);
}
// ---------------------------------------------------------------------------
// 将指定位置開始的位元組串添加到字典中
// ---------------------------------------------------------------------------
static void insert(LZ77_RUNSTATE *rs, ULONG at, ULONG len)
{
ULONG ins_h, ins_t;
if (len == 1)
{
ins_h = *(USHORT *)(rs->pwnd + at);
rs->prev[at] = rs->head[ins_h];
rs->head[ins_h] = (USHORT)at;
return;
}
if ((at + len) < MAXWND)
{
ins_t = -1;
len += at--;
while (++at != len)
{
ins_h = *(USHORT *)(rs->pwnd + at);
if ((ins_t - rs->head[ins_h]) <= 2)
continue;
ins_t = at;
rs->prev[at] = rs->head[ins_h];
rs->head[ins_h] = (USHORT)at;
}
}
}
// ---------------------------------------------------------------------------
// 标志位定義:
// 長度:1,值:0,表示後面有一位元組未壓縮資料
// 長度:2,值:10,表示後面有一個比對(變長偏移+變長長度)
// 長度:3,值:110,表示後面有一個比對(7位偏移+1位長度,偏移為128時表示壓縮流結束)
// 長度:3,值:111,表示後面有多個位元組未壓縮資料
// ---------------------------------------------------------------------------
#define CHARBITS1 4
#define CHARBITS2 7
#define CHARBITS3 16
#define CHARNUMS0 7
#define CHARNUMS1 ((1 << CHARBITS1) - 1 + (CHARNUMS0 + 1) - 2)
#define CHARNUMS2 ((1 << CHARBITS2) - 1 + (CHARNUMS1 + 1))
#define CHARNUMS3 ((1 << CHARBITS3) - 1 + (CHARNUMS2 + 1))
// ---------------------------------------------------------------------------
// 以壓縮格式輸出指定長度的位元組串并針對格式長度進行優化
// ---------------------------------------------------------------------------
static void outcodec(LZ77_OUTPUTS *out, UCHAR *buffer, ULONG length)
{
ULONG i, temp;
if (length <= CHARNUMS0)
{
for (i = 0; i < length; i++)
{
// 逐位元組輸出,額外輸出位(length)
temp = 0x00 | (buffer[i] << 1);
PutBits(out, temp, 1 + 8); // 标志位(1),資料位(8)
}
}
else
{
if (length <= CHARNUMS1)
{
// 輸出(0-13)表示有(0-13)+8個連續位元組未壓縮,額外輸出位(7)
temp = 0x07 | ((length - CHARNUMS0 - 1) << 3);
PutBits(out, temp, 3 + CHARBITS1); // 标志位(3),資料位(4)
}
else if (length <= CHARNUMS1 * 2)
{
// 優化輸出,最大額外輸出位(14)
outcodec(out, buffer, CHARNUMS1);
outcodec(out, buffer + CHARNUMS1, length - CHARNUMS1);
return;
}
else if (length <= CHARNUMS2)
{
// 輸出(14)表示未壓縮位元組數由後面7位決定,額外輸出位(14)
temp = 0x07 | (14 << 3);
PutBits(out, temp, 3 + CHARBITS1); // 标志位(3),資料位(4)
temp = length - CHARNUMS1 - 1;
PutBits(out, temp, CHARBITS2); // 資料位(7)
}
else if (length <= CHARNUMS2 + CHARNUMS1)
{
// 優化輸出,最大額外輸出位(21)
outcodec(out, buffer, CHARNUMS2);
outcodec(out, buffer + CHARNUMS2, length - CHARNUMS2);
return;
}
else
{
// 輸出(15)表示未壓縮位元組數由後面兩位元組決定,額外輸出位(23)
temp = 0x07 | (15 << 3);
PutBits(out, temp, 3 + CHARBITS1); // 标志位(3),資料位(4)
// 輸出(0-65535)+18個連續位元組未壓縮
temp = length - CHARNUMS2 - 1;
PutBits(out, temp, CHARBITS3); // 資料位(16)
}
{
UCHAR *s = out->pbuf + out->bytenum;
UCHAR x = out->bitnum;
temp = buffer[0];
PutBits(out, temp, 8 - x);
// 拷貝連續的未壓縮位元組
for (i = 1; i < length; i++)
{
s[i] = buffer[i];
}
temp >>= 8 - x;
out->bytenum += length - 1;
PutBits(out, temp, x);
}
}
}
// ---------------------------------------------------------------------------
// 以壓縮格式輸出位元組串比對資訊并針對資訊長度進行優化
// ---------------------------------------------------------------------------
static void outcodex(LZ77_OUTPUTS *out, ULONG offset, ULONG length)
{
UCHAR i = 0;
ULONG temp, m, n;
switch (length)
{
// case 1:
// temp = 0x03 | ((offset - MINOFFSET) << 3);
// PutBits(out, temp, 3 + 4); // 标志位(3),資料位(4)
// return;
case 3:
if (offset > 127)
break;
case 2:
temp = 0x03 | ((offset - MINOFFSET) << 3); // 短比對優化
temp |= (length - 2) << (3 + 7);
PutBits(out, temp, 3 + 7 + 1); // 标志位(3),資料位(1+7)
return;
}
// 寫入變長比對偏移
temp = 0x01 | ((offset - MINOFFSET) << 2);
PutBits(out, temp, 2 + out->codelen); // 标志位(2),資料位(log2(資料))
length -= MINMATCH;
m = 1 << (M - 1);
// 計算比對長度最少占用多少位
do
{
n = ~(-1 << i++) << M;
m <<= 1;
} while ((m + n) <= length);
// 寫入比對長度位數
temp = ~(-1 << (i - 1));
PutBits(out, temp, i);
// 寫入變長比對長度
temp = length - n;
PutBits(out, temp, i + 3 - 1);
}
// ---------------------------------------------------------------------------
// 從目前字典中查找一個比對位元組串,成功傳回1同時設定比對位元組串所在位置和長度
// ---------------------------------------------------------------------------
static int match(LZ77_RUNSTATE *rs, ULONG strat, LZ77_MATCHINFO *mi)
{
UCHAR *src, *s, *d, *c, *t;
USHORT index, *prev;
ULONG i, m = 0, n, nice, flag = 0;
src = rs->pwnd;
index = rs->head[*(USHORT *)(src + strat)]; // 從字典中取出比對資訊
if (NIL != index)
{
c = src + MIN(rs->confine, MAXMATCH); // 限制最大比對長度
t = src + strat;
m = MINMATCH - 1;
prev = rs->prev;
nice = rs->nice;
do
{
// 開始尋找相同的位元組串
s = t;
d = src + index;
// 優化速度,一次循環比較8個位元組
while (s < (c - 8)
&& *(USHORT *)(s += 2) == *(USHORT *)(d += 2)
&& *(USHORT *)(s += 2) == *(USHORT *)(d += 2)
&& *(USHORT *)(s += 2) == *(USHORT *)(d += 2)
&& *(USHORT *)(s += 2) == *(USHORT *)(d += 2));
while (s < c && *s == *d)
{
s++;
d++;
}
if (s >= c) // 達到限制長度了?
{
m = c - t; // 如果是便是能找到的最好比對了
n = index;
break;
}
i = s - t;
if (m < i) // 是否達到最小長度要求?
{
m = i;
n = index; // 記錄下找到的資訊
if (m > nice)
flag = 1; // 如果達到預設的最優比對長度則設定該标志,然後再次查找最優比對
}
else if (flag) // 如果已達到過預設的最優比對長度則可以退出查找了
break;
index = prev[index]; // 取出字典中的下一次比對記錄
} while (NIL != index);
}
if (MINMATCH <= m) // 是否找到合适的比對資訊?
{
mi->len = m;
mi->off = strat - n;
return 1;
}
else
{
if (strat + 2 <= rs->confine) // 檢查所剩位元組數是否足夠2個位元組
{
index = rs->head[*(USHORT *)(src + strat)]; // 找不到合适的比對時嘗試查找2個位元組的短比對
if (strat - index <= 127) // 短比對要求所在位置與目前位置之間不能超過127個位元組
{
mi->len = 2;
mi->off = strat - index;
return 1;
}
}
// 從前面16位元組中查找1位元組比對
}
return 0;
}
// ---------------------------------------------------------------------------
// 壓縮算法主循環,将輸入資料壓縮成一個獨立的塊
// ---------------------------------------------------------------------------
static ULONG deflate(LZ77_RUNSTATE rs, UCHAR *dst, ULONG *inbytes)
{
LZ77_OUTPUTS out, prev_out;
ULONG strstart = 0, prev_start = 0, count = 0, prev_count = 0;
LZ77_MATCHINFO mi;
out.pbuf = dst;
out.bytenum = 0;
out.bitnum = 0;
out.codelen = 1;
prev_out = out;
memset(rs.head, NIL, sizeof(USHORT) * 65536);
do
{
if (match(&rs, strstart, &mi))
{
if (count > 0)
{
outcodec(&out, rs.pwnd + strstart - count, count); // 輸出無比對位元組
count = 0;
}
if ((ULONG)(1 << out.codelen) <= (ULONG)(strstart - MINOFFSET))
out.codelen = log2(strstart - MINOFFSET);
insert(&rs, strstart, mi.len); // 更新字典記錄
outcodex(&out, mi.off, mi.len); // 輸出壓縮代碼
strstart += mi.len;
}
else
{
insert(&rs, strstart, 1); // 更新字典記錄
count++; // 增加無比對位元組數量
strstart += 1;
}
if (strstart - prev_start >= 0x1000) // 跟蹤壓縮率變化
{
// 壓縮後的資料是否比原始資料大?
if (strstart - prev_start + 4 < out.bytenum - prev_out.bytenum)
{
// 不壓縮直接輸出原始資料
out = prev_out;
outcodec(&out, rs.pwnd + prev_start - prev_count, strstart - prev_start + prev_count);
count = 0;
}
prev_out = out;
prev_start = strstart;
prev_count = count;
}
} while (strstart < rs.wsize);
if (count > 0)
{
outcodec(&out, rs.pwnd + strstart - count, count); // 輸出無比對位元組
}
// 壓縮後的資料是否比原始資料大?
if (strstart - prev_start + 4 < out.bytenum - prev_out.bytenum)
{
// 不壓縮直接輸出原始資料
out = prev_out;
outcodec(&out, rs.pwnd + prev_start - prev_count, strstart - prev_start + prev_count);
}
// 壓縮後的全部資料是否比原始資料大?
if (strstart + 4 < out.bytenum)
{
// 直接輸出全部資料
out.pbuf = dst;
out.bytenum = 0;
out.bitnum = 0;
out.codelen = 1;
outcodec(&out, rs.pwnd, strstart);
}
outcodex(&out, 128, 2); // 輸出壓縮流結束标記
if (out.bitnum)
out.bytenum++;
*inbytes = strstart;
return out.bytenum;
}
// ---------------------------------------------------------------------------
// 解壓算法,每次處理一個壓縮塊
// ---------------------------------------------------------------------------
static ULONG inflate(UCHAR *src, UCHAR *dst, ULONG len, ULONG *inbytes)
{
ULONG offset, length;
UCHAR i, t;
UCHAR *out, *s;
LZ77_INPUTS in;
in.pbuf = src;
in.bytenum = 0;
in.bitnum = 0;
in.codelen = 1;
out = dst;
while (in.bytenum < len)
{
if (!GetBits(&in, 1)) // 0表示有一個未壓縮的位元組
{
*out++ = (UCHAR)GetBits(&in, 8); // 輸出一個未壓縮位元組
}
else
{
if (!GetBits(&in, 1)) // 10表示有一個長比對
{
if ((ULONG)(1 << in.codelen) <= (ULONG)(out - dst - MINOFFSET))
in.codelen = log2(out - dst - MINOFFSET);
offset = GetBits(&in, in.codelen) + MINOFFSET;
for (i = 0; GetBits(&in, 1); i++); // 計算比對長度位數
length = GetBits(&in, i + M);
length += (~(-1 << i) << M) + MINMATCH; // 計算比對長度
do
{
*out++ = *(out - offset);
} while (--length);
}
else
{
if (!GetBits(&in, 1)) // 110表示有一個短比對
{
offset = GetBits(&in, 7) + MINOFFSET;
length = GetBits(&in, 1) + 2;
if (offset == 128) // offset值128為壓縮流結束标志
break;
do
{
*out++ = *(out - offset); // 解壓短比對
} while (--length);
}
else // 111表示有一個未壓縮的位元組串
{
length = GetBits(&in, CHARBITS1);
switch (length)
{
case 14:
length = GetBits(&in, CHARBITS2); // 長度為14時表示實際長度用後面的7位記錄
length += CHARNUMS1 + 1;
break;
case 15:
length = GetBits(&in, CHARBITS3); // 長度為15時表示實際長度用後面的16位記錄
length += CHARNUMS2 + 1;
break;
default:
length += CHARNUMS0 + 1;
break;
}
s = in.pbuf + in.bytenum;
offset = 1;
i = in.bitnum;
do
{
out[offset] = s[offset]; // 還原位元組串
} while (++offset < length);
t = (UCHAR)GetBits(&in, 8 - i);
in.bytenum += length - 1;
t |= (UCHAR)(GetBits(&in, i) << (8 - i));
*out = t;
out += length;
}
}
}
}
*inbytes = in.bytenum;
*inbytes += in.bitnum == 0 ? 0 : 1;
return out - dst;
}
// ---------------------------------------------------------------------------
// LZ77壓縮算法
// dst:壓縮輸出緩沖區
// src:原始資料
// len:原始資料長度
// level:壓縮等級(0 - 4)
// return 壓縮後資料大小
// ---------------------------------------------------------------------------
int Lz77Compress(void *dst, void *src, int len, int level)
{
LZ77_RUNSTATE rs;
int m, n, count = 0;
if (len <= 0)
return 0;
if (!src || !dst)
return -1;
// 設定壓縮等級,等級越高引擎會花越多時間去查找一個更長的比對
switch (level)
{
case 0:
rs.nice = 3;
break;
case 1:
rs.nice = 30;
break;
case 2:
rs.nice = 70;
break;
case 3:
rs.nice = 150;
break;
case 4:
rs.nice = -1;
break;
}
// 為壓縮字典配置設定記憶體空間
rs.prev = (USHORT *)malloc(sizeof(USHORT) * 65536);
rs.head = (USHORT *)malloc(sizeof(USHORT) * 65536);
if (!rs.prev || !rs.head)
{
free(rs.head);
free(rs.prev);
return -1;
}
do
{
rs.wsize = MIN(len, MAXWND);
rs.pwnd = src;
// rs.confine = len;
rs.confine = MIN(len, MAXWND);
n = deflate(rs, dst, &m);
len -= m;
(UCHAR *)src += m;
(UCHAR *)dst += n;
count += n;
} while (len > 0);
free(rs.head);
free(rs.prev);
return count;
}
// ---------------------------------------------------------------------------
// LZ77解壓算法
// dst:解壓輸出緩沖區
// src:壓縮資料
// len:壓縮資料長度
// return 解壓後資料大小
// ---------------------------------------------------------------------------
int Lz77Decompress(void *dst, void *src, int len)
{
int c = 0, i, o, a = 0;
if (len <= 0)
return 0;
if (!src || !dst)
return -1;
do
{
o = inflate(src, dst, len, &i);
(UCHAR *)src += i;
(UCHAR *)dst += o;
len -= i;
c += o;
} while (len > 0);
return c;
}
測試代碼:
main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
//---------------------------------------------------------------------------
extern int Lz77Compress(void *dst, void *src, int len, int level);
extern int Lz77Decompress(void *dst, void *src, int len);
//---------------------------------------------------------------------------
int main(int argc, char *argv[])
{
FILE *input, *output;
BYTE *inbuf, *outbuf;
DWORD insize, outsize, len, a = 0;
SYSTEMTIME t1, t2;
DWORD milliS;
if (argc < 4)
return 0;
input = fopen(argv[2], "rb");
output = fopen(argv[3], "wb");
if (!input || !output)
return 0;
fseek(input, 0, SEEK_END);
insize = ftell(input);
fseek(input, 0, SEEK_SET);
inbuf = (BYTE *)malloc(insize);
GetSystemTime(&t1);
if (*argv[1] == 'c')
{
outbuf = (BYTE *)malloc(insize * 2);
fread(inbuf, insize, 1, input);
fwrite(&insize, sizeof(DWORD), 1, output);
len = Lz77Compress(outbuf, inbuf, insize, 2);
}
else if (*argv[1] == 'd')
{
fread(&outsize, sizeof(DWORD), 1, input);
insize -= sizeof(DWORD);
outbuf = (BYTE *)malloc(outsize * 2);
//fseek(input, sizeof(DWORD), SEEK_SET);
fread(inbuf, insize, 1, input);
Lz77Decompress(outbuf, inbuf, insize);
len = outsize;
}
else
return 0;
GetSystemTime(&t2);
milliS = ((t2.wHour - t1.wHour)*3600 + (t2.wMinute-t1.wMinute)*60 + (t2.wSecond-t1.wSecond)) * 1000 + (t2.wMilliseconds-t1.wMilliseconds);
printf("Totally %ld milliseconds elapsed!/n/n", milliS);
fwrite(outbuf, len, 1, output);
fclose(output);
fclose(input);
free(inbuf);
free(outbuf);
return 0;
}
//---------------------------------------------------------------------------