關于bwt算法的原理可以看看這個部落格,講得很好 https://blog.csdn.net/blackjack_/article/details/73801003
本文是利用python寫的bwt算法的編碼和解碼,有興趣的同學可以看看。例子是banana這個字元串
def bwtencode():
global L, F
L = ''
F = ''
text = "banana#"
last_str = text
len_text = len(text)
rotate_list = []
rotate_list.append(last_str)
for i in range(len_text - 1):
last_str = last_str[-1] + last_str[-len_text:-1]
rotate_list.append(last_str)
sorted_list = sorted(rotate_list)
for eachstr in sorted_list:
L = L + eachstr[-1]
F = F + eachstr[0]
print(L)
def C(c):
#統計T中字典序小于“c”的所有字元個數
global L
num_Tc = 0
for eachchr in L:
if c > eachchr:
num_Tc += 1
return num_Tc - 1
def Occ(c,r):
#統計L中第r行之前出現字元c的數量
global L
row = -1
num_Lc = 0
if r == 0:
return 0
for eachchr in L:
row += 1
if eachchr == c:
num_Lc += 1
if row == r - 1:
break
return num_Lc
def bwtdecode():
#因為#的存在,就能确定原文中的最後一個字元肯定在L的首行(首字元)
global L
r = 0
T = ''
while L[r] != "#":
T = L[r] + T
c = L[r]
r = C(c) + Occ(c, r) + 1
print(T)
bwtencode()
bwtdecode()