HMM這一章比較簡單,把原理弄熟,看着公式,用numpy實作很簡單。
習題10.1
懶得手算,直接代碼:
import numpy as np
import pandas as pd
class MyHMM:
def __init__(self,p,A,B,Ob,method="forward"):
self.p=p # 初始狀态機率
self.A=A # 狀态轉移機率分布
self.B=B # 觀測機率分布
self.Ob=Ob # 觀測序列
self.method=method # 采用算法
def forward(self):
"""前向算法"""
alpha=self.p*self.B[self.Ob[0]]
for i in range(1,self.Ob.size):
alpha=alpha.dot(self.A)*self.B[self.Ob[i]]
return alpha.sum()
def back(self):
"""後向算法"""
beta=np.ones(self.p.size)
for i in range(1,self.Ob.size):
beta=self.A.dot(self.B[self.Ob[self.Ob.size-i]]*beta)
res=self.p.dot(self.B[self.Ob[0]]*beta)
return res
def fit(self):
if "forward" in self.method:
print("前向算法")
res=self.forward()
elif "back" in self.method:
print("後向算法")
res=self.back()
else:
raise ValueError
return res
def best_way(self):
"""最優路徑"""
index=[]
sigema=self.p*self.B[self.Ob[0]]
for i in range(1,self.Ob.size):
_sigema=sigema.values*(self.A.T)
col_index=np.argmax(_sigema,axis=1)
row_index=np.arange(self.p.size)
sigema=_sigema[row_index,col_index]*self.B[self.Ob[i]]
index.append(dict(enumerate(col_index)))
best_index=[]
k=np.argmax(sigema)
best_index.append(k)
for i in range(len(index)):
k=index[len(index)-1-i][k]
best_index.append(k)
best_index=np.array(best_index[::-1])+1 #因為索引從0開始的,是以加1
return best_index
def main():
A=np.array([[0.5,0.2,0.3],
[0.3,0.5,0.2],
[0.2,0.3,0.5]
])
B=pd.DataFrame(np.array([[0.5,0.5],
[0.4,0.6],
[0.7,0.3]]),columns=["紅","白"])
p=np.array([0.2,0.4,0.4])
Ob=np.array(["紅","白","紅","白"])
clf=MyHMM(p,A,B,Ob,method="back")
res=clf.fit()
print('P(O|入):',round(res,2))
best_way=clf.best_way()
print('維特比求得的最優路徑為:',best_way)
if __name__=="__main__":
main()
習題10.2
import numpy as np
def fore_algorithm(A, B, p_i, o, T, N):
# 設初值,alpha_1(i) = pt_(i)b_i(o(i))
alpha = np.zeros((T, N))
for i in range(N):
h = o[0]
alpha[0][i] = p_i[i] * B[i][h]
#遞推
for t in range(T-1):
h = o[t+1]
for i in range(N):
a = 0
for j in range(N):
a += (alpha[t][j] * A[j][i])
alpha[t+1][i] = a * B[i][h]
#終止
P = 0
for i in range(N):
P += alpha[T-1][i]
return P, alpha
def back_algorithm(A, B, p_i, o, T, N):
#設定初值,beta_t(i)=1
beta = np.ones((T, N))
#遞推
for t in range(T-1):
t = T - t - 2
h = o[t + 1]
h = int(h)
for i in range(N):
beta[t][i] = 0
for j in range(N):
beta[t][i] += A[i][j] * B[j][h] * beta[t+1][j]
#終止
P = 0
for i in range(N):
h = o[0]
h = int(h)
P += p_i[i] * B[i][h] * beta[0][i]
return P, beta
if __name__ == "__main__":
T = 8
N = 3
A = [[0.5, 0.1, 0.4], [0.3, 0.5, 0.2], [0.2, 0.2, 0.6]]
B = [[0.5, 0.5], [0.4, 0.6], [0.7, 0.3]]
pi = [0.2, 0.3, 0.5]
O = ['紅', '白', '紅', '紅', '白', '紅', '白', '白']
o = np.zeros(T, np.int)
for i in range(T):
if O[i] == '白':
o[i] = 1
else:
o[i] = 0
PF, alpha = fore_algorithm(A, B, pi, o, T, N)
PB, beta = back_algorithm(A, B, pi, o, T, N)
print("PF:", PF, "PB:", PB)
#P(i_4=q_3|O,\lambda) = alpah_4(3)* beta_4(3)
P = alpha[4-1][3-1] * beta[4-1][3-1]
print("前向後向機率計算可得 P(i4=q3|O,lambda)=", P / PF)
習題10.3
解:習題10.1中有計算
習題10.4
習題10.5
。
。
。
。
。
。
。
HMM的應用請看:
李航《統計學習方法》第2版 第10章 HMM實作分詞(代碼實作)