天天看點

利用viterbi算法計算出現機率最大字串

利用viterbi算法計算出現機率最大字串

很多人寫的viterbi算法過于依賴HMM,進行分詞、命名實體識别什麼的,因為反而對于這種單純計算字元串機率最大組合的程式較少:

MIN_FLOAT = -3.14E100

"""每一步包含的詞彙清單"""
step1 = ["two"]
step2 = ["of", "off", "on"]
step3 = ["the", "thew"]
step4 = ["people"]
status = [step1, step2, step3, step4]  #狀态集


"""狀态轉移機率"""
trans_probability = {"two": {"on": 0.1, "off": 0.2, "of": 0.6},
                    "of": {"the": 0.6, "thew": 0.8},
                    "off": {"the": 0.3, "thew": 0.5},
                    "on": {"the": 0.2, "thew": 0.52},
                    "the": {"people": 0.5},
                    "thew": {"people": 0.4}
                    }

init_state_p = [("two", 1.0)]


def viterbi(status, init_state_p, trans_p):
    V = [{}]
    path = {}
    for y, prob in init_state_p:
        V[0][y] = prob
        path[y] = [y]

    for t in range(1, len(status)):
        V.append({})
        newpath = {}
        for y in status[t]:
            (prob, state) = max([(V[t-1][y0] + trans_p[y0].get(y, MIN_FLOAT), y0) for y0 in status[t-1]])
            V[t][y] = prob
            newpath[y] = path[state] + [y]
        path = newpath

    (prob, state) = max((V[len(status)-1][y], y) for y in status[-1])
    for y in status[-1]:
        print("last candidate: ", (V[len(status)-1][y], path[y]))
    return (prob, path[state])

print(viterbi(status=status, init_state_p=init_state_p, trans_p=trans_probability))
           

繼續閱讀