""" A program to demonstrate Hidden Markov Models in Natural Language Processing Use of dictionary of probabilities from Wikipedia article on HMMs. Elias Zeidan | October 2011 | GPL """ #List tuples of States and Outputs states = ('1', '2', '3') outputs = ('a', 'b', 'c', 'd', 'e') #Probabilities for first 'visible' node start_prob = {'1' : 0.4, '2' : 0.5, '3' : 0.1} #Probabilities for node transitions transition_prob = { '1' : {'1' : 0.2, '2' : 0.5, '3' : 0.3}, '2' : {'1' : 0.4, '2' : 0, '3': 0.6}, '3' : {'1' : 0.5, '2' : 0.2, '3' : 0.3} } #Output probabilities from each node emission_prob = { '1' : {'a': 0.5, 'b' : 0.1, 'c' : 0.3, 'd' : 0.1, 'e' : 0}, '2' : {'a': 0.1, 'b' : 0.2, 'c' : 0.3, 'd' : 0.2, 'e' : 0.2}, '3' : {'a': 0.2, 'b' : 0.3, 'c' : 0.1, 'd' : 0, 'e' : 0.3}, } # Helps visualize the steps of Viterbi. # code adapted from [url] def print_dptable(ProbabilityTable, outputs): print " ", for i in range(len(ProbabilityTable)): print "%7s" % ("%s" % outputs[i]), print for y in ProbabilityTable[0].keys(): print "%.5s: " % y, for t in range(len(ProbabilityTable)): print "%.7s" % ("%f" % ProbabilityTable[t][y]), print def viterbi(obs=outputs, states=states, start_p=start_prob, trans_p=transition_prob, emit_p=emission_prob): # Default arguments for testing purposes ProbabilityTable = [{}] path = {} # Initialize base cases (t == 0) for y in states: ProbabilityTable[0][y] = start_prob[y] * emit_p[y][obs[0]] path[y] = [y] # Run Viterbi for t > 0 for t in range(1,len(obs)): ProbabilityTable.append({}) newpath = {} for y in states: (prob, state) = max([(ProbabilityTable[t-1][y0] * trans_p[y0][y] * emit_p[y][obs[t]], y0) for y0 in states]) ProbabilityTable[t][y] = prob newpath[y] = path[state] + [y] # print path #For me to see what was going on here # Don't need to remember the old paths path = newpath print_dptable(ProbabilityTable, outputs) (prob, state) = max([(ProbabilityTable[len(obs) - 1][y], y) for y in states]) print (prob, path[state]) def main(): viterbi() if __name__ == "__main__": main()