"""
A program to demonstrate Hidden Markov Models in Natural Language Processing

Use of dictionary of probabilities from Wikipedia article on HMMs.

Elias Zeidan | October 2011 | GPL
"""

#List tuples of States and Outputs
states = ('1', '2', '3')
outputs = ('a', 'b', 'c', 'd', 'e')

#Probabilities for first 'visible' node
start_prob = {'1' : 0.4, '2' : 0.5, '3' : 0.1}

#Probabilities for node transitions
transition_prob = {
   '1' : {'1' : 0.2, '2' : 0.5, '3' : 0.3},
   '2' : {'1' : 0.4, '2' : 0, '3': 0.6},
   '3' : {'1' : 0.5, '2' : 0.2, '3' : 0.3}
}

#Output probabilities from each node
emission_prob = {
   '1' : {'a': 0.5, 'b' : 0.1, 'c' : 0.3, 'd' : 0.1, 'e' : 0},
   '2' : {'a': 0.1, 'b' : 0.2, 'c' : 0.3, 'd' : 0.2, 'e' : 0.2},
   '3' : {'a': 0.2, 'b' : 0.3, 'c' : 0.1, 'd' : 0, 'e' : 0.3},
}

# Helps visualize the steps of Viterbi.
# code adapted from [url]
def print_dptable(ProbabilityTable, outputs):
    print "    ",
    for i in range(len(ProbabilityTable)): print "%7s" % ("%s" % outputs[i]),
    print
 
    for y in ProbabilityTable[0].keys():
        print "%.5s: " % y,
        for t in range(len(ProbabilityTable)):
            print "%.7s" % ("%f" % ProbabilityTable[t][y]),
        print
 
def viterbi(obs=outputs, states=states, start_p=start_prob, trans_p=transition_prob, emit_p=emission_prob):
   
   # Default arguments for testing purposes
    
    ProbabilityTable = [{}]
    path = {}
 
    # Initialize base cases (t == 0)
    for y in states:
        ProbabilityTable[0][y] = start_prob[y] * emit_p[y][obs[0]]
        path[y] = [y]
 
    # Run Viterbi for t > 0
    for t in range(1,len(obs)):
        ProbabilityTable.append({})
        newpath = {}
 
        for y in states:
            (prob, state) = max([(ProbabilityTable[t-1][y0] * trans_p[y0][y] * emit_p[y][obs[t]], y0) for y0 in states])
            ProbabilityTable[t][y] = prob
            newpath[y] = path[state] + [y]
#  print path     #For me to see what was going on here
        # Don't need to remember the old paths
        path = newpath
 
    print_dptable(ProbabilityTable, outputs)
    (prob, state) = max([(ProbabilityTable[len(obs) - 1][y], y) for y in states])
    print (prob, path[state])
    
def main():
   viterbi()
   
if __name__ == "__main__":
   main()

syntax highlighted by Code2HTML, v. 0.93pm6