#! /usr/bin/env python """ """ # Some very simple test data: two words in English and French example_model1_align = {"older": {"aine": 0.5, "frere": 0.5}, "brother": {"aine": 0.5, "frere": 0.5} } correct = {} # Step 1 def assign_correct(conds): """ Picks a correct translation for the Expectation-Maximization (EM) algorithm For the toy example, I'm going to tell it which one it is. >>> assign_correct(example_model1_align) {'brother': {'frere': 0.5}} """ # initialize dict for "correct" alignment correct[conds.keys()[0]] = {} correct[conds.keys()[0]][conds.values()[0].keys()[1]] = conds.values()[0].values()[0] return correct # Step 2 def make_order_conds(conds): """ Returns a dict of probability p(a, f|e) >>> make_order_conds(example_model1_align) {'brother': {'aine': 0.25, 'frere': 0.25}, 'older': {'aine': 0.25, 'frere': 0.25}} """ oc1 = {} for cond in conds: oc1[cond] = {} for key in conds[cond]: for i in range(len(conds[cond].values())-1): oc1[cond][key] = oc1.get(key, conds[cond].values()[i]*conds[cond].values()[i+1]) return oc1 # Step 3 def normalize(ordered_conds, correct=correct): """ Normalizes the dict of p(a, f|e) >>> normalize(make_order_conds(example_model1_align)) {'brother': {'aine': 0.5, 'frere': 0.5}, 'older': {'aine': 0.5, 'frere': 0.5}} """ denom = 0 for i in range(len(ordered_conds.values()[0].values())): denom += ordered_conds.values()[0].values()[i] #print denom # test case should = 0.5 norm = {} for eng in ordered_conds: norm[eng] = {} for fre in ordered_conds[eng]: for i in range(len(ordered_conds[eng].values())): norm[eng][fre] = ordered_conds[eng].values()[i]/denom # print norm for eng in correct: for fre in correct[eng]: correct[eng][fre] = correct[eng][fre]/denom #print correct return norm # Step 4 def collect_fracts(norm, correct=correct): """ Collects fractional counts. >>> collect_fracts(normalize(make_order_conds(example_model1_align))) P("aine" | "brother") = 0.50 P("frere" | "brother") = 0.50 P("aine" | "older") = 0.50 P("frere" | "older") = 0.50 {'brother': {'aine': 0.5, 'frere': 1.5}, 'older': {'aine': 0.5, 'frere': 0.5}} """ fracts = {} for eng in norm.keys(): fracts[eng] = {} for fre in norm.values()[0].keys(): for i in range(len(norm.values()[0].values())-1): fracts[eng][fre] = norm.values()[0].values()[i] print "P(\"%s\" | \"%s\") = %.2f" %(fre, eng, norm[eng].values()[i]) cor_eng = correct.keys()[0] for i in range(len(correct.values()[0].values())): cor_fre = correct.values()[0].values()[i] # print cor_fre fracts[cor_eng][correct.values()[0].keys()[0]] += cor_fre return fracts # Step 5 def renormalize(fracts): """ Normalize fractional counts to get revised parameter values. """ denom1 = 0 denom2 = 0 norm = {} for i in range(len(fracts.values()[0].values())): denom1 += fracts.values()[0].values()[i] print denom1 for j in range(len(fracts.values()[1].values())): denom2 += fracts.values()[1].values()[j] print denom2 for eng in fracts: norm[eng] = {} for fre in fracts[fracts.keys()[0]]: for i in range(len(fracts[fracts.keys()[0]].values())): norm[fracts.keys()[0]][fre] = fracts[fracts.keys()[0]].values()[i]/denom1 for fre in fracts[fracts.keys()[1]]: for j in range(len(fracts[fracts.keys()[1]].values())): norm[fracts.keys()[1]][fre] = fracts[fracts.keys()[1]].values()[j]/denom2 return norm def test(): correct = assign_correct(example_model1_align) conds = example_model1_align ordered = make_order_conds(conds) for i in range(10): norm = normalize(ordered) fracts = collect_fracts(norm) norm1 = renormalize(fracts) ordered = norm1 return conds if __name__ == "__main__": from doctest import testmod testmod() test()