#! usr/bin/env python """""""""""""""""""""""""""""""""" Finite State Transducer for the Soundex algorithm Elias Zeidan | Sept 2011 | GPL """"""""""""""""""""""""""""""""""" #import needed modules from nltk_contrib.fst import fst import string import sys #Name to be converted using Soundex, #sliced to 4 characters name = raw_input("Input name: ") #lists of conversions vlhwy = ['a','e','h','i','o','u','w','y'] q1 = ['b','f','p','v'] q2 = ['c','g','j','k','q','s','x','z'] q3 = ['d','t'] q4 = ['l'] q5 = ['m','n'] q6 = ['r'] out = ' ' #initialize FST f = fst.FST('soundex') #FST states f.add_state('0') f.add_state('1') f.add_state('2') f.add_state('3') f.add_state('4') f.add_state('5') f.add_state('6') f.add_state('7') f.add_state('8') f.initial_state = '0' f.set_final = '8' for letter in name: if letter in vlhwy and not sys.argv[1][0]: f.add_arc('0', '1', (letter), ()) else: f.add_arc('0','0', (letter), (letter)) for letter in name: if letter in q1: f.add_arc('1', '2', (letter), '1') elif letter in q2: f.add_arc('1', '3', (letter), '2') elif letter in q3: f.add_arc('1', '4', (letter), '3') elif letter in q4: f.add_arc('1', '5', (letter), '4') elif letter in q5: f.add_arc('1', '6', (letter), '5') elif letter in q6: f.add_arc('1', '7', (letter), '6') else: f.add_arc('1', '8', (letter), ()) print out.join(f.transduce(name))