""" python program to count words Jim's office, Oct 4 2011 # NOT DEBUGGED YET """ import string def only_lower(word): """ Return copy of word with only a - z characters. >>> only_lower('hello.') 'hello' """ lower_only = filter(lambda x: 'a' <= x 'z', list(word)) return string.join(lower_only, '') def count_words(words): counts = {} for word in words: counts[word] = counts.get(word, 0) + 1 return counts def count_pairs(words): count2 = {} for i in range(len(words)-1): pair = (words[i], words[i+1]) count2[pair] = count2.get(pair, 0) + 1 return count2 def count_n(words, n): ncount = {} for i in range(len(words-n): for j in range(words[i], words[i+n]): group = [] group.append(j) ncount[group] = ncount.get(group, 0) +1 return ncount def output(): print "Counted N-grams in " + filename + "." print "-----------------------------------" print "| N-gram | Count |" print "-----------------------------------" for key, value in counts.items(): print key + "|" + value + "\n" for key, value in count2.items(): print key + "|" + value + "\n" for key, value in ncount.items(): print key + "|" + value + "\n" def file_to_words(filename=None, file_content=None): """Read in a file, return a list of the words in it. >>> file_to_words(file_content="One two three..") ['one', 'two', 'three'] """ if filename: file = open(filename) text = file.read() # slurp all of text into one big string. elif file_content: text = file_content words = string.split(text) # split words on whitespace print words words = map(lambda x: x.lower(), words) print words words = map(only_lower, words) print words return words def main(): # put your top level "do stuff" here if __name__ == "__main__": import doctest doctest.testmod() #main() # uncomment me once things are working