""" Programming Styles Spreadsheet (style) Barbora Houfkova """ #!/usr/bin/env python import sys, re, itertools, operator # # The columns. Each column is a data element and a formula. # The first 2 columns are the input data, so no formulas. # all_words = [(), None] #create a column with no formula stop_words = [(), None] #create a column with no formula non_stop_words = [(), lambda : \ map(lambda w : \ w if w not in stop_words[0] else '',\ all_words[0])] #function takes data and if data are not in stop words then no change, #if it is in stop words, then replaces the data by empty stringdata #map function is applied to all words unique_words = [(), lambda : set([w for w in non_stop_words[0] if w!=''])] #the function - it keeps only data that are not empty string #after that it takes its set (removes duplicates) counts = [(), lambda : map(lambda w, word_list : word_list.count(w), \ unique_words[0], \ itertools.repeat(non_stop_words[0], \ len(unique_words[0])))] #two variables - w and word_list and it is applied on unique_words #and non_stop_words extended for the lenght of unique_words #counts how many times unique words in non stop words #https://www.quora.com/How-do-I-put-multiple-arguments-into-a-map-function-in-Python sorted_data = [(), lambda : sorted(zip(list(unique_words[0]), \ counts[0]), \ key=operator.itemgetter(1), reverse=True)] #function zip makes pairs - unique words with its count #they are sorted by counts #reverse=True ensures that it is with descending order # The entire spreadsheet all_columns = [all_words, stop_words, non_stop_words,\ unique_words, counts, sorted_data] # The active procedure over the columns of data. # Call this everytime the input data changes, or periodically. # def update(): global all_columns #makes all columns global variable # Apply the formula in each column for c in all_columns: if c[1] != None: #if there is some formula (3rd-6th column) c[0] = c[1]() #apply function on the data # Load the fixed data into the first 2 columns all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower()) #any word consisting of at least two letters stop_words[0] = set(open('stopword.txt').read().split(',')) # Update the columns with formulas update() #calling the main function for (w, c) in sorted_data[0][:25]: #prints the results print w, '-', c