"""
Programming Styles

Spreadsheet (style)

Barbora Houfkova

"""
#!/usr/bin/env python
import sys, re, itertools, operator

#
# The columns. Each column is a data element and a formula.
# The first 2 columns are the input data, so no formulas.
#
all_words = [(), None] #create a column with no formula
stop_words = [(), None] #create a column with no formula
non_stop_words = [(), lambda : \
                          map(lambda w : \
                            w if w not in stop_words[0] else '',\
                              all_words[0])]
#function takes data and if data are not in stop words then no change,
#if it is in stop words, then replaces the data by empty stringdata
#map function is applied to all words
unique_words = [(), lambda :
                    set([w for w in non_stop_words[0] if w!=''])]
#the function - it keeps only data that are not empty string
#after that it takes its set (removes duplicates)
counts = [(), lambda :
                map(lambda w, word_list : word_list.count(w), \
                    unique_words[0], \
                    itertools.repeat(non_stop_words[0], \
                                   len(unique_words[0])))]
#two variables - w and word_list and it is applied on unique_words
#and non_stop_words extended for the lenght of unique_words
#counts how many times unique words in non stop words
#https://www.quora.com/How-do-I-put-multiple-arguments-into-a-map-function-in-Python

sorted_data = [(), lambda : sorted(zip(list(unique_words[0]), \
                                       counts[0]), \
                                   key=operator.itemgetter(1),
                                   reverse=True)]
#function zip makes pairs - unique words with its count
#they are sorted by counts
#reverse=True ensures that it is with descending order

# The entire spreadsheet
all_columns = [all_words, stop_words, non_stop_words,\
               unique_words, counts, sorted_data]

# The active procedure over the columns of data.
# Call this everytime the input data changes, or periodically.
#
def update():
    global all_columns #makes all columns global variable
    # Apply the formula in each column
    for c in all_columns:
        if c[1] != None: #if there is some formula (3rd-6th column)
            c[0] = c[1]() #apply function on the data


# Load the fixed data into the first 2 columns
all_words[0] = re.findall('[a-z]{2,}', open(sys.argv[1]).read().lower())
#any word consisting of at least two letters
stop_words[0] = set(open('stopword.txt').read().split(','))
# Update the columns with formulas
update() #calling the main function

for (w, c) in sorted_data[0][:25]: #prints the results
    print w, '-', c