"""
 convert_windham1850.py

 convert vernon_1850_edited.txt to .csv

 Jim Mahoney | Jan 2020 | cs.marlboro.college | MIT License
"""
# --- process ---
#  * read all lines except first two (my inserted columns; see 'fields' below)
#  * split at column indeces
#  * remove whitespace
#  * put everything into a dictionary
#  * output to .csv file

#          column   field_name
fields = {  0:      'page',
            6:      'line',
           10:      'last_name',
           25:      'first_name',
           42:      'age',
           48:      'birth_place',
           66:      'filename',
          }
columns = list(fields.keys())  # i.e. [00, 06, ...]   # ugh ... iterable.

start_stop_columns = list(zip(columns, columns[1:] + [80]))  # 80-1 is line length
# without list(), I get an iterator, which is gone after 1st loop through. Ugh.
# i.e. [(0,6), (6,10), ..., (48,80)]

people = []
lines = open('vernon_1850_edited.txt', 'r').readlines()
for line in lines[2:]:                           # skip first two lines
    #print("line : " + line)
    person = {}
    for (start, stop) in start_stop_columns:
        field = fields[start]                    # i.e. 'last_name'
        value = line[start:stop]                 # i.e. 'Smith       '
        #print('field = ', field, ' ; value = ', value)
        person[field] = value.strip()
    #print(person)
    people.append(person)

def personcsv(p):
    """ return csv string last,first,age for a person """
    return p['last_name'] + ',' + p['first_name'] + ',' + p['age']

# sort by (last,first,age)
people.sort(key=personcsv)
#print(" len(people) = ", len(people))

output = open('vernon_1850.csv', 'w')
output.write('lastname,firstname,age\n')
for person in people:
    output.write(personcsv(person) + '\n')