""" convert_windham1850.py convert vernon_1850_edited.txt to .csv Jim Mahoney | Jan 2020 | cs.marlboro.college | MIT License """ # --- process --- # * read all lines except first two (my inserted columns; see 'fields' below) # * split at column indeces # * remove whitespace # * put everything into a dictionary # * output to .csv file # column field_name fields = { 0: 'page', 6: 'line', 10: 'last_name', 25: 'first_name', 42: 'age', 48: 'birth_place', 66: 'filename', } columns = list(fields.keys()) # i.e. [00, 06, ...] # ugh ... iterable. start_stop_columns = list(zip(columns, columns[1:] + [80])) # 80-1 is line length # without list(), I get an iterator, which is gone after 1st loop through. Ugh. # i.e. [(0,6), (6,10), ..., (48,80)] people = [] lines = open('vernon_1850_edited.txt', 'r').readlines() for line in lines[2:]: # skip first two lines #print("line : " + line) person = {} for (start, stop) in start_stop_columns: field = fields[start] # i.e. 'last_name' value = line[start:stop] # i.e. 'Smith ' #print('field = ', field, ' ; value = ', value) person[field] = value.strip() #print(person) people.append(person) def personcsv(p): """ return csv string last,first,age for a person """ return p['last_name'] + ',' + p['first_name'] + ',' + p['age'] # sort by (last,first,age) people.sort(key=personcsv) #print(" len(people) = ", len(people)) output = open('vernon_1850.csv', 'w') output.write('lastname,firstname,age\n') for person in people: output.write(personcsv(person) + '\n')