""" utilties.py Jim Mahoney | cs.marlboro.college | March 2020 """ import csv from scratch.statistics import mean, standard_deviation def read_housing(filename='housing.data'): """ Return housing table as a list of lists of floats """ data = [] with open(filename) as datafile: reader = csv.reader(datafile, delimiter=' ') for raw_row in reader: row = [float(x) for x in raw_row if x != ''] #print(row) data.append(row) return data def try_float(x): """ Return float(x) or x """ try: result = float(x) except: result = x return result def read_abalone(filename='abalone.data'): """ Return abalone data as a list of list of floats or strings """ data = [] with open(filename) as datafile: reader = csv.reader(datafile, delimiter=',') for raw_row in reader: row = [try_float(x) for x in raw_row] data.append(row) return data def normalize(xs): """ Given xs=(list of floats) i.e. "plural of x" subtract mean, divide by standard deviation, return list of floats """ average = mean(xs) sigma = standard_deviation(xs) return [(x - average)/sigma for x in xs] def normalize_column(data, col): """ modify data in place by normalizing one of its columns """ n_rows = len(data) xs = [data[row][col] for row in range(n_rows)] xs_norm = normalize(xs) for row in range(n_rows): data[row][col] = xs_norm[row] def get_features(data, row, col_features): """ return freatures for given row, values [x1, x2, ...] """ return [data[row][col] for col in col_features] def tests(): data = read_data() for row in data: row_string = [f'{x:6.2f}' for x in row] print(' '.join(row_string)) if __name__ == '__main__': tests()