""" feb2.py The huffman part of the Feb 2 homework, using the routines in utilities.py and huffman.py Jim Mahoney | cs.marlboro.edu | Feb 2017 | MIT License """ from utilities import file_to_substrings, probabilities, entropy, mean from huffman import Huffman def analyze(filename, symbolsize, verbose=False): """ Return entropy and compression ratio for a file treated as symbols of a given length. Optionally print out lots of details. """ def _print(message): if verbose: print(message) _print(" -- file {} with symbolsize {} --".format(filename, symbolsize)) symbols = file_to_substrings(filename, symbolsize) p = probabilities(symbols) pvals = p.values() (mean_p, max_p, min_p) = (mean(pvals), max(pvals), min(pvals)) _print(" symbol count = {}; unique symbols = {}".format( len(symbols), len(p))) _print(" probability max,mean,min = {:.4f}, {:.4f}, {:.4f}".format( max_p, mean_p, min_p)) h = entropy(p) _print(" entropy = {:.4f} per symbol , = {:.4f} per bit".format (h, h/symbolsize)) huff = Huffman(p) huff_mean = huff.mean_code_length() huff_max = huff.max_code_length() huff_min = huff.min_code_length() _print(" huffman code has max, mean, min = {}, {}, {}".format( huff_max, huff_mean, huff_min)) compression = huff_mean/symbolsize _print(" compression factor is (huff_mean/symbolsize) = {:.4f}".format( compression)) _print('') return (h/symbolsize, compression) def entropies_and_compressions(verbose=False): """ Return a dictionary of entropy approximations and huffman compression ratios for several files and symbol lengths. Optionally print out lots (and lots) of details. """ n_sizes = 8 filenames = ('stream1.txt', 'stream2.txt') entropy = {file : [None]*n_sizes for file in filenames} compression = {file : [None]*n_sizes for file in filenames} for filename in ('stream1.txt', 'stream2.txt'): for symbolsize in range(1, n_sizes+1): (this_entropy, this_compression) = analyze(filename, symbolsize, verbose) entropy[filename][symbolsize-1] = this_entropy compression[filename][symbolsize-1] = this_compression return (entropy, compression) if __name__=='__main__': entropies_and_compressions(verbose=True)