"""
analyze_wander.py
Find the information entropy and huffman code for
the first paragraph of text of "The Wandering Inn"
https://wanderinginn.com/2016/07/27/1-00/
(which is what I happen to be reading this week).
Running this :
$ python2 analyze_wander.py ; dot wander.dot -Tpng > wander.png
(The "dot" program is a graphviz, a graph generating tool)
produces this :
" " 0.17647 111
"," 0.00929 1001111
"." 0.01238 011010
";" 0.00310 110101110
"B" 0.00310 110101111
"I" 0.00310 01101110
"R" 0.00310 01101111
"T" 0.00310 10011100
"a" 0.04644 0000
"c" 0.00929 1101000
"b" 0.01548 100100
"e" 0.08050 1100
"d" 0.05573 0111
"g" 0.01548 100101
"f" 0.01548 100110
"i" 0.04954 0100
"h" 0.04334 11011
"k" 0.00310 10011101
"m" 0.00929 1101001
"l" 0.03715 10110
"o" 0.07430 1010
"n" 0.05882 1000
"p" 0.00929 1101010
"s" 0.04954 0101
"r" 0.04644 0001
"u" 0.03715 10111
"t" 0.09598 001
"w" 0.02477 01100
"y" 0.00619 0110110
"x" 0.00310 11010110
entropy = 4.16674676297
mean code length = 4.21052631579
Jim Mahoney | cs.marlboro.college | Dec 2019 | MIT License
"""
from huffman import *
# From wanderinginn.com, chapter 1
text = 'The inn was dark and empty. It stood, silent, on the grassy hilltop, the ruins of other structures around it. Rot and age had brought low other buildings; the weather and wildlife had reduced stone foundations to rubble and stout wooden walls to a few rotten pieces of timber mixed with the ground. But the inn still stood.'
print('--- info entropy & Huffman ---')
print('text:')
print(text)
print('probabilities & huffman codes ')
probabilities = get_probabilities(text)
h = Huffman(probabilities)
for char in probabilities:
print(' "{}" {:.5f} {}'.format(
char, probabilities[char], h.huffman_code[char]))
print('entropy = ', entropy(probabilities))
print('mean code length = ', h.mean_code_length())
open('wander.dot', 'w').write(h.huffman_tree.graphviz(labels=True))