flips.py

"""
 flips.py

 See https://cs.marlboro.college/cours/spring2020/data/notes/feb18

 The coin flip is https://cs.marlboro.college/tools/coins/one.cgi .

 Is this flip fair ? (It gives a random 't' or 'h' with each page load.) 
 Make and discuss an explicit hypothesis test to decide, 
 in two cases : with 10 coin flips, and with 5000 coin flips. 

    $ python3 flips.py 
    --- testing H0=fair_coin with n = 10 ---
    Simulating the null hypothesis ...
      decision cutoffs: 1.6732065515668273, 8.288793448433172
    flipping the coin ...
    100%|███████████████████████████████████████████████████| 10/10 [00:00<00:00, 15.93it/s]
    Number of flips was 5
    Fail to reject the null hypotheis : cannot say that the coin is not fair

    --- testing H0=fair_coin with n = 5000 ---
    Simulating the null hypothesis ...
      decision cutoffs: 2430.806163623822, 2571.5858363761777
    flipping the coin ...
    100%|███████████████████████████████████████████████████| 5000/5000 [04:55<00:00, 16.90it/s]
    Number of flips was 2378
    Reject the null hypothesis : the coin is not fair

 Jim Mahoney | Feb 2020 | cs.marlboro.college | MIT License
"""
import urllib.request
import tqdm
from math import sqrt
from random import randint

def mean(xs):
    """ average of a collection of values """
    return sum(xs)/len(xs)

def standard_deviation(xs):
    """ best estimate of parent populartion's standard deviation """
    mu = mean(xs)
    n = len(xs)
    offsets_squared = [(x - mu)**2 for x in xs]
    return sqrt( mean(offsets_squared) * (n/(n-1)) )

def count_heads(n):
    """ Return number of heads in n flips """
    url='https://cs.marlboro.college/tools/coins/one.cgi'
    heads = 0
    for i in tqdm.tqdm(range(n)):
        if urllib.request.urlopen(url).read() == b'h\n':
            heads += 1
    return heads

def cutoffs_from_simulation(n, n_sims=1000):
    """ Return inference cutoff values for p=0.5 binomial with n trials,
        at the 2 sigma, p-value = 0.05 confidence level,
        i.e. (mean - 2 * sigma, mean + 2 * sigma) """
    sim_flips = [sum(randint(0,1) for i in range(n)) for _ in range(n_sims)]
    mu = mean(sim_flips)
    sigma = standard_deviation(sim_flips)
    return (mu - 2*sigma, mu + 2*sigma)

def main():
    for n in (10, 5000):
        print("--- testing H0=fair_coin with n = {} ---".format(n))
        print("Simulating the null hypothesis ...")
        (low, high) = cutoffs_from_simulation(n)
        print("  decision cutoffs: {}, {}".format(low, high))
        print("flipping the coin ...")
        flips = count_heads(n)
        print("Number of flips was {}".format(flips))
        if flips < low or flips > high:
            print("Reject the null hypothesis : the coin is not fair")
        else:
            print("Fail to reject the null hypotheis : cannot say that the coin is not fair")
        print()

main()