Birthday Paradox


from pylab import *
import numpy
from decimal import *
from random import *
getcontext().prec = 200


def RandList(n, t):
    return [randrange(1, n) for i in range(0, t)]


def DuplicatesP(t):
    return (len(unique(t)) < len(t))


def TryBirthday(n, t, tr=1000):
    j = 0
    for i in range(0, tr):
        u = RandList(n, t)
        if DuplicatesP(u):
            j += 1.
    return (j / (tr * 1.0))


def GenPlot(n, tr=5000):
    x = list(range(0, n + 2))
    y = [TryBirthday(n, i, tr) for i in x]
    return [x, y]


def ActualEstimate(i, n):
    p = 1
    for j in range(0, i):
        p *= (1. - j / float(n))
    return 1 - p


def ExpEstimate(i, n):
    l = -i * (i + 1) / Decimal(2 * n)
    # SEE Heuer for this constant
    const = sqrt(-2 * Decimal(log(1 - 1 / 2.)))
    v = 1 - const * (Decimal(l).exp())
    v = float(v)
    if v > 0:
        return v
    return 0


def PrintProbs(n, tr=5000):
    for i in range(0, n + 2):
        print(('{0:6d} -> value={1:04f} Est={2:04f} Exp={3:04f}'.format(
            i, TryBirthday(n, i, tr), ActualEstimate(i, n), ExpEstimate(i, n))))


def PlotProbs(n, tr=5000):
    [x, y] = GenPlot(n, tr)
    fig = figure()
    ax1 = fig.add_subplot(111)
    ax1.plot(x, y)
    ax1.axhline(y=0.5, color='r')
    ax1.axvline(x=sqrt(n), color='g')
    ax1.axvline(x=1.77 * sqrt(n), color='g')
    l = list(range(0, n + 2))
    ma = [ExpEstimate(i, n) for i in l]
    ax1.plot(l, ma, color='y')
    mp = [ActualEstimate(i, n) for i in l]
    ax1.plot(l, mp, color='m')
    return fig