Birthday Paradox


from pylab import *
import numpy
from decimal import *
from random import *

getcontext().prec = 200


def RandList(n, t):
    return [randrange(1, n) for i in range(0, t)]


def DuplicatesP(t):
    return len(unique(t)) < len(t)


def TryBirthday(n, t, tr=1000):
    j = 0
    for i in range(0, tr):
        u = RandList(n, t)
        if DuplicatesP(u):
            j += 1.0
    return j / (tr * 1.0)


def GenPlot(n, tr=5000):
    x = list(range(0, n + 2))
    y = [TryBirthday(n, i, tr) for i in x]
    return [x, y]


def ActualEstimate(i, n):
    p = 1
    for j in range(0, i):
        p *= 1.0 - j / float(n)
    return 1 - p


def ExpEstimate(i, n):
    l = -i * (i + 1) / Decimal(2 * n)
    # SEE Heuer for this constant
    const = sqrt(-2 * Decimal(log(1 - 1 / 2.0)))
    v = 1 - const * (Decimal(l).exp())
    v = float(v)
    if v > 0:
        return v
    return 0


def PrintProbs(n, tr=5000):
    for i in range(0, n + 2):
        print(
            (
                "{0:6d} -> value={1:04f} Est={2:04f} Exp={3:04f}".format(
                    i,
                    TryBirthday(n, i, tr),
                    ActualEstimate(i, n),
                    ExpEstimate(i, n),
                )
            )
        )


def PlotProbs(n, tr=5000):
    [x, y] = GenPlot(n, tr)
    fig = figure()
    ax1 = fig.add_subplot(111)
    ax1.plot(x, y)
    ax1.axhline(y=0.5, color="r")
    ax1.axvline(x=sqrt(n), color="g")
    ax1.axvline(x=1.77 * sqrt(n), color="g")
    l = list(range(0, n + 2))
    ma = [ExpEstimate(i, n) for i in l]
    ax1.plot(l, ma, color="y")
    mp = [ActualEstimate(i, n) for i in l]
    ax1.plot(l, mp, color="m")
    return fig