%matplotlib inline
from __future__ import  division
from IPython.html.widgets import interact,fixed
from scipy import stats
import sympy
from sympy import S, init_printing, Sum, summation,expand
from sympy import stats as st
from sympy.abc import k
import re

Maximum likelihood estimation¶

$$\hat{p} = \frac{1}{n} \sum_{i=1}^n X_i$$$$ \mathbb{E}(\hat{p}) = p $$$$ \mathbb{V}(\hat{p}) = \frac{p(1-p)}{n} $$

sympy.plot( k*(1-k),(k,0,1),ylabel='$n^2 \mathbb{V}$',xlabel='p',fontsize=28)

<sympy.plotting.plot.Plot at 0xcfc25f0>

b=stats.bernoulli(p=.8)
samples=b.rvs(100)
print var(samples)
print mean(samples)

0.1659
0.79

def slide_figure(n=100,m=1000,p=0.8):
    fig,ax=subplots()
    ax.axis(ymax=25)
    b=stats.bernoulli(p=p)
    v=iter(b.rvs,2)
    samples=b.rvs(n*1000)
    tmp=(samples.reshape(n,-1).mean(axis=0))
    ax.hist(tmp,normed=1)
    ax1=ax.twinx()
    ax1.axis(ymax=25)
    ax1.plot(linspace(0,1,200),stats.norm(mean(tmp),std(tmp)).pdf(linspace(0.0,1,200)),lw=3,color='r')

interact(slide_figure, n=(100,500),p=(0.01,1,.05),m=fixed(500));

Maximum A-Posteriori (MAP) Estimation¶

from sympy.abc import p,n,k

sympy.plot(st.density(st.Beta('p',3,3))(p),(p,0,1) ,xlabel='p')

<sympy.plotting.plot.Plot at 0x58725f0>

Maximize the MAP function to get form of estimator¶

obj=sympy.expand_log(sympy.log(p**k*(1-p)**(n-k) * st.density(st.Beta('p',6,6))(p)))

sol=sympy.solve(sympy.simplify(sympy.diff(obj,p)),p)[0]
print sol

(k + 5)/(n + 10)

$ \hat{p}_{MAP} = \frac{(5+\sum_{i=1}^n X_i )}{(n + 10)} $

with corresponding expectation

$ \mathbb{E} = \frac{(5+n p )}{(n + 10)} $

which is a biased estimator. The variance of this estimator is the following:

$$ \mathbb{V}(\hat{p}_{MAP}) = \frac{n (1-p) p}{(n+10)^2} $$

compare this to the variance of the maximum likelihood estimator, which is reproduced here:

$$ \mathbb{V}(\hat{p}_{ML}) = \frac{p(1-p)}{n} $$

n=5
def show_bias(n=30):
    sympy.plot(p,(5+n*p)/(n+10),(p,0,1),aspect_ratio=1,title='more samples reduce bias')
    
interact(show_bias,n=(10,500,10));

Compute the variance of the MAP estimator

sum(sympy.var('x1:10'))
expr=((5+(sum(sympy.var('x1:10'))))/(n+10))

def apply_exp(expr):
    tmp=re.sub('x[\d]+\*\*2','p',str(expand(expr)))
    tmp=re.sub('x[\d]+\*x[\d]+','p**2',tmp)
    tmp=re.sub('x[\d]+','p',tmp)
    return sympy.sympify(tmp)

ex2 = apply_exp(expr**2)
print ex2

8*p**2/25 + 11*p/25 + 1/9

tmp=sympy.simplify(ex2 - (apply_exp(expr))**2 )
sympy.plot(tmp,p*(1-p)/10,(p,0,1))

<sympy.plotting.plot.Plot at 0xd6d7410>

General case¶

def generate_expr(num_samples=10,alpha=6):
    n = sympy.symbols('n')
    obj=sympy.expand_log(sympy.log(p**k*(1-p)**(n-k) * st.density(st.Beta('p',alpha,alpha))(p)))
    sol=sympy.solve(sympy.simplify(sympy.diff(obj,p)),p)[0]
    expr=sol.replace(k,(sum(sympy.var('x1:%d'%(num_samples)))))
    expr=expr.subs(n,num_samples)
    ex2 = apply_exp(expr**2)
    ex =  apply_exp(expr)
    return (ex,sympy.simplify(ex2-ex**2))

num_samples=10
X_bias,X_v = generate_expr(num_samples,alpha=2)
p1=sympy.plot(X_v,(p,0,1),show=False,line_color='b',ylim=(0,.03),xlabel='p')
X_bias,X_v = generate_expr(num_samples,alpha=6)
p2=sympy.plot(X_v,(p,0,1),show=False,line_color='r',xlabel='p')
p3=sympy.plot(p*(1-p)/num_samples,(p,0,1),show=False,line_color='g',xlabel='p')
p1.append(p2[0])
p1.append(p3[0])
p1.show()

p1=sympy.plot(n*(1-p)*p/(n+10)**2,(p,0,1),show=False,line_color='b',ylim=(0,.05),xlabel='p',ylabel='variance')
p2=sympy.plot((1-p)*p/n,(p,0,1),show=False,line_color='r',ylim=(0,.05),xlabel='p')
p1.append(p2[0])
p1.show()

def show_variance(n=5):
    p1=sympy.plot(n*(1-p)*p/(n+10)**2,(p,0,1),show=False,line_color='b',ylim=(0,.05),xlabel='p',ylabel='variance')
    p2=sympy.plot((1-p)*p/n,(p,0,1),show=False,line_color='r',ylim=(0,.05),xlabel='p')
    p1.append(p2[0])
    p1.show()    
interact(show_variance,n=(10,120,2));

The obvious question is what is the value of a biased estimator? The key fact is that the MAP estimator is biased, yes, but it is biased according to the prior probability of $\theta$. Suppose that the true parameter $p=1/2$ which is exactly at the peak of the prior probability function. In this case, what is the bias?

$ \mathbb{E} = \frac{(5+n p )}{(n + 10)} -p \rightarrow 0$

and the variance of the MAP estimator at this point is the following:

$$ \frac{n p (1-p)}{(n+10)^2} \rightarrow $$

pv=.60
nsamp=30
fig,ax=subplots()
rv = stats.bernoulli(pv)
map_est=(rv.rvs((nsamp,1000)).sum(axis=0)+5)/(nsamp+10);
ml_est=(rv.rvs((nsamp,1000)).sum(axis=0))/(nsamp);
_,bins,_=ax.hist(map_est,bins=20,alpha=.3,normed=True,label='MAP');
ax.hist(ml_est,bins=20,alpha=.3,normed=True,label='ML');
ax.vlines(map_est.mean(),0,12,lw=3,linestyle=':',color='b')
ax.vlines(pv,0,12,lw=3,color='r',linestyle=':')
ax.vlines(ml_est.mean(),0,12,lw=3,color='g',linestyle=':')
ax.legend()

<matplotlib.legend.Legend at 0xe198e70>