sethna/tex/1.14.tex
2022-02-15 08:16:37 -06:00

150 lines
7.1 KiB
TeX

\documentclass{article}
% set up telugu
\usepackage{fontspec}
\newfontfamily\telugufont{Potti Sreeramulu}[Script = Telugu]
\usepackage{polyglossia}
\setdefaultlanguage{english}
\setotherlanguage{telugu}
%other packages
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{physics}
\usepackage{siunitx}
\usepackage{todonotes}
\usepackage{luacode}
\usepackage{titling}
\usepackage{enumitem}
% custom deepak packages
\usepackage{luatrivially}
\usepackage{subtitling}
\usepackage{cleveref}
\begin{luacode*}
math.randomseed(31415926)
\end{luacode*}
\newcommand*\mean[1]{\overline{#1}}
\title{Problem 1.14}
\subtitle{Width of the Height Distribution}
\author{\begin{telugu}హృదయ్ దీపక్ మల్లుభొట్ల\end{telugu}}
% want empty date
\predate{}
\date{}
\postdate{}
% !TeX spellcheck = en_GB
\begin{document}
\maketitle
Given a Gaussian distribution
\begin{equation}
\mathcal{N}(x | \mu, \sigma^2) = \frac{1}{\sqrt{2 \pi \sigma^2}} e^{\flatfrac{-\left(x - \mu\right)^2}{\left(2 \sigma^2\right)}}
\end{equation}
Given a sample of $N$ heights $x_n$, the likelihood that they were drawn from a particular normal distribution with $\mu$ and $\sigma^2$ is
\begin{equation}
P(\left[x_n \right] | \mu, \sigma^2) = \prod_{n = 1}^N \mathcal{N}(x_n | \mu, \sigma^2).
\end{equation}
This is a nightmare.
Can we fix it to be easier to deal with?
\begin{enumerate}[label=(\alph*)]
\item Write $P(\left[x_n \right] | \mu, \sigma^2)$ as a formula depending only on $N$, the sample mean $\mean{x}$ and $S = \sum_n \left(x_n - \mean{x} \right)^2$.
\item Show that $P$ takes its maximum at $\mu_{ML} = \mean{x}$ and $\sigma_{ML} = \sqrt{\flatfrac{S}{N}}$.
\item Assume entire population is drawn from (not necessarily Gaussian) distribution with variance $\left<x^2\right>_{samp} = \sigma_0^2$.
Let $\mu = 0$ for the population.
Show that $\left< \sigma_{ML}^2 \right>_{samp} = \frac{N - 1}{N} \sigma_0^2$
\end{enumerate}
Through this problem, $\mean{x}$ is a mean over a single sample, and $\left<x \right>$ is average over many samples.
\section{Solution} \label{sec:solution}
\subsection{(a) Simplify $P$} \label{subsec:sola}
\begin{align}
P(\left[x_n \right] | \mu, \sigma^2) &= \prod_{n = 1}^N \mathcal{N}(x_n | \mu, \sigma^2) \\
P(\left[x_n \right] | \mu, \sigma^2) &= \prod_{n = 1}^N \frac{1}{\sqrt{2 \pi \sigma^2}} e^{\flatfrac{-\left(x_n - \mu\right)^2}{\left(2 \sigma^2\right)}} \\
P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N \prod_{n = 1}^N e^{\flatfrac{-\left(x_n - \mu\right)^2}{\left(2 \sigma^2\right)}} \\
P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} \prod_{n = 1}^N e^{-\left(x_n - \mu\right)^2} \\
P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} e^{-\sum_n^N \left(x_n - \mu\right)^2} \\
P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} \exp(-\sum_n^N \left(x_n - \mu\right)^2) \label{eq:backup}
\end{align}
So we can isolate the grossness to the sum in the numerator.
\begin{align}
\sum_n^N \left(x_n - \mu\right)^2 &= \sum_n^N \left( x_n^2 - 2 \mu x_n + \mu^2 \right) \\
&= \sum_n^N x_n^2 - 2 \sum_n^N \mu x_n + \sum_n^N \mu^2 \\
&= \sum_n^N x_n^2 - 2 \mu \sum_n^N x_n + \mu^2 \sum_n^N 1 \\
&= \sum_n^N x_n^2 - 2 \mu N \mean{x} + N \mu^2
\end{align}
The pesky $x_n^2$ reminds us of $S$, so we expand it:
\begin{align}
S = \sum_n^N \left(x_n - \mean{x} \right)^2 \\
= \sum_n^N \left(x_n^2 - 2 x_n \mean{x} + \mean{x}^2 \right) \\
= \sum_n^N x_n^2 - 2 \sum_n^N x_n \mean{x} + \sum_n^N \mean{x}^2 \\
= \sum_n^N x_n^2 - 2 \mean{x} \sum_n^N x_n + N \mean{x}^2 \\
= \sum_n^N x_n^2 - 2 N \mean{x}^2 + N \mean{x}^2 \\
S = \sum_n^N x_n^2 - N \mean{x}^2,
\end{align}
so then
\begin{align}
\sum_n^N \left(x_n - \mu\right)^2 &= \sum_n^N x_n^2 - 2 \mu N \mean{x} + N \mu^2 \\
\sum_n^N \left(x_n - \mu\right)^2 &= S + N \mean{x}^2 - 2 \mu N \mean{x} + N \mu^2 \\
\sum_n^N \left(x_n - \mu\right)^2 &= S + N \left(\mean{x} - \mu \right)^2
\end{align}
Inserting this into \cref{eq:backup}, we get
\begin{align}
P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} \exp(-\sum_n^N \left(x_n - \mu\right)^2) \\
P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} \exp(- S - N \left(\mean{x} - \mu \right)^2) \\
P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N \exp(\frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2}) \\
\end{align}
\subsection{(b) max likelihood}
This section is basically just a lot of gross derivatives.
We want to maximise
\begin{equation}
P(\left[x_n \right] | \mu, \sigma^2) = \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N \exp(\frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2})
\end{equation}
Because $\log$ is monotonic, we can maximise $\log P$:
\begin{align}
\log P(\left[x_n \right] | \mu, \sigma^2) &= \log \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N \exp(\frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2}) \\
\log P(\left[x_n \right] | \mu, \sigma^2) &= \log \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N + \log \exp(\frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2}) \\
\log P(\left[x_n \right] | \mu, \sigma^2) &= N \log \frac{1}{\sqrt{2 \pi \sigma^2}} + \frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2} \\
\log P(\left[x_n \right] | \mu, \sigma^2) &= - \frac{N}{2} \log \sigma^2 + C - \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2} \\
\log P(\left[x_n \right] | \mu, \sigma^2) &= - N \log \sigma + C - \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2}
\end{align}
First minimise with respect to $\mu$:
\begin{align}
\pdv{\log P}{\mu} &= - \pdv{}{\mu} \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2},
\end{align}
which clearly reaches zero when $\mu = \mean{x}$.
Next, we look at minimising with respect to $\sigma$:
\begin{align}
\pdv{\log P}{\sigma} &= \pdv{}{\sigma }- N \log \sigma + C - \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2} \\
\pdv{\log P}{\sigma} &= - \frac{N}{\sigma} - \pdv{}{\sigma} \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2} \\
\pdv{\log P}{\sigma} &= - \frac{N}{\sigma} - \frac{S + N \left(\mean{x} - \mu \right)^2}{2} \pdv{}{\sigma} \frac{1}{\sigma^2} \\
\pdv{\log P}{\sigma} &= - \frac{N}{\sigma} - \frac{S + N \left(\mean{x} - \mu \right)^2}{2} \frac{-2}{\sigma^3} \\
\pdv{\log P}{\sigma} &= - \frac{N}{\sigma} + \frac{S + N \left(\mean{x} - \mu \right)^2}{\sigma^3}
\end{align}
\triv we can now sub $\mu = \mean{x}$, giving us
\begin{align}
\pdv{\log P}{\sigma} &= - \frac{N}{\sigma} + \frac{S}{\sigma^3}.
\end{align}
Minimising,
\begin{align}
0 &= - \frac{N}{\sigma_{ML}} + \frac{S}{\sigma_{ML}^3} \\
\frac{N}{\sigma_{ML}} &= \frac{S}{\sigma_{ML}^3} \\
\sigma_{ML} &= \sqrt{\frac{S}{N}}
\end{align}
\newpage
\listoftodos
\end{document}