\documentclass{article} % set up telugu \usepackage{fontspec} \newfontfamily\telugufont{Potti Sreeramulu}[Script = Telugu] \usepackage{polyglossia} \setdefaultlanguage{english} \setotherlanguage{telugu} %other packages \usepackage{amsmath} \usepackage{amssymb} \usepackage{physics} \usepackage{siunitx} \usepackage{todonotes} \usepackage{luacode} \usepackage{titling} \usepackage{enumitem} % custom deepak packages \usepackage{luatrivially} \usepackage{subtitling} \usepackage{cleveref} \begin{luacode*} math.randomseed(31415926) \end{luacode*} \newcommand*\mean[1]{\overline{#1}} \title{Problem 1.14} \subtitle{Width of the Height Distribution} \author{\begin{telugu}హృదయ్ దీపక్ మల్లుభొట్ల\end{telugu}} % want empty date \predate{} \date{} \postdate{} % !TeX spellcheck = en_GB \begin{document} \maketitle Given a Gaussian distribution \begin{equation} \mathcal{N}(x | \mu, \sigma^2) = \frac{1}{\sqrt{2 \pi \sigma^2}} e^{\flatfrac{-\left(x - \mu\right)^2}{\left(2 \sigma^2\right)}} \end{equation} Given a sample of $N$ heights $x_n$, the likelihood that they were drawn from a particular normal distribution with $\mu$ and $\sigma^2$ is \begin{equation} P(\left[x_n \right] | \mu, \sigma^2) = \prod_{n = 1}^N \mathcal{N}(x_n | \mu, \sigma^2). \end{equation} This is a nightmare. Can we fix it to be easier to deal with? \begin{enumerate}[label=(\alph*)] \item Write $P(\left[x_n \right] | \mu, \sigma^2)$ as a formula depending only on $N$, the sample mean $\mean{x}$ and $S = \sum_n \left(x_n - \mean{x} \right)^2$. \item Show that $P$ takes its maximum at $\mu_{ML} = \mean{x}$ and $\sigma_{ML} = \sqrt{\flatfrac{S}{N}}$. \item Assume entire population is drawn from (not necessarily Gaussian) distribution with variance $\left_{samp} = \sigma_0^2$. Let $\mu = 0$ for the population. Show that $\left< \sigma_{ML}^2 \right>_{samp} = \frac{N - 1}{N} \sigma_0^2$ \end{enumerate} Through this problem, $\mean{x}$ is a mean over a single sample, and $\left$ is average over many samples. \section{Solution} \label{sec:solution} \subsection{(a) Simplify $P$} \label{subsec:sola} \begin{align} P(\left[x_n \right] | \mu, \sigma^2) &= \prod_{n = 1}^N \mathcal{N}(x_n | \mu, \sigma^2) \\ P(\left[x_n \right] | \mu, \sigma^2) &= \prod_{n = 1}^N \frac{1}{\sqrt{2 \pi \sigma^2}} e^{\flatfrac{-\left(x_n - \mu\right)^2}{\left(2 \sigma^2\right)}} \\ P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N \prod_{n = 1}^N e^{\flatfrac{-\left(x_n - \mu\right)^2}{\left(2 \sigma^2\right)}} \\ P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} \prod_{n = 1}^N e^{-\left(x_n - \mu\right)^2} \\ P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} e^{-\sum_n^N \left(x_n - \mu\right)^2} \\ P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} \exp(-\sum_n^N \left(x_n - \mu\right)^2) \label{eq:backup} \end{align} So we can isolate the grossness to the sum in the numerator. \begin{align} \sum_n^N \left(x_n - \mu\right)^2 &= \sum_n^N \left( x_n^2 - 2 \mu x_n + \mu^2 \right) \\ &= \sum_n^N x_n^2 - 2 \sum_n^N \mu x_n + \sum_n^N \mu^2 \\ &= \sum_n^N x_n^2 - 2 \mu \sum_n^N x_n + \mu^2 \sum_n^N 1 \\ &= \sum_n^N x_n^2 - 2 \mu N \mean{x} + N \mu^2 \end{align} The pesky $x_n^2$ reminds us of $S$, so we expand it: \begin{align} S = \sum_n^N \left(x_n - \mean{x} \right)^2 \\ = \sum_n^N \left(x_n^2 - 2 x_n \mean{x} + \mean{x}^2 \right) \\ = \sum_n^N x_n^2 - 2 \sum_n^N x_n \mean{x} + \sum_n^N \mean{x}^2 \\ = \sum_n^N x_n^2 - 2 \mean{x} \sum_n^N x_n + N \mean{x}^2 \\ = \sum_n^N x_n^2 - 2 N \mean{x}^2 + N \mean{x}^2 \\ S = \sum_n^N x_n^2 - N \mean{x}^2, \end{align} so then \begin{align} \sum_n^N \left(x_n - \mu\right)^2 &= \sum_n^N x_n^2 - 2 \mu N \mean{x} + N \mu^2 \\ \sum_n^N \left(x_n - \mu\right)^2 &= S + N \mean{x}^2 - 2 \mu N \mean{x} + N \mu^2 \\ \sum_n^N \left(x_n - \mu\right)^2 &= S + N \left(\mean{x} - \mu \right)^2 \end{align} Inserting this into \cref{eq:backup}, we get \begin{align} P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} \exp(-\sum_n^N \left(x_n - \mu\right)^2) \\ P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N e^{\flatfrac{1}{\left(2 \sigma^2\right)}} \exp(- S - N \left(\mean{x} - \mu \right)^2) \\ P(\left[x_n \right] | \mu, \sigma^2) &= \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N \exp(\frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2}) \\ \end{align} \subsection{(b) max likelihood} This section is basically just a lot of gross derivatives. We want to maximise \begin{equation} P(\left[x_n \right] | \mu, \sigma^2) = \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N \exp(\frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2}) \end{equation} Because $\log$ is monotonic, we can maximise $\log P$: \begin{align} \log P(\left[x_n \right] | \mu, \sigma^2) &= \log \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N \exp(\frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2}) \\ \log P(\left[x_n \right] | \mu, \sigma^2) &= \log \left( \frac{1}{\sqrt{2 \pi \sigma^2}}\right)^N + \log \exp(\frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2}) \\ \log P(\left[x_n \right] | \mu, \sigma^2) &= N \log \frac{1}{\sqrt{2 \pi \sigma^2}} + \frac{- S - N \left(\mean{x} - \mu \right)^2}{2 \sigma^2} \\ \log P(\left[x_n \right] | \mu, \sigma^2) &= - \frac{N}{2} \log \sigma^2 + C - \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2} \\ \log P(\left[x_n \right] | \mu, \sigma^2) &= - N \log \sigma + C - \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2} \end{align} First minimise with respect to $\mu$: \begin{align} \pdv{\log P}{\mu} &= - \pdv{}{\mu} \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2}, \end{align} which clearly reaches zero when $\mu = \mean{x}$. Next, we look at minimising with respect to $\sigma$: \begin{align} \pdv{\log P}{\sigma} &= \pdv{}{\sigma }- N \log \sigma + C - \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2} \\ \pdv{\log P}{\sigma} &= - \frac{N}{\sigma} - \pdv{}{\sigma} \frac{S + N \left(\mean{x} - \mu \right)^2}{2 \sigma^2} \\ \pdv{\log P}{\sigma} &= - \frac{N}{\sigma} - \frac{S + N \left(\mean{x} - \mu \right)^2}{2} \pdv{}{\sigma} \frac{1}{\sigma^2} \\ \pdv{\log P}{\sigma} &= - \frac{N}{\sigma} - \frac{S + N \left(\mean{x} - \mu \right)^2}{2} \frac{-2}{\sigma^3} \\ \pdv{\log P}{\sigma} &= - \frac{N}{\sigma} + \frac{S + N \left(\mean{x} - \mu \right)^2}{\sigma^3} \end{align} Naturally we can now sub $\mu = \mean{x}$, giving us \begin{align} \pdv{\log P}{\sigma} &= - \frac{N}{\sigma} + \frac{S}{\sigma^3}. \end{align} Minimising, \begin{align} 0 &= - \frac{N}{\sigma_{ML}} + \frac{S}{\sigma_{ML}^3} \\ \frac{N}{\sigma_{ML}} &= \frac{S}{\sigma_{ML}^3} \\ \sigma_{ML} &= \sqrt{\frac{S}{N}} \end{align} \newpage \listoftodos \end{document}