Skip to content

Commit

Permalink
erm
Browse files Browse the repository at this point in the history
  • Loading branch information
kashefy committed May 18, 2020
1 parent 27008ba commit 187a2d3
Showing 1 changed file with 94 additions and 9 deletions.
103 changes: 94 additions & 9 deletions notes/05_infomax/3_cost.tex
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@ \section{Empirical Risk Minimization}
Consider the following perceptron network with $N$ inputs and $N$ outputs:
\begin{figure}[ht]
\centering
\includegraphics[width=6cm]{img/section2_fig16}
\includegraphics[width=4.5cm]{img/section2_fig16}
%\caption{$N-N$ perceptron network}
\end{figure}
where
\svspace{-4mm}
\begin{equation}
\widehat{u}_i := \underbrace{
\widehat{u}_i := \widehat{f}_i
\big(
s_i
\big) = \underbrace{
\widehat{f}_i
\Big( \sum_{j=1}^{N} \mathrm{w}_{ij}
\mathrm{x}_j
Expand All @@ -33,6 +37,7 @@ \section{Empirical Risk Minimization}
}
\end{equation}
and observations:
\svspace{-4mm}
\begin{equation}
\vec{x}^{(\alpha)} \in \mathbb{R}^N,
\quad \alpha = 1, \ldots, p
Expand All @@ -41,6 +46,8 @@ \section{Empirical Risk Minimization}

\end{frame}

\begin{frame}

Deriving the cost function for this network to find the Infomax solution:
\begin{equation}
\label{eq:conservationvec}
Expand All @@ -57,27 +64,69 @@ \section{Empirical Risk Minimization}
& = \frac{P_{\vec{x}}(\vec{x})}{|\det \vec{J}\,|}
\end{align}
with elements of the Jacobian $\vec J$ given as
\slidesonly{
\begingroup
\small
}
\begin{align}
\label{eq:jacobelement}
J_{ij}=
\frac{\partial \widehat{u}_i}{\partial \mathrm{x}_j}
& = \frac{\partial}{\partial \mathrm{x}_j}
\widehat{f}_i \bigg( \sum\limits_{k = 1}^N \mathrm{w}_{ik}
\mathrm{x}_k \bigg) \\
& = \mathrm{w}_{ij} \widehat{f}_i^{'} \bigg( \sum\limits_{k = 1}^N
\mathrm{x}_k \bigg) \notesonly{\\
&} = \mathrm{w}_{ij} \widehat{f}_i^{'} \bigg( \sum\limits_{k = 1}^N
\mathrm{w}_{ik} \mathrm{x}_k \bigg).
\end{align}
\slidesonly{
\endgroup
}

We therefore obtain for the value of the Jacobian determinant
\slidesonly{
\begingroup
\footnotesize
}
\begin{equation} \label{eq:functionalDeterminant}
|\det \vec {J}\,| =
\Big| \det \frac{\partial \widehat{\vec{u}}}{\partial \vec{x}} \Big|
= |\det \vec{W}\, | \prod\limits_{l = 1}^N \widehat{f}_l^{'} \Bigg(
\sum\limits_{k = 1}^N \mathrm{w}_{lk} \mathrm{x}_k \Bigg).
\end{equation}
\slidesonly{
\endgroup
}

\end{frame}

\clearpage

Inserting \eqref{eq:conservationvec} and \eqref{eq:uxj} into the Infomax cost function from \eqref{eq:infomax} gives
\begin{frame}

\slidesonly{
\visible<1->{
\vspace{-7mm}
\hspace{8.0cm}
\StickyNote[1.7cm]{
\begingroup
\scriptsize
\begin{equation}
%\label{eq:conservationvec}
P_{\vec{u}} (\widehat{\vec{u}}) d \widehat{\vec{u}}
= P_{\vec{x}}(\vec{x}) d \vec{x}
\end{equation}
\begin{equation}
%\label{eq:uxj}
P_{\vec{u}} (\widehat{\vec{u}})
= \frac{P_{\vec{x}}(\vec{x})}{|\det \vec{J}\,|}
\end{equation}
\endgroup
}[3.cm] % width
\vspace{-22mm}
}
}

\notesonly{Inserting \eqref{eq:conservationvec} and \eqref{eq:uxj} into the Infomax cost function from \eqref{eq:infomax} gives}
\begin{eqnarray}
H & = & -\int d \widehat{\vec{u}} P_{\vec{u}} (\widehat{\vec{u}})
\ln P_{\vec{u}} (\widehat{\vec{u}}) \\
Expand All @@ -89,14 +138,45 @@ \section{Empirical Risk Minimization}
}_{ \text{constant w.r.t. } \vec W }
+ \int d \vec{x} P_{\vec{x}} (\vec{x}) \ln |\det \vec{J}\,|
\end{eqnarray}
and with \eqref{eq:functionalDeterminant} we can formulate the cost
in terms that explicitly depend on $\vec W$ and its components:
and with \notesonly{\eqref{eq:functionalDeterminant}}
\slidesonly{
\begingroup
\footnotesize
}
\begin{equation} \label{eq:functionalDeterminant}
|\det \vec {J}\,| =
\Big| \det \frac{\partial \widehat{\vec{u}}}{\partial \vec{x}} \Big|
= |\det \vec{W}\, | \prod\limits_{l = 1}^N \widehat{f}_l^{'} \Bigg(
\sum\limits_{k = 1}^N \mathrm{w}_{lk} \mathrm{x}_k \Bigg).
\end{equation}
\slidesonly{
\endgroup
}
we can formulate the cost
in terms that depend on components in $\vec W$:
\begin{equation}
H =~\text{const.} \, + \; \ln |\det \vec{W}\,| \underbrace{\int d \vec{x} P_{\vec{x}} (\vec{x})}_{=\,1}
+ \int d \vec{x} P_{\vec{x}} (\vec{x}) \sum\limits_{l = 1}^N
\ln \widehat{f}_l^{'} \Bigg( \sum\limits_{k = 1}^N
\mathrm{w}_{lk} \mathrm{x}_k \Bigg).
\end{equation}

\end{frame}

\begin{frame}{Generalization cost}

\only<1>{
\slidesonly{
\begin{equation}
H = const. \, + \; \ln |\det \vec{W}\,| \underbrace{\int d \vec{x} P_{\vec{x}} (\vec{x})}_{=\,1}
H =~\text{const.} \, + \; \ln |\det \vec{W}\,| \underbrace{\int d \vec{x} P_{\vec{x}} (\vec{x})}_{=\,1}
+ \int d \vec{x} P_{\vec{x}} (\vec{x}) \sum\limits_{l = 1}^N
\ln \widehat{f}_l^{'} \Bigg( \sum\limits_{k = 1}^N
\mathrm{w}_{lk} \mathrm{x}_k \Bigg).
\end{equation}
}
}


This enables us to define the generlization cost $E^G$ for model selection:
\begin{equation} \tag{generalization cost}
E^G = \ln |\det \vec W\,| + \int d \vec{x} P_{\vec{x}} (\vec{x})
Expand All @@ -105,6 +185,8 @@ \section{Empirical Risk Minimization}
\mathrm{w}_{lk} \mathrm{x}_k \Bigg)
\Bigg\}
\end{equation}

\only<2>{
The \emph{principle of empirical risk minimization} (in our particular case maximization) allows
\begin{center}
mathematical expectation $E^G \longrightarrow$ empirical average $E^T$
Expand All @@ -118,7 +200,10 @@ \section{Empirical Risk Minimization}
\end{equation}
can be used for model selection using empirical data
\begin{equation}
E^T \eqexcl \max
E^T \eqexcl \max_{\vec W}
\end{equation}
}

\end{frame}

\newpage

0 comments on commit 187a2d3

Please sign in to comment.