diff --git a/notes/05_infomax/3_cost.tex b/notes/05_infomax/3_cost.tex
index 2113712..55196a4 100644
--- a/notes/05_infomax/3_cost.tex
+++ b/notes/05_infomax/3_cost.tex
@@ -16,12 +16,16 @@ \section{Empirical Risk Minimization}
 Consider the following perceptron network with $N$ inputs and $N$ outputs:
 \begin{figure}[ht]
 \centering
-\includegraphics[width=6cm]{img/section2_fig16}
+\includegraphics[width=4.5cm]{img/section2_fig16}
 %\caption{$N-N$ perceptron network}
 \end{figure}
 where
+\svspace{-4mm}
 \begin{equation}
-\widehat{u}_i := \underbrace{
+\widehat{u}_i := \widehat{f}_i 
+	\big( 
+		s_i 
+	\big) = \underbrace{
 	\widehat{f}_i 
 	\Big( \sum_{j=1}^{N} \mathrm{w}_{ij} 
 		\mathrm{x}_j 
@@ -33,6 +37,7 @@ \section{Empirical Risk Minimization}
 	}
 \end{equation}
 and observations:
+\svspace{-4mm}
 \begin{equation}
 \vec{x}^{(\alpha)} \in \mathbb{R}^N, 
 		\quad \alpha = 1, \ldots, p
@@ -41,6 +46,8 @@ \section{Empirical Risk Minimization}
 
 \end{frame}
 
+\begin{frame}
+
 Deriving the cost function for this network to find the Infomax solution:
 \begin{equation}
 \label{eq:conservationvec}
@@ -57,27 +64,69 @@ \section{Empirical Risk Minimization}
     & = \frac{P_{\vec{x}}(\vec{x})}{|\det \vec{J}\,|}
 \end{align}
 with elements of the Jacobian $\vec J$ given as
+\slidesonly{
+	\begingroup
+	\small
+}
 \begin{align}
 \label{eq:jacobelement}
  J_{ij}=
  \frac{\partial \widehat{u}_i}{\partial \mathrm{x}_j}
 	& = \frac{\partial}{\partial \mathrm{x}_j} 
 		\widehat{f}_i \bigg( \sum\limits_{k = 1}^N \mathrm{w}_{ik} 
-		\mathrm{x}_k \bigg) \\
-	& = \mathrm{w}_{ij} \widehat{f}_i^{'} \bigg( \sum\limits_{k = 1}^N 
+		\mathrm{x}_k \bigg) \notesonly{\\
+	&} = \mathrm{w}_{ij} \widehat{f}_i^{'} \bigg( \sum\limits_{k = 1}^N 
 		\mathrm{w}_{ik} \mathrm{x}_k \bigg).
 \end{align}
+\slidesonly{
+	\endgroup
+}
+
 We therefore obtain for the value of the Jacobian determinant
+\slidesonly{
+	\begingroup
+	\footnotesize
+}
 \begin{equation} \label{eq:functionalDeterminant}
 |\det \vec {J}\,| = 
 	\Big| \det \frac{\partial \widehat{\vec{u}}}{\partial \vec{x}} \Big|
 	= |\det \vec{W}\, | \prod\limits_{l = 1}^N  \widehat{f}_l^{'} \Bigg( 
 		\sum\limits_{k = 1}^N \mathrm{w}_{lk} \mathrm{x}_k \Bigg).
 \end{equation}
+\slidesonly{
+	\endgroup
+}
+
+\end{frame}
 
 \clearpage
 
-Inserting \eqref{eq:conservationvec} and \eqref{eq:uxj} into the Infomax cost function from \eqref{eq:infomax} gives 
+\begin{frame}
+
+\slidesonly{
+\visible<1->{
+\vspace{-7mm}
+\hspace{8.0cm}
+\StickyNote[1.7cm]{
+	\begingroup
+	\scriptsize
+\begin{equation}
+%\label{eq:conservationvec}
+	P_{\vec{u}} (\widehat{\vec{u}}) d \widehat{\vec{u}}
+		= P_{\vec{x}}(\vec{x}) d \vec{x}
+\end{equation}
+\begin{equation}
+%\label{eq:uxj}
+	P_{\vec{u}} (\widehat{\vec{u}}) 
+	= \frac{P_{\vec{x}}(\vec{x})}{|\det \vec{J}\,|}
+\end{equation}
+	\endgroup
+}[3.cm] % width
+\vspace{-22mm}
+}
+}
+
+\notesonly{Inserting \eqref{eq:conservationvec} and \eqref{eq:uxj} into the Infomax cost function from \eqref{eq:infomax} gives} 
 \begin{eqnarray}
 H & = & -\int d \widehat{\vec{u}} P_{\vec{u}} (\widehat{\vec{u}})
   \ln P_{\vec{u}} (\widehat{\vec{u}}) \\
@@ -89,14 +138,45 @@ \section{Empirical Risk Minimization}
 }_{ \text{constant w.r.t. } \vec W } 
     + \int d \vec{x} P_{\vec{x}} (\vec{x}) \ln |\det \vec{J}\,|
 \end{eqnarray}
-and with \eqref{eq:functionalDeterminant} we can formulate the cost 
-in terms that explicitly depend on $\vec W$ and its components:
+and with \notesonly{\eqref{eq:functionalDeterminant}}
+\slidesonly{
+	\begingroup
+	\footnotesize
+}
+\begin{equation} \label{eq:functionalDeterminant}
+|\det \vec {J}\,| = 
+	\Big| \det \frac{\partial \widehat{\vec{u}}}{\partial \vec{x}} \Big|
+	= |\det \vec{W}\, | \prod\limits_{l = 1}^N  \widehat{f}_l^{'} \Bigg( 
+		\sum\limits_{k = 1}^N \mathrm{w}_{lk} \mathrm{x}_k \Bigg).
+\end{equation}
+\slidesonly{
+	\endgroup
+}
+we can formulate the cost 
+in terms that depend on components in $\vec W$:
+\begin{equation}
+	H =~\text{const.} \, + \; \ln |\det \vec{W}\,| \underbrace{\int d \vec{x} P_{\vec{x}} (\vec{x})}_{=\,1}
+		+ \int d \vec{x} P_{\vec{x}} (\vec{x}) \sum\limits_{l = 1}^N
+			\ln \widehat{f}_l^{'} \Bigg( \sum\limits_{k = 1}^N 
+			\mathrm{w}_{lk} \mathrm{x}_k \Bigg).
+\end{equation}
+
+\end{frame}
+
+\begin{frame}{Generalization cost}
+
+\only<1>{
+\slidesonly{
 \begin{equation}
-	H = const. \, + \; \ln |\det \vec{W}\,| \underbrace{\int d \vec{x} P_{\vec{x}} (\vec{x})}_{=\,1}
+	H =~\text{const.} \, + \; \ln |\det \vec{W}\,| \underbrace{\int d \vec{x} P_{\vec{x}} (\vec{x})}_{=\,1}
 		+ \int d \vec{x} P_{\vec{x}} (\vec{x}) \sum\limits_{l = 1}^N
 			\ln \widehat{f}_l^{'} \Bigg( \sum\limits_{k = 1}^N 
 			\mathrm{w}_{lk} \mathrm{x}_k \Bigg).
 \end{equation}
+}
+}
+
+
 This enables us to define the generlization cost $E^G$ for model selection:
 \begin{equation} \tag{generalization cost}
 	E^G = \ln |\det \vec W\,| + \int d \vec{x} P_{\vec{x}} (\vec{x})
@@ -105,6 +185,8 @@ \section{Empirical Risk Minimization}
 			\mathrm{w}_{lk} \mathrm{x}_k \Bigg)
 		\Bigg\}
 \end{equation}
+
+\only<2>{
 The \emph{principle of empirical risk minimization} (in our particular case maximization) allows
 \begin{center}
 mathematical expectation $E^G \longrightarrow$ empirical average $E^T$
@@ -118,7 +200,10 @@ \section{Empirical Risk Minimization}
 \end{equation}
 can be used for model selection using empirical data 
 \begin{equation}
-E^T \eqexcl \max
+E^T \eqexcl \max_{\vec W}
 \end{equation}
+}
+
+\end{frame}
 
 \newpage