fixes

kashefy · May 20, 2020 · fab6977 · fab6977
1 parent 0a00fe4
commit fab6977
Show file tree

Hide file tree

Showing 6 changed files with 14 additions and 19 deletions.
diff --git a/notes/05_infomax/0_ica_intro.tex b/notes/05_infomax/0_ica_intro.tex
@@ -57,7 +57,7 @@ \section{The ICA problem}
 			x_1 \\ x_2
 		\end{array} \right)
 	= \left( \begin{array}{l}
-		w_{11} \hat s_1 + w_{12} \hat s_2 \\ w_{21} \hat s_1 + w_{22} \hat s_2
+		w_{11} x_1 + w_{12} x_2 \\ w_{21} x_1 + w_{22} x_2
 	\end{array} \right)
 \end{equation}
 
@@ -157,7 +157,7 @@ \subsection{Statistical independence}
 \E  \lbrack \, g(x) h(y) \, \rbrack = \E \lbrack g(x) \rbrack \, \E \lbrack h(y) \rbrack  \,,
 \end{equation}
 
-where $g(x)$ and $h(y)$ are absolutely integrable functions of $Y$ and $Y$.
+where $g(x)$ and $h(y)$ are absolutely integrable functions of $X$ and $Y$.
 }
 \only<2>{
 

diff --git a/notes/05_infomax/1_primer_info_theory.tex b/notes/05_infomax/1_primer_info_theory.tex
@@ -326,7 +326,7 @@ \subsection{Mutual Information}
 \begin{frame}{\subsecname}
 
 \begin{itemize}
-\item (differential) entropy $H(X)$ represents our uncertainty about $X$ 
+\item (differential) entropy $h(X)$ represents our uncertainty about $X$ 
 and 
 \item the conditional (differential) entropy $h(X|Y)$ represents such \textbf{after} observing $Y$.
 \end{itemize}

diff --git a/notes/05_infomax/2_infomax.tex b/notes/05_infomax/2_infomax.tex
@@ -269,10 +269,9 @@ \section{Approach 1: Infomax via KL-divergence for the transformed densities}
   \dkl 
   & = \int d \, \widehat{\vec{s}} P_{\vec{s}}(\widehat{\vec{s}}) \ln \frac{P_{\vec{s}}(\widehat{\vec{s}})}{\prod_i \widehat{P}_{s_i}(\widehat{s}_i)} \slidesonly{\hspace{35mm}}\\
 \notesonly{  \intertext{Using the factorization in \eqref{eq:facts}:}}
-  \pause
+\visible<2->{
   & = \int d \, \widehat{\vec{s}} P_{\vec{s}}(\widehat{\vec{s}}) \ln \frac{P_{\vec{s}}(\widehat{\vec{s}})}{\widehat{P}_{\vec{s}}(\widehat{\vec{s}})}
-\slidesonly{\\
-  \pause}
+\slidesonly{\\}
 \notesonly{  \intertext{Applying the density transformation:} }
   %& = &  \int d \, \widehat{\vec{s}} P_{\vec{s}}(\widehat{\vec{s}}) \ln 
   %\frac
@@ -289,8 +288,9 @@ \section{Approach 1: Infomax via KL-divergence for the transformed densities}
     \Big|  \frac{d \widehat{\vec u}}{d \widehat{\vec s}} \Big|
   \widehat{P}_{\vec u}(\widehat{\vec u}) 
   }
-\slidesonly{\\
-  \pause}
+}
+\visible<3->{
+\slidesonly{\\}
 \notesonly{  \intertext{The same factorization in \eqref{eq:facts} equally applies to the transformed variables $\vec u$:} }
   & = \int d \widehat{\vec{u}} P_{\vec{u}} (\widehat{\vec{u}}) 
   \ln 
@@ -313,6 +313,7 @@ \section{Approach 1: Infomax via KL-divergence for the transformed densities}
 	  }^{\substack{\text{const.\;}\notesonly{ a \\\text{\;see \eqref{eq:dtufs}}}}}} 
 	  \bigg)
 	  }_{\text{constant}}
+}
 \end{align}
 \slidesonly{
 \endgroup

diff --git a/notes/05_infomax/3_cost.tex b/notes/05_infomax/3_cost.tex
@@ -6,7 +6,7 @@ \section{Empirical Risk Minimization}
         \secname
     \end{center}
     \begin{center}
-    Minimize the cost function using training data.
+    Optimize the cost function using training data.
     \end{center}
 \end{frame}
 }

diff --git a/notes/05_infomax/4_gradient.tex b/notes/05_infomax/4_gradient.tex
@@ -1,6 +1,6 @@
 
 
-\subsection{Learning by Gradient Ascent}
+\section{Learning by Gradient Ascent}
 
 \mode<presentation>{
 \begin{frame} 
@@ -45,9 +45,7 @@ \subsection{Learning by Gradient Ascent}
 
 \begin{equation}
 \Delta \mathrm{w}_{ij} = 
-%\underbrace{ \eta }_{
-    %\substack{ \text{learning} \\ \text{rate}} }
-  \frac{\partial E^T}{\partial \mathrm{w}_{ij}}
+\eta~\frac{\partial E^T}{\partial \mathrm{w}_{ij}}
  \end{equation}
 }
 
@@ -152,10 +150,6 @@ \subsubsection{Motivation}
 
 \begin{frame}{\subsecname}
 
-\notesonly{
-The justification for natural gradient\footnote{For a proper explanation and justification for the natural gradient, \citep{amari1998natural}} vs. standard gradient:
-} 
-
 \only<1->{
 The standard gradient\notesonly{ operates on the notion that }\slidesonly{: }the shortest distance between two points is a straight line.\\ 
 }
@@ -200,7 +194,7 @@ \subsubsection{Standard gradient vs. natural gradient}
 }
 }
 \only<2->{
-The natural gradient enables \emph{comparable learning steps over time}.
+The natural gradient enables \emph{comparable learning steps over time}\footnote{For a more detailed explanation and justification for the natural gradient, see \citep{amari1998natural}}.
 It allows for a more stable, therefore efficient, and faster learning rule (no matrix inversion for $\vec W$ in Infomax'
   necessary) to do steepest ascent under normalized step size.%\notesonly{ (cf. lecture slides 2.2.1 for details)}
 

diff --git a/notes/05_infomax/Makefile b/notes/05_infomax/Makefile
@@ -12,7 +12,7 @@ slides: $(projname).slides.tex $(projname).tex
 	$(compile) $(projname).slides.tex
 	$(compile) $(projname).slides.tex
 	bibtex $(projname).slides
-#	$(compile) $(projname).slides.tex
+	$(compile) $(projname).slides.tex
 	$(compile) --interaction=batchmode  $(projname).slides.tex
 #	$(compile) --interaction=batchmode  $(projname).slides.tex
 	mv $(projname).slides.pdf $(targetname).slides.pdf