Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
kashefy committed May 20, 2020
1 parent 0a00fe4 commit fab6977
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 19 deletions.
4 changes: 2 additions & 2 deletions notes/05_infomax/0_ica_intro.tex
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ \section{The ICA problem}
x_1 \\ x_2
\end{array} \right)
= \left( \begin{array}{l}
w_{11} \hat s_1 + w_{12} \hat s_2 \\ w_{21} \hat s_1 + w_{22} \hat s_2
w_{11} x_1 + w_{12} x_2 \\ w_{21} x_1 + w_{22} x_2
\end{array} \right)
\end{equation}

Expand Down Expand Up @@ -157,7 +157,7 @@ \subsection{Statistical independence}
\E \lbrack \, g(x) h(y) \, \rbrack = \E \lbrack g(x) \rbrack \, \E \lbrack h(y) \rbrack \,,
\end{equation}

where $g(x)$ and $h(y)$ are absolutely integrable functions of $Y$ and $Y$.
where $g(x)$ and $h(y)$ are absolutely integrable functions of $X$ and $Y$.
}
\only<2>{

Expand Down
2 changes: 1 addition & 1 deletion notes/05_infomax/1_primer_info_theory.tex
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ \subsection{Mutual Information}
\begin{frame}{\subsecname}

\begin{itemize}
\item (differential) entropy $H(X)$ represents our uncertainty about $X$
\item (differential) entropy $h(X)$ represents our uncertainty about $X$
and
\item the conditional (differential) entropy $h(X|Y)$ represents such \textbf{after} observing $Y$.
\end{itemize}
Expand Down
11 changes: 6 additions & 5 deletions notes/05_infomax/2_infomax.tex
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,9 @@ \section{Approach 1: Infomax via KL-divergence for the transformed densities}
\dkl
& = \int d \, \widehat{\vec{s}} P_{\vec{s}}(\widehat{\vec{s}}) \ln \frac{P_{\vec{s}}(\widehat{\vec{s}})}{\prod_i \widehat{P}_{s_i}(\widehat{s}_i)} \slidesonly{\hspace{35mm}}\\
\notesonly{ \intertext{Using the factorization in \eqref{eq:facts}:}}
\pause
\visible<2->{
& = \int d \, \widehat{\vec{s}} P_{\vec{s}}(\widehat{\vec{s}}) \ln \frac{P_{\vec{s}}(\widehat{\vec{s}})}{\widehat{P}_{\vec{s}}(\widehat{\vec{s}})}
\slidesonly{\\
\pause}
\slidesonly{\\}
\notesonly{ \intertext{Applying the density transformation:} }
%& = & \int d \, \widehat{\vec{s}} P_{\vec{s}}(\widehat{\vec{s}}) \ln
%\frac
Expand All @@ -289,8 +288,9 @@ \section{Approach 1: Infomax via KL-divergence for the transformed densities}
\Big| \frac{d \widehat{\vec u}}{d \widehat{\vec s}} \Big|
\widehat{P}_{\vec u}(\widehat{\vec u})
}
\slidesonly{\\
\pause}
}
\visible<3->{
\slidesonly{\\}
\notesonly{ \intertext{The same factorization in \eqref{eq:facts} equally applies to the transformed variables $\vec u$:} }
& = \int d \widehat{\vec{u}} P_{\vec{u}} (\widehat{\vec{u}})
\ln
Expand All @@ -313,6 +313,7 @@ \section{Approach 1: Infomax via KL-divergence for the transformed densities}
}^{\substack{\text{const.\;}\notesonly{ a \\\text{\;see \eqref{eq:dtufs}}}}}}
\bigg)
}_{\text{constant}}
}
\end{align}
\slidesonly{
\endgroup
Expand Down
2 changes: 1 addition & 1 deletion notes/05_infomax/3_cost.tex
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ \section{Empirical Risk Minimization}
\secname
\end{center}
\begin{center}
Minimize the cost function using training data.
Optimize the cost function using training data.
\end{center}
\end{frame}
}
Expand Down
12 changes: 3 additions & 9 deletions notes/05_infomax/4_gradient.tex
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@


\subsection{Learning by Gradient Ascent}
\section{Learning by Gradient Ascent}

\mode<presentation>{
\begin{frame}
Expand Down Expand Up @@ -45,9 +45,7 @@ \subsection{Learning by Gradient Ascent}

\begin{equation}
\Delta \mathrm{w}_{ij} =
%\underbrace{ \eta }_{
%\substack{ \text{learning} \\ \text{rate}} }
\frac{\partial E^T}{\partial \mathrm{w}_{ij}}
\eta~\frac{\partial E^T}{\partial \mathrm{w}_{ij}}
\end{equation}
}

Expand Down Expand Up @@ -152,10 +150,6 @@ \subsubsection{Motivation}

\begin{frame}{\subsecname}

\notesonly{
The justification for natural gradient\footnote{For a proper explanation and justification for the natural gradient, \citep{amari1998natural}} vs. standard gradient:
}

\only<1->{
The standard gradient\notesonly{ operates on the notion that }\slidesonly{: }the shortest distance between two points is a straight line.\\
}
Expand Down Expand Up @@ -200,7 +194,7 @@ \subsubsection{Standard gradient vs. natural gradient}
}
}
\only<2->{
The natural gradient enables \emph{comparable learning steps over time}.
The natural gradient enables \emph{comparable learning steps over time}\footnote{For a more detailed explanation and justification for the natural gradient, see \citep{amari1998natural}}.
It allows for a more stable, therefore efficient, and faster learning rule (no matrix inversion for $\vec W$ in Infomax'
necessary) to do steepest ascent under normalized step size.%\notesonly{ (cf. lecture slides 2.2.1 for details)}

Expand Down
2 changes: 1 addition & 1 deletion notes/05_infomax/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ slides: $(projname).slides.tex $(projname).tex
$(compile) $(projname).slides.tex
$(compile) $(projname).slides.tex
bibtex $(projname).slides
# $(compile) $(projname).slides.tex
$(compile) $(projname).slides.tex
$(compile) --interaction=batchmode $(projname).slides.tex
# $(compile) --interaction=batchmode $(projname).slides.tex
mv $(projname).slides.pdf $(targetname).slides.pdf
Expand Down

0 comments on commit fab6977

Please sign in to comment.