Merge pull request #16 from kashefy/fastica

fastica
kashefy · Jun 1, 2020 · 9a360c7 · 9a360c7
2 parents a2cffa9 + ab8bcc0
commit 9a360c7
Show file tree

Hide file tree

Showing 26 changed files with 1,656 additions and 1,296 deletions.
diff --git a/notes/06_fastica/0_recap_ica_whitening.tex b/notes/06_fastica/0_recap_ica_whitening.tex
@@ -0,0 +1,120 @@
+
+\section{The ICA Problem}
+\begin{frame}{\secname}
+
+independent sources: $\vec s = (s_1, s_2,...,s_N)^\top \in \R^N$\\
+observations: $\vec x \in \R^N$
+
+\begin{equation}
+\label{eq:ica}
+\vec x = \vec A \, \vec s
+\end{equation}
+
+\begin{equation}
+\widehat{\vec s} = \vec W \vec x
+\end{equation}
+
+Methods for solving the ICA problem:
+
+\begin{itemize}
+\item maximizing the \emph{mutual information} between $\vec x$ and $\vec {\hat s}$ \\
+(e.g. Infomax)
+\item maximizing the \emph{nongaussianity} of $\widehat {\vec s}$ \\
+(e.g. Kurtosis-based ICA, FastICA)
+\end{itemize}
+\end{frame}
+
+%\begin{frame}
+%\underline{Outline:}
+%\begin{itemize}
+    %\item ICA on whitened data
+    %\begin{itemize}
+        %\item Whitening/sphering
+        %\item Amibguities in ICA
+        %\item PCA is \emph{half} the ICA Problem
+    %\end{itemize}
+    %\item the problem with gaussians
+    %\item maximizing nongaussianity
+    %\begin{itemize}
+        %\item Kurtosis-based
+        %\item negentropy
+    %\end{itemize}
+%\end{itemize}
+%\end{frame}
+
+%\newpage
+
+\section{Whitening revisited}
+
+\mode<presentation>{
+\begin{frame} 
+    \begin{center} \huge
+        \secname
+    \end{center}
+\end{frame}
+}
+
+\begin{frame}{\secname}
+
+\notesonly{
+The purpose of whitening is to decorrelate the data.
+}
+
+Let the data $\vec X \in \R^{N \times p}$ be centered:
+
+\begin{equation}
+\label{eq:centered}
+\E \lbrack \vec x \rbrack = 0 
+\end{equation}
+
+\notesonly{
+From this follows:
+}
+
+\begin{equation}
+\label{eq:cov}
+\vec \Sigma_x = \mathrm{Cov}(\vec x) = \E \lbrack \, \vec x \, \vec x^\top \rbrack 
+\end{equation}
+
+The whitening transformation yields:
+
+\begin{equation}
+\label{eq:whitening}
+\vec v^{(\alpha)} = \vec \Lambda^{-\frac{1}{2}} \vec M^\top \vec x^{(\alpha)}
+\end{equation}
+
+where
+\begin{itemize}
+\item[] $\vec M = (\vec e_1, \vec e_2, \ldots,\vec e_N)$ is the orthonormal eigenbasis of $\Sigma_x$
+\item[] and $\vec\Lambda$ is diagonal matrix with the corresponding eigenvalues.
+\end{itemize}
+
+\pause
+
+\question{What do we know about the variables in $\vec v$?}
+
+\pause
+
+
+\svspace{-5mm}
+
+\begin{equation}
+\label{eq:covw}
+\vec \Sigma_v = \mathrm{Cov}(\vec v) = \E \lbrack \, \vec v \, \vec v^\top \rbrack = \vec I_N
+\end{equation}
+
+\notesonly{
+Uncorrelated means zero covariance. Therefore, the covariance matrix for uncorrelated data is a diagonal matrix because it only contains the variances of the individual variables. 
+Whitening decorrelates the variables and normalizes the variances to 1.
+}
+\end{frame}
+
+\begin{frame}{\secname}
+
+\begin{figure}[ht]
+\label{fig:sphering}
+\includegraphics[width=12cm]{img/cov.png}
+\caption{A visual interpretation of whitening}
+\end{figure}
+
+\end{frame}
diff --git a/notes/06_fastica/1_ica_ambiguous.tex b/notes/06_fastica/1_ica_ambiguous.tex
@@ -0,0 +1,164 @@
+
+\section{Ambiguities in ICA and limitations}
+\begin{frame}{\secname}
+
+Sources can be recovered up to:
+\begin{itemize}
+\item sign
+\item scale
+\item permutation i.e. ordering
+\item only one gaussian distributed source
+\end{itemize}
+\begin{align*}
+\vec P &:= \text{arbitrary permutation matrix}\\
+\vec \Lambda &:= \text{arbitrary diagonal matrix}
+\end{align*}
+\begin{align}
+\vec x &= \vec A \, \vec s\\
+\vec x &= \lbrack \, \vec A\, \vec P^{-1} \vec \Lambda^{-1}\, \rbrack \, \lbrack \, \vec \Lambda \, \vec P \, \vec s\, \rbrack
+\end{align}
+
+\end{frame}
+
+\notesonly{
+ICA cannot resolve if the mixing matrix is $\vec A$ or a permuted and/or scaled version of $\vec A$.
+It can \textbf{also} not resolve if the independent sources are $\vec s$ or a permuted and/or scaled version of $\vec s$.
+}
+
+\begin{frame}{\secname}
+
+Permutations and scaling are not an issue for ICA because permutation and scaling do not interfere with statistical independence.
+
+\begin{equation}
+P_{s_1, s_2}(\widehat {\vec s}) \eqexcl  P_{s_1} (\widehat{s}_1) \cdot P_{s_2} (\widehat{s}_2)
+\end{equation}
+
+\slidesonly{
+\begin{center}
+	\includegraphics[width=0.4\textwidth]{img/meme_doesnotinterfere}%
+\end{center}
+}
+
+\end{frame}
+
+\notesonly{
+We can verify that ambiguities to scale and permutation do not interfere with statistical independence.
+}
+
+\begin{frame}{Verification}
+Permutations of sources
+{\footnotesize
+\begin{equation}
+	\arraycolsep=1.4pt%\def\arraystretch{2.2}
+	\begin{array}{ccc}
+	\left( \begin{array}{ll}
+		\textcolor{gray}{\widehat{s}_1} \\ \widehat{s}_2
+	\end{array} \right)
+	=
+	\left( \begin{array}{ll}
+	\textcolor{gray}{\mathrm{w}_{11}} & \textcolor{gray}{\mathrm{w}_{12}} \\
+		\mathrm{w}_{21} & \mathrm{w}_{22} 
+	\end{array} \right)
+	\left( \begin{array}{ll}
+		\mathrm{x}_1 \\ \mathrm{x}_2
+	\end{array} \right)
+	& \corresponds &
+	\left( \begin{array}{ll}
+		\widehat{s}_2 \\ \textcolor{gray}{\widehat{s}_1}
+	\end{array} \right)
+	 = 
+	\left( \begin{array}{ll}
+		\mathrm{w}_{21} & \mathrm{w}_{22} \\
+		\textcolor{gray}{\mathrm{w}_{11}} & \textcolor{gray}{\mathrm{w}_{12}} 
+	\end{array} \right)
+	\left( \begin{array}{ll}
+		\mathrm{x}_1 \\ \mathrm{x}_2
+	\end{array} \right)
+	\\\\
+	P_{s_1} (\widehat{s}_1) \cdot P_{s_2} (\widehat{s}_2)
+	&& 
+	P_{s_2} (\widehat{s}_2) \cdot P_{s_1} (\widehat{s}_1)
+	\end{array}
+\end{equation}
+}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+Scaling of source amplitudes:
+
+{\footnotesize
+\begin{equation}
+	\begin{array}{ccc}
+	\arraycolsep=1.4pt
+	\left( \begin{array}{ll}
+		\widehat{s}_1 \\ \widehat{s}_2
+	\end{array} \right)
+	=
+	\left( \begin{array}{ll}
+		\mathrm{w}_{11} & \mathrm{w}_{12} \\
+		\mathrm{w}_{21} & \mathrm{w}_{22} 
+	\end{array} \right)
+	\left( \begin{array}{ll}
+		\mathrm{x}_1 \\ \mathrm{x}_2
+	\end{array} \right)
+	& \corresponds &
+	\left( \begin{array}{ll}
+		\textcolor{gray}{a}\,\widehat{s}_1 \\ 
+                 \textcolor{gray}{b}\,\widehat{s}_2
+	\end{array} \right)
+	=
+	\left( \begin{array}{ll}
+		\textcolor{gray}{a}\,\mathrm{w}_{11} & \textcolor{gray}{a}\,\mathrm{w}_{12} \\
+		\textcolor{gray}{b}\,\mathrm{w}_{21} & \textcolor{gray}{b}\,\mathrm{w}_{22} 
+	\end{array} \right)
+	\left( \begin{array}{ll}
+		\mathrm{x}_1 \\ \mathrm{x}_2
+	\end{array} \right)
+	\\\\
+	P_{s_1} (\widehat{s}_1) \cdot P_{s_2} (\widehat{s}_2)
+	&& 
+	aP_{s_1} (a\widehat{s}_1) \cdot bP_{s_2} (b\, \widehat{s}_2)
+	\end{array}
+\end{equation}
+}
+\end{frame}
+
+\subsection{Implications of the ambiguities}
+
+\begin{frame}{\subsecname}
+
+We can assume:
+\begin{equation}
+\E \lbrack \, \vec s \, \rbrack = \vec 0
+\end{equation}
+
+Subtracting the mean from $\vec x$ does not change $\vec A$:
+
+\begin{equation}
+\vec x - \E \lbrack \, \vec x \, \rbrack = \vec A \left( \vec s - \E \lbrack \, \vec s \, \rbrack \right)
+\end{equation}
+
+\notesonly{Note that} $\E \lbrack \, \vec s \, \rbrack$ and $\E \lbrack \, \vec x \, \rbrack$ are not necessarily equal.
+
+\end{frame}
+
+\begin{frame}{\subsecname}
+
+We can also assume:
+\begin{equation}
+\mathrm{Cov}(\vec s) = \E \lbrack \, \vec s \, \vec s^\top \rbrack = \vec I_N
+\end{equation}
+
+Any scaling in $\mathrm{Cov}(\widehat{\vec s})$ can be assumed to come from $\vec A$ and can be undone.
+
+\pause
+
+\begin{align}
+\label{eq:expxina}
+\vec \Sigma_x = \mathrm{Cov}(\vec x) &=  \E \lbrack \, \vec x \, \vec x^\top \rbrack\\
+&=  \E \lbrack \, \vec A\,\vec s \, \left( \vec A\,\vec s \right)^\top \rbrack\\
+&=  \E \lbrack \, \vec A\; \underbrace{\;\vec s \, \vec s^\top}_{= \vec I_N} \vec A^\top \rbrack\\
+\label{eq:sigmax}
+&=  \vec A\, \vec A^\top
+\end{align}
+
+\end{frame}
+