Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fastica #16

Merged
merged 7 commits into from
Jun 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions notes/06_fastica/0_recap_ica_whitening.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@

\section{The ICA Problem}
\begin{frame}{\secname}

independent sources: $\vec s = (s_1, s_2,...,s_N)^\top \in \R^N$\\
observations: $\vec x \in \R^N$

\begin{equation}
\label{eq:ica}
\vec x = \vec A \, \vec s
\end{equation}

\begin{equation}
\widehat{\vec s} = \vec W \vec x
\end{equation}

Methods for solving the ICA problem:

\begin{itemize}
\item maximizing the \emph{mutual information} between $\vec x$ and $\vec {\hat s}$ \\
(e.g. Infomax)
\item maximizing the \emph{nongaussianity} of $\widehat {\vec s}$ \\
(e.g. Kurtosis-based ICA, FastICA)
\end{itemize}
\end{frame}

%\begin{frame}
%\underline{Outline:}
%\begin{itemize}
%\item ICA on whitened data
%\begin{itemize}
%\item Whitening/sphering
%\item Amibguities in ICA
%\item PCA is \emph{half} the ICA Problem
%\end{itemize}
%\item the problem with gaussians
%\item maximizing nongaussianity
%\begin{itemize}
%\item Kurtosis-based
%\item negentropy
%\end{itemize}
%\end{itemize}
%\end{frame}

%\newpage

\section{Whitening revisited}

\mode<presentation>{
\begin{frame}
\begin{center} \huge
\secname
\end{center}
\end{frame}
}

\begin{frame}{\secname}

\notesonly{
The purpose of whitening is to decorrelate the data.
}

Let the data $\vec X \in \R^{N \times p}$ be centered:

\begin{equation}
\label{eq:centered}
\E \lbrack \vec x \rbrack = 0
\end{equation}

\notesonly{
From this follows:
}

\begin{equation}
\label{eq:cov}
\vec \Sigma_x = \mathrm{Cov}(\vec x) = \E \lbrack \, \vec x \, \vec x^\top \rbrack
\end{equation}

The whitening transformation yields:

\begin{equation}
\label{eq:whitening}
\vec v^{(\alpha)} = \vec \Lambda^{-\frac{1}{2}} \vec M^\top \vec x^{(\alpha)}
\end{equation}

where
\begin{itemize}
\item[] $\vec M = (\vec e_1, \vec e_2, \ldots,\vec e_N)$ is the orthonormal eigenbasis of $\Sigma_x$
\item[] and $\vec\Lambda$ is diagonal matrix with the corresponding eigenvalues.
\end{itemize}

\pause

\question{What do we know about the variables in $\vec v$?}

\pause


\svspace{-5mm}

\begin{equation}
\label{eq:covw}
\vec \Sigma_v = \mathrm{Cov}(\vec v) = \E \lbrack \, \vec v \, \vec v^\top \rbrack = \vec I_N
\end{equation}

\notesonly{
Uncorrelated means zero covariance. Therefore, the covariance matrix for uncorrelated data is a diagonal matrix because it only contains the variances of the individual variables.
Whitening decorrelates the variables and normalizes the variances to 1.
}
\end{frame}

\begin{frame}{\secname}

\begin{figure}[ht]
\label{fig:sphering}
\includegraphics[width=12cm]{img/cov.png}
\caption{A visual interpretation of whitening}
\end{figure}

\end{frame}
164 changes: 164 additions & 0 deletions notes/06_fastica/1_ica_ambiguous.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@

\section{Ambiguities in ICA and limitations}
\begin{frame}{\secname}

Sources can be recovered up to:
\begin{itemize}
\item sign
\item scale
\item permutation i.e. ordering
\item only one gaussian distributed source
\end{itemize}
\begin{align*}
\vec P &:= \text{arbitrary permutation matrix}\\
\vec \Lambda &:= \text{arbitrary diagonal matrix}
\end{align*}
\begin{align}
\vec x &= \vec A \, \vec s\\
\vec x &= \lbrack \, \vec A\, \vec P^{-1} \vec \Lambda^{-1}\, \rbrack \, \lbrack \, \vec \Lambda \, \vec P \, \vec s\, \rbrack
\end{align}

\end{frame}

\notesonly{
ICA cannot resolve if the mixing matrix is $\vec A$ or a permuted and/or scaled version of $\vec A$.
It can \textbf{also} not resolve if the independent sources are $\vec s$ or a permuted and/or scaled version of $\vec s$.
}

\begin{frame}{\secname}

Permutations and scaling are not an issue for ICA because permutation and scaling do not interfere with statistical independence.

\begin{equation}
P_{s_1, s_2}(\widehat {\vec s}) \eqexcl P_{s_1} (\widehat{s}_1) \cdot P_{s_2} (\widehat{s}_2)
\end{equation}

\slidesonly{
\begin{center}
\includegraphics[width=0.4\textwidth]{img/meme_doesnotinterfere}%
\end{center}
}

\end{frame}

\notesonly{
We can verify that ambiguities to scale and permutation do not interfere with statistical independence.
}

\begin{frame}{Verification}
Permutations of sources
{\footnotesize
\begin{equation}
\arraycolsep=1.4pt%\def\arraystretch{2.2}
\begin{array}{ccc}
\left( \begin{array}{ll}
\textcolor{gray}{\widehat{s}_1} \\ \widehat{s}_2
\end{array} \right)
=
\left( \begin{array}{ll}
\textcolor{gray}{\mathrm{w}_{11}} & \textcolor{gray}{\mathrm{w}_{12}} \\
\mathrm{w}_{21} & \mathrm{w}_{22}
\end{array} \right)
\left( \begin{array}{ll}
\mathrm{x}_1 \\ \mathrm{x}_2
\end{array} \right)
& \corresponds &
\left( \begin{array}{ll}
\widehat{s}_2 \\ \textcolor{gray}{\widehat{s}_1}
\end{array} \right)
=
\left( \begin{array}{ll}
\mathrm{w}_{21} & \mathrm{w}_{22} \\
\textcolor{gray}{\mathrm{w}_{11}} & \textcolor{gray}{\mathrm{w}_{12}}
\end{array} \right)
\left( \begin{array}{ll}
\mathrm{x}_1 \\ \mathrm{x}_2
\end{array} \right)
\\\\
P_{s_1} (\widehat{s}_1) \cdot P_{s_2} (\widehat{s}_2)
&&
P_{s_2} (\widehat{s}_2) \cdot P_{s_1} (\widehat{s}_1)
\end{array}
\end{equation}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Scaling of source amplitudes:

{\footnotesize
\begin{equation}
\begin{array}{ccc}
\arraycolsep=1.4pt
\left( \begin{array}{ll}
\widehat{s}_1 \\ \widehat{s}_2
\end{array} \right)
=
\left( \begin{array}{ll}
\mathrm{w}_{11} & \mathrm{w}_{12} \\
\mathrm{w}_{21} & \mathrm{w}_{22}
\end{array} \right)
\left( \begin{array}{ll}
\mathrm{x}_1 \\ \mathrm{x}_2
\end{array} \right)
& \corresponds &
\left( \begin{array}{ll}
\textcolor{gray}{a}\,\widehat{s}_1 \\
\textcolor{gray}{b}\,\widehat{s}_2
\end{array} \right)
=
\left( \begin{array}{ll}
\textcolor{gray}{a}\,\mathrm{w}_{11} & \textcolor{gray}{a}\,\mathrm{w}_{12} \\
\textcolor{gray}{b}\,\mathrm{w}_{21} & \textcolor{gray}{b}\,\mathrm{w}_{22}
\end{array} \right)
\left( \begin{array}{ll}
\mathrm{x}_1 \\ \mathrm{x}_2
\end{array} \right)
\\\\
P_{s_1} (\widehat{s}_1) \cdot P_{s_2} (\widehat{s}_2)
&&
aP_{s_1} (a\widehat{s}_1) \cdot bP_{s_2} (b\, \widehat{s}_2)
\end{array}
\end{equation}
}
\end{frame}

\subsection{Implications of the ambiguities}

\begin{frame}{\subsecname}

We can assume:
\begin{equation}
\E \lbrack \, \vec s \, \rbrack = \vec 0
\end{equation}

Subtracting the mean from $\vec x$ does not change $\vec A$:

\begin{equation}
\vec x - \E \lbrack \, \vec x \, \rbrack = \vec A \left( \vec s - \E \lbrack \, \vec s \, \rbrack \right)
\end{equation}

\notesonly{Note that} $\E \lbrack \, \vec s \, \rbrack$ and $\E \lbrack \, \vec x \, \rbrack$ are not necessarily equal.

\end{frame}

\begin{frame}{\subsecname}

We can also assume:
\begin{equation}
\mathrm{Cov}(\vec s) = \E \lbrack \, \vec s \, \vec s^\top \rbrack = \vec I_N
\end{equation}

Any scaling in $\mathrm{Cov}(\widehat{\vec s})$ can be assumed to come from $\vec A$ and can be undone.

\pause

\begin{align}
\label{eq:expxina}
\vec \Sigma_x = \mathrm{Cov}(\vec x) &= \E \lbrack \, \vec x \, \vec x^\top \rbrack\\
&= \E \lbrack \, \vec A\,\vec s \, \left( \vec A\,\vec s \right)^\top \rbrack\\
&= \E \lbrack \, \vec A\; \underbrace{\;\vec s \, \vec s^\top}_{= \vec I_N} \vec A^\top \rbrack\\
\label{eq:sigmax}
&= \vec A\, \vec A^\top
\end{align}

\end{frame}

Loading