diff --git a/latex/headerMIslides.tex b/latex/headerMIslides.tex index 7030e27..fab550c 100644 --- a/latex/headerMIslides.tex +++ b/latex/headerMIslides.tex @@ -28,6 +28,27 @@ \tikzset{myrect/.style={rectangle, fill=#1!20, draw=#1!75, text=black}} \tikzstyle{axes}=[] +% sticky note +\usepackage{xparse} +\usetikzlibrary{shadows} +\definecolor{myyellow}{RGB}{242,226,149} +\NewDocumentCommand\StickyNote{O{6cm}mO{6cm}}{% +\begin{tikzpicture} +\node[ +drop shadow={ + shadow xshift=2pt, + shadow yshift=-4pt +}, +inner xsep=7pt, +fill=myyellow, +xslant=0.05, +yslant=-0.05, +inner ysep=10pt +] +{\parbox[t][#1][c]{#3}{#2}}; +\end{tikzpicture}% +} + % from tex.stackexchange useful to align substacks \makeatletter diff --git a/notes/02_online-pca/2_apply-online-pca.tex b/notes/02_online-pca/2_apply-online-pca.tex index a6514f9..c75ea0b 100644 --- a/notes/02_online-pca/2_apply-online-pca.tex +++ b/notes/02_online-pca/2_apply-online-pca.tex @@ -44,14 +44,14 @@ \subsection{Novelty filter} } } +\notesonly{- Yes by using the \emph{Anti-Hebbian rule}} + \end{frame} -\subsubsecname{Novelty Filter with normalization} +\subsubsection{Novelty Filter with normalization} \begin{frame}\frametitle{\subsubsecname} -\notesonly{- Yes by using the} - \begin{block}{Anti-Hebbian rule:} \begin{equation} \Delta w_j = \overbrace{-}^{\substack{ \text{``Anti''-} \\ \text{Hebbian} }} \varepsilon y^{(\alpha)} x_j^{(\alpha)} @@ -200,7 +200,7 @@ \section{PCA vs. online PCA} \only<2>{ \slidesonly{ \begin{center} - \includegraphics[width=0.2\textwidth]{img/mem_pca_vs_online_pca}% + \includegraphics[width=0.2\textwidth]{img/meme_pca_vs_online_pca}% \end{center} } } diff --git a/notes/02_online-pca/Makefile b/notes/02_online-pca/Makefile index bc2ba48..1b6e9d6 100644 --- a/notes/02_online-pca/Makefile +++ b/notes/02_online-pca/Makefile @@ -12,6 +12,7 @@ slides: $(projname).slides.tex $(projname).tex $(compile) $(projname).slides.tex bibtex $(projname).slides $(compile) $(projname).slides.tex + $(compile) $(projname).slides.tex # $(compile) --interaction=batchmode $(projname).slides.tex mv $(projname).slides.pdf $(targetname).slides.pdf @@ -24,6 +25,7 @@ notes: $(projname).notes.tex $(projname).tex $(compile) $(projname).notes.tex bibtex $(projname).notes $(compile) $(projname).notes.tex + $(compile) $(projname).notes.tex # $(compile) --interaction=batchmode $(projname).notes.tex mv $(projname).notes.pdf $(targetname).notes.pdf diff --git a/notes/02_online-pca/img/mem_pca_vs_online_pca.jpeg b/notes/02_online-pca/img/meme_pca_vs_online_pca.jpeg similarity index 100% rename from notes/02_online-pca/img/mem_pca_vs_online_pca.jpeg rename to notes/02_online-pca/img/meme_pca_vs_online_pca.jpeg diff --git a/notes/03_kernel-pca/0_recap.tex b/notes/03_kernel-pca/0_recap.tex new file mode 100644 index 0000000..46580fc --- /dev/null +++ b/notes/03_kernel-pca/0_recap.tex @@ -0,0 +1,42 @@ + +\section{linear PCA: Recap} + +\begin{frame}\frametitle{\secname} +\begin{itemize} +\item Requires \pause + + centering the data. +\only<2>{ +\slidesonly{ +\begin{center} + \includegraphics[width=0.2\textwidth]{img/mem_notthisagain}% +\end{center} +} +} + +\pause + +\item Eigenvalue problem: $\vec C\,\vec e = \lambda \vec e$ +\item limited to \underline{linear} correlations +\end{itemize} + + +\begin{center} + \includegraphics[width=0.5\textwidth]{img/scatter}% + \captionof{figure}{linear vs. non-linear correlations} +\end{center} + +Kernel PCA for finding non-linear features.\\ + + +\end{frame} + +\begin{frame}\frametitle{What is Kernel PCA about?} + +Don't panic! Kernel PCA is essentially standard linear PCA applied to a non-linearly transformed version of the data. + +\begin{center} + \includegraphics[width=0.3\textwidth]{img/koffer}% +\end{center} + +\end{frame} diff --git a/notes/03_kernel-pca/1_kernel-pca.tex b/notes/03_kernel-pca/1_kernel-pca.tex deleted file mode 100644 index 611f96a..0000000 --- a/notes/03_kernel-pca/1_kernel-pca.tex +++ /dev/null @@ -1,431 +0,0 @@ - -\section{linear PCA: Recap} -\begin{itemize} -\item Requires centering the data. -\item Eigenvalue problem: $\vec C\,\vec e = \lambda \vec e$ -\item \underline{limited to linear correlations} -\end{itemize} - -Kernel PCA for finding non-linear features. - -Don't panic! Kernel PCA is essentially standard linear PCA applied to a non-linearly transformed version of the data. - -\section{Non-linear transformations} - -We use $N$ to denote the dimensionality of our input space and $p$ to denote the number of observations.\\ -Let $\vec x^{(\alpha)} \in \R^N$ and $\alpha = 1, \ldots, p$. - -The following mapping describes the non-linear transformation of each observation: -$$ -\vec{\phi}: \vec{x} \mapsto \vec{\phi}_{(\vec{x})} -$$ - -An example of such a transformation - 2\textsuperscript{nd}-order monomials: -$$ -\vec{\phi}_{(\vec{x})} = ( - 1, \; - \mathrm{x}_1, \; - \mathrm{x}_2, \; - \ldots \; - \mathrm{x}_N, \; - \mathrm{x}_1^2, \; - \mathrm{x}_1 \mathrm{x}_2, \; - \mathrm{x}_2^2, \; - \mathrm{x}_1 \mathrm{x}_3, \; - \mathrm{x}_2 \mathrm{x}_3, \; - \mathrm{x}_3^2, \; \ldots, \; - \mathrm{x}_N^2 - )^\top -$$ - -We actually don't need to define this transformation. -All we need to know is that the dimensionality of $\vec{\phi}_{(\vec{x})}$ can be larger than $N$, possibly infinitely large.\\ - -\underline{The purpose of non-linear transformations:} - -Two or more components in the original $\vec x$ (e.g. $x_1$, $x_2$) could have non-linear correlations (e.g. plotting those two components reveals a parabola). -Expanding the dimensionality of $\vec x$ through the above mapping introduces new dimensions in which correlations between the components in $\vec \phi_{(\vec x)}$ become \emph{linear}. - -\textbf{Caveat}:\\ -Directly applying this transformation on a single observation is not applicable. -We might never find a transformation that causes all non-linear correlations within $\vec x$ to become linear. - -We turn to the ``kernel trick'' to solve this problem. - -\newpage - -\section{The kernel trick} - -Representing a non-linear transformation using inner products of the data: -\begin{equation} - \label{eq:trick} - \vec{\phi}_{(\vec{x})}^\top - \vec{\phi}_{(\vec{x}')} = - k(\vec{x}, \vec{x}') -\end{equation} - -where $k(\vec{x}, \vec{x}')$ is a kernel function applied -to any two pairs of observations. - -Applying the kernel function to \emph{each} pair in our dataset; \\ -$\vec x^{(\alpha)}$ and $\vec{x}^{(\beta)}$ -with $\alpha, \beta = 1, \ldots, p$ yields the scalar $K_{\alpha \beta}$. -Storing all scalars $K_{\alpha \beta} \; \forall (\alpha,\beta)$ yields -the un-normalized kernel matrix $\widetilde {\vec K}=\{K_{\alpha \beta}\}$: - -$$ -\widetilde {\vec K} = -\rmat{ -K_{11} & K_{12} & \ldots & K_{1p} \\ -K_{21} & K_{12} & \ldots & K_{2p} \\ -\vdots & & \ddots\\ -K_{p1} & & & K_{pp} -} -$$ - -$\vec K$ (without the ``\textasciitilde'') denotes the normalized or ``centered'' kernel matrix. - -\question{What is the dimensionality of $\widetilde {\vec K}$?} - -\question{Is $K_{\alpha \beta}$ sensitive to translation and rotation of the data?} - -- No. $K_{\alpha \beta}$ is the pairwise relation between two observations. -Rotating the data or translating it will result in the same $K_{\alpha \beta}$. -However, scaling the data while keeping the kernel function fixed would produce a different $K_{\alpha \beta}$. - -\newpage - -\section{Kernel PCA} - -We apply standard linear PCA on the \emph{transformed} version of the data -$ -\left\{ -\vec{\phi}_{(\vec{x}^{(\alpha)})} -\right\}_{\alpha=1}^{p} -$. -We will first assume we have $\vec{\phi}_{(\vec{x})}$ -but we will eventually turn to $K_{\alpha \beta}$ -which we can actually obtain.\\ - -\underline{Remark:} -A difference between these notes and the lecture slides is that -the lecture slides employ ``identity'' as the mapping. -This is why you don't see $\phi$ in the derivations of Kernel PCA in the slides but rather see $\vec x$ used directly.\\ - -\begin{enumerate} -\item PCA assumes its input is centered. -$\frac{1}{p} \sum^{p}_{\alpha=1} \vec{\phi}_{(\vec{x}^{(\alpha)})}$\\ -Centering $\vec X$ does not guranatee it stays centered after transformation. -Therefore, there is no need to center $\vec X$ beforehand. - -\item Compute the covariance matrix $\vec C_{\phi}$ for $\vec{\phi}_{(\vec{x})}$: - - -\begin{equation} \label{eq:cov} -\vec C_{\phi} = \frac{1}{p} \sum_{\alpha=1}^{p} \vec{\phi}_{(\vec{x}^{(\alpha)})} \vec{\phi}^{\top}_{(\vec{x}^{(\alpha)})} -\end{equation} - -\item Solve the eigenvalue problem: - -\begin{equation} \label{eq:eig} -\vec C_{\phi} \, \vec e = \lambda \vec e -\end{equation} - -Each eigenvector $\vec e_i$ with corresponding $\lambda_i \ne 0$ lies in the span of -$ -\left\{ -\vec{\phi}_{(\vec{x}^{(\alpha)})} -\right\}_{\alpha=1}^{p}. -$ - -Consequently, there exists a set of coefficients (i.e. a coefficient for each transformed observation) -$ -\left\{ -a^{(\alpha)} -\right\}_{\alpha=1}^{p} -$, which satisfies the following: - -\begin{equation} -\label{eq:ephi} -\vec e = \sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}_{(\vec{x}^{(\beta)})} -\end{equation} - -Eq.\ref{eq:ephi} tells us that we can describe $\vec e$ in terms of the transformed observations (a weighted summation of $\phi$'s). - The use of the index $\beta$ is only to avoid collisions with $\alpha$ later. - -Substituting Eq.\ref{eq:cov} and Eq.\ref{eq:ephi} into the eignevalue problem Eq.\ref{eq:eig}: - -\begin{equation*} -\underbrace{\frac{1}{p} \sum_{\alpha=1}^{p} \vec{\phi}_{(\vec{x}^{(\alpha)})} \vec{\phi}^{\top}_{(\vec{x}^{(\alpha)})} -}_{=\,\vec C_{\phi}} - \, -\underbrace{\sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}_{(\vec{x}^{(\beta)})}}_{=\,\vec e} - = \lambda \;\, -\underbrace{\sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}_{(\vec{x}^{(\beta)})}}_{=\,\vec e} -\end{equation*} - -After rearranging the terms we get: -\begin{equation} \label{eq:eig2} -\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} -a^{(\beta)} \vec{\phi}_{(\vec{x}^{(\alpha)})} -\underbrace{ - \vec{\phi}^{\top}_{(\vec{x}^{(\alpha)})} \, \vec{\phi}_{(\vec{x}^{(\beta)})} -}_{\substack{\text{scalar product}\\ = K_{\alpha\beta}}} - = \lambda -\sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}_{(\vec{x}^{(\beta)})} -\end{equation} - -Recall that we cannot compute $\vec{\phi}_{(\vec{x})}$ but can now -exploit the kernel trick (cf. Eq.\ref{eq:trick}) by substituing -$ K_{\alpha \beta} $ for -$ -\vec{\phi}^{\top}_{(\vec{x}^{(\alpha)})} - \, - \vec{\phi}_{(\vec{x}^{(\beta)})} -$ - -Eq.\ref{eq:eig2} becomes: -\begin{equation} \label{eq:eig3} -\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} -a^{(\beta)} -\vec{\phi}_{(\vec{x}^{(\alpha)})} K_{\alpha \beta} - = \lambda -\sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}_{(\vec{x}^{(\beta)})} -\end{equation} - -We will now proceed with reformulating the above until we no longer have any $\phi$'s. - -We left-multiply Eq.\ref{eq:eig3} with $\left(\vec \phi^{(\gamma)}\right)^\top$, where $\gamma = 1, \ldots, p$. - We can pull $\left(\vec \phi^{(\gamma)}\right)^\top$ directly into the sum on the left and the sum on the right: - -\begin{equation} \label{eq:eig4} -\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} -a^{(\beta)} -\underbrace{ -\left(\vec \phi^{(\gamma)}\right)^\top -\vec{\phi}_{(\vec{x}^{(\alpha)})} -}_{=K_{\gamma \alpha}} -K_{\alpha \beta} - = \lambda -\sum^{p}_{\beta=1} a^{(\beta)} -\underbrace{ -\left(\vec \phi^{(\gamma)}\right)^\top \vec{\phi}_{(\vec{x}^{(\beta)})} -}_{=K_{\gamma \beta}} -\end{equation} - -\newpage - -Eq.\ref{eq:eig4} without the clutter: - -\begin{equation} \label{eq:eigK} -\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} -a^{(\beta)} -K_{\gamma \alpha} -K_{\alpha \beta} - = \lambda -\sum^{p}_{\beta=1} a^{(\beta)} -K_{\gamma \beta} \quad \forall \gamma -\end{equation} - -Since we want to compute this for all training samples $\gamma$, -we can reduce the clutter even more by using matrix notation. -Specifically, by using the \emph{kernel matrix} $\vec K=\{K_{\alpha\beta}\}$, where \\ -$ -K_{\alpha \beta} = -k(\vec x^{(\alpha)}, \vec x^{(\beta)}) = -\vec{\phi}_{(\vec{x}^{(\alpha)})}^\top - \vec{\phi}_{(\vec{x}^{(\beta)})} -$ - -We end up with this formulation of the eigenvalue problem: - -\begin{equation*} - \vec{K}^2 \vec{a} = p \lambda \vec{K} \mspace{2mu} \vec{a} -\end{equation*} - -$\vec K$ appears on both sides. All the solutions that are of interest remain represented in -the following simpler eigenvalue problem, which we refer to as the \emph{transformed eigenvalue problem}: -\begin{equation} -\label{eq:eigsimple1} - \vec{K} \, \vec{a} = p \lambda \mspace{2mu} \vec{a} -\end{equation} - -We can interpret $\vec a$ as the \emph{eigenvector} of $\vec K$ - -By omitting the constant $p$, we can rely on finding solutions for $\lambda$ that absorb it: - -\begin{equation} -\label{eq:eigsimple2} - \vec{K} \, \vec{a} = \lambda \mspace{2mu} \vec{a} -\end{equation} - -All we've been doing so far is reformulate the eigenvalue problem such that we end up -with a formulation that only contains terms of the inner product kernel.\\ -Why was all this necesary? Because (1) we want to enable PCA to find non-linear correlations and (2) we don't have access to $\vec \phi_{(\vec x)}$. - -Now that we've solved the eigenvalue problem, we continue with the remaining steps for PCA. - -\newpage -\item Normalize the eigenvectors: - -Before we can project anything onto the space spanned by the PCs $\widetilde{\vec a}_k$ where $k=1,\ldots,p$, -we need to ensure these vectors are normalized. -$\widetilde {\vec a}_k$ is only used to indicate that the vector has not been normalized yet. - -%$\widetilde {\vec a}_k$ can be normalized explicitly by: -%$$ -%\vec a_k^{norm.} = \frac{1}{||\widetilde{\vec a}_k||} \cdot \widetilde{\vec a}_k = -%\frac{1}{\sqrt{\left(\widetilde{\vec a}_k\right)^\top \widetilde{\vec a}_k}} \cdot \widetilde{\vec a}_k -%$$ - -%However, we want to demonstrate how to normalize $\widetilde {\vec a}_k$ in a way that can be more efficient than an explicit normalization. - -Recalling Eq.\ref{eq:ephi} (we add the index $k$ to denote which eigenvector): -\begin{equation} -\label{eq:ephik} -\vec e_k = \sum^{p}_{\beta=1} a_k^{(\beta)} \vec{\phi}_{(\vec{x}^{(\beta)})}, -\end{equation} - -Again, we try to reformulate things such that we end up with the inner-product kernel rather than $\phi$. -We left-multiply Eq.\ref{eq:ephik} with $\left(\vec e_k\right)^\top$: -\begin{align} -\vec e^{\top}_k \vec e_k &= \sum^{p}_{\alpha=1} a_k^{(\alpha)} \vec{\phi}_{(\vec{x}^{(\alpha)})}^\top \sum^{p}_{\beta=1} a_k^{(\beta)} \vec{\phi}_{(\vec{x}^{(\beta)})} \\ -&= \sum^{p}_{\alpha=1} \sum^{p}_{\beta=1} a_k^{(\beta)} \underbrace{\vec{\phi}_{(\vec{x}^{(\alpha)})}^\top \vec{\phi}_{(\vec{x}^{(\beta)})}} a_k^{(\alpha)} \\ -&= \sum^{p}_{\alpha=1} \sum^{p}_{\beta=1} a_k^{(\beta)} \quad \; K_{\alpha\beta} \quad \; a_k^{(\alpha)} \\ -&= \widetilde {\vec a}_k^\top \vec K \, \widetilde {\vec a}_k -\end{align} - -And when we plug Eq.\ref{eq:eigsimple1} into the above: - -\begin{equation} -\label{eq:eignorm} -\vec e^{\top}_k \vec e_k = -\widetilde {\vec a}_k^\top -\underbrace{p \lambda_k \, \widetilde {\vec a}_k}_{=\, \vec K \, \widetilde {\vec a}_k} -= p \lambda_k \, \widetilde {\vec a}_k^\top \widetilde {\vec a}_k \eqexcl 1 -\end{equation} - -Scaling $\widetilde {\vec a}_k$ by $\frac{1}{\sqrt{p \lambda_k}}$ yields -a unit vector with the same direction as $\widetilde {\vec a}_k$ to satisfy Eq.\ref{eq:eignorm}.\\ -With -\begin{equation} -\vec a_k^{norm.} := \frac{1}{\sqrt{p \lambda_k}} \widetilde {\vec a}_k, -\end{equation} -follows: -\begin{align} -\vec e^{\top}_k \vec e_k -&= p \lambda_k \, \; \widetilde {\vec a}_k^\top \; \widetilde {\vec a}_k\\ -&= p \lambda_k \, \left(\vec a_k^{\text{norm.}}\right)^\top \vec a_k^{\text{norm.}} \\ -&= p \lambda_k \left(\frac{1}{\sqrt{p \lambda_k}} \widetilde {\vec a}_k\right)^\top \left(\frac{1}{\sqrt{p \lambda_k}} \widetilde {\vec a}_k\right) -= 1 -\end{align} - - -\newpage - -\item Sort the eigenvectors such that the corresponding eigenvalues are arranged in decreasing order. - - -\item Projection: - -In order to project some observation $\vec x$ into the PC space, we first map it into the non-linear space of $\phi$ -and then project that into the space spanned by the PCs. -By now we should expect that the transformation can only be performed via the kernel trick. -We basically represent this sample $\vec x$ by its relation to the \emph{training data} -(i.e. the $p$ observations that were used to compute the PCs). - -We derive the projection for Kernel PCA by starting with the projection used in linear PCA (cf. slides 1.3 \#11): - -The projection for linear PCA, specifically the component of $\vec x$ in the direction of the $k$-th PC is: -\begin{equation} -\label{eq:projlin} -u_k(\vec x) = \vec e_k^\top \vec x -\end{equation} - -We substitute $\vec \phi_{(\vec x)}$ for $\vec x$ and plug Eq.\ref{eq:ephi} into Eq.\ref{eq:projlin}: - -\begin{align} -\label{eq:projk1} -u_k(\vec \phi_{(\vec x)}) &= \sum^{p}_{\beta=1} a^{(\beta)} -\underbrace{ -\vec{\phi}^\top_{(\vec{x}^{(\beta)})} \vec \phi_{(\vec x)} -}_{\substack{ -\text{recognize the familiar}\\ -\text{scalar product?} -\\ =k(\vec x^{(\beta)}, \vec x) = K_{\beta,\vec x}}}\\ -&= \sum_{\beta=1}^{p} a_k^{(\beta)} K_{\beta, \vec x} -\end{align} - -Note that $\vec x$ can be a sample that was used in computing the PCs or a completly new ``test'' point. - -\item Reconstruction: - -Since we never had the transformation $\phi$ to begin with. -It is not psssible to simply project a sample from PC space back into the original $N$-dimensional input space. -Algorithms exist that approximate a ``pre-image'' of some new observation. -\end{enumerate} - -\subsection{Centering the kernel matrix} - -PCA operates on centered data. This is common for all forms of PCA, -whether it is linear PCA, online PCA or Kernel PCA. -In Kernel PCA, the $\vec X$ is transformed into a higher dimensional space by applying the kernel trick. -Applying it to all $p$ training samples yields the kernel matrix, which is used in solving the transformed eigenvalue problem. - -All of the above assumed that we have successfully centered the kernel matrix. We now look at how this is done. - -\question{Do I need to center $\vec X$ before computing $ K_{\alpha \beta}$?} - -- No, Centering $\vec X$ before computing $K_{\alpha \beta}$ does not guarantee the kernel matrix to be centered. You only end up with $\vec{\widetilde K}$. -It is simply irrelevant whether the original data $\vec X$ is centered or not. -Therefore, we need to center the kernel matrix before solving the transformed eigenvalue problem.\\ - -We use $\vec{\widetilde K}$ to denote the \emph{un-normalized} kernel matrix and $\vec K$ -to denote the kernel matrix \emph{after} it has been centered. - -\underline{Centering $\vec{\widetilde K}$:} - -The centering is built on the kernel trick as in Eq.\ref{eq:trick}, except that we compute the inner product using centered $\phi_{(\vec x)}$: - -\begin{equation*} - \underbrace{ \vec{\phi}_{\big( \vec{x}^{(\alpha)} \big)} }_{ - \substack{ \text{``centered''} \\ - \text{feature vectors}} } - = \widetilde{\vec{\phi}}_{\big( \vec{x}^{(\alpha)} \big)} - - \frac{1}{p} \sum\limits_{\gamma = 1}^p - \underbrace{ \widetilde{\vec{\phi}}_{\big( \vec{x}^{(\gamma)} - \big)} }_{ - \substack{ \text{uncentered} \\ - \text{feature vectors}} } - \qquad \forall \alpha -\end{equation*} - -\begin{equation*} - K_{\alpha \beta} = \underbrace{\widetilde{K}_{\alpha \beta}}_{ _{= \, k \left ( \vec{x}^{(\alpha)}, - \vec{x}^{(\beta)} \right )}} - - \;\underbrace{\frac{1}{p} \sum\limits_{\delta = 1}^p - \widetilde{K}_{\alpha \delta}}_\text{\scriptsize{row avg.}} - \; - \; \underbrace{\frac{1}{p} - \sum\limits_{\gamma = 1}^p - \widetilde{K}_{\gamma \beta}}_\text{\scriptsize{col. avg.}} - \;+ \;\underbrace{\frac{1}{p^2} - \sum\limits_{\gamma, \delta = 1}^p - \widetilde{K}_{\gamma \delta}}_\text{\scriptsize{matrix avg.}} -\end{equation*}% - -(cf. slides 1.3 \#16-\#17 on how we arrive at the above centering) - -\subsection{Applying Kernel PCA:} - -\question{How do I interpret the role of PCs?} - - -(cf. 1.3 \#28) - -\subsection{A note on implementation:} - -\question{Should you solve the transformed eigenvalue problem using \emph{eigenvalue decomposition} or \emph{SVD}?} - -- \emph{eigenvalue decomposition} is the only option for Kernel PCA. \emph{SVD} is simply not applicable since we don't have access to $\vec \phi_((\vec x))$ - -The kernel function is symmetric. $k(\vec x^{(\alpha)}, \vec x^{(\beta)}) = k(\vec x^{(\beta)}, \vec x^{(\alpha)})$. One can exploit this by reducing how many times the kernel function is actually applied while traversing the training samples when computing $\widetilde {\vec{K}}$. diff --git a/notes/03_kernel-pca/1_nonlin.tex b/notes/03_kernel-pca/1_nonlin.tex new file mode 100644 index 0000000..c31e95f --- /dev/null +++ b/notes/03_kernel-pca/1_nonlin.tex @@ -0,0 +1,109 @@ + +\section{Non-linear transformations} +\label{sec:nonlin} + +\begin{frame}\frametitle{\secname} + +\only<1>{ +\notesonly{We use $N$ to denote the dimensionality of our input space and $p$ to denote the number of observations.\\} +Let $\vec x^{(\alpha)} \in \R^N$ and $\alpha = 1, \ldots, p$. + +\notesonly{The following} mapping\notesonly{ describes the non-linear transformation of each observation}: +\svspace{-3mm} +\begin{equation} +\vec{\phi}: \vec{x} \mapsto \vec{\phi}_{(\vec{x})} +\end{equation} + +} +\only<1->{ +\notesonly{An e}\slidesonly{E}xample\notesonly{ of such a transformation }- 2\textsuperscript{nd}-order monomials: +\begin{equation} +\vec{\phi}_{(\vec{x})} = ( + 1, \; + \mathrm{x}_1, \; + \mathrm{x}_2, \; + \ldots \; + \mathrm{x}_N, \; + \mathrm{x}_1^2, \; + \mathrm{x}_1 \mathrm{x}_2, \; + \mathrm{x}_2^2, \; + \mathrm{x}_1 \mathrm{x}_3, \; + \mathrm{x}_2 \mathrm{x}_3, \; + \mathrm{x}_3^2, \; \ldots, \; + \mathrm{x}_N^2 + )^\top +\end{equation} + +\svspace{-5mm} +\begin{center} +\only<2>{ + \includegraphics[width=0.87\textwidth]{img/monomials}% + } + \only<3>{ + \slidesonly{ + \includegraphics[width=0.7\textwidth]{img/meme_candononlinpca}% + } + } + \mode
{ + \captionof{figure}{Monomials for 2-d input} + } +\end{center} +} +\only<2->{ +\svspace{-5mm} +We could perform PCA on the transformed data $\vec \phi_{(\vec x)}$.\\ +\notesonly{The reasoning behind this is that }non-linear correlations between variables in $\vec x$ could be linear between variables in $\vec \phi_{(\vec x)}$. + +\notesonly{ +This is the \underline{purpose} of the non-linear transformaiton. That, two or more components in the original $\vec x$ (e.g. $x_1$, $x_2$) could have non-linear correlations (e.g. plotting those two components reveals a parabola). +Expanding the dimensionality of $\vec x$ through the above mapping introduces new dimensions in which correlations between the components in $\vec \phi_{(\vec x)}$ become \emph{linear}. + +} +} + +\end{frame} + +\begin{frame}\frametitle{\secname} + +\pause +\notesonly{However,} we might not know how to define $\vec \phi_{(\vec x)}$. + +\pause + +The potentially very high dimensionality of $\vec \phi_{(\vec x)}$ could prohibit us from storing the transformed data.\\ + +\begin{center} + \includegraphics[width=0.5\textwidth]{img/storehd}% + \notesonly{ + \captionof{figure}{Storing 2-nd order monomials applied to an HD image.} + } +\end{center} + +\notesonly{ +2-nd order monomial applied to one HD ($1280 \times 720$ pixels) image would require more than $400 GB$ of storage alone\footnote{ +$N=1280 \times 720 = 921600$ pixels, $d = \# x^1_j + \# x^2_j + \# \binom{N}{2} = (N+1) + N + \binom{N}{2} = 424,674,662,401$\\ +Let's say it's a grayscale image, so 1 Byte per pixel. This way we end up with $424$ GB and some change. +} +} + + +\pause + +\svspace{5mm} + +\textbf{Caveat}:\\ +Directly applying this transformation on a single observation is not applicable. +We might never find a transformation that causes all non-linear correlations within $\vec x$ to become linear.\\ + +\end{frame} + +\begin{frame}\frametitle{\secname} + +The upside:\\ + +We actually don't need to define this transformation. +All we need to know is that the dimensionality of $\vec{\phi}_{(\vec{x})}$ can be larger than $N$, possibly infinitely large.\\ + +We turn to the ``kernel trick'' to solve this problem and avoid an epxlicit definition for the non-linear transformation. + +\end{frame} diff --git a/notes/03_kernel-pca/2_trick.tex b/notes/03_kernel-pca/2_trick.tex new file mode 100644 index 0000000..c982b9e --- /dev/null +++ b/notes/03_kernel-pca/2_trick.tex @@ -0,0 +1,146 @@ +\section{The kernel trick} + +\begin{frame}\frametitle{\secname} + +Representing a non-linear transformation using inner products of the data: +\begin{equation} + \label{eq:trick} + \vec{\phi}_{(\vec{x})}^\top + \vec{\phi}_{(\vec{x}')} = + k(\vec{x}, \vec{x}') +\end{equation} + +where $k(\vec{x}, \vec{x}')$ is a kernel function applied +to any two observations. + +\end{frame} + +\subsection{The Kernel matrix} + +\begin{frame}\frametitle{\subsecname} + +Applying the kernel function to \emph{each} pair in our dataset; \\ +$\vec x^{(\alpha)}$ and $\vec{x}^{(\beta)}$ +with $\alpha, \beta = 1, \ldots, p$ yields the scalar $K_{\alpha \beta}$. +Storing all scalars $K_{\alpha \beta} \; \forall (\alpha,\beta)$ yields +the un-normalized kernel matrix $\widetilde {\vec K}=\{K_{\alpha \beta}\}$: + +\begin{equation} +\widetilde {\vec K} = +\rmat{ +K_{11} & K_{12} & \ldots & K_{1p} \\ +K_{21} & K_{12} & \ldots & K_{2p} \\ +\vdots & & \ddots\\ +K_{p1} & & & K_{pp} +} +\end{equation} + +$\vec K$ (without the ``\textasciitilde'') denotes the normalized or ``centered'' kernel matrix. \notesonly{cf. \sectionref{sec:centerkernel} why and how to center the Kernel matrix.} + +\question{What is the dimensionality of $\widetilde {\vec K}$?} + +\end{frame} + +\subsubsection{Properties of the Kernel matrix} + +\begin{frame}\frametitle{\subsubsecname} + +\begin{block}{From Mercer's theorem} +Every positive semidefinite kernel $k$ corresponds to a scalar product in +some metric feature space. +\end{block} + +\end{frame} + +\subsubsection{The Radial Basis function (RBF) Kernel} + +\begin{frame}\frametitle{The Radial Basis function (RBF) } + +\notesonly{The radial basis function (RBF) as depicted in \figref{fig:rbf} is a popular choice for a kernel. It is defined as:} + +\begin{equation} + k_{(\vec{x},\vec{x}')} = \exp \bigg\{ -\frac{ \big( \vec{x} - \vec{x}' + \big)^2 }{ 2 \sigma^2 } \bigg\} +\end{equation} + +where $\sigma$ is referred to as the width of the kernel. + +\begin{figure}[ht] + \centering + \savebox{\imagebox}{ + \includegraphics[width=\slidesonly{0.45}\notesonly{0.35}\textwidth]{img/guassian_function_1d}}% + \begin{subfigure}[t]{\slidesonly{0.45}\notesonly{0.35}\textwidth} + \centering + \usebox{\imagebox}% Place largest image + \caption{for data in 1D} + \label{fig:quadratic} + \end{subfigure} + \hspace{2mm} + \begin{subfigure}[t]{\slidesonly{0.45}\notesonly{0.35}\textwidth} + \centering + \raisebox{\dimexpr.5\ht\imagebox-.5\height}{% Raise smaller image into place + \includegraphics[width=0.99\textwidth]{img/guassian_function_2d_3Dview} + } + \caption{for data in 2D} + \label{fig:linear} + \end{subfigure} + \mode
{ + \caption{The Gaussian kernel function} + } + \label{fig:rbf} +\end{figure} + +\end{frame} + +\begin{frame}\frametitle{The RBF Kernel} + +\slidesonly{ +Applied to all pairs\\ + +\begin{minipage}{0.4\textwidth} + +\only<1>{ +\begin{equation} + k_{(\vec{x},\vec{x}')} = \exp \bigg\{ -\frac{ \big( \vec{x} - \vec{x}' + \big)^2 }{ 2 \sigma^2 } \bigg\} +\end{equation} +} +\only<2>{ + \includegraphics[width=0.8\textwidth]{img/points_rbf} +} +\only<3>{ + \includegraphics[width=0.8\textwidth]{img/points_rbf_rot_transl} +} + + +\end{minipage} +\begin{minipage}{0.5\textwidth} + \includegraphics[width=0.99\textwidth]{img/guassian_function_1d} +\end{minipage} + +} + +\pause + +\visible<2->{ +\question{For an RBF kernel, is $K_{\alpha \beta}$ sensitive to translation and rotation of the input data?} +} + +\mode
{ +\begin{center} + \includegraphics[width=0.2\textwidth]{img/points_rbf_rot_transl} + \captionof{figure}{Rotation and translation of the input data} +\end{center} + +} + + +\pause + +- No.\notesonly{ For an RBF Kernel, $K_{\alpha \beta}$ is the pairwise relation between two observations. +Rotating the data or translating it will result in the same $K_{\alpha \beta}$.} +However, scaling the data while keeping the kernel width fixed would produce a different $K_{\alpha \beta}$. + +\end{frame} + + diff --git a/notes/03_kernel-pca/3_kpca.tex b/notes/03_kernel-pca/3_kpca.tex new file mode 100644 index 0000000..cba4959 --- /dev/null +++ b/notes/03_kernel-pca/3_kpca.tex @@ -0,0 +1,778 @@ +\section{Kernel PCA} + +\mode{ +\begin{frame} + \begin{center} \huge + \secname + \end{center} + \begin{center} + \includegraphics[width=0.3\textwidth]{img/koffer}% + + \vspace{5mm} + non-linear transformation $\Leftrightarrow$ kernel trick + \end{center} + +\end{frame} +} + +\begin{frame}\frametitle{\secname} + +We apply standard linear PCA on the \emph{transformed} version of the data +$ +\big\{ +\vec{\phi}_{(\vec{x}^{(\alpha)})} +\big\}_{\alpha=1}^{p} +$. + +We will first assume we have $\vec{\phi}_{(\vec{x})}$ +but we will eventually turn to $K_{\alpha \beta}$ +which we can actually obtain.\\ + +\pause + +\svspace{10mm} + +\underline{Remark:} +\slidesonly{It might look different from the slides, but it's not} +\notesonly{A difference between these notes and the lecture slides is that} +the lecture slides employ ``identity'' as the mapping. +This is why you don't see $\phi$ in the derivations of Kernel PCA in the slides but rather see $\vec x$ used directly.\\ + +\end{frame} + +\subsection{The Method} + +\begin{frame}{Kernel PCA: Outline the method} + +\mode{ +\begin{itemize} +\item center the ``immediate'' input to PCA + +\pause +\only<2>{ +\begin{center} + \includegraphics[width=0.5\textwidth]{img/meme_realinput}% +\end{center} +} + +\pause + +\item compute covariance $\vec C_\phi$ +\item solve the eigenvalue problem +\item normalize the eigenvectors +\item sort the eigenvectors and eigenvalues +\end{itemize} + +Use Kernel PCA: + +\begin{itemize} +\item projection +\item reconstruction + +\end{itemize} + +\pause + +\only<4>{ +\placeimage{9}{6}{img/meme_exactly}{width=5cm}% + +\vspace{5mm} +The difference is in the details. +} + +} + +\end{frame} + +\subsubsection{Centering the immediate input to PCA} + +\begin{frame}{\subsubsecname} + +Remember, we will first assume that we have the non-linear mapping $\phi$.\\ + +PCA assumes its input is centered. +It's direct input are the $\phi$'s. Therefore, +\begin{equation} +\frac{1}{p} \sum^{p}_{\alpha=1} \vec{\phi}_{(\vec{x}^{(\alpha)})} \eqexcl \vec 0 +\end{equation} + +\question{Isn't it enough to center $\vec X$?} + +\pause + +- No, Centering $\vec X$ does not guranatee it stays centered after the transformation. +Therefore, there is no need to center $\vec X$ beforehand. + +Example: +\begin{equation} +\E[\vec x] = 0 \quad \nRightarrow \quad \E[\vec x^2] = 0 +\end{equation} + +\end{frame} + +\subsubsection{The covariance matrix of the transformed data} + +\begin{frame}{\subsubsecname} + +Compute the covariance matrix $\vec C_{\phi}$ for $\vec{\phi}_{(\vec{x})}$: + + +\begin{equation} \label{eq:cov} +\vec C_{\phi} = \frac{1}{p} \sum_{\alpha=1}^{p} \vec{\phi}_{(\vec{x}^{(\alpha)})} \vec{\phi}^{\top}_{(\vec{x}^{(\alpha)})} +\end{equation} + +\end{frame} + +\subsubsection{The eigenvalue problem} + +\begin{frame}{\subsubsecname} + +Solve the eigenvalue problem: + +\begin{equation} \label{eq:eig} +\vec C_{\phi} \, \vec e = \lambda \vec e +\end{equation} + +Each eigenvector $\vec e_i$ with corresponding $\lambda_i \ne 0$ lies in the span of +\begin{equation} +\big\{ +\vec{\phi}_{(\vec{x}^{(\alpha)})} +\big\}_{\alpha=1}^{p}. \qquad \big(\vec{\phi}_{(\vec{x}^{(\alpha)})} = \vec{\phi}^{(\alpha)} \text{ for brevity}\big) +\end{equation} + +\pause + +Consequently, there exists a set of coefficients (i.e. a coefficient for each transformed observation) +\begin{equation} +\big\{ +a^{(\alpha)} +\big\}_{\alpha=1}^{p}\,, +\end{equation} which satisfies the following: + +\begin{equation} +\label{eq:ephi} +\vec e = \sum^{p}_{\beta=1} a^{(\beta)} +%\vec{\phi}_{(\vec{x}^{(\beta)})} +\vec{\phi}^{(\beta)} +\end{equation} + +\notesonly{ +Eq.\ref{eq:ephi} tells us that we can describe $\vec e$ in terms of the transformed observations (a weighted summation of $\phi$'s). + The use of the index $\beta$ is only to avoid collisions with $\alpha$ later. +} + +\end{frame} + +\begin{frame}{\subsubsecname} + +\slidesonly{ +\vspace{-12mm} +\hspace{7.8cm} +\StickyNote[1.7cm]{ + \begingroup + \footnotesize + \begin{equation} \label{eq:cov} + \vec C_{\phi} = \frac{1}{p} \sum_{\alpha=1}^{p} \vec{\phi}^{(\alpha)} \big(\vec{\phi}^{(\alpha)}\big)^\top + \end{equation} + \begin{equation} + \label{eq:ephi} + \vec e = \sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}^{(\beta)} + \end{equation} + \endgroup +}[3.5cm] +\vspace{-22mm} +} + +\notesonly{ +Substituting Eq.\ref{eq:cov} and Eq.\ref{eq:ephi} into the eignevalue problem Eq.\ref{eq:eig}: +} +\slidesonly{ +Express the eigenvalue problem in terms of $\phi$'s: + +\begin{equation} \label{eq:eig} +\vec C_{\phi} \;\; \vec e \; = \; \lambda \;\; \vec e +\end{equation} + +} + +\begin{equation} +\underbrace{\frac{1}{p} \sum_{\alpha=1}^{p} \vec{\phi}^{(\alpha)} \big(\vec{\phi}^{(\alpha)}\big)^\top +\vphantom{\sum^{p}_{\beta=1}} +}_{=\,\vec C_{\phi}} + \, +\underbrace{\sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}^{(\beta)}}_{=\,\vec e} + = \lambda \;\, +\underbrace{\sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}^{(\beta)}}_{=\,\vec e} +\end{equation} + +\pause + +After rearranging the terms we get: +\begin{equation} \label{eq:eig2} +\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} +a^{(\beta)} \vec{\phi}^{(\alpha)} +\underbrace{ + \big(\vec{\phi}^{(\alpha)}\big)^\top \, \vec{\phi}^{(\beta)} +}_{\substack{\text{scalar product}\\ = K_{\alpha\beta}}} + = \lambda +\sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}^{(\beta)} +\end{equation} + +We are one step closer to not needing the eplxicit mapping $\phi$. + +\end{frame} + +\begin{frame}{\subsubsecname} + +\slidesonly{ +\vspace{-12mm} +\hspace{7.8cm} +\StickyNote[1.7cm]{ + \begingroup + \footnotesize + \begin{equation} \label{eq:cov} + \vec C_{\phi} = \frac{1}{p} \sum_{\alpha=1}^{p} \vec{\phi}^{(\alpha)} \big(\vec{\phi}^{(\alpha)}\big)^\top + \end{equation} + \begin{equation} + \label{eq:ephi} + \vec e = \sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}^{(\beta)} + \end{equation} + \endgroup +}[3.5cm] +\vspace{-22mm} +} + +\notesonly{ +Recall from \sectionref{sec:nonlin} that we not even be able compute $\vec{\phi}_{(\vec{x})}$ but we now see it is possible to avoid the transformation altogether by exploiting the kernel trick (cf. Eq.\ref{eq:trick}) by substituing +$ K_{\alpha \beta} $ for +$ +\vec{\phi}^{\top}_{(\vec{x}^{(\alpha)})} + \, + \vec{\phi}_{(\vec{x}^{(\beta)})} +$ + +Eq.\ref{eq:eig2} becomes:} + +\begin{equation} \label{eq:eig3} +\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} +a^{(\beta)} +\vec{\phi}^{(\alpha)} +K_{\alpha \beta} += \lambda +\sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}^{(\beta)} +\slidesonly{\hspace{40mm}} +\end{equation} + +We now proceed with reformulating the above until we no longer have any $\phi$'s: + +\pause + +\notesonly{We }left-multiply\notesonly{ Eq.\ref{eq:eig3}} with $\big(\vec \phi_{(\vec x^{(\gamma)})}\big)^\top$, where $\gamma = 1, \ldots, p$. + We can pull $\big(\vec \phi^{(\gamma)}\big)^\top$ directly into the sum on the \slidesonly{LHS}\notesonly{left-hand-side} and the sum on the \slidesonly{RHS}\notesonly{right-hand-side}: + +\only<2,3>{ +\begin{equation} \label{eq:eig4} +\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} +a^{(\beta)} +\underbrace{ +\left(\vec \phi^{(\gamma)}\right)^\top +\vec{\phi}^{(\alpha)} +}_{=K_{\gamma \alpha}} +K_{\alpha \beta} + = \lambda +\sum^{p}_{\beta=1} a^{(\beta)} +\underbrace{ +\left(\vec \phi^{(\gamma)}\right)^\top \vec{\phi}^{(\beta)} +}_{=K_{\gamma \beta}} +\end{equation} +} + +\pause + +\newpage + +\notesonly{Eq.\ref{eq:eig4} without the clutter:} + +\begin{equation} \label{eq:eigK} +\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} +a^{(\beta)} +K_{\gamma \alpha} +K_{\alpha \beta} + = \lambda +\sum^{p}_{\beta=1} a^{(\beta)} +K_{\gamma \beta} \quad {\only<4>{\color{red}}\forall \gamma} +\end{equation} + +\slidesonly{ +\only<4>{ +Use matrix notation to reduce the clutter. +} +} + + + +\end{frame} + +\begin{frame} + +\slidesonly{ +\begin{equation} \label{eq:eigK} +\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} +a^{(\beta)} +K_{\gamma \alpha} +K_{\alpha \beta} + = \lambda +\sum^{p}_{\beta=1} a^{(\beta)} +K_{\gamma \beta} \quad {\color{red}\forall \gamma} +\end{equation} +} + +We want to solve \notesonly{\eqref{eq:eq:eigK}}\slidesonly{the above} \textbf{for all} training samples $\gamma$. +We can further reduce the clutter by using matrix notation.\\ +Specifically, by computing the \emph{kernel matrix} $\vec K=\{K_{\alpha\beta}\}$, where \\ +\begin{equation} +K_{\alpha \beta} = +k(\vec x^{(\alpha)}, \vec x^{(\beta)}) = +\big(\vec{\phi}^{(\alpha)}\big)^\top + \vec{\phi}^{(\beta)} +\end{equation} + +We end up with this formulation of the eigenvalue problem: + +\begin{align} + \frac{1}{p} \vec{K}^2 \vec{a} = \lambda \vec{K} \, \vec{a}\\ + \vec{K}^2 \vec{a} = p \lambda \vec{K} \, \vec{a} +\end{align} + + +\end{frame} + +\begin{frame}{Side-note: Arriving at the matrx notation} + +\notesonly{Side note: Going from \eqref{eq:eigK} to the matrix notation:\\} + +\slidesonly{ + +\svspace{-3mm} + +\begingroup +\footnotesize +\begin{equation} \label{eq:eigK} +\frac{1}{p} \sum_{\alpha=1}^{p} \sum^{p}_{\beta=1} +a^{(\beta)} +K_{\gamma \alpha} +K_{\alpha \beta} + = \lambda + \sum^{p}_{\beta=1} a^{(\beta)} +K_{{\gamma} \beta} \quad {\forall \gamma} +\end{equation} +\endgroup + +\svspace{-1mm} +} + +First, we look at the RHS\notesonly{ of \eqref{eq:eigK}}: + +\svspace{-5mm} + +\begin{equation} + \ldots = \lambda + \sum^{{\color{blue}p}}_{\color{blue}\beta=1} a^{({\color{blue}\beta})} +K_{{\color{red}\gamma} {\color{blue}\beta}} \quad {\color{red}\forall \gamma} +\end{equation} + +\begin{itemize} +\item ${\color{red}\gamma}$ corresponds to a specific row in the Kernel matrix $\vec K$. +\item Let $\vec a$ be a vector that stores the values $a^{({\color{blue}\beta})}$, ${\color{blue}\beta=1,\ldots,p}$. +\item ${\color{blue}\beta}$ iterates over the elements in the vector $\vec a$ and columns in $\vec K$. +\item If we switch terms to: +\svspace{-3mm} +\begin{equation} + \ldots = \lambda + \sum^{{\color{blue}p}}_{\color{blue}\beta=1} +K_{{\color{red}\gamma} {\color{blue}\beta}} \, a^{({\color{blue}\beta})} \quad {\color{red}\forall \gamma} +\label{eq:rearrangerhs} +\end{equation} + +\notesonly{We see that the }RHS is essentially the inner product of the $\gamma$-th row with $\vec a$. +Recall that we're doing this ${\color{red}\forall \gamma}$ (i.e. all rows). Therefore RHS $\Rightarrow \lambda \vec K \, \vec a$ (a vector). + + +\end{itemize} + +\end{frame} + +\definecolor{darkgreen}{rgb}{0,0.6,0} + +\begin{frame}{Side-note: Arriving at the matrx notation - LHS} + +Next, we look at the LHS\notesonly{ of \eqref{eq:eigK}}: + +\svspace{-3mm} + +\begin{equation} + \frac{1}{p} \sum_{{\color{darkgreen}\alpha=1}}^{p} \sum^{p}_{{\color{blue}\beta=1}} +a^{({\color{blue}\beta})} +K_{{\color{red}\gamma} {\color{darkgreen}\alpha}} +K_{{\color{darkgreen}\alpha} {\color{blue}\beta}} = \ldots + \quad {\color{red}\forall \gamma} +\end{equation} + +\svspace{-1mm} + +\begin{itemize} +\item Same as before: +\begin{itemize} +\item ${\color{red}\gamma}$ corresponds to a specific row in the Kernel matrix $\vec K$. +\item Let $\vec a$ be a vector that stores the values $a^{({\color{blue}\beta})}$, ${\color{blue}\beta=1,\ldots,p}$. +\item ${\color{blue}\beta}$ iterates over the elements in the vector $\vec a$ and columns in $\vec K$. +\end{itemize} +\item ${\color{darkgreen}\alpha}$ iterates over the columns in $\vec K$. +\item $K_{{\color{red}\gamma} {\color{darkgreen}\alpha}}$ doesn't change with ${\color{blue}\beta}$, so we can move it out of the inner sum: +\svspace{-3mm} +\begin{align} + \frac{1}{p} \sum_{{\color{darkgreen}\alpha=1}}^{p} + K_{{\color{red}\gamma} {\color{darkgreen}\alpha}} + \underbrace{ +\sum^{p}_{{\color{blue}\beta=1}} +a^{({\color{blue}\beta})} +K_{{\color{darkgreen}\alpha} {\color{blue}\beta}}}_{\circledast} = \ldots + \quad {\color{red}\forall \gamma} +\end{align} +\item $\circledast$ is the inner-product of the ${\color{darkgreen}\alpha}$-th row and $\vec a$. +\item LHS is essentially the inner product of the $\gamma$-th row with the \underline{vector} $\circledast$. +\item Recall ${\color{red}\forall \gamma}$. Therefore LHS $\Rightarrow \vec K \, \vec K \, \vec a = \vec K^2 \vec a$. \notesonly{$\vec K$ is a symmetric matrix} + +\end{itemize} + +\end{frame} + +\begin{frame}{Back to the eigenvalue problem} + +\notesonly{Back to the eigenvalue problem:\\} + +\slidesonly{ +\begin{align} + \vec{K}^2 \vec{a} = p \lambda \vec{K} \, \vec{a} +\end{align} +} + +\pause + +$\vec K$ appears on both sides. All the solutions that are of interest remain represented in +the following simpler eigenvalue problem, which we refer to as the \emph{transformed eigenvalue problem}: +\begin{equation} +\label{eq:eigsimple1} + \vec{K} \, \vec{a} = p \lambda \vec{a} +\end{equation} + +\pause + +We can interpret $\vec a$ as the \emph{eigenvector} of $\vec K$ + +\pause + +\question{Do we really need $p$ in the transformed eigenvalue problem?} + +\pause + +No, by omitting the constant $p$ (optional), we can rely on finding solutions for $\lambda$ that absorb it: + +\begin{equation} +\label{eq:eigsimple2} + \vec{K} \, \vec{a} = \lambda \vec{a} +\end{equation} + +\end{frame} + +\begin{frame} + +\question{What was all this about?} + +All we've been doing so far is reformulate the eigenvalue problem such that we end up +with a formulation that only contains terms of the inner product kernel.\\ + +\pause + +\slidesonly{ +\begin{minipage}{0.45\textwidth} + \begin{center} + \includegraphics[width=3cm]{img/meme_necessary} + \end{center} +\end{minipage} +\pause +\begin{minipage}{0.45\textwidth} + \begin{center} + \includegraphics[width=4cm]{img/meme_ofcourse2} + \end{center} +\end{minipage} +} + +\notesonly{ +\question{Was all this necessary?} + +Yes, }because + +\begin{enumerate} +\item we want to enable PCA to find non-linear correlations and +\item we don't have access to $\vec \phi_{(\vec x)}$. +\end{enumerate} + +\end{frame} + +\begin{frame} + +\slidesonly{ +\begin{center} + \includegraphics[width=6cm]{img/meme_nowwhat} +\end{center} +} + +\pause + +Now that we've solved the eigenvalue problem, we continue with the remaining steps for PCA. + +\pause + + +\slidesonly{ +\begin{center} + \includegraphics[width=2cm]{img/meme_right} +\end{center} +} + +\end{frame} + +\subsubsection{Normalize the eigenvectors} + +\begin{frame}{\subsecname} + +Before we can project anything onto the space spanned by the PCs, we need to know they are normalized first. + +In linear PCA, we've relied on \textit{Eig} giving us normalized eigenvectors $\vec e$. + +\question{If we feed it with the kernel matrix will we get normalized $\vec a$?} + +\pause + +- Yes, Eig() gives us \underline{unit} vectors regardless. + +\pause + +\question{So, are we done talking about normalization?} + +\pause + +-No, because it's the $\vec e$'s that PCA needs normalized. + +\end{frame} + +\begin{frame}{\subsubsecname} + +Recall, that PCA solves the eigenvalue problem + +\begin{equation} +\vec C_{\phi} \, \vec e = \lambda \vec e +\end{equation} + +At the solution, $\vec e^\top \vec e = 1$. $\vec e$ are normalized eigenvectors to use as PCs. + +We don't have the $\phi$'s so we opted to solve the transformed eigenvalue problem instead: + +\begin{equation} +%\label{eq:eigsimple1} + \vec{K} \, \vec{a} = p \lambda \vec{a} +\end{equation} + +At the solution, $\widetilde{\vec a}^\top \widetilde{\vec a} = 1 \quad \nRightarrow \quad \vec e^\top \vec e \ne 1$\\ + +\notesonly{Now that we are aware that we need further normalization, }we'll use $\widetilde{\vec a}_k$ where $k=1,\ldots,p$ to denote the \textbf{un}normalized eigenvectors from the transformed eigenvalue problem and $\vec a_k$ to denote that the vector has been normalized to use to correctly project onto the corresponding PC. + +\end{frame} + +\begin{frame}{\subsubsecname} + +Recall\notesonly{ing Eq.\ref{eq:ephi} (we add the index $k$ to denote which eigenvector):} +\begin{equation} +\label{eq:ephik} +\vec e_k = \sum^{p}_{\beta=1} a_k^{(\beta)} \vec{\phi}_{(\vec{x}^{(\beta)})}, +\end{equation} + +We want an expression for the norm $\vec e^{\top}_k \vec e_k$ that does not involve $\phi$'s. We left-multiply\slidesonly{ the above}\notesonly{ Eq.\ref{eq:ephik}} with $\left(\vec e_k\right)^\top$: +\begin{align} +\vec e^{\top}_k \vec e_k &= \sum^{p}_{\alpha=1} a_k^{(\alpha)} \vec{\phi}_{(\vec{x}^{(\alpha)})}^\top \sum^{p}_{\beta=1} a_k^{(\beta)} \vec{\phi}_{(\vec{x}^{(\beta)})} \\ +&= \sum^{p}_{\alpha=1} \sum^{p}_{\beta=1} a_k^{(\beta)} \underbrace{\vec{\phi}_{(\vec{x}^{(\alpha)})}^\top \vec{\phi}_{(\vec{x}^{(\beta)})}} a_k^{(\alpha)} \\ +&= \sum^{p}_{\alpha=1} \sum^{p}_{\beta=1} a_k^{(\beta)} \quad \; K_{\alpha\beta} \quad \; a_k^{(\alpha)} \\ +&= \widetilde {\vec a}_k^\top \vec K \, \widetilde {\vec a}_k +\end{align} + +\end{frame} + +\begin{frame}{\subsubsecname} + +\notesonly{ +And when we plug Eq.\ref{eq:eigsimple1} into the above: +} +\slidesonly{ +From $\vec{K} \, \widetilde {\vec a}_k = p \lambda \widetilde {\vec a}_k$ follows: +} + +\begin{equation} +\label{eq:eignorm} +\vec e^{\top}_k \vec e_k = +\widetilde {\vec a}_k^\top +\underbrace{p \lambda_k \, \widetilde {\vec a}_k}_{=\, \vec K \, \widetilde {\vec a}_k} += p \lambda_k \, \widetilde {\vec a}_k^\top \widetilde {\vec a}_k \eqexcl 1 +\end{equation} + +\pause + +w.l.o.g. + +\svspace{-7mm} + +\begin{equation} +\widetilde {\vec a}_k^\top \widetilde {\vec a}_k = 1 +\end{equation} + +Remark: a unit-length vector $\widetilde {\vec a}_k$ does not imply \emph{normlized} for PCA anymore. + +\pause + +\notesonly{ +Scaling $\widetilde {\vec a}_k$ by $\frac{1}{\sqrt{p \lambda_k}}$ yields +a vector in the same direction as $\widetilde {\vec a}_k$ to satisfy \notesonly{Eq.\ref{eq:eignorm}}\slidesonly{$\vec e^{\top}_k \vec e_k \eqexcl 1$}.\\ +} +With +\svspace{-5mm} +\begin{equation} +\vec a_k^{norm.} := \frac{1}{\sqrt{p \lambda_k}} \widetilde {\vec a}_k, +\end{equation} +follows: +\svspace{-5mm} +\begin{align} +\vec e^{\top}_k \vec e_k +&= p \lambda_k \, \; \widetilde {\vec a}_k^\top \; \widetilde {\vec a}_k\\ +&= p \lambda_k \, \left(\vec a_k^{\text{norm.}}\right)^\top \vec a_k^{\text{norm.}} \\ +&= p \lambda_k \left(\frac{1}{\sqrt{p \lambda_k}} \widetilde {\vec a}_k\right)^\top \left(\frac{1}{\sqrt{p \lambda_k}} \widetilde {\vec a}_k\right) += 1 +\end{align} + +\end{frame} + +\subsubsection{Sorting} + +\begin{frame}{\subsubsecname} + +Sort the eigenvectors such that the corresponding eigenvalues are arranged in decreasing order. + +\slidesonly{ +\begin{center} + \includegraphics[width=4cm]{img/meme_sort} +\end{center} +} + +\end{frame} + +\subsubsection{Projection} + +\begin{frame}{\subsubsecname} +\notesonly{In order t}\slidesonly{T}o project some observation $\vec x$ into the PC space, we first map it into \notesonly{the non-linear }space of $\phi$ +and then project that into the space spanned by the PCs.\\ + +\pause + +\slidesonly{ +\begin{center} + \includegraphics[width=4cm]{img/meme_mapx} +\end{center} +} + +\notesonly{ +By now we should expect that the transformation will be performed}\slidesonly{Transform} via the kernel trick. +\notesonly{We basically }represent this sample $\vec x$ by its relation to the \emph{training data} +(i.e. the $p$ observations that were used to compute the PCs). + +\end{frame} + +\begin{frame}{\subsubsecname} + +\slidesonly{ +\visible<2->{ +\vspace{-12mm} +\hspace{8.cm} +\StickyNote[1.cm]{ + \begingroup + \footnotesize + \begin{equation} + \label{eq:ephi} + \vec e = \sum^{p}_{\beta=1} a^{(\beta)} \vec{\phi}^{(\beta)} + \end{equation} + \endgroup +}[3.cm] +\vspace{-5mm} +} +} + +\notesonly{We derive the projection for Kernel PCA by starting}\slidesonly{Start} with the projection used in linear PCA. + +Recall in linear PCA, to get the component of $\vec x$ in the direction of the $k$-th PC we use: + +\svspace{-5mm} + +\begin{equation} +\label{eq:projlinx} +u_k(\vec x) = \vec e_k^\top \vec x +\end{equation} + +Assuming we have a the $\phi$'s: + +\svspace{-3mm} + +\begin{equation} +\label{eq:projlin} +u_k(\phi_{(\vec x)}) = \vec e_k^\top \vec \phi_{(\vec x)} +\end{equation} + +\pause + +\notesonly{ +We substitute $\vec \phi_{(\vec x)}$ for $\vec x$ and plug Eq.\ref{eq:ephi} into Eq.\ref{eq:projlin}: +} + +\visible<3>{ + +\svspace{-2mm} + +\begin{align} +\label{eq:projk1} +u_k(\vec \phi_{(\vec x)}) &= \sum^{p}_{\beta=1} a^{(\beta)} +\hspace{-3mm} +\underbrace{ +\vec{\phi}^\top_{(\vec{x}^{(\beta)})} \, \vec \phi_{(\vec x)} +}_{\substack{ +\text{recognize the familiar}\\ +\text{scalar product?} +\\ =k(\vec x^{(\beta)}, \vec x) = K_{\beta,\vec x}}}\notesonly{\\ +&}= \sum_{\beta=1}^{p} a_k^{(\beta)} K_{\beta, \vec x} +\end{align} + +\notesonly{Note that }$\vec x$ can be a sample that was used in computing the PCs or a completly new ``test'' point. +} +\end{frame} + +\subsubsection{Reconstruction} + +\begin{frame}{\subsubsecname} + +\slidesonly{ +\begin{center} + \includegraphics[width=4cm]{img/meme_reconstruct} +\end{center} +} + +Since we never had the transformation $\phi$ to begin with. +It is not psssible to simply project a sample from PC space back into the original $N$-dimensional input space. +Algorithms exist that \emph{approximate} a ``pre-image'' of some new observation. + +\end{frame} diff --git a/notes/03_kernel-pca/4_centering.tex b/notes/03_kernel-pca/4_centering.tex new file mode 100644 index 0000000..ef88a54 --- /dev/null +++ b/notes/03_kernel-pca/4_centering.tex @@ -0,0 +1,80 @@ +\subsection{Centering the kernel matrix} + +\begin{frame}{\subsecname} + +PCA operates on centered data. This is common for all forms of PCA, +whether it is linear PCA, online PCA or Kernel PCA.\\ +In Kernel PCA, the $\vec X$ is transformed into a higher dimensional space by applying the kernel trick. +Applying it to all $p$ training samples yields the kernel matrix, which is used in solving the transformed eigenvalue problem. + +All of the above assumed that we have successfully centered the kernel matrix. We now look at how this is done. + +\end{frame} + + +\begin{frame}{\subsecname} + + +\question{Do I need to center $\vec X$ before computing $ K_{\alpha \beta}$?} + +\pause + +- No, Centering $\vec X$ before computing $K_{\alpha \beta}$ does not guarantee the kernel matrix to be centered. You only end up with $\vec{\widetilde K}$. The same applies if we had the $\phi$'s. + +It is simply irrelevant whether the original data $\vec X$ is centered or not. +Therefore, we need to center the kernel matrix before solving the transformed eigenvalue problem.\\ + +\end{frame} + +\begin{frame}{\subsecname} + +\notesonly{ +We use $\vec{\widetilde K}$ to denote the \emph{un-normalized} kernel matrix and $\vec K$ +to denote the kernel matrix \emph{after} it has been centered.\\ +} + +%\underline{Centering $\vec{\widetilde K}$:} + +The centering is built on the kernel trick\notesonly{ as in Eq.\ref{eq:trick}}: +\begin{equation} + k(\vec{x}, \vec{x}') = K_{\vec x,\vec x'} = \vec{\phi}_{(\vec{x})}^\top + \vec{\phi}_{(\vec{x}')}, +\end{equation} + +except that we compute the inner product using \emph{centered} $\phi_{(\vec x)}$: + +\begin{equation} + \underbrace{ \vec{\phi}_{\big( \vec{x}^{(\alpha)} \big)} }_{ + \substack{ \text{centered} \\ + \text{feature vectors}} } + \hspace{-3mm} + = \widetilde{\vec{\phi}}_{\big( \vec{x}^{(\alpha)} \big)} + \hspace{-1mm} + - \frac{1}{p} \sum\limits_{\gamma = 1}^p + \hspace{-2mm} + \underbrace{ \widetilde{\vec{\phi}}_{\big( \vec{x}^{(\gamma)} + \big)} }_{ + \substack{ \text{uncentered} \\ + \text{feature vectors}} } + \qquad \forall \alpha +\end{equation} + +\notesonly{ +Plugging \emph{centered} $\vec{\phi}_{(\vec{x})}$ into the kernel trick equation\slidesonly{ above} reveals how to obtain centered elements $K_{\alpha\beta}$ for }forming a \emph{centered} \notesonly{Kernel matrix }$\vec K$ from an \emph{unnormalized} \notesonly{Kernel matrix }$\widetilde{\vec K}$. + +\begin{equation} + K_{\alpha \beta} = \underbrace{\widetilde{K}_{\alpha \beta}}_{ _{= \, k \left ( \vec{x}^{(\alpha)}, + \vec{x}^{(\beta)} \right )}} + - \;\underbrace{\frac{1}{p} \sum\limits_{\delta = 1}^p + \widetilde{K}_{\alpha \delta}}_\text{\scriptsize{row avg.}} + \; - \; \underbrace{\frac{1}{p} + \sum\limits_{\gamma = 1}^p + \widetilde{K}_{\gamma \beta}}_\text{\scriptsize{col. avg.}} + \;+ \;\underbrace{\frac{1}{p^2} + \sum\limits_{\gamma, \delta = 1}^p + \widetilde{K}_{\gamma \delta}}_\text{\scriptsize{matrix avg.}} +\end{equation}% + + +\end{frame} + diff --git a/notes/03_kernel-pca/5_apply.tex b/notes/03_kernel-pca/5_apply.tex new file mode 100644 index 0000000..0b4c4fc --- /dev/null +++ b/notes/03_kernel-pca/5_apply.tex @@ -0,0 +1,55 @@ + + +\subsection{Applying Kernel PCA} + +\begin{frame}{How do I interpret the role of PCs?} + +\notesonly{\question{How do I interpret the role of PCs?}} + +\svspace{-7mm} + +\begin{center} + \includegraphics[height=5cm]{img/contourplot_kpca_rbf} + \notesonly{\captionof{figure}{Projections onto individual PCs}} +\end{center} +\svspace{-0.8cm} +\begin{center} + \includegraphics[height=3.2cm]{img/screeplot_kpca_rbf.pdf} + \notesonly{\captionof{figure}{Scree plot}} +\end{center} + +\end{frame} + +\subsection{A note on implementation} + +\begin{frame}{\subsecname} + +\question{Should you solve the transformed eigenvalue problem using \emph{eigenvalue decomposition} or \emph{SVD}?} + +\slidesonly{ +\begin{equation} +%\label{eq:eigsimple1} + \vec{K} \, \vec{a} = p \lambda \vec{a} +\end{equation} +} + +\pause + +- \emph{eigenvalue decomposition} is the only option for Kernel PCA. \emph{SVD} is simply not applicable since we don't have access to $\vec \phi_{((\vec x))}$ + +\begin{equation} + %\label{eq:cov} +\vec C_{\phi} = \frac{1}{p} \sum_{\alpha=1}^{p} \vec{\phi}_{(\vec{x}^{(\alpha)})} \vec{\phi}^{\top}_{(\vec{x}^{(\alpha)})} +\end{equation} + +\end{frame} + +\begin{frame}{\subsecname} + +\question{Which property of $\widetilde {\vec{K}}$ can we exploit to speed up computation?} + +\pause + +The kernel function is symmetric. $k(\vec x^{(\alpha)}, \vec x^{(\beta)}) = k(\vec x^{(\beta)}, \vec x^{(\alpha)})$. One can exploit this by reducing how many times the kernel function is actually applied while traversing the training samples when computing $\widetilde {\vec{K}}$. + +\end{frame} diff --git a/notes/03_kernel-pca/Makefile b/notes/03_kernel-pca/Makefile index 9ab797d..3b82f68 100644 --- a/notes/03_kernel-pca/Makefile +++ b/notes/03_kernel-pca/Makefile @@ -10,6 +10,7 @@ projnameA = $(projname).notes slides: $(projname).slides.tex $(projname).tex $(compile) $(projname).slides.tex + $(compile) $(projname).slides.tex # bibtex $(projname).slides # $(compile) --interaction=batchmode $(projname).slides.tex # $(compile) --interaction=batchmode $(projname).slides.tex diff --git a/notes/03_kernel-pca/img/contourplot_kpca_rbf.pdf b/notes/03_kernel-pca/img/contourplot_kpca_rbf.pdf new file mode 100644 index 0000000..44c63f4 Binary files /dev/null and b/notes/03_kernel-pca/img/contourplot_kpca_rbf.pdf differ diff --git a/notes/03_kernel-pca/img/guassian_function_1d.pdf b/notes/03_kernel-pca/img/guassian_function_1d.pdf new file mode 100644 index 0000000..7ebc0e5 Binary files /dev/null and b/notes/03_kernel-pca/img/guassian_function_1d.pdf differ diff --git a/notes/03_kernel-pca/img/guassian_function_2d.pdf b/notes/03_kernel-pca/img/guassian_function_2d.pdf new file mode 100644 index 0000000..70154f0 Binary files /dev/null and b/notes/03_kernel-pca/img/guassian_function_2d.pdf differ diff --git a/notes/03_kernel-pca/img/guassian_function_2d_3Dview.pdf b/notes/03_kernel-pca/img/guassian_function_2d_3Dview.pdf new file mode 100644 index 0000000..5454e76 Binary files /dev/null and b/notes/03_kernel-pca/img/guassian_function_2d_3Dview.pdf differ diff --git a/notes/03_kernel-pca/img/koffer.pdf b/notes/03_kernel-pca/img/koffer.pdf new file mode 100644 index 0000000..50d627c Binary files /dev/null and b/notes/03_kernel-pca/img/koffer.pdf differ diff --git a/notes/03_kernel-pca/img/koffer.svg b/notes/03_kernel-pca/img/koffer.svg new file mode 100644 index 0000000..396e35f --- /dev/null +++ b/notes/03_kernel-pca/img/koffer.svg @@ -0,0 +1,205 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + KPCA + + + + + + + + non-lineartransformation+standard linearPCA + + diff --git a/notes/03_kernel-pca/img/mem_notthisagain.jpg b/notes/03_kernel-pca/img/mem_notthisagain.jpg new file mode 100644 index 0000000..29b7a55 Binary files /dev/null and b/notes/03_kernel-pca/img/mem_notthisagain.jpg differ diff --git a/notes/03_kernel-pca/img/mem_waitforit.jpg b/notes/03_kernel-pca/img/mem_waitforit.jpg new file mode 100644 index 0000000..251d66a Binary files /dev/null and b/notes/03_kernel-pca/img/mem_waitforit.jpg differ diff --git a/notes/03_kernel-pca/img/meme_candononlinpca.jpg b/notes/03_kernel-pca/img/meme_candononlinpca.jpg new file mode 100644 index 0000000..f84b46c Binary files /dev/null and b/notes/03_kernel-pca/img/meme_candononlinpca.jpg differ diff --git a/notes/03_kernel-pca/img/meme_exactly.jpg b/notes/03_kernel-pca/img/meme_exactly.jpg new file mode 100644 index 0000000..4f390c2 Binary files /dev/null and b/notes/03_kernel-pca/img/meme_exactly.jpg differ diff --git a/notes/03_kernel-pca/img/meme_mapx.jpg b/notes/03_kernel-pca/img/meme_mapx.jpg new file mode 100644 index 0000000..34b87ef Binary files /dev/null and b/notes/03_kernel-pca/img/meme_mapx.jpg differ diff --git a/notes/03_kernel-pca/img/meme_necessary.jpg b/notes/03_kernel-pca/img/meme_necessary.jpg new file mode 100644 index 0000000..15b7e1b Binary files /dev/null and b/notes/03_kernel-pca/img/meme_necessary.jpg differ diff --git a/notes/03_kernel-pca/img/meme_nowwhat.jpg b/notes/03_kernel-pca/img/meme_nowwhat.jpg new file mode 100644 index 0000000..e42f9a5 Binary files /dev/null and b/notes/03_kernel-pca/img/meme_nowwhat.jpg differ diff --git a/notes/03_kernel-pca/img/meme_ofcourse.jpg b/notes/03_kernel-pca/img/meme_ofcourse.jpg new file mode 100644 index 0000000..f0447c6 Binary files /dev/null and b/notes/03_kernel-pca/img/meme_ofcourse.jpg differ diff --git a/notes/03_kernel-pca/img/meme_ofcourse2.jpg b/notes/03_kernel-pca/img/meme_ofcourse2.jpg new file mode 100644 index 0000000..ae15ca4 Binary files /dev/null and b/notes/03_kernel-pca/img/meme_ofcourse2.jpg differ diff --git a/notes/03_kernel-pca/img/meme_realinput.jpg b/notes/03_kernel-pca/img/meme_realinput.jpg new file mode 100644 index 0000000..8305851 Binary files /dev/null and b/notes/03_kernel-pca/img/meme_realinput.jpg differ diff --git a/notes/03_kernel-pca/img/meme_reconstruct.jpg b/notes/03_kernel-pca/img/meme_reconstruct.jpg new file mode 100644 index 0000000..240c87e Binary files /dev/null and b/notes/03_kernel-pca/img/meme_reconstruct.jpg differ diff --git a/notes/03_kernel-pca/img/meme_right.jpg b/notes/03_kernel-pca/img/meme_right.jpg new file mode 100644 index 0000000..ef40d5d Binary files /dev/null and b/notes/03_kernel-pca/img/meme_right.jpg differ diff --git a/notes/03_kernel-pca/img/meme_sort.jpg b/notes/03_kernel-pca/img/meme_sort.jpg new file mode 100644 index 0000000..7aaaad9 Binary files /dev/null and b/notes/03_kernel-pca/img/meme_sort.jpg differ diff --git a/notes/03_kernel-pca/img/monomials.pdf b/notes/03_kernel-pca/img/monomials.pdf new file mode 100644 index 0000000..d8d7504 Binary files /dev/null and b/notes/03_kernel-pca/img/monomials.pdf differ diff --git a/notes/03_kernel-pca/img/points_rbf.pdf b/notes/03_kernel-pca/img/points_rbf.pdf new file mode 100644 index 0000000..b36dbc1 Binary files /dev/null and b/notes/03_kernel-pca/img/points_rbf.pdf differ diff --git a/notes/03_kernel-pca/img/points_rbf_rot_transl.pdf b/notes/03_kernel-pca/img/points_rbf_rot_transl.pdf new file mode 100644 index 0000000..aa911eb Binary files /dev/null and b/notes/03_kernel-pca/img/points_rbf_rot_transl.pdf differ diff --git a/notes/03_kernel-pca/img/scatter.pdf b/notes/03_kernel-pca/img/scatter.pdf new file mode 100644 index 0000000..18cfd2e Binary files /dev/null and b/notes/03_kernel-pca/img/scatter.pdf differ diff --git a/notes/03_kernel-pca/img/screeplot_kpca_rbf.pdf b/notes/03_kernel-pca/img/screeplot_kpca_rbf.pdf new file mode 100644 index 0000000..0e3a47b Binary files /dev/null and b/notes/03_kernel-pca/img/screeplot_kpca_rbf.pdf differ diff --git a/notes/03_kernel-pca/img/storehd.pdf b/notes/03_kernel-pca/img/storehd.pdf new file mode 100644 index 0000000..d03b633 Binary files /dev/null and b/notes/03_kernel-pca/img/storehd.pdf differ diff --git a/notes/03_kernel-pca/tutorial.tex b/notes/03_kernel-pca/tutorial.tex index d4a0340..cc633ff 100644 --- a/notes/03_kernel-pca/tutorial.tex +++ b/notes/03_kernel-pca/tutorial.tex @@ -62,7 +62,39 @@ \newpage \mode -\input{./1_kernel-pca} +\input{./0_recap} +\mode* + +\clearpage + + +\mode +\input{./1_nonlin} +\mode* + +\clearpage + + +\mode +\input{./2_trick} +\mode* + +\clearpage + +\mode +\input{./3_kpca} +\mode* + +\clearpage + +\mode +\input{./4_centering} +\mode* + +\clearpage + +\mode +\input{./5_apply} \mode* \clearpage