04-02-ACA-Inference-RegressionClustering.tex

% move all configuration stuff into includes file so we can focus on the content
\input{include}


\subtitle{module 4.2: regression \& clustering}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
    % generate title page
	\input{include/titlepage}

    \section[overview]{lecture overview}
        \begin{frame}{introduction}{overview}
            \begin{block}{corresponding textbook section}
                    %\href{http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6331125}{Chapter 8: Musical Genre, Similarity, and Mood} (pp.~155)
                    sections~4.2 -- 4.4
            \end{block}

            \begin{itemize}
                \item   \textbf{lecture content}
                    \begin{itemize}
                        \item   regression: non-categorical data analysis
                        \item   clustering: unsupervised data analysis
                    \end{itemize}
                \bigskip
                \item<2->   \textbf{learning objectives}
                    \begin{itemize}
                        \item   describe the basic principles of data-driven machine learning approaches
                        \item   implement linear regression in Python
                        \item   implement kMeans clustering in Python 
                    \end{itemize}
            \end{itemize}
            \inserticon{directions}
        \end{frame}

    \section[intro]{introduction}
        \begin{frame}{regression}{introduction}
            remember the flow chart of a general ACA system:
            \vspace{-3mm}
            \begin{figure}
                \input{pict/introduction_ACASystem_3}
            \end{figure}
            \begin{itemize}
                \item<2->   \textit{classification}: 
                    \begin{itemize}
                        \item   assign class labels to data
                    \end{itemize}
                \item<2->   \color<3->{highlight}{\textit{regression}}:
                    \begin{itemize}
                        \item	estimate numerical labels for data
                    \end{itemize}
                \item<2->   \color<3->{highlight}{\textit{clustering}}:
                    \begin{itemize}
                        \item	find grouping patterns in data
                    \end{itemize}
            \end{itemize}
        \end{frame}
        
    \section{regression}
        \begin{frame}{regression}{introduction}
             \vspace{-3mm}
             \begin{itemize}
                 \item  given a set of pairs of data and corresponding output observations
                \item   find model that maps input to output
                \bigskip
                \item<2-> model can then be used to predict (continuous value) output for an unknown new input
             \end{itemize}
        \end{frame}
        
        \begin{frame}{regression}{linear regression}
             \vspace{-3mm}
             \begin{columns}
                \column{.4\linewidth} 
                   \begin{itemize}
                        \item   estimate the slope $m$ and offset $b$ of a straight line that fits the data best:
                            \begin{footnotesize}
                            \begin{equation*}
                                \hat{y}(r) = m\cdot v(r) + b
                            \end{equation*} 
                        \end{footnotesize}
                        \bigskip
                        \item   minimizing the mean squared error leads to:
                            \begin{footnotesize}
                            \begin{eqnarray*}
                                b &=& \mu_y - m\cdot \mu_v \\
                                m &=& \frac{\sum\limits_{r=0}^{\mathcal{R}-1}\left(y(r)- \mu_y\right)\cdot \left(v(r) - \mu_v\right)}{\sum\limits_{r=0}^{\mathcal{R}-1}\left(v(r) - \mu_v\right)^2}
                            \end{eqnarray*}
                        \end{footnotesize}
                    \end{itemize}
                \column{.6\linewidth} 
                \vspace{-10mm}
                    \figwithmatlab{LinearRegression}
            \end{columns}
        \end{frame}
        
    \section{clustering}
        \begin{frame}{clustering}{introduction}
             \begin{itemize}
                \item   clustering is usually unsupervised and exploratory
                \item  group observations 
                    \begin{itemize}
                        \item 'similar' observations are grouped together
                        \item   'dissimilar' observations are in different groups
                    \end{itemize}
                \item   depends on definition of 'similarity'/ distance
             \end{itemize}
            
            \begin{figure}%
                \centering
                \input{pict/inference_clustering_example}	
            \end{figure}
        \end{frame}
        
        \begin{frame}{clustering}{kMeans clustering}
             \only<1>{
            \vspace{-5mm}
             \begin{columns}
                \column{.33\linewidth} 
                    \begin{enumerate}
                        \item	\emph{Initialization}: randomly select $K$ observations from the data set as initialization.
                        \item	\emph{Update}: compute the mean for each cluster.
                        \item	\emph{Assignment}: assign each observation to the cluster with the mean of the closest cluster.
                        \item	\emph{Iteration}: go to step $2$ until the clusters converge.
                    \end{enumerate}
                \column{.67\linewidth} 
                %\vspace{-10mm}
                \includeanimation
                    {Kmeans}
                    {00}
                    {04}
                    {.5}
                    %
            \end{columns}
            }
            \only<2>{\figwithmatlab{Kmeans}}
        \end{frame}

    \section{distances}
        \begin{frame}{distances}{overview }
            \vspace{-5mm}
             \begin{columns}
                \column{.5\linewidth} 
                    \begin{itemize}
                        \item<1->	\emph{\only<1>{\textcolor{blue}}{Euclidean Distance}} (L2 Distance)
                        \smallskip
                        \item<2->	\emph{\only<2>{\textcolor{blue}}{Manhattan Distance}} (L1 Distance)
                        \smallskip
                        \item<3->	\emph{\only<3>{\textcolor{blue}}{Cosine Similarity/Distance}}
                                \begin{itemize}
                                    \item   range is from $[-1;1]$ ($[0;1]$ for non-negative input),
                                    \item   not distance but similarity measure
                                    \item   independent of vector length, only on angle
                                \end{itemize}
                        \smallskip
                        \item<4->	\emph{\only<4>{\textcolor{blue}}{Kullback-Leibler Divergence}}
                                \begin{itemize}
                                    \item   not symmetric: $d_\mathrm{KL}(\vec{v}_\mathrm{a},\vec{v}_\mathrm{b})\neq d_\mathrm{KL}(\vec{v}_\mathrm{b},\vec{v}_\mathrm{a})$,
                                    \item   designed to measure distance between probability distributions
                                \end{itemize}
                            \end{itemize}
                \column{.5\linewidth} 
                \only<1>{
                    \begin{footnotesize}\begin{equation*}\label{eq:dist_eucl}
                        d_\mathrm{EU}(\vec{v}_\mathrm{a},\vec{v}_\mathrm{b}) = \left\|\vec{v}_\mathrm{a}-\vec{v}_\mathrm{b}\right\|_2 = \sqrt{\sum\limits_{j = 0}^{\mathcal{J}-1}{\big(v_\mathrm{a}(j)-v_\mathrm{b}(j)\big)^2}} .
                    \end{equation*}\end{footnotesize}
                }
                \only<2>{
                    \begin{footnotesize}\begin{equation*}\label{eq:dist_manh}
                        d_\mathrm{M}(\vec{v}_\mathrm{a},\vec{v}_\mathrm{b}) = \left\|\vec{v}_\mathrm{a}-\vec{v}_\mathrm{b}\right\|_1 = \sum\limits_{j = 0}^{\mathcal{J}-1}{\big|v_\mathrm{a}(j)-v_\mathrm{b}(j)\big|} .
                    \end{equation*}\end{footnotesize}
                }
                \only<3>{
                    \begin{footnotesize}\begin{equation*}\label{eq:dist_cos}
                        s_\mathrm{C}(\vec{v}_\mathrm{a},\vec{v}_\mathrm{b}) = \frac{\sum\limits_{j = 0}^{\mathcal{J}-1}{v_\mathrm{a}(j)\cdot v_\mathrm{b}(j)}}{\sqrt{\sum\limits_{j = 0}^{\mathcal{J}-1}{v_\mathrm{a}(j)^2}}\cdot\sqrt{ \sum\limits_{j = 0}^{\mathcal{J}-1}{v_\mathrm{b}(j)^2}}} .
                    \end{equation*}\end{footnotesize}
                     \begin{footnotesize}\begin{equation*}
                        d_\mathrm{C}(\vec{v}_\mathrm{a},\vec{v}_\mathrm{b}) = 1-s_\mathrm{C}(\vec{v}_\mathrm{a},\vec{v}_\mathrm{b}) .
                    \end{equation*}\end{footnotesize}
               }
                \only<4>{
                    \begin{footnotesize}\begin{equation*}\label{eq:dist_kl}
                        d_\mathrm{KL}(\vec{v}_\mathrm{a},\vec{v}_\mathrm{b}) = \sum\limits_{j = 0}^{\mathcal{J}-1}{v_\mathrm{a}(j)\cdot\log\left(\frac{v_\mathrm{a}(j)}{v_\mathrm{b}(j)}\right)} . %-v_\mathrm{a}(j)+v_\mathrm{b}(j)
                    \end{equation*}\end{footnotesize}
                }
            \end{columns}
            
        \end{frame}

    \section{summary}
        \begin{frame}{summary}{lecture content}
            \begin{itemize}
                \item   \textbf{regression}
                    \begin{itemize}
                        \item   model to estimate numeric labels from features
                        \item   linear regression assumes model is straight line
                    \end{itemize}
                \bigskip
                \item   \textbf{clustering}
                    \begin{enumerate}
                        \item   unsupervised grouping
                        \item   feature space and distance measure determine result
                        \item   number of clusters usually has to be known
                        \item   kMeans is simple way of clustering
                    \end{enumerate}
                \bigskip
                \item   \textbf{distances}
                    \begin{itemize}
                        \item   L1 and L2 are most common distances
                        \item   not all 'distances' are consistent. a real distance
                        \begin{itemize}
                            \item   cannot be negative
                            \item   is symmetric
                        \end{itemize}
                    \end{itemize}
            \end{itemize}
            \inserticon{summary}
        \end{frame}
\end{document}