day1/session3.tex
author Puneeth Chaganti <punchagan@fossee.in>
Mon, 26 Oct 2009 12:41:06 +0530
changeset 164 b03c0d1be31f
parent 161 ff22fae4fde5
child 167 5f13be28532d
permissions -rw-r--r--
Changed Least squares fit in session3.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Tutorial slides on Python.
%
% Author: FOSSEE
% Copyright (c) 2009, FOSSEE, IIT Bombay
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\documentclass[14pt,compress]{beamer}
%\documentclass[draft]{beamer}
%\documentclass[compress,handout]{beamer}
%\usepackage{pgfpages} 
%\pgfpagesuselayout{2 on 1}[a4paper,border shrink=5mm]

% Modified from: generic-ornate-15min-45min.de.tex
\mode<presentation>
{
  \usetheme{Warsaw}
  \useoutertheme{split}
  \setbeamercovered{transparent}
}

\usepackage[english]{babel}
\usepackage[latin1]{inputenc}
%\usepackage{times}
\usepackage[T1]{fontenc}

% Taken from Fernando's slides.
\usepackage{ae,aecompl}
\usepackage{mathpazo,courier,euler}
\usepackage[scaled=.95]{helvet}
\usepackage{amsmath}

\definecolor{darkgreen}{rgb}{0,0.5,0}

\usepackage{listings}
\lstset{language=Python,
    basicstyle=\ttfamily\bfseries,
    commentstyle=\color{red}\itshape,
  stringstyle=\color{darkgreen},
  showstringspaces=false,
  keywordstyle=\color{blue}\bfseries}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Macros
\setbeamercolor{emphbar}{bg=blue!20, fg=black}
\newcommand{\emphbar}[1]
{\begin{beamercolorbox}[rounded=true]{emphbar} 
      {#1}
 \end{beamercolorbox}
}
\newcounter{time}
\setcounter{time}{0}
\newcommand{\inctime}[1]{\addtocounter{time}{#1}{\tiny \thetime\ m}}

\newcommand{\typ}[1]{\lstinline{#1}}

\newcommand{\kwrd}[1]{ \texttt{\textbf{\color{blue}{#1}}}  }

%%% This is from Fernando's setup.
% \usepackage{color}
% \definecolor{orange}{cmyk}{0,0.4,0.8,0.2}
% % Use and configure listings package for nicely formatted code
% \usepackage{listings}
% \lstset{
%    language=Python,
%    basicstyle=\small\ttfamily,
%    commentstyle=\ttfamily\color{blue},
%    stringstyle=\ttfamily\color{orange},
%    showstringspaces=false,
%    breaklines=true,
%    postbreak = \space\dots
% }

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Title page
\title[]{Least Squares Fit\\Statistical Plotting}

\author[FOSSEE] {FOSSEE}

\institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
\date[] {31, October 2009\\Day 1, Session 3}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%\pgfdeclareimage[height=0.75cm]{iitmlogo}{iitmlogo}
%\logo{\pgfuseimage{iitmlogo}}


%% Delete this, if you do not want the table of contents to pop up at
%% the beginning of each subsection:
\AtBeginSubsection[]
{
  \begin{frame}<beamer>
    \frametitle{Outline}
    \tableofcontents[currentsection,currentsubsection]
  \end{frame}
}

\AtBeginSection[]
{
  \begin{frame}<beamer>
    \frametitle{Outline}
    \tableofcontents[currentsection,currentsubsection]
  \end{frame}
}

\newcommand{\num}{\texttt{numpy}}


% If you wish to uncover everything in a step-wise fashion, uncomment
% the following command: 
%\beamerdefaultoverlayspecification{<+->}

%\includeonlyframes{current,current1,current2,current3,current4,current5,current6}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% DOCUMENT STARTS
\begin{document}

\begin{frame}
  \maketitle
\end{frame}

%% \begin{frame}
%%   \frametitle{Outline}
%%   \tableofcontents
%%   % You might wish to add the option [pausesections]
%% \end{frame}

\begin{frame}[fragile]
\frametitle{Least Squares Fit}
\vspace{-0.15in}
\begin{figure}
\includegraphics[width=4in]{data/least-sq-fit.png}
\end{figure}
\end{frame}

\begin{frame}[fragile]
\frametitle{Calculating $T^2$ Efficiently}
\begin{lstlisting}
In []: for t in T:
 ....:     Tsq.append(t*t)
\end{lstlisting}
\begin{itemize}
\item This is not very efficient
\item We use arrays to make it efficient
\end{itemize}
\begin{lstlisting}
In []: L = array(L)
In []: T = array(T)
In []: Tsq = T*T
In []: plot(L, Tsq, 'o')
\end{lstlisting}
\end{frame}

\begin{frame}[fragile]
\frametitle{Arrays}
\begin{itemize}
\item \typ{T} and \typ{L} are now arrays
\item arrays are very efficient and powerful 
\item Very easy to perform element-wise operations
\item \typ{+, -, *, /, \%}
\item More about arrays later
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{Least Square Fit Curve}
\begin{itemize}
\item $T^2$ and $L$ have a linear relationship
\item Hence, Least Square Fit Curve is a line
\item we shall use the \typ{lstsq} function
\end{itemize}
\end{frame}

\begin{frame}[fragile]
\frametitle{\typ{lstsq}}
\begin{itemize}
\item We need to fit a line through points for the equation $T^2 = m \cdot L+c$
\item The equation can be re-written as $T^2 = A \cdot p$
\item where A is   
  $\begin{bmatrix}
  L_1 & 1 \\
  L_2 & 1 \\
  \vdots & \vdots\\
  L_N & 1 \\
  \end{bmatrix}$
  and p is 
  $\begin{bmatrix}
  m\\
  c\\
  \end{bmatrix}$
\item We need to find $p$ to plot the line
\end{itemize}
\end{frame}

\begin{frame}[fragile]
\frametitle{Van der Monde Matrix}
\begin{itemize}
\item A is also called a Van der Monde matrix
\item It can be generated using \typ{vander}
\end{itemize}
Van der Monde matrix of order M
\begin{equation*}
  \begin{bmatrix}
  l_1^{M-1} & \ldots & l_1 & 1 \\
  l_2^{M-1} & \ldots &l_2 & 1 \\
  \vdots & \ldots & \vdots & \vdots\\
  l_N^{M-1} & \ldots & l_N & 1 \\
  \end{bmatrix}
\end{equation*}
\begin{lstlisting}
In []: A = vander(L,2)
\end{lstlisting}
\end{frame}

\begin{frame}[fragile]
\frametitle{\typ{lstsq} \ldots}
\begin{itemize}
\item Now use the \typ{lstsq} function
\item Along with a lot of things, it returns the least squares solution
\end{itemize}
\begin{lstlisting}
In []: coef, res, r, s = lstsq(A,Tsq)
\end{lstlisting}
\end{frame}

\begin{frame}[fragile]
\frametitle{Least Square Fit Line \ldots}
We get the points of the line from \typ{coef}
\begin{lstlisting}
In []: Tline = coef[0]*L + coef[1]
\end{lstlisting}
\begin{itemize}
\item Now plot Tline vs. L, to get the Least squares fit line. 
\end{itemize}
\begin{lstlisting}
In []: plot(L, Tline)
\end{lstlisting}
\end{frame}

\begin{frame}
  \frametitle{Statistical Analysis and Parsing}
  Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
  \begin{itemize}
    \item Average total marks scored in each region
    \item Subject wise average score of each region
    \item \alert{??Subject wise average score for all regions combined??}
    \item Find the subject wise standard deviation of scores for each region
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Statistical Analysis and Parsing \ldots}
  Machinery Required -
  \begin{itemize}
    \item File reading and parsing
    \item NumPy arrays - sum by rows and sum by coloumns
    \item Dictionaries
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{File reading and parsing}
  Understanding the structure of sslc1.txt
  \begin{itemize}
    \item Each line in the file, i.e each row of a file is a single record.
    \item Each record corresponds to a record of a single student
    \item Each record consists of several fields separated by a ';'
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{File reading and parsing \ldots}
  Each record consists of:
  \begin{itemize}
    \item Region Code
    \item Roll Number
    \item Name
    \item Marks of 5 subjects
    \item Total marks
    \item Pass (P)
    \item Withdrawn (W)
    \item Fail (F)
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{File reading and parsing \ldots}
  \begin{lstlisting}
for record in open('sslc1.txt'):
    fields = record.split(';')
  \end{lstlisting}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Dictionary}
  \begin{itemize}
    \item lists index: 0 \ldots n
    \item dictionaries index using any hashable objects
    \item d = \{ ``Hitchhiker's guide'' : 42, ``Terminator'' : ``I'll be back''\}
    \item d[``Terminator''] => ``I'll be back''
    \item ``Terminator'' is called the key of \typ{d}
    \item ``I'll be back'' is called the value of the key ``Terminator''
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Dictionary - Building parsed data}
  \begin{itemize}
    \item Let the parsed data be stored in dictionary \typ{data}
    \item Keys of \typ{data} are strings - region codes
    \item Value of the key is another dictionary.
    \item This dictionary contains:
    \begin{itemize}
      \item 'marks': A list of NumPy arrays
      \item 'total': Total marks of each student
      \item 'P': Number of passes
      \item 'F': Number of failures
      \item 'W': Number of withdrawls
    \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Dictionary - Building parsed data \ldots}
  \small
  \begin{lstlisting}
data = {}
for record in open('sslc1.txt'):
    fields = record.split(';')
    if fields[0] not in data:
        data[fields[0]] = {
            'marks': array([]),
            'total': array([]),
            'P': 0,
            'F': 0,
            'W': 0
            }
  \end{lstlisting}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Dictionary - Building parsed data \ldots}
  \begin{lstlisting}
marks = []
for field in fields[3:8]:
    score_str = field.strip()
    score = 0 if score_str == 'AA'
        or score_str == 'AAA'
        or score_str == ''
        else int(score_str)
    marks.append(score)

data[fields[0]]['marks'].append(marks)
  \end{lstlisting}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Dictionary - Building parsed data \ldots}
  \begin{lstlisting}
total = 0 if score_str == 'AA'
    or score_str == 'AAA'
    or score_str == ''
    else int(fields[8])
data[fields[0]]['total'].append(total)

pfw_key = fields[9]
    or fields[10]
    or 'F'
data[fields[0]][pfw_key] += 1
  \end{lstlisting}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Dictionary - Building parsed data \ldots}
  \begin{lstlisting}
pfw_key = fields[9]
    or fields[10]
    or 'F'
data[fields[0]][pfw_key] += 1
  \end{lstlisting}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Calculations}
  \small
  \begin{lstlisting}
for k in data:
    data[k]['marks'] = array(data[k]['marks'])
    data[k]['total'] = array(data[k]['total'])

    data[k]['avg'] = average(
        data[k]['total'])
    marks = data[k]['marks']
    sub_avg = average(marks, axis=1)
    sub_std = sqrt(sum(square(
        sub_avg[:,newaxis] - marks), axis=0) /
        len(marks))
    data[k]['sub_avg'] = sub_avg
    data[k]['sub_std'] = sub_std
  \end{lstlisting}
\end{frame}

\end{document}