Updated session 2, day 1 slides.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Tutorial slides on Python.
%
% Author: Prabhu Ramachandran <prabhu at aero.iitb.ac.in>
% Copyright (c) 2005-2009, Prabhu Ramachandran
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentclass[14pt,compress]{beamer}
%\documentclass[draft]{beamer}
%\documentclass[compress,handout]{beamer}
%\usepackage{pgfpages}
%\pgfpagesuselayout{2 on 1}[a4paper,border shrink=5mm]
% Modified from: generic-ornate-15min-45min.de.tex
\mode<presentation>
{
\usetheme{Warsaw}
\useoutertheme{split}
\setbeamercovered{transparent}
}
\usepackage[english]{babel}
\usepackage[latin1]{inputenc}
%\usepackage{times}
\usepackage[T1]{fontenc}
% Taken from Fernando's slides.
\usepackage{ae,aecompl}
\usepackage{mathpazo,courier,euler}
\usepackage[scaled=.95]{helvet}
\usepackage{amsmath}
\definecolor{darkgreen}{rgb}{0,0.5,0}
\usepackage{listings}
\lstset{language=Python,
basicstyle=\ttfamily\bfseries,
commentstyle=\color{red}\itshape,
stringstyle=\color{darkgreen},
showstringspaces=false,
keywordstyle=\color{blue}\bfseries}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Macros
\setbeamercolor{emphbar}{bg=blue!20, fg=black}
\newcommand{\emphbar}[1]
{\begin{beamercolorbox}[rounded=true]{emphbar}
{#1}
\end{beamercolorbox}
}
\newcounter{time}
\setcounter{time}{0}
\newcommand{\inctime}[1]{\addtocounter{time}{#1}{\tiny \thetime\ m}}
\newcommand{\typ}[1]{\lstinline{#1}}
\newcommand{\kwrd}[1]{ \texttt{\textbf{\color{blue}{#1}}} }
%%% This is from Fernando's setup.
% \usepackage{color}
% \definecolor{orange}{cmyk}{0,0.4,0.8,0.2}
% % Use and configure listings package for nicely formatted code
% \usepackage{listings}
% \lstset{
% language=Python,
% basicstyle=\small\ttfamily,
% commentstyle=\ttfamily\color{blue},
% stringstyle=\ttfamily\color{orange},
% showstringspaces=false,
% breaklines=true,
% postbreak = \space\dots
% }
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Title page
\title[]{Arrays \& Least Squares Fit}
\author[FOSSEE] {FOSSEE}
\institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
\date[] {31, October 2009\\Day 1, Session 3}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\pgfdeclareimage[height=0.75cm]{iitmlogo}{iitmlogo}
%\logo{\pgfuseimage{iitmlogo}}
%% Delete this, if you do not want the table of contents to pop up at
%% the beginning of each subsection:
\AtBeginSubsection[]
{
\begin{frame}<beamer>
\frametitle{Outline}
\tableofcontents[currentsection,currentsubsection]
\end{frame}
}
\AtBeginSection[]
{
\begin{frame}<beamer>
\frametitle{Outline}
\tableofcontents[currentsection,currentsubsection]
\end{frame}
}
\newcommand{\num}{\texttt{numpy}}
% If you wish to uncover everything in a step-wise fashion, uncomment
% the following command:
%\beamerdefaultoverlayspecification{<+->}
%\includeonlyframes{current,current1,current2,current3,current4,current5,current6}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% DOCUMENT STARTS
\begin{document}
\begin{frame}
\maketitle
\end{frame}
%% \begin{frame}
%% \frametitle{Outline}
%% \tableofcontents
%% % You might wish to add the option [pausesections]
%% \end{frame}
\begin{frame}
\frametitle{Least Squares Fit}
In this session -
\begin{itemize}
\item We shall plot a least squares fit curve for time-period(T) squared vs. length(L) plot of a Simple Pendulum.
\item Given a file containing L and T values
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Least Squares Fit \ldots}
Machinery Required -
\begin{itemize}
\item Reading files and parsing data
\item Plotting points, lines
\item Calculating the Coefficients of the Least Squares Fit curve
\begin{itemize}
\item Arrays
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Reading pendulum.txt}
\begin{itemize}
\item The file has two columns
\item Column1 - L; Column2 - T
\end{itemize}
\begin{lstlisting}
In []: L = []
In []: T = []
In []: for line in open('pendulum.txt'):
.... len, t = line.split()
.... L.append(float(len))
.... T.append(float(t))
\end{lstlisting}
We now have two lists L and T
\end{frame}
\begin{frame}[fragile]
\frametitle{Calculating $T^2$}
\begin{itemize}
\item Each element of the list T must be squared
\item Iterating over each element of the list works
\item But very slow \ldots
\item Instead, we use arrays
\end{itemize}
\begin{lstlisting}
In []: array(L)
In []: T = array(T)
In []: Tsq = T*T
In []: plot(L, Tsq, 'o')
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Arrays}
\begin{itemize}
\item T is now a \typ{numpy array}
\item \typ{numpy} arrays are very efficient and powerful
\item Very easy to perform element-wise operations
\item \typ{+, -, *, /, \%}
\item More about arrays later
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Least Square Polynomial}
\begin{enumerate}
\item $T^2 = \frac{4\pi^2}{g}L$
\item $T^2$ and $L$ have a linear relationship
\item We find an approximate solution to $Ax = y$, where A is the Van der Monde matrix to get coefficients of the least squares fit line.
\end{enumerate}
\end{frame}
\begin{frame}[fragile]
\frametitle{Van der Monde Matrix}
Van der Monde matrix of order M
\begin{equation*}
\begin{bmatrix}
l_1^{M-1} & \ldots & l_1 & 1 \\
l_2^{M-1} & \ldots &l_2 & 1 \\
\vdots & \ldots & \vdots & \vdots\\
l_N^{M-1} & \ldots & l_N & 1 \\
\end{bmatrix}
\end{equation*}
\begin{lstlisting}
In []: A=vander(L,2)
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Least Square Fit Line}
\begin{itemize}
\item We use the \typ{lstsq} function of pylab
\item It returns the
\begin{enumerate}
\item Least squares solution
\item Sum of residues
\item Rank of matrix A
\item Singular values of A
\end{enumerate}
\end{itemize}
\begin{lstlisting}
In []: coef, res, r, s = lstsq(A,Tsq)
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Least Square Fit Line \ldots}
\begin{itemize}
\item Use the poly1d function of pylab, to create a function for the line equation using the coefficients obtained
\begin{lstlisting}
In []: p=poly1d(coef)
\end{lstlisting}
\item Get new $T^2$ values using the function \typ{p} obtained
\begin{lstlisting}
In []: Tline = p(L)
\end{lstlisting}
\item Now plot Tline vs. L, to get the Least squares fit line.
\begin{lstlisting}
In []: plot(L, Tline)
\end{lstlisting}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Statistical Analysis and Parsing}
Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
\begin{itemize}
\item Average total marks scored in each region
\item Subject wise average score of each region
\item \alert{??Subject wise average score for all regions combined??}
\item Find the subject wise standard deviation of scores for each region
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Statistical Analysis and Parsing \ldots}
Machinery Required -
\begin{itemize}
\item File reading and parsing
\item NumPy arrays - sum by rows and sum by coloumns
\item Dictionaries
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{File reading and parsing}
Understanding the structure of sslc1.txt
\begin{itemize}
\item Each line in the file, i.e each row of a file is a single record.
\item Each record corresponds to a record of a single student
\item Each record consists of several fields separated by a ';'
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{File reading and parsing \ldots}
Each record consists of:
\begin{itemize}
\item Region Code
\item Roll Number
\item Name
\item Marks of 5 subjects
\item Total marks
\item Pass (P)
\item Withdrawn (W)
\item Fail (F)
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{File reading and parsing \ldots}
\begin{lstlisting}
for record in open('sslc1.txt'):
fields = record.split(';')
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Dictionary}
\begin{itemize}
\item lists index: 0 \ldots n
\item dictionaries index using any hashable objects
\item d = \{ ``Hitchhiker's guide'' : 42, ``Terminator'' : ``I'll be back''\}
\item d[``Terminator''] => ``I'll be back''
\item ``Terminator'' is called the key of \typ{d}
\item ``I'll be back'' is called the value of the key ``Terminator''
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Dictionary - Building parsed data}
\begin{itemize}
\item Let the parsed data be stored in dictionary \typ{data}
\item Keys of \typ{data} are strings - region codes
\item Value of the key is another dictionary.
\item This dictionary contains:
\begin{itemize}
\item 'marks': A list of NumPy arrays
\item 'total': Total marks of each student
\item 'P': Number of passes
\item 'F': Number of failures
\item 'W': Number of withdrawls
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Dictionary - Building parsed data \ldots}
\small
\begin{lstlisting}
data = {}
for record in open('sslc1.txt'):
fields = record.split(';')
if fields[0] not in data:
data[fields[0]] = {
'marks': array([]),
'total': array([]),
'P': 0,
'F': 0,
'W': 0
}
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Dictionary - Building parsed data \ldots}
\begin{lstlisting}
marks = []
for field in fields[3:8]:
score_str = field.strip()
score = 0 if score_str == 'AA'
or score_str == 'AAA'
or score_str == ''
else int(score_str)
marks.append(score)
data[fields[0]]['marks'].append(marks)
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Dictionary - Building parsed data \ldots}
\begin{lstlisting}
total = 0 if score_str == 'AA'
or score_str == 'AAA'
or score_str == ''
else int(fields[8])
data[fields[0]]['total'].append(total)
pfw_key = fields[9]
or fields[10]
or 'F'
data[fields[0]][pfw_key] += 1
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Dictionary - Building parsed data \ldots}
\begin{lstlisting}
pfw_key = fields[9]
or fields[10]
or 'F'
data[fields[0]][pfw_key] += 1
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Calculations}
\small
\begin{lstlisting}
for k in data:
data[k]['marks'] = array(data[k]['marks'])
data[k]['total'] = array(data[k]['total'])
data[k]['avg'] = average(
data[k]['total'])
marks = data[k]['marks']
sub_avg = average(marks, axis=1)
sub_std = sqrt(sum(square(
sub_avg[:,newaxis] - marks), axis=0) /
len(marks))
data[k]['sub_avg'] = sub_avg
data[k]['sub_std'] = sub_std
\end{lstlisting}
\end{frame}
\end{document}