Workshop materials: comparison day1/session3.tex

equal deleted inserted replaced

-:217c38c06ebd
+:4442da6bf693
 %    postbreak = \space\dots
 % }
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Title page
-\title[Statistics]{Python for Science and Engg: Statistics}
+\title[Statistics]{Python for Science and Engg:\\ Basic data processing}
 \author[FOSSEE] {FOSSEE}
 \institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
-\date[] {30 April, 2010\\Day 1, Session 3}
+\date[] {SciPy 2010, Introductory tutorials,\\Day 1, Session 3}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %\pgfdeclareimage[height=0.75cm]{iitmlogo}{iitmlogo}
 %\logo{\pgfuseimage{iitmlogo}}
 %%   \frametitle{Outline}
 %%   \tableofcontents
 %%   % You might wish to add the option [pausesections]
 %% \end{frame}
-\section{Computing mean}
+\section{Computing the mean}
 \begin{frame}
 \frametitle{Value of acceleration due to gravity?}
 \begin{itemize}
-\item We already have pendulum.txt
+\item We already have \typ{pendulum.txt}
 \item We know that $ T = 2\pi \sqrt{\frac{L}{g}} $
 \item So $ g = \frac{4 \pi^2 L}{T^2}  $
-\item Calculate ``g'' - acceleration due to gravity for each pair of L and T
+\item Calculate $g$ - acceleration due to gravity for each pair of
-\item Hence calculate mean ``g''
+$L$ and $T$
-\end{itemize}
+\item Hence calculate mean $g$
-\end{frame}
+\end{itemize}
+\end{frame}
-\begin{frame}[fragile]
-\frametitle{Acceleration due to gravity - ``g''\ldots}
+\begin{frame}[fragile]
+\frametitle{Acceleration due to gravity - $g$\ldots}
 \begin{lstlisting}
 In []: g_list = []
 In []: for line in open('pendulum.txt'):
 ....     point = line.split()
 ....     L = float(point[0])
 ....     g_list.append(g)
 \end{lstlisting}
 \end{frame}
 \begin{frame}[fragile]
-\frametitle{Mean ``g'' - Classical method}
+\frametitle{Mean $g$ - Classical method}
 \begin{lstlisting}
 In []: total = 0
 In []: for g in g_list:
 ....:     total += g
 ....:
 In []: print 'Mean: ', g_mean
 \end{lstlisting}
 \end{frame}
 \begin{frame}[fragile]
-\frametitle{Mean ``g'' - Slightly improved method}
+\frametitle{Mean $g$ - Slightly improved method}
 \begin{lstlisting}
 In []: g_mean = sum(g_list) / len(g_list)
 In []: print 'Mean: ', g_mean
 \end{lstlisting}
 \end{frame}
 \begin{frame}[fragile]
-\frametitle{Mean ``g'' - One liner}
+\frametitle{Mean $g$ - One liner}
 \begin{lstlisting}
 In []: g_mean = mean(g_list)
 In []: print 'Mean: ', g_mean
 \end{lstlisting}
 \inctime{10}
 \section{Processing voluminous data}
 \begin{frame}
 \frametitle{More on data processing}
 \begin{block}{}
-We have a huge data file--180,000 records.\\How do we do \emph{efficient} statistical computations, i.e. find mean, median, standard deviation etc; draw pie charts?
+We have a huge data file--180,000 records.\\How do we do
+\emph{efficient} statistical computations, i.e. find mean, median,
+standard deviation etc.;\\How do we draw pie charts?
 \end{block}
 \end{frame}
 \begin{frame}
 \frametitle{Structure of the file}
-Understanding the structure of sslc1.txt
+Understanding the structure of \typ{sslc1.txt}
 \begin{itemize}
 \item Each line in the file has a student's details(record)
 \item Each record consists of fields separated by ';'
 \end{itemize}
 \emphbar{A;015162;JENIL T P;081;060;77;41;74;333;P;;}
 Each record consists of:
 \begin{itemize}
 \item Region Code
 \item Roll Number
 \item Name
-\item Marks of 5 subjects: SLang, Flang Maths, Science, Social
+\item Marks of 5 subjects: second lang, first lang., Math, Science,
+Social Studies
 \item Total marks
 \item Pass/Fail (P/F)
 \item Withheld (W)
 \end{itemize}
 \inctime{5}
 \end{frame}
 \begin{frame}
 \frametitle{Statistical Analysis: Problem statement}
-1. Read the data supplied in the file \emph{sslc1.txt} and carry out the following:
+1. Read the data supplied in the file \typ{sslc1.txt} and carry out the following:
 \begin{itemize}
 \item[a] Draw a pie chart representing proportion of students who scored more than 90\% in each region in Science.
 \item[b] Print mean, median and standard deviation of math scores for all regions combined.
 \end{itemize}
 \end{frame}
 \end{lstlisting}
 \begin{itemize}
 \item Keys will be region codes
 \item Values will be the number students who scored more than 90\% in that region in Science
 \end{itemize}
-\begin{block}{Sample \emph{science} dictionary}
+\begin{block}{Sample \typ{science} dictionary}
 \{'A': 729, 'C': 764, 'B': 1120,'E': 414, 'D': 603, 'F': 500\}
 \end{block}
 \end{frame}
 \end{lstlisting}
 \inctime{5}
 \end{frame}
 \begin{frame}[fragile]
+\frametitle{IPython tip: Timing}
+Try the following:
+\begin{lstlisting}
+In []: %timeit mean(math_scores)
+In []: %timeit mean(math_array)
+In []: %timeit?
+\end{lstlisting}
+\begin{itemize}
+\item \typ{\%timeit}: accurate, many measurements
+\item Can also use \typ{\%time}
+\item \typ{\%time}: less accurate, one measurement
+\end{itemize}
+\inctime{5}
+\end{frame}
+\begin{frame}[fragile]
 \frametitle{What tools did we use?}
 \begin{itemize}
+\item More parsing data
 \item Dictionaries for storing data
 \item Facilities for drawing pie charts
+\item Functions for statistical computations - mean, median, standard deviation
 \item Efficient array manipulations
-\item Functions for statistical computations - mean, median, standard deviation
+\item Timing in IPython
 \end{itemize}
 \end{frame}
 \end{document}
 %% Questions for Quiz %%

branch	scipy2010
changeset 409	4442da6bf693
parent 389	aa392117454f
child 423	11c942a85b3f