--- a/day1/session3.tex Fri Jun 18 01:21:32 2010 -0400
+++ b/day1/session3.tex Sat Jun 19 01:27:20 2010 -0400
@@ -73,13 +73,13 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Title page
-\title[Statistics]{Python for Science and Engg: Statistics}
+\title[Statistics]{Python for Science and Engg:\\ Basic data processing}
\author[FOSSEE] {FOSSEE}
\institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
-\date[] {30 April, 2010\\Day 1, Session 3}
+\date[] {SciPy 2010, Introductory tutorials,\\Day 1, Session 3}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\pgfdeclareimage[height=0.75cm]{iitmlogo}{iitmlogo}
@@ -127,20 +127,21 @@
%% % You might wish to add the option [pausesections]
%% \end{frame}
-\section{Computing mean}
+\section{Computing the mean}
\begin{frame}
\frametitle{Value of acceleration due to gravity?}
\begin{itemize}
- \item We already have pendulum.txt
+ \item We already have \typ{pendulum.txt}
\item We know that $ T = 2\pi \sqrt{\frac{L}{g}} $
\item So $ g = \frac{4 \pi^2 L}{T^2} $
- \item Calculate ``g'' - acceleration due to gravity for each pair of L and T
- \item Hence calculate mean ``g''
+ \item Calculate $g$ - acceleration due to gravity for each pair of
+ $L$ and $T$
+ \item Hence calculate mean $g$
\end{itemize}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Acceleration due to gravity - ``g''\ldots}
+ \frametitle{Acceleration due to gravity - $g$\ldots}
\begin{lstlisting}
In []: g_list = []
In []: for line in open('pendulum.txt'):
@@ -153,7 +154,7 @@
\end{frame}
\begin{frame}[fragile]
- \frametitle{Mean ``g'' - Classical method}
+ \frametitle{Mean $g$ - Classical method}
\begin{lstlisting}
In []: total = 0
In []: for g in g_list:
@@ -166,7 +167,7 @@
\end{frame}
\begin{frame}[fragile]
- \frametitle{Mean ``g'' - Slightly improved method}
+ \frametitle{Mean $g$ - Slightly improved method}
\begin{lstlisting}
In []: g_mean = sum(g_list) / len(g_list)
In []: print 'Mean: ', g_mean
@@ -174,7 +175,7 @@
\end{frame}
\begin{frame}[fragile]
- \frametitle{Mean ``g'' - One liner}
+ \frametitle{Mean $g$ - One liner}
\begin{lstlisting}
In []: g_mean = mean(g_list)
In []: print 'Mean: ', g_mean
@@ -186,13 +187,15 @@
\begin{frame}
\frametitle{More on data processing}
\begin{block}{}
- We have a huge data file--180,000 records.\\How do we do \emph{efficient} statistical computations, i.e. find mean, median, standard deviation etc; draw pie charts?
+ We have a huge data file--180,000 records.\\How do we do
+ \emph{efficient} statistical computations, i.e. find mean, median,
+ standard deviation etc.;\\How do we draw pie charts?
\end{block}
\end{frame}
\begin{frame}
\frametitle{Structure of the file}
- Understanding the structure of sslc1.txt
+ Understanding the structure of \typ{sslc1.txt}
\begin{itemize}
\item Each line in the file has a student's details(record)
\item Each record consists of fields separated by ';'
@@ -208,7 +211,8 @@
\item Region Code
\item Roll Number
\item Name
- \item Marks of 5 subjects: SLang, Flang Maths, Science, Social
+ \item Marks of 5 subjects: second lang, first lang., Math, Science,
+ Social Studies
\item Total marks
\item Pass/Fail (P/F)
\item Withheld (W)
@@ -218,7 +222,7 @@
\begin{frame}
\frametitle{Statistical Analysis: Problem statement}
- 1. Read the data supplied in the file \emph{sslc1.txt} and carry out the following:
+ 1. Read the data supplied in the file \typ{sslc1.txt} and carry out the following:
\begin{itemize}
\item[a] Draw a pie chart representing proportion of students who scored more than 90\% in each region in Science.
\item[b] Print mean, median and standard deviation of math scores for all regions combined.
@@ -336,7 +340,7 @@
\item Keys will be region codes
\item Values will be the number students who scored more than 90\% in that region in Science
\end{itemize}
- \begin{block}{Sample \emph{science} dictionary}
+ \begin{block}{Sample \typ{science} dictionary}
\{'A': 729, 'C': 764, 'B': 1120,'E': 414, 'D': 603, 'F': 500\}
\end{block}
@@ -458,13 +462,38 @@
\end{frame}
\begin{frame}[fragile]
+ \frametitle{IPython tip: Timing}
+
+Try the following:
+ \begin{lstlisting}
+In []: %timeit mean(math_scores)
+
+In []: %timeit mean(math_array)
+
+In []: %timeit?
+
+ \end{lstlisting}
+
+ \begin{itemize}
+ \item \typ{\%timeit}: accurate, many measurements
+ \item Can also use \typ{\%time}
+ \item \typ{\%time}: less accurate, one measurement
+ \end{itemize}
+
+ \inctime{5}
+\end{frame}
+
+\begin{frame}[fragile]
\frametitle{What tools did we use?}
\begin{itemize}
+ \item More parsing data
\item Dictionaries for storing data
\item Facilities for drawing pie charts
+ \item Functions for statistical computations - mean, median, standard deviation
\item Efficient array manipulations
- \item Functions for statistical computations - mean, median, standard deviation
+ \item Timing in IPython
\end{itemize}
+
\end{frame}
\end{document}