day1/session3.tex
branchscipy2010
changeset 409 4442da6bf693
parent 389 aa392117454f
child 423 11c942a85b3f
equal deleted inserted replaced
408:217c38c06ebd 409:4442da6bf693
    71 %    postbreak = \space\dots
    71 %    postbreak = \space\dots
    72 % }
    72 % }
    73 
    73 
    74 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    74 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    75 % Title page
    75 % Title page
    76 \title[Statistics]{Python for Science and Engg: Statistics}
    76 \title[Statistics]{Python for Science and Engg:\\ Basic data processing}
    77 
    77 
    78 \author[FOSSEE] {FOSSEE}
    78 \author[FOSSEE] {FOSSEE}
    79 
    79 
    80 \institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
    80 \institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
    81 
    81 
    82 \date[] {30 April, 2010\\Day 1, Session 3}
    82 \date[] {SciPy 2010, Introductory tutorials,\\Day 1, Session 3}
    83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    84 
    84 
    85 %\pgfdeclareimage[height=0.75cm]{iitmlogo}{iitmlogo}
    85 %\pgfdeclareimage[height=0.75cm]{iitmlogo}{iitmlogo}
    86 %\logo{\pgfuseimage{iitmlogo}}
    86 %\logo{\pgfuseimage{iitmlogo}}
    87 
    87 
   125 %%   \frametitle{Outline}
   125 %%   \frametitle{Outline}
   126 %%   \tableofcontents
   126 %%   \tableofcontents
   127 %%   % You might wish to add the option [pausesections]
   127 %%   % You might wish to add the option [pausesections]
   128 %% \end{frame}
   128 %% \end{frame}
   129 
   129 
   130 \section{Computing mean}
   130 \section{Computing the mean}
   131 \begin{frame}
   131 \begin{frame}
   132   \frametitle{Value of acceleration due to gravity?}
   132   \frametitle{Value of acceleration due to gravity?}
   133   \begin{itemize}
   133   \begin{itemize}
   134     \item We already have pendulum.txt
   134     \item We already have \typ{pendulum.txt}
   135     \item We know that $ T = 2\pi \sqrt{\frac{L}{g}} $
   135     \item We know that $ T = 2\pi \sqrt{\frac{L}{g}} $
   136     \item So $ g = \frac{4 \pi^2 L}{T^2}  $
   136     \item So $ g = \frac{4 \pi^2 L}{T^2}  $
   137     \item Calculate ``g'' - acceleration due to gravity for each pair of L and T
   137     \item Calculate $g$ - acceleration due to gravity for each pair of
   138     \item Hence calculate mean ``g''
   138         $L$ and $T$
   139   \end{itemize}
   139     \item Hence calculate mean $g$
   140 \end{frame}
   140   \end{itemize}
   141 
   141 \end{frame}
   142 \begin{frame}[fragile]
   142 
   143   \frametitle{Acceleration due to gravity - ``g''\ldots}
   143 \begin{frame}[fragile]
       
   144   \frametitle{Acceleration due to gravity - $g$\ldots}
   144   \begin{lstlisting}
   145   \begin{lstlisting}
   145 In []: g_list = []
   146 In []: g_list = []
   146 In []: for line in open('pendulum.txt'):
   147 In []: for line in open('pendulum.txt'):
   147   ....     point = line.split()
   148   ....     point = line.split()
   148   ....     L = float(point[0])
   149   ....     L = float(point[0])
   151   ....     g_list.append(g)
   152   ....     g_list.append(g)
   152   \end{lstlisting}
   153   \end{lstlisting}
   153 \end{frame}
   154 \end{frame}
   154 
   155 
   155 \begin{frame}[fragile]
   156 \begin{frame}[fragile]
   156   \frametitle{Mean ``g'' - Classical method}
   157   \frametitle{Mean $g$ - Classical method}
   157   \begin{lstlisting}
   158   \begin{lstlisting}
   158 In []: total = 0
   159 In []: total = 0
   159 In []: for g in g_list:
   160 In []: for g in g_list:
   160  ....:     total += g
   161  ....:     total += g
   161  ....:
   162  ....:
   164 In []: print 'Mean: ', g_mean
   165 In []: print 'Mean: ', g_mean
   165   \end{lstlisting}
   166   \end{lstlisting}
   166 \end{frame}
   167 \end{frame}
   167 
   168 
   168 \begin{frame}[fragile]
   169 \begin{frame}[fragile]
   169   \frametitle{Mean ``g'' - Slightly improved method}
   170   \frametitle{Mean $g$ - Slightly improved method}
   170   \begin{lstlisting}
   171   \begin{lstlisting}
   171 In []: g_mean = sum(g_list) / len(g_list)
   172 In []: g_mean = sum(g_list) / len(g_list)
   172 In []: print 'Mean: ', g_mean
   173 In []: print 'Mean: ', g_mean
   173   \end{lstlisting}
   174   \end{lstlisting}
   174 \end{frame}
   175 \end{frame}
   175 
   176 
   176 \begin{frame}[fragile]
   177 \begin{frame}[fragile]
   177   \frametitle{Mean ``g'' - One liner}
   178   \frametitle{Mean $g$ - One liner}
   178   \begin{lstlisting}
   179   \begin{lstlisting}
   179 In []: g_mean = mean(g_list)
   180 In []: g_mean = mean(g_list)
   180 In []: print 'Mean: ', g_mean
   181 In []: print 'Mean: ', g_mean
   181   \end{lstlisting}
   182   \end{lstlisting}
   182   \inctime{10}
   183   \inctime{10}
   184 
   185 
   185 \section{Processing voluminous data}
   186 \section{Processing voluminous data}
   186 \begin{frame}
   187 \begin{frame}
   187   \frametitle{More on data processing}
   188   \frametitle{More on data processing}
   188   \begin{block}{}
   189   \begin{block}{}
   189     We have a huge data file--180,000 records.\\How do we do \emph{efficient} statistical computations, i.e. find mean, median, standard deviation etc; draw pie charts?
   190     We have a huge data file--180,000 records.\\How do we do
       
   191     \emph{efficient} statistical computations, i.e. find mean, median,
       
   192     standard deviation etc.;\\How do we draw pie charts?
   190   \end{block}
   193   \end{block}
   191 \end{frame}
   194 \end{frame}
   192 
   195 
   193 \begin{frame}
   196 \begin{frame}
   194   \frametitle{Structure of the file}
   197   \frametitle{Structure of the file}
   195   Understanding the structure of sslc1.txt
   198   Understanding the structure of \typ{sslc1.txt}
   196   \begin{itemize}
   199   \begin{itemize}
   197     \item Each line in the file has a student's details(record)
   200     \item Each line in the file has a student's details(record)
   198     \item Each record consists of fields separated by ';'
   201     \item Each record consists of fields separated by ';'
   199   \end{itemize}
   202   \end{itemize}
   200 \emphbar{A;015162;JENIL T P;081;060;77;41;74;333;P;;}
   203 \emphbar{A;015162;JENIL T P;081;060;77;41;74;333;P;;}
   206   Each record consists of:
   209   Each record consists of:
   207   \begin{itemize}
   210   \begin{itemize}
   208     \item Region Code
   211     \item Region Code
   209     \item Roll Number
   212     \item Roll Number
   210     \item Name
   213     \item Name
   211     \item Marks of 5 subjects: SLang, Flang Maths, Science, Social
   214     \item Marks of 5 subjects: second lang, first lang., Math, Science,
       
   215         Social Studies
   212     \item Total marks
   216     \item Total marks
   213     \item Pass/Fail (P/F)
   217     \item Pass/Fail (P/F)
   214     \item Withheld (W)
   218     \item Withheld (W)
   215   \end{itemize}
   219   \end{itemize}
   216   \inctime{5}
   220   \inctime{5}
   217 \end{frame}
   221 \end{frame}
   218 
   222 
   219 \begin{frame}
   223 \begin{frame}
   220   \frametitle{Statistical Analysis: Problem statement}
   224   \frametitle{Statistical Analysis: Problem statement}
   221   1. Read the data supplied in the file \emph{sslc1.txt} and carry out the following:
   225   1. Read the data supplied in the file \typ{sslc1.txt} and carry out the following:
   222   \begin{itemize}
   226   \begin{itemize}
   223     \item[a] Draw a pie chart representing proportion of students who scored more than 90\% in each region in Science.
   227     \item[a] Draw a pie chart representing proportion of students who scored more than 90\% in each region in Science.
   224     \item[b] Print mean, median and standard deviation of math scores for all regions combined.
   228     \item[b] Print mean, median and standard deviation of math scores for all regions combined.
   225   \end{itemize}
   229   \end{itemize}
   226 \end{frame}
   230 \end{frame}
   334   \end{lstlisting}
   338   \end{lstlisting}
   335 \begin{itemize}
   339 \begin{itemize}
   336     \item Keys will be region codes
   340     \item Keys will be region codes
   337     \item Values will be the number students who scored more than 90\% in that region in Science
   341     \item Values will be the number students who scored more than 90\% in that region in Science
   338   \end{itemize}
   342   \end{itemize}
   339   \begin{block}{Sample \emph{science} dictionary}
   343   \begin{block}{Sample \typ{science} dictionary}
   340     \{'A': 729, 'C': 764, 'B': 1120,'E': 414, 'D': 603, 'F': 500\}
   344     \{'A': 729, 'C': 764, 'B': 1120,'E': 414, 'D': 603, 'F': 500\}
   341   \end{block}
   345   \end{block}
   342 
   346 
   343 \end{frame}
   347 \end{frame}
   344 
   348 
   456   \end{lstlisting}
   460   \end{lstlisting}
   457   \inctime{5}
   461   \inctime{5}
   458 \end{frame}
   462 \end{frame}
   459 
   463 
   460 \begin{frame}[fragile]
   464 \begin{frame}[fragile]
       
   465   \frametitle{IPython tip: Timing}
       
   466 
       
   467 Try the following:
       
   468   \begin{lstlisting}
       
   469 In []: %timeit mean(math_scores)
       
   470 
       
   471 In []: %timeit mean(math_array)
       
   472 
       
   473 In []: %timeit?
       
   474 
       
   475   \end{lstlisting}
       
   476 
       
   477   \begin{itemize}
       
   478       \item \typ{\%timeit}: accurate, many measurements
       
   479       \item Can also use \typ{\%time}
       
   480       \item \typ{\%time}: less accurate, one measurement 
       
   481   \end{itemize}
       
   482 
       
   483   \inctime{5}
       
   484 \end{frame}
       
   485 
       
   486 \begin{frame}[fragile]
   461   \frametitle{What tools did we use?}
   487   \frametitle{What tools did we use?}
   462   \begin{itemize}
   488   \begin{itemize}
       
   489    \item More parsing data
   463    \item Dictionaries for storing data
   490    \item Dictionaries for storing data
   464    \item Facilities for drawing pie charts
   491    \item Facilities for drawing pie charts
       
   492    \item Functions for statistical computations - mean, median, standard deviation
   465    \item Efficient array manipulations
   493    \item Efficient array manipulations
   466    \item Functions for statistical computations - mean, median, standard deviation
   494    \item Timing in IPython
   467   \end{itemize}
   495   \end{itemize}
       
   496 
   468 \end{frame}
   497 \end{frame}
   469 
   498 
   470 \end{document}
   499 \end{document}
   471 
   500 
   472 %% Questions for Quiz %%
   501 %% Questions for Quiz %%