day1/session3.tex
changeset 292 2622aebff64a
parent 288 c4e25269a86c
child 296 2d08c45681a1
equal deleted inserted replaced
291:ec70a2048871 292:2622aebff64a
    76 \title[Statistics]{Python for Science and Engg: Statistics}
    76 \title[Statistics]{Python for Science and Engg: Statistics}
    77 
    77 
    78 \author[FOSSEE] {FOSSEE}
    78 \author[FOSSEE] {FOSSEE}
    79 
    79 
    80 \institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
    80 \institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
       
    81 
    81 \date[] {7 November, 2009\\Day 1, Session 3}
    82 \date[] {7 November, 2009\\Day 1, Session 3}
    82 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    83 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    83 
    84 
    84 %\pgfdeclareimage[height=0.75cm]{iitmlogo}{iitmlogo}
    85 %\pgfdeclareimage[height=0.75cm]{iitmlogo}{iitmlogo}
    85 %\logo{\pgfuseimage{iitmlogo}}
    86 %\logo{\pgfuseimage{iitmlogo}}
   124 %%   \frametitle{Outline}
   125 %%   \frametitle{Outline}
   125 %%   \tableofcontents
   126 %%   \tableofcontents
   126 %%   % You might wish to add the option [pausesections]
   127 %%   % You might wish to add the option [pausesections]
   127 %% \end{frame}
   128 %% \end{frame}
   128 
   129 
       
   130 \section{Computing mean}
       
   131 \begin{frame}
       
   132   \frametitle{Value of acceleration due to gravity?}
       
   133   \begin{itemize}
       
   134     \item We already have pendulum.txt
       
   135     \item We know that $ T = 2\pi \sqrt{\frac{L}{g}} $
       
   136     \item So $ g = \frac{4 \pi^2 L}{T^2}  $
       
   137     \item Calculate ``g'' - acceleration due to gravity for each pair of L and T
       
   138     \item Hence calculate mean ``g''
       
   139   \end{itemize}
       
   140 \end{frame}
       
   141 
       
   142 \begin{frame}[fragile]
       
   143   \frametitle{Acceleration due to gravity - ``g''\ldots}
       
   144   \begin{lstlisting}
       
   145 In []: G = []
       
   146 In []: for line in open('pendulum.txt'):
       
   147   ....     points = line.split()
       
   148   ....     l = float(points[0])
       
   149   ....     t = float(points[1])
       
   150   ....     g = 4 * pi * pi * l / t * t
       
   151   ....     G.append(g)
       
   152   \end{lstlisting}
       
   153 \end{frame}
       
   154 
       
   155 \begin{frame}
       
   156   \frametitle{Computing mean ``g''}
       
   157   \begin{block}{Exercise}
       
   158     Obtain the mean of ``g''
       
   159   \end{block}
       
   160 \end{frame}
       
   161 
       
   162 \begin{frame}[fragile]
       
   163   \frametitle{Mean ``g''}
       
   164   \begin{lstlisting}
       
   165 total = 0
       
   166 for g in G:
       
   167     total += g
       
   168 
       
   169 mean_g = total / len(g)
       
   170 print "Mean: ", mean_g
       
   171   \end{lstlisting}
       
   172 \end{frame}
       
   173 
       
   174 \begin{frame}[fragile]
       
   175   \frametitle{Mean ``g''}
       
   176   \begin{lstlisting}
       
   177 mean_g = sum(G) / len(G)
       
   178 print "Mean: ", mean_g
       
   179   \end{lstlisting}
       
   180 \end{frame}
       
   181 
       
   182 \begin{frame}[fragile]
       
   183   \frametitle{Mean ``g''}
       
   184   \begin{lstlisting}
       
   185 mean_g = mean(G)
       
   186 print "Mean: ", mean_g
       
   187   \end{lstlisting}
       
   188   \inctime{10}
       
   189 \end{frame}
       
   190 
   129 \section{Processing voluminous data}
   191 \section{Processing voluminous data}
   130 \begin{frame}
   192 \begin{frame}
   131   \frametitle{More on data processing}
   193   \frametitle{More on data processing}
   132   \begin{block}{}
   194   \begin{block}{}
   133     We have a huge--1m records--data file.\\How do we do \emph{efficient} statistical computations, that is find mean, median, mode, standard deveiation etc; draw pie charts?
   195     We have a huge data file--180,000 records.\\How do we do \emph{efficient} statistical computations, i.e. find mean, median, standard deviation etc; draw pie charts?
   134   \end{block}
   196   \end{block}
   135 \end{frame}
   197 \end{frame}
   136 
   198 
   137 
   199 \begin{frame}
   138 \begin{frame}
   200   \frametitle{Structure of the file}
   139   \frametitle{Statistical Analysis: Problem statement}
       
   140   Read the data supplied in \emph{sslc1.txt} and carry out the following:
       
   141   \begin{enumerate}
       
   142     \item Draw a pie chart representing the proportion of students who scored more than 90\% in each region in Science.
       
   143     \item Draw a pie chart representing the proportion of students who scored more than 90\% in each subject across regions.
       
   144     \item Print mean, median, mode and standard deviation of math scores for all regions combined.
       
   145   \end{enumerate}
       
   146 \end{frame}
       
   147 
       
   148 \begin{frame}
       
   149   \frametitle{Problem statement: explanation}
       
   150     \emphbar{Draw a pie chart representing the proportion of students who scored more than 90\% in each region in Science.}
       
   151     \begin{enumerate}
       
   152       \item Complete(100\%) data - Number of students who scored more than 90\% in Science
       
   153       \item Each slice - Number of students who scored more than 90\% in Science in one region
       
   154     \end{enumerate}
       
   155 \end{frame}
       
   156 
       
   157 \begin{frame}
       
   158   \frametitle{Problem statement: explanation}
       
   159     \emphbar{Draw a pie chart representing the proportion of students who scored more than 90\% in each subject across regions.}
       
   160     \begin{enumerate}
       
   161       \item Complete(100\%) data - Number of students who scored more than 90\% across all regions
       
   162       \item Each slice - Number of students who scored more than 90\% in each subject across all regions
       
   163     \end{enumerate}
       
   164 \end{frame}
       
   165 
       
   166 \begin{frame}
       
   167   \frametitle{Statistical Analysis and Parsing \ldots}
       
   168   Machinery Required -
       
   169   \begin{itemize}
       
   170     \item File reading
       
   171     \item Parsing
       
   172     \item Dictionaries
       
   173     \item NumPy arrays
       
   174     \item Statistical operations
       
   175   \end{itemize}
       
   176 \end{frame}
       
   177 
       
   178 \begin{frame}
       
   179   \frametitle{File reading and parsing}
       
   180   Understanding the structure of sslc1.txt
   201   Understanding the structure of sslc1.txt
   181   \begin{itemize}
   202   \begin{itemize}
   182     \item One line in file corresponds to a student's details
   203     \item Each line in the file has a student's details(record)
   183     \item aka record
       
   184     \item Each record consists of fields separated by ';'
   204     \item Each record consists of fields separated by ';'
   185   \end{itemize}
   205   \end{itemize}
   186 \end{frame}
   206 \emphbar{A;015162;JENIL T P;081;060;77;41;74;333;P;;}
   187 
   207 \end{frame}
   188 \begin{frame}
   208 
   189   \frametitle{File reading and parsing \ldots}
   209 \begin{frame}
       
   210   \frametitle{Structure of the file \ldots}
       
   211 \emphbar{A;015163;JOSEPH RAJ S;083;042;47;AA;72;244;;;}
   190   Each record consists of:
   212   Each record consists of:
   191   \begin{itemize}
   213   \begin{itemize}
   192     \item Region Code
   214     \item Region Code
   193     \item Roll Number
   215     \item Roll Number
   194     \item Name
   216     \item Name
   195     \item Marks of 5 subjects: English, Hindi, Maths, Science, Social
   217     \item Marks of 5 subjects: English, Hindi, Maths, Science, Social
   196     \item Total marks
   218     \item Total marks
   197     \item Pass/Fail (P/F)
   219     \item Pass/Fail (P/F)
   198     \item Withdrawn (W)
   220     \item Withheld (W)
   199   \end{itemize}
   221   \end{itemize}
   200   \inctime{5}
   222   \inctime{5}
       
   223 \end{frame}
       
   224 
       
   225 \begin{frame}
       
   226   \frametitle{Statistical Analysis: Problem statement}
       
   227   1. Read the data supplied in the file \emph{sslc1.txt} and carry out the following:
       
   228   \begin{itemize}
       
   229     \item[a] Draw a pie chart representing proportion of students who scored more than 90\% in each region in Science.
       
   230     \item[b] Print mean, median and standard deviation of math scores for all regions combined.
       
   231   \end{itemize}
       
   232 \end{frame}
       
   233 
       
   234 \begin{frame}
       
   235   \frametitle{Problem statement: explanation}
       
   236     \emphbar{a. Draw a pie chart representing proportion of students who scored more than 90\% in each region in Science.}
       
   237 \begin{columns}
       
   238     \column{5.25\textwidth}
       
   239     \hspace*{.5in}
       
   240 \includegraphics[height=2.6in, interpolate=true]{data/science}
       
   241     \column{0.8\textwidth}
       
   242 \end{columns}
       
   243 \end{frame}
       
   244 
       
   245 \begin{frame}
       
   246   \frametitle{Machinery Required}
       
   247   \begin{itemize}
       
   248     \item File reading
       
   249     \item Parsing
       
   250     \item Dictionaries 
       
   251     \item List enumeration
       
   252     \item Arrays
       
   253     \item Statistical operations
       
   254   \end{itemize}
   201 \end{frame}
   255 \end{frame}
   202 
   256 
   203 \subsection{Data processing}
   257 \subsection{Data processing}
   204 \begin{frame}[fragile]
   258 \begin{frame}[fragile]
   205   \frametitle{File reading and parsing \ldots}
   259   \frametitle{File reading and parsing \ldots}
   206   \begin{lstlisting}
   260   \begin{lstlisting}
   207 for record in open('sslc1.txt'):
   261 for record in open('sslc1.txt'):
   208     fields = record.split(';')
   262     fields = record.split(';')
   209   \end{lstlisting}
   263   \end{lstlisting}
   210 \end{frame}
   264 \begin{block}{}
   211 
   265 \centerline{Recall pendulum example!}
   212 \subsection{Dictionary}
   266 \end{block}
   213 \begin{frame}[fragile]
   267 \end{frame}
   214   \frametitle{Dictionary: Introduction}
   268 
       
   269 \subsection{Dictionaries}
       
   270 \begin{frame}[fragile]
       
   271   \frametitle{Dictionaries: Introduction}
   215   \begin{itemize}
   272   \begin{itemize}
   216     \item lists index: 0 \ldots n
   273     \item lists index: 0 \ldots n
   217     \item dictionaries index using strings
   274     \item dictionaries index using strings
   218   \end{itemize}
   275   \end{itemize}
   219   \begin{block}{Example}
   276 \end{frame}
   220 d = \{ ``Hitchhiker's guide'' : 42,
   277 
   221      ``Terminator'' : ``I'll be back''\}\\
   278 \begin{frame}[fragile]
   222 d[``Terminator''] => ``I'll be back''
   279   \frametitle{Dictionaries \ldots}
   223   \end{block}
   280   \begin{lstlisting}
   224 \end{frame}
   281 In []: d = {"jpg" : "image file",
   225 
   282       "txt" : "text file", 
   226 \begin{frame}[fragile]
   283       "py" : "python code"}
   227   \frametitle{Dictionary: Introduction}
   284 
   228   \begin{lstlisting}
   285 In []: d["txt"]
   229 In [1]: d = {"Hitchhiker's guide" : 42,
   286 Out[]: 'text file'
   230       "Terminator" : "I'll be back"}
   287   \end{lstlisting}
   231 
   288 \end{frame}
   232 In [2]: d["Hitchhiker's guide"]
   289 
   233 Out[2]: 42
   290 \begin{frame}[fragile]
   234 
   291   \frametitle{Dictionaries \ldots}
   235 In [3]: "Hitchhiker's guide" in d
   292   \begin{lstlisting}
   236 Out[3]: True
   293 In []: "py" in d
   237 
   294 Out[]: True
   238 In [4]: "Guido" in d
   295 
   239 Out[4]: False
   296 In []: "cpp" in d
   240   \end{lstlisting}
   297 Out[]: False
   241 \end{frame}
   298   \end{lstlisting}
   242 
   299 \end{frame}
   243 \begin{frame}[fragile]
   300 
   244   \frametitle{Dictionary: Introduction}
   301 \begin{frame}[fragile]
   245   \begin{lstlisting}
   302   \frametitle{Dictionaries \ldots}
   246 In [5]: d.keys()
   303   \begin{lstlisting}
   247 Out[5]: ['Terminator', "Hitchhiker's 
   304 In []: d.keys()
   248                               guide"]
   305 Out[]: ['py', 'txt', 'jpg']
   249 
   306 
   250 In [6]: d.values()
   307 In []: d.values()
   251 Out[6]: ["I'll be back", 42]
   308 Out[]: ['python code', 'text file',
   252   \end{lstlisting}
   309        'image file']
   253 \end{frame}
   310   \end{lstlisting}
   254 
   311   \inctime{10}
   255 \begin{frame}[fragile]
   312 \end{frame}
   256   \frametitle{Back to lists: Iterating}
   313 
   257   \begin{itemize}
   314 \begin{frame}[fragile]
   258     \item Python's \kwrd{for} loop iterates through list items
   315   \frametitle{Getting back to the problem}
   259     \item In other languages (C/C++) we run through indices and pick items from the array using these indices
       
   260     \item In Python, while iterating through list items current position is not available
       
   261   \end{itemize}
       
   262   \begin{block}{Iterating through indices}
       
   263     What if we want the index of an item of a list?
       
   264   \end{block}
       
   265 
       
   266 \end{frame}
       
   267 
       
   268 \begin{frame}[fragile]
       
   269   \frametitle{enumerate: Iterating through list indices}
       
   270   \begin{lstlisting}
       
   271 In [1]: names = ["Guido","Alex", "Tim"]
       
   272 
       
   273 In [2]: for i, name in enumerate(names):
       
   274    ...:     print i, name
       
   275    ...: 
       
   276 0 Guido
       
   277 1 Alex
       
   278 2 Tim
       
   279   \end{lstlisting}
       
   280   \inctime{5}
       
   281 \end{frame}
       
   282 
       
   283 \begin{frame}[fragile]
       
   284   \frametitle{Continuing with our Dictionary}
       
   285   Let our dictionary be:
   316   Let our dictionary be:
   286   \begin{lstlisting}
   317   \begin{lstlisting}
   287 science = {} # is an empty dictionary
   318 science = {}
   288   \end{lstlisting}
   319   \end{lstlisting}
   289 \end{frame}
   320 \begin{itemize}
   290 
   321     \item Keys will be region codes
   291 \begin{frame}[fragile]
   322     \item Values will be the number students who scored more than 90\% in that region
   292   \frametitle{Dictionary - Building parsed data}
       
   293   \begin{itemize}
       
   294     \item \emph{Keys} of \emph{science} will be region codes
       
   295     \item Value of a \emph{science} will be the number students who scored more than 90\% in that region
       
   296   \end{itemize}
   323   \end{itemize}
   297 \end{frame}
   324 \end{frame}
   298 
   325 
   299 \begin{frame}[fragile]
   326 \begin{frame}[fragile]
   300   \frametitle{Building parsed data \ldots}
   327   \frametitle{Building parsed data \ldots}
   301   \begin{lstlisting}
   328   \begin{lstlisting}
   302 from pylab import pie
       
   303 
       
   304 science = {}
   329 science = {}
   305 
   330 
   306 for record in open('sslc1.txt'):
   331 for record in open('sslc1.txt'):
   307     record = record.strip()
   332     record = record.strip()
   308     fields = record.split(';')
   333     fields = record.split(';')
   315   \frametitle{Building parsed data \ldots}
   340   \frametitle{Building parsed data \ldots}
   316   \begin{lstlisting}
   341   \begin{lstlisting}
   317 if region_code not in science:
   342 if region_code not in science:
   318     science[region_code] = 0
   343     science[region_code] = 0
   319 
   344 
   320 score_str = fields[4].strip()
   345 score_str = fields[6].strip()
   321 
   346 
   322 score = int(score_str) if
   347 score = int(score_str) if \
   323     score_str != 'AA' else 0
   348     score_str != 'AA' else 0
   324 
   349 
   325 if score > 90:
   350 if score > 90:
   326     science[region_code] += 1
   351     science[region_code] += 1
   327   \end{lstlisting}
   352   \end{lstlisting}
   328 \end{frame}
   353 \end{frame}
   329 
   354 
       
   355 \begin{frame}[fragile]
       
   356   \frametitle{Building parsed data \ldots}
       
   357   \begin{lstlisting}
       
   358 print science
       
   359 print science.keys()
       
   360 print science.values()
       
   361   \end{lstlisting}
       
   362 \end{frame}
       
   363 
   330 \subsection{Visualizing data}
   364 \subsection{Visualizing data}
   331 \begin{frame}[fragile]
   365 \begin{frame}[fragile]
   332   \frametitle{Pie charts}
   366   \frametitle{Pie chart}
   333   \small
   367   \small
   334   \begin{lstlisting}
   368   \begin{lstlisting}
   335 figure(1)
       
   336 pie(science.values(), 
   369 pie(science.values(), 
   337     labels=science.keys())
   370     labels = science.keys())
   338 title('Students scoring 90% and above 
   371 title('Students scoring 90% and above 
   339       in science by region')
   372       in science by region')
   340 savefig('/tmp/science.png')
   373 savefig('science.png')
   341   \end{lstlisting}
   374   \end{lstlisting}
   342 \begin{columns}
   375 \begin{columns}
   343     \column{5.25\textwidth}
   376     \column{5.25\textwidth}
   344     \hspace*{1.1in}
   377     \hspace*{1.1in}
   345 \includegraphics[height=2in, interpolate=true]{data/science}
   378 \includegraphics[height=2in, interpolate=true]{data/science}
   346     \column{0.8\textwidth}
   379     \column{0.8\textwidth}
   347 \end{columns}
   380 \end{columns}
   348   \inctime{5}
   381   \inctime{10}
   349 \end{frame}
   382 \end{frame}
   350 
   383 
   351 \begin{frame}[fragile]
   384 \begin{frame}
   352   \frametitle{Building data for all subjects \ldots}
   385   \frametitle{Problem statement}
   353   \begin{lstlisting}
   386     \emphbar{b. Print mean, median and standard deviation of math scores for all regions combined.}
   354 from pylab import pie
   387 \end{frame}
   355 from scipy import mean, median, std
   388 
   356 from scipy import stats
   389 \begin{frame}[fragile]
   357 
   390   \frametitle{Building data for statistics}
   358 scores = [[], [], [], [], []]
   391   \begin{lstlisting}
   359 ninety_percents = [{}, {}, {}, {}, {}]
   392 math_scores = []
   360   \end{lstlisting}
   393 
   361 \end{frame}
       
   362 
       
   363 \begin{frame}[fragile]
       
   364   \frametitle{Building data for all subjects \ldots}
       
   365   \begin{lstlisting}
       
   366 for record in open('sslc1.txt'):
   394 for record in open('sslc1.txt'):
   367     record = record.strip()
   395     record = record.strip()
   368     fields = record.split(';')
   396     fields = record.split(';')
   369 
   397 
   370     region_code = fields[0].strip()
   398     score_str = fields[5].strip()
   371   \end{lstlisting}
   399     score = int(score_str) if \
   372 \end{frame}
       
   373 
       
   374 \begin{frame}[fragile]
       
   375   \frametitle{Building data for all subjects \ldots}
       
   376   \small
       
   377   \begin{lstlisting}
       
   378 for i, field in enumerate(fields[3:8]):
       
   379     if region_code not in ninety_percents[i]:
       
   380         ninety_percents[i][region_code] = 0
       
   381 
       
   382     score_str = field.strip()
       
   383     score = int(score_str) if
       
   384       score_str != 'AA' else 0
   400       score_str != 'AA' else 0
   385 
   401 
   386     scores[i].append(score)
   402     math_scores.append(score)
   387 
   403   \end{lstlisting}
   388     if score > 90:
       
   389         ninety_percents[i][region_code] += 1
       
   390   \end{lstlisting}
       
   391 \end{frame}
       
   392 
       
   393 \begin{frame}[fragile]
       
   394   \frametitle{Consolidating data}
       
   395   \begin{lstlisting}
       
   396 subj_total = []
       
   397 for subject in ninety_percents:
       
   398     subj_total.append(sum(
       
   399          subject.values()))
       
   400   \end{lstlisting}
       
   401 \end{frame}
       
   402 
       
   403 \begin{frame}[fragile]
       
   404   \frametitle{Pie charts}
       
   405   \begin{lstlisting}
       
   406 figure(2)
       
   407 pie(subj_total, labels=['English',
       
   408     'Hindi', 'Maths', 'Science',
       
   409     'Social'])
       
   410 title('Students scoring more than
       
   411       90% by subject(All regions
       
   412       combined).')
       
   413 savefig('/tmp/all_regions.png')
       
   414   \end{lstlisting}
       
   415 \end{frame}
       
   416 
       
   417 \begin{frame}[fragile]
       
   418   \frametitle{Pie charts}
       
   419   \includegraphics[height=3in, interpolate=true]{data/all_regions}
       
   420 \end{frame}
   404 \end{frame}
   421 
   405 
   422 \subsection{Obtaining statistics}
   406 \subsection{Obtaining statistics}
   423 \begin{frame}[fragile]
   407 \begin{frame}[fragile]
   424   \frametitle{Obtaining statistics}
   408   \frametitle{Obtaining statistics}
   425   \begin{block}{Statistics: Mean}
   409   \begin{block}{Exercise}
   426     Obtain the mean of Math scores
   410     Obtain the mean of Math scores
   427   \end{block}
   411   \end{block}
   428 \end{frame}
   412 \end{frame}
   429 
   413 
   430 \begin{frame}[fragile]
   414 \begin{frame}[fragile]
   431   \frametitle{Obtaining statistics: Solution}
       
   432   \begin{block}{Statistics: Mean}
       
   433     Obtain the mean of Math scores
       
   434   \end{block}
       
   435   \begin{lstlisting}
       
   436 math_scores = scores[2]
       
   437 total = 0
       
   438 for i, score in enumerate(math_scores):
       
   439     total += score
       
   440 
       
   441 mean = total / (i + 1)
       
   442 print "Mean: ", mean
       
   443   \end{lstlisting}
       
   444 \end{frame}
       
   445 
       
   446 \begin{frame}[fragile]
       
   447   \frametitle{Obtaining statistics: Another solution}
       
   448   \begin{block}{Statistics: Mean}
       
   449     Obtain the mean of Math scores
       
   450   \end{block}
       
   451   \begin{lstlisting}
       
   452 math_scores = scores[2]
       
   453 mean = sum(math_scores) /
       
   454           len(math_scores)
       
   455   \end{lstlisting}
       
   456 \end{frame}
       
   457 
       
   458 \begin{frame}[fragile]
       
   459 \frametitle{NumPy arrays}
       
   460   \begin{itemize}
       
   461     \item NumPy provides arrays
       
   462     \item arrays are very efficient and powerful 
       
   463     \item Very easy to perform element-wise operations - \typ{+, -, *, /, \%}
       
   464     \begin{lstlisting}
       
   465 In [1]: a = array([1, 2, 3])
       
   466 In [2]: b = array([4, 5, 6])
       
   467 
       
   468 In [3]: a + b
       
   469 Out[3]: array([5, 7, 9])
       
   470     \end{lstlisting}
       
   471     \item Very easy to compute statistics
       
   472   \end{itemize}
       
   473 \end{frame}
       
   474 
       
   475 \begin{frame}[fragile]
       
   476   \frametitle{Obtaining statistics}
   415   \frametitle{Obtaining statistics}
   477   \begin{lstlisting}
   416   \begin{lstlisting}
   478 math_scores = array(scores[2])
       
   479 
       
   480 print "Mean: ", mean(math_scores)
   417 print "Mean: ", mean(math_scores)
   481 
   418 
   482 print "Median: ", median(math_scores)
   419 print "Median: ", median(math_scores)
   483 
       
   484 print "Mode: ", stats.mode(math_scores)
       
   485 
   420 
   486 print "Standard Deviation: ",
   421 print "Standard Deviation: ",
   487               std(math_scores)
   422               std(math_scores)
   488   \end{lstlisting}
   423   \end{lstlisting}
   489   \inctime{15}
   424   \inctime{10}
       
   425 \end{frame}
       
   426 
       
   427 \begin{frame}[fragile]
       
   428   \frametitle{Obtaining statistics: efficiently!}
       
   429   \begin{lstlisting}
       
   430 math_array = array(math_scores)
       
   431 
       
   432 print "Mean: ", mean(math_array)
       
   433 
       
   434 print "Median: ", median(math_array)
       
   435 
       
   436 print "Standard Deviation: ",
       
   437               std(math_array)
       
   438   \end{lstlisting}
       
   439   \inctime{5}
   490 \end{frame}
   440 \end{frame}
   491 
   441 
   492 \begin{frame}[fragile]
   442 \begin{frame}[fragile]
   493   \frametitle{What tools did we use?}
   443   \frametitle{What tools did we use?}
   494   \begin{itemize}
   444   \begin{itemize}
   495    \item Dictionaries for storing data
   445    \item Dictionaries for storing data
   496    \item Facilities for drawing pie charts
   446    \item Facilities for drawing pie charts
   497    \item NumPy arrays for efficient array manipulations
   447    \item Efficient array manipulations
   498    \item Functions for statistical computations - mean, median, mode, standard deviation
   448    \item Functions for statistical computations - mean, median, standard deviation
   499   \end{itemize}
   449   \end{itemize}
   500 \end{frame}
       
   501 
       
   502 \section{Least square fit}
       
   503 \begin{frame}
       
   504 \frametitle{L vs $T^2$ \ldots}
       
   505 Let's go back to the L vs $T^2$ plot
       
   506 \begin{itemize}
       
   507 \item We first look at obtaining $T^2$ from T
       
   508 \item Then, we look at plotting a Least Squares fit
       
   509 \end{itemize}
       
   510 \end{frame}
       
   511 
       
   512 \begin{frame}[fragile]
       
   513 \frametitle{Dealing with data whole-sale}
       
   514 \begin{lstlisting}
       
   515 In []: for t in T:
       
   516  ....:     TSq.append(t*t)
       
   517 \end{lstlisting}
       
   518 \begin{itemize}
       
   519 \item This is not very efficient
       
   520 \item We are squaring element after element
       
   521 \item We use arrays to make this efficient
       
   522 \end{itemize}
       
   523 \begin{lstlisting}
       
   524 In []: L = array(L)
       
   525 In []: T = array(T)
       
   526 In []: TSq = T*T
       
   527 \end{lstlisting}
       
   528 \end{frame}
   450 \end{frame}
   529 
   451 
   530 \end{document}
   452 \end{document}