day1/session3.tex
changeset 125 99ca3cb18fd2
parent 120 055b199c46c2
child 131 b3a78754c4a9
equal deleted inserted replaced
120:055b199c46c2 125:99ca3cb18fd2
   256   \frametitle{Statistical Analysis and Parsing}
   256   \frametitle{Statistical Analysis and Parsing}
   257   Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
   257   Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
   258   \begin{itemize}
   258   \begin{itemize}
   259     \item Average total marks scored in each region
   259     \item Average total marks scored in each region
   260     \item Subject wise average score of each region
   260     \item Subject wise average score of each region
   261     \item ??Subject wise average score for all regions combined??
   261     \item \alert{??Subject wise average score for all regions combined??}
       
   262     \item Find the subject wise standard deviation of scores for each region
   262   \end{itemize}
   263   \end{itemize}
   263 \end{frame}
   264 \end{frame}
   264 
   265 
   265 \begin{frame}
   266 \begin{frame}
   266   \frametitle{Statistical Analysis and Parsing \ldots}
   267   \frametitle{Statistical Analysis and Parsing \ldots}
   352   \end{lstlisting}
   353   \end{lstlisting}
   353 \end{frame}
   354 \end{frame}
   354 
   355 
   355 \begin{frame}[fragile]
   356 \begin{frame}[fragile]
   356   \frametitle{Dictionary - Building parsed data \ldots}
   357   \frametitle{Dictionary - Building parsed data \ldots}
       
   358   \begin{lstlisting}
       
   359 marks = []
       
   360 for field in fields[3:8]:
       
   361     score_str = field.strip()
       
   362     score = 0 if score_str == 'AA'
       
   363         or score_str == 'AAA'
       
   364         or score_str == ''
       
   365         else int(score_str)
       
   366     marks.append(score)
       
   367 
       
   368 data[fields[0]]['marks'].append(marks)
       
   369   \end{lstlisting}
       
   370 \end{frame}
       
   371 
       
   372 \begin{frame}[fragile]
       
   373   \frametitle{Dictionary - Building parsed data \ldots}
       
   374   \begin{lstlisting}
       
   375 total = 0 if score_str == 'AA'
       
   376     or score_str == 'AAA'
       
   377     or score_str == ''
       
   378     else int(fields[8])
       
   379 data[fields[0]]['total'].append(total)
       
   380 
       
   381 pfw_key = fields[9]
       
   382     or fields[10]
       
   383     or 'F'
       
   384 data[fields[0]][pfw_key] += 1
       
   385   \end{lstlisting}
       
   386 \end{frame}
       
   387 
       
   388 \begin{frame}[fragile]
       
   389   \frametitle{Dictionary - Building parsed data \ldots}
       
   390   \begin{lstlisting}
       
   391 pfw_key = fields[9]
       
   392     or fields[10]
       
   393     or 'F'
       
   394 data[fields[0]][pfw_key] += 1
       
   395   \end{lstlisting}
       
   396 \end{frame}
       
   397 
       
   398 \begin{frame}[fragile]
       
   399   \frametitle{Calculations}
   357   \small
   400   \small
   358   \begin{lstlisting}
   401   \begin{lstlisting}
   359 data[fields[0]]['marks'] = append(
   402 for k in data:
   360     data[fields[0]]['marks'], 
   403     data[k]['marks'] = array(data[k]['marks'])
   361     [int(fields[3]), int(fields[4]),
   404     data[k]['total'] = array(data[k]['total'])
   362     int(fields[5]), int(fields[6]),
   405 
   363     int(fields[7])
       
   364     ])
       
   365 
       
   366 data[fields[0]]['total'].append(fields[8])
       
   367 
       
   368 pfw_key = fields[9] or fields[10] or fields[11]
       
   369 data[fields[0]][pfw_key] += 1
       
   370   \end{lstlisting}
       
   371 \end{frame}
       
   372 
       
   373 \begin{frame}[fragile]
       
   374   \frametitle{Calculations}
       
   375   \begin{lstlisting}
       
   376 all_sub_avg = array([])
       
   377 for k, v in data:
       
   378     data[k]['avg'] = average(
   406     data[k]['avg'] = average(
   379         data[k]['total'])
   407         data[k]['total'])
   380     data[k]['sub_avg'] = average(
   408     marks = data[k]['marks']
   381         data[k]['marks'], axis=1)
   409     sub_avg = average(marks, axis=1)
       
   410     sub_std = sqrt(sum(square(
       
   411         sub_avg[:,newaxis] - marks), axis=0) /
       
   412         len(marks))
       
   413     data[k]['sub_avg'] = sub_avg
       
   414     data[k]['sub_std'] = sub_std
   382   \end{lstlisting}
   415   \end{lstlisting}
   383 \end{frame}
   416 \end{frame}
   384 
   417 
   385 \end{document}
   418 \end{document}
   386 
   419