diff -r ac715f4826f2 -r 73374e1ae4f3 day1/session3.tex --- a/day1/session3.tex Thu Oct 15 11:23:01 2009 +0530 +++ b/day1/session3.tex Thu Oct 15 11:23:47 2009 +0530 @@ -252,6 +252,136 @@ \end{itemize} \end{frame} +\begin{frame} + \frametitle{Statistical Analysis and Parsing} + Read the data supplied in \emph{sslc1.txt} and obtain the following statistics: + \begin{itemize} + \item Average total marks scored in each region + \item Subject wise average score of each region + \item ??Subject wise average score for all regions combined?? + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Statistical Analysis and Parsing \ldots} + Machinery Required - + \begin{itemize} + \item File reading and parsing + \item NumPy arrays - sum by rows and sum by coloumns + \item Dictionaries + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{File reading and parsing} + Understanding the structure of sslc1.txt + \begin{itemize} + \item Each line in the file, i.e each row of a file is a single record. + \item Each record corresponds to a record of a single student + \item Each record consists of several fields separated by a ';' + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{File reading and parsing \ldots} + Each record consists of: + \begin{itemize} + \item Region Code + \item Roll Number + \item Name + \item Marks of 5 subjects + \item Total marks + \item Pass (P) + \item Withdrawn (W) + \item Fail (F) + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{File reading and parsing \ldots} + \begin{lstlisting} +for record in open('sslc1.txt'): + fields = record.split(';') + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Dictionary} + \begin{itemize} + \item lists index: 0 \ldots n + \item dictionaries index using any hashable objects + \item d = \{ ``Hitchhiker's guide'' : 42, ``Terminator'' : ``I'll be back''\} + \item d[``Terminator''] => ``I'll be back'' + \item ``Terminator'' is called the key of \typ{d} + \item ``I'll be back'' is called the value of the key ``Terminator'' + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Dictionary - Building parsed data} + \begin{itemize} + \item Let the parsed data be stored in dictionary \typ{data} + \item Keys of \typ{data} are strings - region codes + \item Value of the key is another dictionary. + \item This dictionary contains: + \begin{itemize} + \item 'marks': A list of NumPy arrays + \item 'total': Total marks of each student + \item 'P': Number of passes + \item 'F': Number of failures + \item 'W': Number of withdrawls + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Dictionary - Building parsed data \ldots} + \small + \begin{lstlisting} +data = {} +for record in open('sslc1.txt'): + fields = record.split(';') + if fields[0] not in data: + data[fields[0]] = { + 'marks': array([]), + 'total': array([]), + 'P': 0, + 'F': 0, + 'W': 0 + } + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Dictionary - Building parsed data \ldots} + \small + \begin{lstlisting} +data[fields[0]]['marks'] = append( + data[fields[0]]['marks'], + [int(fields[3]), int(fields[4]), + int(fields[5]), int(fields[6]), + int(fields[7]) + ]) + +data[fields[0]]['total'].append(fields[8]) + +pfw_key = fields[9] or fields[10] or fields[11] +data[fields[0]][pfw_key] += 1 + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Calculations} + \begin{lstlisting} +all_sub_avg = array([]) +for k, v in data: + data[k]['avg'] = average( + data[k]['total']) + data[k]['sub_avg'] = average( + data[k]['marks'], axis=1) + \end{lstlisting} +\end{frame} + \end{document} Least squares: Smooth curve fit.