Added NumPy array operations to session3 day1 for sslc1.txt.
--- a/day1/session3.tex Wed Oct 14 20:10:26 2009 +0530
+++ b/day1/session3.tex Wed Oct 14 20:40:35 2009 +0530
@@ -252,6 +252,136 @@
\end{itemize}
\end{frame}
+\begin{frame}
+ \frametitle{Statistical Analysis and Parsing}
+ Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
+ \begin{itemize}
+ \item Average total marks scored in each region
+ \item Subject wise average score of each region
+ \item ??Subject wise average score for all regions combined??
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Statistical Analysis and Parsing \ldots}
+ Machinery Required -
+ \begin{itemize}
+ \item File reading and parsing
+ \item NumPy arrays - sum by rows and sum by coloumns
+ \item Dictionaries
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{File reading and parsing}
+ Understanding the structure of sslc1.txt
+ \begin{itemize}
+ \item Each line in the file, i.e each row of a file is a single record.
+ \item Each record corresponds to a record of a single student
+ \item Each record consists of several fields separated by a ';'
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{File reading and parsing \ldots}
+ Each record consists of:
+ \begin{itemize}
+ \item Region Code
+ \item Roll Number
+ \item Name
+ \item Marks of 5 subjects
+ \item Total marks
+ \item Pass (P)
+ \item Withdrawn (W)
+ \item Fail (F)
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{File reading and parsing \ldots}
+ \begin{lstlisting}
+for record in open('sslc1.txt'):
+ fields = record.split(';')
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary}
+ \begin{itemize}
+ \item lists index: 0 \ldots n
+ \item dictionaries index using any hashable objects
+ \item d = \{ ``Hitchhiker's guide'' : 42, ``Terminator'' : ``I'll be back''\}
+ \item d[``Terminator''] => ``I'll be back''
+ \item ``Terminator'' is called the key of \typ{d}
+ \item ``I'll be back'' is called the value of the key ``Terminator''
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data}
+ \begin{itemize}
+ \item Let the parsed data be stored in dictionary \typ{data}
+ \item Keys of \typ{data} are strings - region codes
+ \item Value of the key is another dictionary.
+ \item This dictionary contains:
+ \begin{itemize}
+ \item 'marks': A list of NumPy arrays
+ \item 'total': Total marks of each student
+ \item 'P': Number of passes
+ \item 'F': Number of failures
+ \item 'W': Number of withdrawls
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data \ldots}
+ \small
+ \begin{lstlisting}
+data = {}
+for record in open('sslc1.txt'):
+ fields = record.split(';')
+ if fields[0] not in data:
+ data[fields[0]] = {
+ 'marks': array([]),
+ 'total': array([]),
+ 'P': 0,
+ 'F': 0,
+ 'W': 0
+ }
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data \ldots}
+ \small
+ \begin{lstlisting}
+data[fields[0]]['marks'] = append(
+ data[fields[0]]['marks'],
+ [int(fields[3]), int(fields[4]),
+ int(fields[5]), int(fields[6]),
+ int(fields[7])
+ ])
+
+data[fields[0]]['total'].append(fields[8])
+
+pfw_key = fields[9] or fields[10] or fields[11]
+data[fields[0]][pfw_key] += 1
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Calculations}
+ \begin{lstlisting}
+all_sub_avg = array([])
+for k, v in data:
+ data[k]['avg'] = average(
+ data[k]['total'])
+ data[k]['sub_avg'] = average(
+ data[k]['marks'], axis=1)
+ \end{lstlisting}
+\end{frame}
+
\end{document}
Least squares: Smooth curve fit.