Added NumPy array operations to session3 day1 for sslc1.txt.
authorMadhusudan.C.S <madhusudancs@gmail.com>
Wed, 14 Oct 2009 20:40:35 +0530
changeset 120 055b199c46c2
parent 119 7ae0f756f050
child 122 73374e1ae4f3
child 123 d2f4053a2e85
child 125 99ca3cb18fd2
Added NumPy array operations to session3 day1 for sslc1.txt.
day1/session3.tex
--- a/day1/session3.tex	Wed Oct 14 20:10:26 2009 +0530
+++ b/day1/session3.tex	Wed Oct 14 20:40:35 2009 +0530
@@ -252,6 +252,136 @@
 \end{itemize}
 \end{frame}
 
+\begin{frame}
+  \frametitle{Statistical Analysis and Parsing}
+  Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
+  \begin{itemize}
+    \item Average total marks scored in each region
+    \item Subject wise average score of each region
+    \item ??Subject wise average score for all regions combined??
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Statistical Analysis and Parsing \ldots}
+  Machinery Required -
+  \begin{itemize}
+    \item File reading and parsing
+    \item NumPy arrays - sum by rows and sum by coloumns
+    \item Dictionaries
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{File reading and parsing}
+  Understanding the structure of sslc1.txt
+  \begin{itemize}
+    \item Each line in the file, i.e each row of a file is a single record.
+    \item Each record corresponds to a record of a single student
+    \item Each record consists of several fields separated by a ';'
+  \end{itemize}
+\end{frame}
+
+\begin{frame}
+  \frametitle{File reading and parsing \ldots}
+  Each record consists of:
+  \begin{itemize}
+    \item Region Code
+    \item Roll Number
+    \item Name
+    \item Marks of 5 subjects
+    \item Total marks
+    \item Pass (P)
+    \item Withdrawn (W)
+    \item Fail (F)
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{File reading and parsing \ldots}
+  \begin{lstlisting}
+for record in open('sslc1.txt'):
+    fields = record.split(';')
+  \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Dictionary}
+  \begin{itemize}
+    \item lists index: 0 \ldots n
+    \item dictionaries index using any hashable objects
+    \item d = \{ ``Hitchhiker's guide'' : 42, ``Terminator'' : ``I'll be back''\}
+    \item d[``Terminator''] => ``I'll be back''
+    \item ``Terminator'' is called the key of \typ{d}
+    \item ``I'll be back'' is called the value of the key ``Terminator''
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Dictionary - Building parsed data}
+  \begin{itemize}
+    \item Let the parsed data be stored in dictionary \typ{data}
+    \item Keys of \typ{data} are strings - region codes
+    \item Value of the key is another dictionary.
+    \item This dictionary contains:
+    \begin{itemize}
+      \item 'marks': A list of NumPy arrays
+      \item 'total': Total marks of each student
+      \item 'P': Number of passes
+      \item 'F': Number of failures
+      \item 'W': Number of withdrawls
+    \end{itemize}
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Dictionary - Building parsed data \ldots}
+  \small
+  \begin{lstlisting}
+data = {}
+for record in open('sslc1.txt'):
+    fields = record.split(';')
+    if fields[0] not in data:
+        data[fields[0]] = {
+            'marks': array([]),
+            'total': array([]),
+            'P': 0,
+            'F': 0,
+            'W': 0
+            }
+  \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Dictionary - Building parsed data \ldots}
+  \small
+  \begin{lstlisting}
+data[fields[0]]['marks'] = append(
+    data[fields[0]]['marks'], 
+    [int(fields[3]), int(fields[4]),
+    int(fields[5]), int(fields[6]),
+    int(fields[7])
+    ])
+
+data[fields[0]]['total'].append(fields[8])
+
+pfw_key = fields[9] or fields[10] or fields[11]
+data[fields[0]][pfw_key] += 1
+  \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Calculations}
+  \begin{lstlisting}
+all_sub_avg = array([])
+for k, v in data:
+    data[k]['avg'] = average(
+        data[k]['total'])
+    data[k]['sub_avg'] = average(
+        data[k]['marks'], axis=1)
+  \end{lstlisting}
+\end{frame}
+
 \end{document}
 
 Least squares: Smooth curve fit.