Session 3 Day 1, sslc data parsing obtaining statistics.
authorMadhusudan.C.S <madhusudancs@gmail.com>
Thu, 15 Oct 2009 15:03:27 +0530
changeset 125 99ca3cb18fd2
parent 120 055b199c46c2
child 126 a4bbc14342f9
Session 3 Day 1, sslc data parsing obtaining statistics.
day1/session3.tex
day1/sslc1.txt
--- a/day1/session3.tex	Wed Oct 14 20:40:35 2009 +0530
+++ b/day1/session3.tex	Thu Oct 15 15:03:27 2009 +0530
@@ -258,7 +258,8 @@
   \begin{itemize}
     \item Average total marks scored in each region
     \item Subject wise average score of each region
-    \item ??Subject wise average score for all regions combined??
+    \item \alert{??Subject wise average score for all regions combined??}
+    \item Find the subject wise standard deviation of scores for each region
   \end{itemize}
 \end{frame}
 
@@ -354,31 +355,63 @@
 
 \begin{frame}[fragile]
   \frametitle{Dictionary - Building parsed data \ldots}
-  \small
+  \begin{lstlisting}
+marks = []
+for field in fields[3:8]:
+    score_str = field.strip()
+    score = 0 if score_str == 'AA'
+        or score_str == 'AAA'
+        or score_str == ''
+        else int(score_str)
+    marks.append(score)
+
+data[fields[0]]['marks'].append(marks)
+  \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Dictionary - Building parsed data \ldots}
   \begin{lstlisting}
-data[fields[0]]['marks'] = append(
-    data[fields[0]]['marks'], 
-    [int(fields[3]), int(fields[4]),
-    int(fields[5]), int(fields[6]),
-    int(fields[7])
-    ])
+total = 0 if score_str == 'AA'
+    or score_str == 'AAA'
+    or score_str == ''
+    else int(fields[8])
+data[fields[0]]['total'].append(total)
 
-data[fields[0]]['total'].append(fields[8])
+pfw_key = fields[9]
+    or fields[10]
+    or 'F'
+data[fields[0]][pfw_key] += 1
+  \end{lstlisting}
+\end{frame}
 
-pfw_key = fields[9] or fields[10] or fields[11]
+\begin{frame}[fragile]
+  \frametitle{Dictionary - Building parsed data \ldots}
+  \begin{lstlisting}
+pfw_key = fields[9]
+    or fields[10]
+    or 'F'
 data[fields[0]][pfw_key] += 1
   \end{lstlisting}
 \end{frame}
 
 \begin{frame}[fragile]
   \frametitle{Calculations}
+  \small
   \begin{lstlisting}
-all_sub_avg = array([])
-for k, v in data:
+for k in data:
+    data[k]['marks'] = array(data[k]['marks'])
+    data[k]['total'] = array(data[k]['total'])
+
     data[k]['avg'] = average(
         data[k]['total'])
-    data[k]['sub_avg'] = average(
-        data[k]['marks'], axis=1)
+    marks = data[k]['marks']
+    sub_avg = average(marks, axis=1)
+    sub_std = sqrt(sum(square(
+        sub_avg[:,newaxis] - marks), axis=0) /
+        len(marks))
+    data[k]['sub_avg'] = sub_avg
+    data[k]['sub_std'] = sub_std
   \end{lstlisting}
 \end{frame}
 
--- a/day1/sslc1.txt	Wed Oct 14 20:40:35 2009 +0530
+++ b/day1/sslc1.txt	Thu Oct 15 15:03:27 2009 +0530
@@ -131341,7 +131341,7 @@
 15;144497;KARTHIKEYAN N;090;079; 88; 86; 81;424;P;;
 15;144498;PRABHAKARAN C;091;087; 94; 95; 82;449;P;;
 15;144499;MANIKANDAN J;088;073; 93; 90; 87;431;P;;
-15;144500;VIJAYA KUMAR ;G;086;073; 92; 78; 74;403;P;;
+15;144500;VIJAYA KUMAR G;086;073; 92; 78; 74;403;P;;
 15;144501;SARAVANAN S;077;062; 73; 65; 67;344;P;;
 15;144502;GOWTHAMA PANDIAN M;072;064; 64; 66; 69;335;P;;
 15;144503;BABU R;088;076; 95; 75; 82;416;P;;