Session 3 Day 1, sslc data parsing obtaining statistics.
--- a/day1/session3.tex Wed Oct 14 20:40:35 2009 +0530
+++ b/day1/session3.tex Thu Oct 15 15:03:27 2009 +0530
@@ -258,7 +258,8 @@
\begin{itemize}
\item Average total marks scored in each region
\item Subject wise average score of each region
- \item ??Subject wise average score for all regions combined??
+ \item \alert{??Subject wise average score for all regions combined??}
+ \item Find the subject wise standard deviation of scores for each region
\end{itemize}
\end{frame}
@@ -354,31 +355,63 @@
\begin{frame}[fragile]
\frametitle{Dictionary - Building parsed data \ldots}
- \small
+ \begin{lstlisting}
+marks = []
+for field in fields[3:8]:
+ score_str = field.strip()
+ score = 0 if score_str == 'AA'
+ or score_str == 'AAA'
+ or score_str == ''
+ else int(score_str)
+ marks.append(score)
+
+data[fields[0]]['marks'].append(marks)
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data \ldots}
\begin{lstlisting}
-data[fields[0]]['marks'] = append(
- data[fields[0]]['marks'],
- [int(fields[3]), int(fields[4]),
- int(fields[5]), int(fields[6]),
- int(fields[7])
- ])
+total = 0 if score_str == 'AA'
+ or score_str == 'AAA'
+ or score_str == ''
+ else int(fields[8])
+data[fields[0]]['total'].append(total)
-data[fields[0]]['total'].append(fields[8])
+pfw_key = fields[9]
+ or fields[10]
+ or 'F'
+data[fields[0]][pfw_key] += 1
+ \end{lstlisting}
+\end{frame}
-pfw_key = fields[9] or fields[10] or fields[11]
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data \ldots}
+ \begin{lstlisting}
+pfw_key = fields[9]
+ or fields[10]
+ or 'F'
data[fields[0]][pfw_key] += 1
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
\frametitle{Calculations}
+ \small
\begin{lstlisting}
-all_sub_avg = array([])
-for k, v in data:
+for k in data:
+ data[k]['marks'] = array(data[k]['marks'])
+ data[k]['total'] = array(data[k]['total'])
+
data[k]['avg'] = average(
data[k]['total'])
- data[k]['sub_avg'] = average(
- data[k]['marks'], axis=1)
+ marks = data[k]['marks']
+ sub_avg = average(marks, axis=1)
+ sub_std = sqrt(sum(square(
+ sub_avg[:,newaxis] - marks), axis=0) /
+ len(marks))
+ data[k]['sub_avg'] = sub_avg
+ data[k]['sub_std'] = sub_std
\end{lstlisting}
\end{frame}
--- a/day1/sslc1.txt Wed Oct 14 20:40:35 2009 +0530
+++ b/day1/sslc1.txt Thu Oct 15 15:03:27 2009 +0530
@@ -131341,7 +131341,7 @@
15;144497;KARTHIKEYAN N;090;079; 88; 86; 81;424;P;;
15;144498;PRABHAKARAN C;091;087; 94; 95; 82;449;P;;
15;144499;MANIKANDAN J;088;073; 93; 90; 87;431;P;;
-15;144500;VIJAYA KUMAR ;G;086;073; 92; 78; 74;403;P;;
+15;144500;VIJAYA KUMAR G;086;073; 92; 78; 74;403;P;;
15;144501;SARAVANAN S;077;062; 73; 65; 67;344;P;;
15;144502;GOWTHAMA PANDIAN M;072;064; 64; 66; 69;335;P;;
15;144503;BABU R;088;076; 95; 75; 82;416;P;;