diff -r 8618546997ac -r 298cd56f4d5a day1/session3.tex --- a/day1/session3.tex Tue Oct 27 19:25:14 2009 +0530 +++ b/day1/session3.tex Tue Oct 27 19:36:09 2009 +0530 @@ -140,6 +140,7 @@ \begin{itemize} \item Draw a pie chart representing the number of students who scored more than 90\% in Science per region. \item Draw a pie chart representing the number of students who scored more than 90\% per subject(All regions combined). + \item Print mean, median, mode and standard deviation of math scores for all regions combined. \end{itemize} \end{frame} @@ -147,8 +148,11 @@ \frametitle{Statistical Analysis and Parsing \ldots} Machinery Required - \begin{itemize} - \item File reading and parsing + \item File reading + \item Parsing \item Dictionaries + \item NumPy arrays + \item Statistical operations \end{itemize} \end{frame} @@ -156,8 +160,8 @@ \frametitle{File reading and parsing} Understanding the structure of sslc1.txt \begin{itemize} - \item Each line in the file, i.e each row of a file is a single record. - \item Each record corresponds to a record of a single student + \item Each line in the file corresponds to one student's details + \item aka record \item Each record consists of several fields separated by a ';' \end{itemize} \end{frame} @@ -169,11 +173,10 @@ \item Region Code \item Roll Number \item Name - \item Marks of 5 subjects + \item Marks of 5 subjects: English, Hindi, Maths, Science, Social \item Total marks - \item Pass (P) + \item Pass/Fail (P/F) \item Withdrawn (W) - \item Fail (F) \end{itemize} \end{frame} @@ -186,44 +189,83 @@ \end{frame} \begin{frame}[fragile] - \frametitle{Dictionary - Building parsed data} + \frametitle{Dictionary: Introduction} \begin{itemize} - \item Let the parsed data be stored in list of dictionaries. - \item d = \{\} is an empty dictionary + \item lists index: 0 \ldots n + \item dictionaries index using strings \end{itemize} +\begin{block}{Example} +d = \{ ``Hitchhiker's guide'' : 42, + ``Terminator'' : ``I'll be back''\}\\ +d[``Terminator''] => ``I'll be back'' +\end{block} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Dictionary: Introduction} +\begin{lstlisting} +In [1]: d = {"Hitchhiker's guide" : 42, + "Terminator" : "I'll be back"} + +In [2]: d["Hitchhiker's guide"] +Out[2]: 42 + +In [3]: "Hitchhiker's guide" in d +Out[3]: True + +In [4]: "Guido" in d +Out[4]: False +\end{lstlisting} \end{frame} \begin{frame}[fragile] - \frametitle{Dictionary - Building parsed data} + \frametitle{Dictionary: Introduction} +\begin{lstlisting} +In [5]: d.keys() +Out[5]: ['Terminator', "Hitchhiker's + guide"] + +In [6]: d.values() +Out[6]: ["I'll be back", 42] +\end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{enumerate: Iterating through list indices} \begin{lstlisting} -ninety_percents = [{}, {}, {}, {}, {}] +In [1]: names = ["Guido","Alex", "Tim"] + +In [2]: for i, name in enumerate(names): + ...: print i, name + ...: +0 Guido +1 Alex +2 Tim \end{lstlisting} \end{frame} \begin{frame}[fragile] + \frametitle{Dictionary: Building parsed data} + Let our dictionary be: + \begin{lstlisting} +science = {} # is an empty dictionary + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] \frametitle{Dictionary - Building parsed data} \begin{itemize} - \item Index of a dictionary is called a \emph{key} - \item \emph{Keys} of these dictionaries are strings - region codes + \item \emph{Keys} of \emph{science} will be region codes + \item Value of a \emph{science} will be the number students who scored more than 90\% in that region \end{itemize} \end{frame} \begin{frame}[fragile] - \frametitle{Dictionary - Building parsed data \ldots} - \begin{itemize} - \item Value of a \emph{key} can be any legal Python value - \item In this problem let the value of a \emph{key} be another an integer - \item This dictionary contains: - \end{itemize} -'region code': Number of students who scored more than 90\% in this region for this subject -\end{frame} - -\begin{frame}[fragile] \frametitle{Building parsed data \ldots} \begin{lstlisting} -from pylab import * +from pylab import pie -ninety_percents = [{}, {}, {}, {}, {}] +science = {} for record in open('sslc1.txt'): record = record.strip() @@ -235,17 +277,92 @@ \begin{frame}[fragile] \frametitle{Building parsed data \ldots} + \begin{lstlisting} +if region_code not in science: + science[region_code] = 0 + +score_str = fields[4].strip() + +score = int(score_str) if + score_str != 'AA' else 0 + +if score > 90: + science[region_code] += 1 + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Pie charts} + \small + \begin{lstlisting} +figure(1) +pie(science.values(), + labels=science.keys()) +title('Students scoring 90% and above + in science by region') +savefig('/tmp/science.png') + \end{lstlisting} +\begin{columns} + \column{5.25\textwidth} + \hspace*{1.1in} +\includegraphics[height=2in, interpolate=true]{data/science} + \column{0.8\textwidth} +\end{columns} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Building data for all subjects \ldots} + \begin{lstlisting} +from pylab import pie +from scipy import mean, median, std +from scipy import stats + +scores = [[]] * 5 +ninety_percents = [{}] * 5 + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Building data for all subjects \ldots} + \begin{lstlisting} +from pylab import pie +from scipy import mean, median, std +from scipy import stats + \end{lstlisting} + + \begin{block}{Repeating list items} + \begin{lstlisting} +scores = [[]] * 5 +ninety_percents = [{}] * 5 + \end{lstlisting} + \end{block} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Building data for all subjects \ldots} + \begin{lstlisting} +for record in open('sslc1.txt'): + record = record.strip() + fields = record.split(';') + + region_code = fields[0].strip() + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Building data for all subjects \ldots} \small \begin{lstlisting} for i, field in enumerate(fields[3:8]): - if region_code not in ninety_percents[i]: ninety_percents[i][region_code] = 0 score_str = field.strip() + score = int(score_str) if + score_str != 'AA' else 0 - score = 0 if score_str == 'AA' else - int(score_str) + scores[i].append(score) + if score > 90: ninety_percents[i][region_code] += 1 \end{lstlisting} @@ -263,25 +380,6 @@ \begin{frame}[fragile] \frametitle{Pie charts} - \small - \begin{lstlisting} -figure(1) -pie(ninety_percents[4].values(), - labels=ninety_percents[1].keys()) -title('Students scoring 90% and above - in science by region') -savefig('/tmp/science.png') - \end{lstlisting} -\begin{columns} - \column{5.25\textwidth} - \hspace*{1.1in} -\includegraphics[height=2in, interpolate=true]{data/science} - \column{0.8\textwidth} -\end{columns} -\end{frame} - -\begin{frame}[fragile] - \frametitle{Pie charts} \begin{lstlisting} figure(2) pie(subj_total, labels=['English', @@ -299,6 +397,32 @@ \includegraphics[height=3in, interpolate=true]{data/all_regions} \end{frame} +\begin{frame}[fragile] + \frametitle{Obtaining statistics} + \begin{lstlisting} +math_scores = array(scores[2]) + +print "Mean: ", mean(math_scores) + +print "Median: ", median(math_scores) + +print "Mode: ", stats.mode(math_scores) + +print "Standard Deviation: ", + std(math_scores) + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{What tools did we use?} + \begin{itemize} + \item Dictionaries for storing data + \item Facilities for drawing pie charts + \item NumPy arrays for efficient array manipulations + \item Functions for statistical computations - mean, median, mode, standard deviation + \end{itemize} +\end{frame} + \begin{frame} \frametitle{L vs $T^2$ \ldots} Let's go back to the L vs $T^2$ plot @@ -436,14 +560,4 @@ \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{What did we learn?} - \begin{itemize} - \item Dictionaries - \item Drawing pie charts - \item Arrays - \item Least Square fitting - \item Intro to Matrices - \end{itemize} -\end{frame} \end{document}