Workshop materials: comparison day1/session3.tex

equal deleted inserted replaced

-:b34c3a22d726
+:9efa777af2e2
 %%   \tableofcontents
 %%   % You might wish to add the option [pausesections]
 %% \end{frame}
 \begin{frame}
+\frametitle{More on data processing}
+\begin{block}{}
+What do we do if we want to draw Pie charts for the data in a huge data file?
+\end{block}
+\end{frame}
+\begin{frame}
 \frametitle{Statistical Analysis and Parsing}
 Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
 \begin{itemize}
-\item Average total marks scored in each region
+\item Draw a pie chart representing the number of students who scored more than 90\% in Science per region.
-\item Subject wise average score of each region
+\item Draw a pie chart representing the number of students who scored more than 90\% per subject(All regions combined).
-\item \alert{??Subject wise average score for all regions combined??}
-\item Find the subject wise standard deviation of scores for each region
 \end{itemize}
 \end{frame}
 \begin{frame}
 \frametitle{Statistical Analysis and Parsing \ldots}
 Machinery Required -
 \begin{itemize}
 \item File reading and parsing
-\item NumPy arrays - sum by rows and sum by coloumns
 \item Dictionaries
 \end{itemize}
 \end{frame}
 \begin{frame}
 \end{frame}
 \begin{frame}[fragile]
 \frametitle{Dictionary - Building parsed data}
 \begin{itemize}
-\item Let the parsed data be stored in dictionary \typ{data}
+\item Let the parsed data be stored in list of dictionaries.
-\item \begin{lstlisting}
+\item d = \{\} is an empty dictionary
-data = {}  # is an empty dictionary
+\end{itemize}
-\end{lstlisting}
+\end{frame}
+\begin{frame}[fragile]
+\frametitle{Dictionary - Building parsed data}
+\begin{lstlisting}
+ninety_percents = [{}, {}, {}, {}, {}]
+\end{lstlisting}
+\end{frame}
+\begin{frame}[fragile]
+\frametitle{Dictionary - Building parsed data}
+\begin{itemize}
 \item Index of a dictionary is called a \emph{key}
-\item \emph{Keys} of \typ{data} are strings - region codes
+\item \emph{Keys} of these dictionaries are strings - region codes
-\item Value of a \emph{key} can be any Python object
+\end{itemize}
-\end{itemize}
+\end{frame}
-\end{frame}
+\begin{frame}[fragile]
-\begin{frame}[fragile]
+\frametitle{Dictionary - Building parsed data \ldots}
-\frametitle{Dictionary - Building parsed data...}
+\begin{itemize}
-\begin{itemize}
+\item Value of a \emph{key} can be any legal Python value
-\item In this problem let the value of a \emph{key} be another dictionary.
+\item In this problem let the value of a \emph{key} be another an integer
 \item This dictionary contains:
-\begin{itemize}
+\end{itemize}
-\item 'marks': A \emph{List} of \emph{Lists} containing all marks
+'region code': Number of students who scored more than 90\% in this region for this subject
-\item 'total': A \emph{List} of total marks of each student
+\end{frame}
-\item 'P': Number of passes
-\item 'F': Number of failures
+\begin{frame}[fragile]
-\item 'W': Number of withdrawls
+\frametitle{Building parsed data \ldots}
-\end{itemize}
+\begin{lstlisting}
-\end{itemize}
+from pylab import *
-\end{frame}
+ninety_percents = [{}, {}, {}, {}, {}]
-\begin{frame}[fragile]
-\frametitle{Dictionary - Building parsed data \ldots}
+for record in open('sslc1.txt'):
+record = record.strip()
+fields = record.split(';')
+region_code = fields[0].strip()
+\end{lstlisting}
+\end{frame}
+\begin{frame}[fragile]
+\frametitle{Building parsed data \ldots}
 \small
 \begin{lstlisting}
-data = {}
+for i, field in enumerate(fields[3:8]):
-for record in open('sslc1.txt'):
-fields = record.split(';')
+if region_code not in ninety_percents[i]:
-if fields[0] not in data:
+ninety_percents[i][region_code] = 0
-data[fields[0]] = {
-'marks': [],
-'total': [],
-'P': 0,
-'F': 0,
-'W': 0
-}
-\end{lstlisting}
-\end{frame}
-\begin{frame}[fragile]
-\frametitle{Dictionary - Building parsed data \ldots}
-\begin{lstlisting}
-marks = []
-for field in fields[3:8]:
 score_str = field.strip()
-score = 0 if score_str == 'AA'
-or score_str == 'AAA'
+score = 0 if score_str == 'AA' else
-or score_str == ''
+int(score_str)
-else int(score_str)
+if score > 90:
-marks.append(score)
+ninety_percents[i][region_code] += 1
+\end{lstlisting}
-data[fields[0]]['marks'].append(marks)
+\end{frame}
-\end{lstlisting}
-\end{frame}
+\begin{frame}[fragile]
+\frametitle{Consolidating data}
-\begin{frame}[fragile]
+\begin{lstlisting}
-\frametitle{Dictionary - Building parsed data \ldots}
+subj_total = []
-\begin{lstlisting}
+for subject in ninety_percents:
-total = 0 if score_str == 'AA'
+subj_total.append(sum(
-or score_str == 'AAA'
+subject.values()))
-or score_str == ''
+\end{lstlisting}
-else int(fields[8])
+\end{frame}
-data[fields[0]]['total'].append(total)
-\end{lstlisting}
+\begin{frame}[fragile]
-\end{frame}
+\frametitle{Pie charts}
-\begin{frame}[fragile]
-\frametitle{Dictionary - Building parsed data \ldots}
-\begin{lstlisting}
-pfw_key = fields[9]
-or fields[10]
-or 'F'
-data[fields[0]][pfw_key] += 1
-\end{lstlisting}
-\end{frame}
-\begin{frame}[fragile]
-\frametitle{NumPy arrays}
-\centerline{\alert{But I lied!?!?!?}}
-\end{frame}
-\begin{frame}[fragile]
-\frametitle{Calculations}
-\begin{lstlisting}
-for k in data:
-data[k]['marks'] = array(
-data[k]['marks'])
-data[k]['total'] = array(
-data[k]['total'])
-\end{lstlisting}
-\end{frame}
-\begin{frame}[fragile]
-\frametitle{Calculations}
 \small
 \begin{lstlisting}
-data[k]['avg'] = average(
+figure(1)
-data[k]['total'])
+pie(ninety_percents[4].values(),
-marks = data[k]['marks']
+labels=ninety_percents[1].keys())
-sub_avg = average(marks, axis=1)
+title('Students scoring 90% and above
-sub_std = sqrt(sum(square(
+in science by region')
-sub_avg[:,newaxis] - marks), axis=0) /
+savefig('/tmp/science.png')
-len(marks))
+\end{lstlisting}
-data[k]['sub_avg'] = sub_avg
+\begin{columns}
-data[k]['sub_std'] = sub_std
+\column{5.25\textwidth}
-\end{lstlisting}
+\hspace*{1.1in}
-\end{frame}
+\includegraphics[height=2in, interpolate=true]{data/science}
+\column{0.8\textwidth}
-\begin{frame}[fragile]
+\end{columns}
-\frametitle{New Concepts}
+\end{frame}
-\begin{itemize}
-\item Dictionaries
+\begin{frame}[fragile]
-\item Slicing lists
+\frametitle{Pie charts}
-\item New type of conditional
+\begin{lstlisting}
-\item NumPy arrays
+figure(2)
-\item Slicing NumPy arrays
+pie(subj_total, labels=['English',
-\item NumPy array functions - square, average, sqrt
+'Hindi', 'Maths', 'Science',
-\end{itemize}
+'Social'])
+title('Students scoring more than
+90% by subject(All regions
+combined).')
+savefig('/tmp/all_regions.png')
+\end{lstlisting}
+\end{frame}
+\begin{frame}[fragile]
+\frametitle{Pie charts}
+\includegraphics[height=3in, interpolate=true]{data/all_regions}
 \end{frame}
 \begin{frame}[fragile]
 \frametitle{Dealing with data whole-sale}
 \begin{lstlisting}
 \begin{lstlisting}
 In []: plot(L, Tline)
 \end{lstlisting}
 \end{frame}
+\begin{frame}[fragile]
+\frametitle{What did we learn?}
+\begin{itemize}
+\item Dictionaries
+\item Drawing pie charts
+\end{itemize}
+\end{frame}
 \end{document}

changeset 184	9efa777af2e2
parent 178	8a3a9d98fa84
child 185	e59ab9ab1a89