day1/session3.tex
changeset 184 9efa777af2e2
parent 178 8a3a9d98fa84
child 185 e59ab9ab1a89
equal deleted inserted replaced
183:b34c3a22d726 184:9efa777af2e2
   125 %%   \tableofcontents
   125 %%   \tableofcontents
   126 %%   % You might wish to add the option [pausesections]
   126 %%   % You might wish to add the option [pausesections]
   127 %% \end{frame}
   127 %% \end{frame}
   128 
   128 
   129 \begin{frame}
   129 \begin{frame}
       
   130   \frametitle{More on data processing}
       
   131   \begin{block}{}
       
   132     What do we do if we want to draw Pie charts for the data in a huge data file?
       
   133   \end{block}
       
   134 \end{frame}
       
   135 
       
   136 
       
   137 \begin{frame}
   130   \frametitle{Statistical Analysis and Parsing}
   138   \frametitle{Statistical Analysis and Parsing}
   131   Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
   139   Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
   132   \begin{itemize}
   140   \begin{itemize}
   133     \item Average total marks scored in each region
   141     \item Draw a pie chart representing the number of students who scored more than 90\% in Science per region.
   134     \item Subject wise average score of each region
   142     \item Draw a pie chart representing the number of students who scored more than 90\% per subject(All regions combined).
   135     \item \alert{??Subject wise average score for all regions combined??}
       
   136     \item Find the subject wise standard deviation of scores for each region
       
   137   \end{itemize}
   143   \end{itemize}
   138 \end{frame}
   144 \end{frame}
   139 
   145 
   140 \begin{frame}
   146 \begin{frame}
   141   \frametitle{Statistical Analysis and Parsing \ldots}
   147   \frametitle{Statistical Analysis and Parsing \ldots}
   142   Machinery Required -
   148   Machinery Required -
   143   \begin{itemize}
   149   \begin{itemize}
   144     \item File reading and parsing
   150     \item File reading and parsing
   145     \item NumPy arrays - sum by rows and sum by coloumns
       
   146     \item Dictionaries
   151     \item Dictionaries
   147   \end{itemize}
   152   \end{itemize}
   148 \end{frame}
   153 \end{frame}
   149 
   154 
   150 \begin{frame}
   155 \begin{frame}
   181 \end{frame}
   186 \end{frame}
   182 
   187 
   183 \begin{frame}[fragile]
   188 \begin{frame}[fragile]
   184   \frametitle{Dictionary - Building parsed data}
   189   \frametitle{Dictionary - Building parsed data}
   185   \begin{itemize}
   190   \begin{itemize}
   186     \item Let the parsed data be stored in dictionary \typ{data}
   191     \item Let the parsed data be stored in list of dictionaries.
   187     \item \begin{lstlisting}
   192     \item d = \{\} is an empty dictionary
   188 data = {}  # is an empty dictionary
   193   \end{itemize}
   189 \end{lstlisting}
   194 \end{frame}
       
   195 
       
   196 \begin{frame}[fragile]
       
   197   \frametitle{Dictionary - Building parsed data}
       
   198 \begin{lstlisting}
       
   199 ninety_percents = [{}, {}, {}, {}, {}]
       
   200 \end{lstlisting}
       
   201 \end{frame}
       
   202 
       
   203 \begin{frame}[fragile]
       
   204   \frametitle{Dictionary - Building parsed data}
       
   205   \begin{itemize}
   190     \item Index of a dictionary is called a \emph{key}
   206     \item Index of a dictionary is called a \emph{key}
   191     \item \emph{Keys} of \typ{data} are strings - region codes
   207     \item \emph{Keys} of these dictionaries are strings - region codes
   192     \item Value of a \emph{key} can be any Python object
   208   \end{itemize}
   193   \end{itemize}
   209 \end{frame}
   194 \end{frame}
   210 
   195 
   211 \begin{frame}[fragile]
   196 \begin{frame}[fragile]
   212   \frametitle{Dictionary - Building parsed data \ldots}
   197   \frametitle{Dictionary - Building parsed data...}
   213   \begin{itemize}
   198   \begin{itemize}
   214     \item Value of a \emph{key} can be any legal Python value
   199     \item In this problem let the value of a \emph{key} be another dictionary.
   215     \item In this problem let the value of a \emph{key} be another an integer
   200     \item This dictionary contains:
   216     \item This dictionary contains:
   201     \begin{itemize}
   217   \end{itemize}
   202       \item 'marks': A \emph{List} of \emph{Lists} containing all marks
   218 'region code': Number of students who scored more than 90\% in this region for this subject
   203       \item 'total': A \emph{List} of total marks of each student
   219 \end{frame}
   204       \item 'P': Number of passes
   220 
   205       \item 'F': Number of failures
   221 \begin{frame}[fragile]
   206       \item 'W': Number of withdrawls
   222   \frametitle{Building parsed data \ldots}
   207     \end{itemize}
   223   \begin{lstlisting}
   208   \end{itemize}
   224 from pylab import *
   209 \end{frame}
   225 
   210 
   226 ninety_percents = [{}, {}, {}, {}, {}]
   211 \begin{frame}[fragile]
   227 
   212   \frametitle{Dictionary - Building parsed data \ldots}
   228 for record in open('sslc1.txt'):
       
   229     record = record.strip()
       
   230     fields = record.split(';')
       
   231 
       
   232     region_code = fields[0].strip()
       
   233   \end{lstlisting}
       
   234 \end{frame}
       
   235 
       
   236 \begin{frame}[fragile]
       
   237   \frametitle{Building parsed data \ldots}
   213   \small
   238   \small
   214   \begin{lstlisting}
   239   \begin{lstlisting}
   215 data = {}
   240 for i, field in enumerate(fields[3:8]):
   216 for record in open('sslc1.txt'):
   241 
   217     fields = record.split(';')
   242     if region_code not in ninety_percents[i]:
   218     if fields[0] not in data:
   243         ninety_percents[i][region_code] = 0
   219         data[fields[0]] = {
   244 
   220             'marks': [],
       
   221             'total': [],
       
   222             'P': 0,
       
   223             'F': 0,
       
   224             'W': 0
       
   225             }
       
   226   \end{lstlisting}
       
   227 \end{frame}
       
   228 
       
   229 \begin{frame}[fragile]
       
   230   \frametitle{Dictionary - Building parsed data \ldots}
       
   231   \begin{lstlisting}
       
   232 marks = []
       
   233 for field in fields[3:8]:
       
   234     score_str = field.strip()
   245     score_str = field.strip()
   235     score = 0 if score_str == 'AA'
   246 
   236         or score_str == 'AAA'
   247     score = 0 if score_str == 'AA' else 
   237         or score_str == ''
   248                          int(score_str)
   238         else int(score_str)
   249     if score > 90:
   239     marks.append(score)
   250         ninety_percents[i][region_code] += 1
   240 
   251   \end{lstlisting}
   241 data[fields[0]]['marks'].append(marks)
   252 \end{frame}
   242   \end{lstlisting}
   253 
   243 \end{frame}
   254 \begin{frame}[fragile]
   244 
   255   \frametitle{Consolidating data}
   245 \begin{frame}[fragile]
   256   \begin{lstlisting}
   246   \frametitle{Dictionary - Building parsed data \ldots}
   257 subj_total = []
   247   \begin{lstlisting}
   258 for subject in ninety_percents:
   248 total = 0 if score_str == 'AA'
   259     subj_total.append(sum(
   249     or score_str == 'AAA'
   260          subject.values()))
   250     or score_str == ''
   261   \end{lstlisting}
   251     else int(fields[8])
   262 \end{frame}
   252 data[fields[0]]['total'].append(total)
   263 
   253   \end{lstlisting}
   264 \begin{frame}[fragile]
   254 \end{frame}
   265   \frametitle{Pie charts}
   255 
       
   256 \begin{frame}[fragile]
       
   257   \frametitle{Dictionary - Building parsed data \ldots}
       
   258   \begin{lstlisting}
       
   259 pfw_key = fields[9]
       
   260     or fields[10]
       
   261     or 'F'
       
   262 data[fields[0]][pfw_key] += 1
       
   263   \end{lstlisting}
       
   264 \end{frame}
       
   265 
       
   266 \begin{frame}[fragile]
       
   267   \frametitle{NumPy arrays}
       
   268   \centerline{\alert{But I lied!?!?!?}}
       
   269 \end{frame}
       
   270 
       
   271 \begin{frame}[fragile]
       
   272   \frametitle{Calculations}
       
   273   \begin{lstlisting}
       
   274 for k in data:
       
   275     data[k]['marks'] = array(
       
   276         data[k]['marks'])
       
   277     data[k]['total'] = array(
       
   278         data[k]['total'])
       
   279   \end{lstlisting}
       
   280 \end{frame}
       
   281 
       
   282 \begin{frame}[fragile]
       
   283   \frametitle{Calculations}
       
   284   \small
   266   \small
   285   \begin{lstlisting}
   267   \begin{lstlisting}
   286     data[k]['avg'] = average(
   268 figure(1)
   287         data[k]['total'])
   269 pie(ninety_percents[4].values(), 
   288     marks = data[k]['marks']
   270     labels=ninety_percents[1].keys())
   289     sub_avg = average(marks, axis=1)
   271 title('Students scoring 90% and above 
   290     sub_std = sqrt(sum(square(
   272       in science by region')
   291         sub_avg[:,newaxis] - marks), axis=0) /
   273 savefig('/tmp/science.png')
   292         len(marks))
   274   \end{lstlisting}
   293     data[k]['sub_avg'] = sub_avg
   275 \begin{columns}
   294     data[k]['sub_std'] = sub_std
   276     \column{5.25\textwidth}
   295   \end{lstlisting}
   277     \hspace*{1.1in}
   296 \end{frame}
   278 \includegraphics[height=2in, interpolate=true]{data/science}
   297 
   279     \column{0.8\textwidth}
   298 \begin{frame}[fragile]
   280 \end{columns}
   299   \frametitle{New Concepts}
   281 \end{frame}
   300   \begin{itemize}
   282 
   301    \item Dictionaries
   283 \begin{frame}[fragile]
   302    \item Slicing lists
   284   \frametitle{Pie charts}
   303    \item New type of conditional
   285   \begin{lstlisting}
   304    \item NumPy arrays
   286 figure(2)
   305    \item Slicing NumPy arrays
   287 pie(subj_total, labels=['English',
   306    \item NumPy array functions - square, average, sqrt
   288     'Hindi', 'Maths', 'Science',
   307   \end{itemize}
   289     'Social'])
       
   290 title('Students scoring more than
       
   291       90% by subject(All regions
       
   292       combined).')
       
   293 savefig('/tmp/all_regions.png')
       
   294   \end{lstlisting}
       
   295 \end{frame}
       
   296 
       
   297 \begin{frame}[fragile]
       
   298   \frametitle{Pie charts}
       
   299   \includegraphics[height=3in, interpolate=true]{data/all_regions}
   308 \end{frame}
   300 \end{frame}
   309 
   301 
   310 \begin{frame}[fragile]
   302 \begin{frame}[fragile]
   311 \frametitle{Dealing with data whole-sale}
   303 \frametitle{Dealing with data whole-sale}
   312 \begin{lstlisting}
   304 \begin{lstlisting}
   425 \begin{lstlisting}
   417 \begin{lstlisting}
   426 In []: plot(L, Tline)
   418 In []: plot(L, Tline)
   427 \end{lstlisting}
   419 \end{lstlisting}
   428 \end{frame}
   420 \end{frame}
   429 
   421 
       
   422 \begin{frame}[fragile]
       
   423   \frametitle{What did we learn?}
       
   424   \begin{itemize}
       
   425    \item Dictionaries
       
   426    \item Drawing pie charts
       
   427   \end{itemize}
       
   428 \end{frame}
   430 \end{document}
   429 \end{document}