day1/session3.tex
changeset 122 73374e1ae4f3
parent 120 055b199c46c2
child 125 99ca3cb18fd2
equal deleted inserted replaced
121:ac715f4826f2 122:73374e1ae4f3
   250 plot(L, Tline)
   250 plot(L, Tline)
   251 \end{lstlisting}
   251 \end{lstlisting}
   252 \end{itemize}
   252 \end{itemize}
   253 \end{frame}
   253 \end{frame}
   254 
   254 
       
   255 \begin{frame}
       
   256   \frametitle{Statistical Analysis and Parsing}
       
   257   Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
       
   258   \begin{itemize}
       
   259     \item Average total marks scored in each region
       
   260     \item Subject wise average score of each region
       
   261     \item ??Subject wise average score for all regions combined??
       
   262   \end{itemize}
       
   263 \end{frame}
       
   264 
       
   265 \begin{frame}
       
   266   \frametitle{Statistical Analysis and Parsing \ldots}
       
   267   Machinery Required -
       
   268   \begin{itemize}
       
   269     \item File reading and parsing
       
   270     \item NumPy arrays - sum by rows and sum by coloumns
       
   271     \item Dictionaries
       
   272   \end{itemize}
       
   273 \end{frame}
       
   274 
       
   275 \begin{frame}
       
   276   \frametitle{File reading and parsing}
       
   277   Understanding the structure of sslc1.txt
       
   278   \begin{itemize}
       
   279     \item Each line in the file, i.e each row of a file is a single record.
       
   280     \item Each record corresponds to a record of a single student
       
   281     \item Each record consists of several fields separated by a ';'
       
   282   \end{itemize}
       
   283 \end{frame}
       
   284 
       
   285 \begin{frame}
       
   286   \frametitle{File reading and parsing \ldots}
       
   287   Each record consists of:
       
   288   \begin{itemize}
       
   289     \item Region Code
       
   290     \item Roll Number
       
   291     \item Name
       
   292     \item Marks of 5 subjects
       
   293     \item Total marks
       
   294     \item Pass (P)
       
   295     \item Withdrawn (W)
       
   296     \item Fail (F)
       
   297   \end{itemize}
       
   298 \end{frame}
       
   299 
       
   300 \begin{frame}[fragile]
       
   301   \frametitle{File reading and parsing \ldots}
       
   302   \begin{lstlisting}
       
   303 for record in open('sslc1.txt'):
       
   304     fields = record.split(';')
       
   305   \end{lstlisting}
       
   306 \end{frame}
       
   307 
       
   308 \begin{frame}[fragile]
       
   309   \frametitle{Dictionary}
       
   310   \begin{itemize}
       
   311     \item lists index: 0 \ldots n
       
   312     \item dictionaries index using any hashable objects
       
   313     \item d = \{ ``Hitchhiker's guide'' : 42, ``Terminator'' : ``I'll be back''\}
       
   314     \item d[``Terminator''] => ``I'll be back''
       
   315     \item ``Terminator'' is called the key of \typ{d}
       
   316     \item ``I'll be back'' is called the value of the key ``Terminator''
       
   317   \end{itemize}
       
   318 \end{frame}
       
   319 
       
   320 \begin{frame}[fragile]
       
   321   \frametitle{Dictionary - Building parsed data}
       
   322   \begin{itemize}
       
   323     \item Let the parsed data be stored in dictionary \typ{data}
       
   324     \item Keys of \typ{data} are strings - region codes
       
   325     \item Value of the key is another dictionary.
       
   326     \item This dictionary contains:
       
   327     \begin{itemize}
       
   328       \item 'marks': A list of NumPy arrays
       
   329       \item 'total': Total marks of each student
       
   330       \item 'P': Number of passes
       
   331       \item 'F': Number of failures
       
   332       \item 'W': Number of withdrawls
       
   333     \end{itemize}
       
   334   \end{itemize}
       
   335 \end{frame}
       
   336 
       
   337 \begin{frame}[fragile]
       
   338   \frametitle{Dictionary - Building parsed data \ldots}
       
   339   \small
       
   340   \begin{lstlisting}
       
   341 data = {}
       
   342 for record in open('sslc1.txt'):
       
   343     fields = record.split(';')
       
   344     if fields[0] not in data:
       
   345         data[fields[0]] = {
       
   346             'marks': array([]),
       
   347             'total': array([]),
       
   348             'P': 0,
       
   349             'F': 0,
       
   350             'W': 0
       
   351             }
       
   352   \end{lstlisting}
       
   353 \end{frame}
       
   354 
       
   355 \begin{frame}[fragile]
       
   356   \frametitle{Dictionary - Building parsed data \ldots}
       
   357   \small
       
   358   \begin{lstlisting}
       
   359 data[fields[0]]['marks'] = append(
       
   360     data[fields[0]]['marks'], 
       
   361     [int(fields[3]), int(fields[4]),
       
   362     int(fields[5]), int(fields[6]),
       
   363     int(fields[7])
       
   364     ])
       
   365 
       
   366 data[fields[0]]['total'].append(fields[8])
       
   367 
       
   368 pfw_key = fields[9] or fields[10] or fields[11]
       
   369 data[fields[0]][pfw_key] += 1
       
   370   \end{lstlisting}
       
   371 \end{frame}
       
   372 
       
   373 \begin{frame}[fragile]
       
   374   \frametitle{Calculations}
       
   375   \begin{lstlisting}
       
   376 all_sub_avg = array([])
       
   377 for k, v in data:
       
   378     data[k]['avg'] = average(
       
   379         data[k]['total'])
       
   380     data[k]['sub_avg'] = average(
       
   381         data[k]['marks'], axis=1)
       
   382   \end{lstlisting}
       
   383 \end{frame}
       
   384 
   255 \end{document}
   385 \end{document}
   256 
   386 
   257 Least squares: Smooth curve fit. 
   387 Least squares: Smooth curve fit. 
   258 Array Operations: Mean, average (etc region wise like district wise and state wise from SSLC.txt) 
   388 Array Operations: Mean, average (etc region wise like district wise and state wise from SSLC.txt) 
   259 Subject wise average. Introduce idea of dictionary. 
   389 Subject wise average. Introduce idea of dictionary.