day1/session3.tex
changeset 177 4ec0418ba041
parent 174 a43832c1edc2
child 178 8a3a9d98fa84
equal deleted inserted replaced
176:5e2925eed57e 177:4ec0418ba041
   123 %% \begin{frame}
   123 %% \begin{frame}
   124 %%   \frametitle{Outline}
   124 %%   \frametitle{Outline}
   125 %%   \tableofcontents
   125 %%   \tableofcontents
   126 %%   % You might wish to add the option [pausesections]
   126 %%   % You might wish to add the option [pausesections]
   127 %% \end{frame}
   127 %% \end{frame}
       
   128 
       
   129 \begin{frame}
       
   130   \frametitle{Statistical Analysis and Parsing}
       
   131   Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
       
   132   \begin{itemize}
       
   133     \item Average total marks scored in each region
       
   134     \item Subject wise average score of each region
       
   135     \item \alert{??Subject wise average score for all regions combined??}
       
   136     \item Find the subject wise standard deviation of scores for each region
       
   137   \end{itemize}
       
   138 \end{frame}
       
   139 
       
   140 \begin{frame}
       
   141   \frametitle{Statistical Analysis and Parsing \ldots}
       
   142   Machinery Required -
       
   143   \begin{itemize}
       
   144     \item File reading and parsing
       
   145     \item NumPy arrays - sum by rows and sum by coloumns
       
   146     \item Dictionaries
       
   147   \end{itemize}
       
   148 \end{frame}
       
   149 
       
   150 \begin{frame}
       
   151   \frametitle{File reading and parsing}
       
   152   Understanding the structure of sslc1.txt
       
   153   \begin{itemize}
       
   154     \item Each line in the file, i.e each row of a file is a single record.
       
   155     \item Each record corresponds to a record of a single student
       
   156     \item Each record consists of several fields separated by a ';'
       
   157   \end{itemize}
       
   158 \end{frame}
       
   159 
       
   160 \begin{frame}
       
   161   \frametitle{File reading and parsing \ldots}
       
   162   Each record consists of:
       
   163   \begin{itemize}
       
   164     \item Region Code
       
   165     \item Roll Number
       
   166     \item Name
       
   167     \item Marks of 5 subjects
       
   168     \item Total marks
       
   169     \item Pass (P)
       
   170     \item Withdrawn (W)
       
   171     \item Fail (F)
       
   172   \end{itemize}
       
   173 \end{frame}
       
   174 
       
   175 \begin{frame}[fragile]
       
   176   \frametitle{File reading and parsing \ldots}
       
   177   \begin{lstlisting}
       
   178 for record in open('sslc1.txt'):
       
   179     fields = record.split(';')
       
   180   \end{lstlisting}
       
   181 \end{frame}
       
   182 
       
   183 \begin{frame}[fragile]
       
   184   \frametitle{Dictionary - Building parsed data}
       
   185   \begin{itemize}
       
   186     \item Let the parsed data be stored in dictionary \typ{data}
       
   187     \item \begin{lstlisting}
       
   188 data = {}  # is an empty dictionary
       
   189 \end{lstlisting}
       
   190     \item Index of a dictionary is called a \emph{key}
       
   191     \item \emph{Keys} of \typ{data} are strings - region codes
       
   192     \item Value of a \emph{key} can be any Python object
       
   193   \end{itemize}
       
   194 \end{frame}
       
   195 
       
   196 \begin{frame}[fragile]
       
   197   \frametitle{Dictionary - Building parsed data...}
       
   198   \begin{itemize}
       
   199     \item In this problem let the value of a \emph{key} be another dictionary.
       
   200     \item This dictionary contains:
       
   201     \begin{itemize}
       
   202       \item 'marks': A \emph{List} of \emph{Lists} containing all marks
       
   203       \item 'total': A \emph{List} of total marks of each student
       
   204       \item 'P': Number of passes
       
   205       \item 'F': Number of failures
       
   206       \item 'W': Number of withdrawls
       
   207     \end{itemize}
       
   208   \end{itemize}
       
   209 \end{frame}
       
   210 
       
   211 \begin{frame}[fragile]
       
   212   \frametitle{Dictionary - Building parsed data \ldots}
       
   213   \small
       
   214   \begin{lstlisting}
       
   215 data = {}
       
   216 for record in open('sslc1.txt'):
       
   217     fields = record.split(';')
       
   218     if fields[0] not in data:
       
   219         data[fields[0]] = {
       
   220             'marks': [],
       
   221             'total': [],
       
   222             'P': 0,
       
   223             'F': 0,
       
   224             'W': 0
       
   225             }
       
   226   \end{lstlisting}
       
   227 \end{frame}
       
   228 
       
   229 \begin{frame}[fragile]
       
   230   \frametitle{Dictionary - Building parsed data \ldots}
       
   231   \begin{lstlisting}
       
   232 marks = []
       
   233 for field in fields[3:8]:
       
   234     score_str = field.strip()
       
   235     score = 0 if score_str == 'AA'
       
   236         or score_str == 'AAA'
       
   237         or score_str == ''
       
   238         else int(score_str)
       
   239     marks.append(score)
       
   240 
       
   241 data[fields[0]]['marks'].append(marks)
       
   242   \end{lstlisting}
       
   243 \end{frame}
       
   244 
       
   245 \begin{frame}[fragile]
       
   246   \frametitle{Dictionary - Building parsed data \ldots}
       
   247   \begin{lstlisting}
       
   248 total = 0 if score_str == 'AA'
       
   249     or score_str == 'AAA'
       
   250     or score_str == ''
       
   251     else int(fields[8])
       
   252 data[fields[0]]['total'].append(total)
       
   253   \end{lstlisting}
       
   254 \end{frame}
       
   255 
       
   256 \begin{frame}[fragile]
       
   257   \frametitle{Dictionary - Building parsed data \ldots}
       
   258   \begin{lstlisting}
       
   259 pfw_key = fields[9]
       
   260     or fields[10]
       
   261     or 'F'
       
   262 data[fields[0]][pfw_key] += 1
       
   263   \end{lstlisting}
       
   264 \end{frame}
       
   265 
       
   266 \begin{frame}[fragile]
       
   267   \frametitle{NumPy arrays}
       
   268   \centerline{\alert{But I lied!?!?!?}}
       
   269 \end{frame}
       
   270 
       
   271 \begin{frame}[fragile]
       
   272   \frametitle{Calculations}
       
   273   \begin{lstlisting}
       
   274 for k in data:
       
   275     data[k]['marks'] = array(
       
   276         data[k]['marks'])
       
   277     data[k]['total'] = array(
       
   278         data[k]['total'])
       
   279   \end{lstlisting}
       
   280 \end{frame}
       
   281 
       
   282 \begin{frame}[fragile]
       
   283   \frametitle{Calculations}
       
   284   \small
       
   285   \begin{lstlisting}
       
   286     data[k]['avg'] = average(
       
   287         data[k]['total'])
       
   288     marks = data[k]['marks']
       
   289     sub_avg = average(marks, axis=1)
       
   290     sub_std = sqrt(sum(square(
       
   291         sub_avg[:,newaxis] - marks), axis=0) /
       
   292         len(marks))
       
   293     data[k]['sub_avg'] = sub_avg
       
   294     data[k]['sub_std'] = sub_std
       
   295   \end{lstlisting}
       
   296 \end{frame}
       
   297 
       
   298 \begin{frame}[fragile]
       
   299   \frametitle{New Concepts}
       
   300   \begin{itemize}
       
   301    \item Dictionaries
       
   302    \item Slicing lists
       
   303    \item New type of conditional
       
   304    \item NumPy arrays
       
   305    \item Slicing NumPy arrays
       
   306    \item NumPy array functions - square, average, sqrt
       
   307   \end{itemize}
       
   308 \end{frame}
   128 
   309 
   129 \begin{frame}[fragile]
   310 \begin{frame}[fragile]
   130 \frametitle{Least Squares Fit}
   311 \frametitle{Least Squares Fit}
   131 \vspace{-0.15in}
   312 \vspace{-0.15in}
   132 \begin{figure}
   313 \begin{figure}
   236 \begin{lstlisting}
   417 \begin{lstlisting}
   237 In []: plot(L, Tline)
   418 In []: plot(L, Tline)
   238 \end{lstlisting}
   419 \end{lstlisting}
   239 \end{frame}
   420 \end{frame}
   240 
   421 
   241 \begin{frame}
       
   242   \frametitle{Statistical Analysis and Parsing}
       
   243   Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
       
   244   \begin{itemize}
       
   245     \item Average total marks scored in each region
       
   246     \item Subject wise average score of each region
       
   247     \item \alert{??Subject wise average score for all regions combined??}
       
   248     \item Find the subject wise standard deviation of scores for each region
       
   249   \end{itemize}
       
   250 \end{frame}
       
   251 
       
   252 \begin{frame}
       
   253   \frametitle{Statistical Analysis and Parsing \ldots}
       
   254   Machinery Required -
       
   255   \begin{itemize}
       
   256     \item File reading and parsing
       
   257     \item NumPy arrays - sum by rows and sum by coloumns
       
   258     \item Dictionaries
       
   259   \end{itemize}
       
   260 \end{frame}
       
   261 
       
   262 \begin{frame}
       
   263   \frametitle{File reading and parsing}
       
   264   Understanding the structure of sslc1.txt
       
   265   \begin{itemize}
       
   266     \item Each line in the file, i.e each row of a file is a single record.
       
   267     \item Each record corresponds to a record of a single student
       
   268     \item Each record consists of several fields separated by a ';'
       
   269   \end{itemize}
       
   270 \end{frame}
       
   271 
       
   272 \begin{frame}
       
   273   \frametitle{File reading and parsing \ldots}
       
   274   Each record consists of:
       
   275   \begin{itemize}
       
   276     \item Region Code
       
   277     \item Roll Number
       
   278     \item Name
       
   279     \item Marks of 5 subjects
       
   280     \item Total marks
       
   281     \item Pass (P)
       
   282     \item Withdrawn (W)
       
   283     \item Fail (F)
       
   284   \end{itemize}
       
   285 \end{frame}
       
   286 
       
   287 \begin{frame}[fragile]
       
   288   \frametitle{File reading and parsing \ldots}
       
   289   \begin{lstlisting}
       
   290 for record in open('sslc1.txt'):
       
   291     fields = record.split(';')
       
   292   \end{lstlisting}
       
   293 \end{frame}
       
   294 
       
   295 \begin{frame}[fragile]
       
   296   \frametitle{Dictionary - Building parsed data}
       
   297   \begin{itemize}
       
   298     \item Let the parsed data be stored in dictionary \typ{data}
       
   299     \item \begin{lstlisting}
       
   300 data = {}  # is an empty dictionary
       
   301 \end{lstlisting}
       
   302     \item Index of a dictionary is called a \emph{key}
       
   303     \item \emph{Keys} of \typ{data} are strings - region codes
       
   304     \item Value of a \emph{key} can be any Python object
       
   305   \end{itemize}
       
   306 \end{frame}
       
   307 
       
   308 \begin{frame}[fragile]
       
   309   \frametitle{Dictionary - Building parsed data...}
       
   310   \begin{itemize}
       
   311     \item In this problem let the value of a \emph{key} be another dictionary.
       
   312     \item This dictionary contains:
       
   313     \begin{itemize}
       
   314       \item 'marks': A \emph{List} of \emph{Lists} containing all marks
       
   315       \item 'total': A \emph{List} of total marks of each student
       
   316       \item 'P': Number of passes
       
   317       \item 'F': Number of failures
       
   318       \item 'W': Number of withdrawls
       
   319     \end{itemize}
       
   320   \end{itemize}
       
   321 \end{frame}
       
   322 
       
   323 \begin{frame}[fragile]
       
   324   \frametitle{Dictionary - Building parsed data \ldots}
       
   325   \small
       
   326   \begin{lstlisting}
       
   327 data = {}
       
   328 for record in open('sslc1.txt'):
       
   329     fields = record.split(';')
       
   330     if fields[0] not in data:
       
   331         data[fields[0]] = {
       
   332             'marks': [],
       
   333             'total': [],
       
   334             'P': 0,
       
   335             'F': 0,
       
   336             'W': 0
       
   337             }
       
   338   \end{lstlisting}
       
   339 \end{frame}
       
   340 
       
   341 \begin{frame}[fragile]
       
   342   \frametitle{Dictionary - Building parsed data \ldots}
       
   343   \begin{lstlisting}
       
   344 marks = []
       
   345 for field in fields[3:8]:
       
   346     score_str = field.strip()
       
   347     score = 0 if score_str == 'AA'
       
   348         or score_str == 'AAA'
       
   349         or score_str == ''
       
   350         else int(score_str)
       
   351     marks.append(score)
       
   352 
       
   353 data[fields[0]]['marks'].append(marks)
       
   354   \end{lstlisting}
       
   355 \end{frame}
       
   356 
       
   357 \begin{frame}[fragile]
       
   358   \frametitle{Dictionary - Building parsed data \ldots}
       
   359   \begin{lstlisting}
       
   360 total = 0 if score_str == 'AA'
       
   361     or score_str == 'AAA'
       
   362     or score_str == ''
       
   363     else int(fields[8])
       
   364 data[fields[0]]['total'].append(total)
       
   365   \end{lstlisting}
       
   366 \end{frame}
       
   367 
       
   368 \begin{frame}[fragile]
       
   369   \frametitle{Dictionary - Building parsed data \ldots}
       
   370   \begin{lstlisting}
       
   371 pfw_key = fields[9]
       
   372     or fields[10]
       
   373     or 'F'
       
   374 data[fields[0]][pfw_key] += 1
       
   375   \end{lstlisting}
       
   376 \end{frame}
       
   377 
       
   378 \begin{frame}[fragile]
       
   379   \frametitle{NumPy arrays}
       
   380   \centerline{\alert{But I lied!?!?!?}}
       
   381 \end{frame}
       
   382 
       
   383 \begin{frame}[fragile]
       
   384   \frametitle{Calculations}
       
   385   \begin{lstlisting}
       
   386 for k in data:
       
   387     data[k]['marks'] = array(
       
   388         data[k]['marks'])
       
   389     data[k]['total'] = array(
       
   390         data[k]['total'])
       
   391   \end{lstlisting}
       
   392 \end{frame}
       
   393 
       
   394 \begin{frame}[fragile]
       
   395   \frametitle{Calculations}
       
   396   \small
       
   397   \begin{lstlisting}
       
   398     data[k]['avg'] = average(
       
   399         data[k]['total'])
       
   400     marks = data[k]['marks']
       
   401     sub_avg = average(marks, axis=1)
       
   402     sub_std = sqrt(sum(square(
       
   403         sub_avg[:,newaxis] - marks), axis=0) /
       
   404         len(marks))
       
   405     data[k]['sub_avg'] = sub_avg
       
   406     data[k]['sub_std'] = sub_std
       
   407   \end{lstlisting}
       
   408 \end{frame}
       
   409 
       
   410 \begin{frame}[fragile]
       
   411   \frametitle{New Concepts}
       
   412   \begin{itemize}
       
   413    \item Dictionaries
       
   414    \item Slicing lists
       
   415    \item New type of conditional
       
   416    \item NumPy arrays
       
   417    \item Slicing NumPy arrays
       
   418    \item NumPy array functions - square, average, sqrt
       
   419   \end{itemize}
       
   420 \end{frame}
       
   421 
       
   422 \end{document}
   422 \end{document}