123 %% \begin{frame} |
123 %% \begin{frame} |
124 %% \frametitle{Outline} |
124 %% \frametitle{Outline} |
125 %% \tableofcontents |
125 %% \tableofcontents |
126 %% % You might wish to add the option [pausesections] |
126 %% % You might wish to add the option [pausesections] |
127 %% \end{frame} |
127 %% \end{frame} |
|
128 |
|
129 \begin{frame} |
|
130 \frametitle{Statistical Analysis and Parsing} |
|
131 Read the data supplied in \emph{sslc1.txt} and obtain the following statistics: |
|
132 \begin{itemize} |
|
133 \item Average total marks scored in each region |
|
134 \item Subject wise average score of each region |
|
135 \item \alert{??Subject wise average score for all regions combined??} |
|
136 \item Find the subject wise standard deviation of scores for each region |
|
137 \end{itemize} |
|
138 \end{frame} |
|
139 |
|
140 \begin{frame} |
|
141 \frametitle{Statistical Analysis and Parsing \ldots} |
|
142 Machinery Required - |
|
143 \begin{itemize} |
|
144 \item File reading and parsing |
|
145 \item NumPy arrays - sum by rows and sum by coloumns |
|
146 \item Dictionaries |
|
147 \end{itemize} |
|
148 \end{frame} |
|
149 |
|
150 \begin{frame} |
|
151 \frametitle{File reading and parsing} |
|
152 Understanding the structure of sslc1.txt |
|
153 \begin{itemize} |
|
154 \item Each line in the file, i.e each row of a file is a single record. |
|
155 \item Each record corresponds to a record of a single student |
|
156 \item Each record consists of several fields separated by a ';' |
|
157 \end{itemize} |
|
158 \end{frame} |
|
159 |
|
160 \begin{frame} |
|
161 \frametitle{File reading and parsing \ldots} |
|
162 Each record consists of: |
|
163 \begin{itemize} |
|
164 \item Region Code |
|
165 \item Roll Number |
|
166 \item Name |
|
167 \item Marks of 5 subjects |
|
168 \item Total marks |
|
169 \item Pass (P) |
|
170 \item Withdrawn (W) |
|
171 \item Fail (F) |
|
172 \end{itemize} |
|
173 \end{frame} |
|
174 |
|
175 \begin{frame}[fragile] |
|
176 \frametitle{File reading and parsing \ldots} |
|
177 \begin{lstlisting} |
|
178 for record in open('sslc1.txt'): |
|
179 fields = record.split(';') |
|
180 \end{lstlisting} |
|
181 \end{frame} |
|
182 |
|
183 \begin{frame}[fragile] |
|
184 \frametitle{Dictionary - Building parsed data} |
|
185 \begin{itemize} |
|
186 \item Let the parsed data be stored in dictionary \typ{data} |
|
187 \item \begin{lstlisting} |
|
188 data = {} # is an empty dictionary |
|
189 \end{lstlisting} |
|
190 \item Index of a dictionary is called a \emph{key} |
|
191 \item \emph{Keys} of \typ{data} are strings - region codes |
|
192 \item Value of a \emph{key} can be any Python object |
|
193 \end{itemize} |
|
194 \end{frame} |
|
195 |
|
196 \begin{frame}[fragile] |
|
197 \frametitle{Dictionary - Building parsed data...} |
|
198 \begin{itemize} |
|
199 \item In this problem let the value of a \emph{key} be another dictionary. |
|
200 \item This dictionary contains: |
|
201 \begin{itemize} |
|
202 \item 'marks': A \emph{List} of \emph{Lists} containing all marks |
|
203 \item 'total': A \emph{List} of total marks of each student |
|
204 \item 'P': Number of passes |
|
205 \item 'F': Number of failures |
|
206 \item 'W': Number of withdrawls |
|
207 \end{itemize} |
|
208 \end{itemize} |
|
209 \end{frame} |
|
210 |
|
211 \begin{frame}[fragile] |
|
212 \frametitle{Dictionary - Building parsed data \ldots} |
|
213 \small |
|
214 \begin{lstlisting} |
|
215 data = {} |
|
216 for record in open('sslc1.txt'): |
|
217 fields = record.split(';') |
|
218 if fields[0] not in data: |
|
219 data[fields[0]] = { |
|
220 'marks': [], |
|
221 'total': [], |
|
222 'P': 0, |
|
223 'F': 0, |
|
224 'W': 0 |
|
225 } |
|
226 \end{lstlisting} |
|
227 \end{frame} |
|
228 |
|
229 \begin{frame}[fragile] |
|
230 \frametitle{Dictionary - Building parsed data \ldots} |
|
231 \begin{lstlisting} |
|
232 marks = [] |
|
233 for field in fields[3:8]: |
|
234 score_str = field.strip() |
|
235 score = 0 if score_str == 'AA' |
|
236 or score_str == 'AAA' |
|
237 or score_str == '' |
|
238 else int(score_str) |
|
239 marks.append(score) |
|
240 |
|
241 data[fields[0]]['marks'].append(marks) |
|
242 \end{lstlisting} |
|
243 \end{frame} |
|
244 |
|
245 \begin{frame}[fragile] |
|
246 \frametitle{Dictionary - Building parsed data \ldots} |
|
247 \begin{lstlisting} |
|
248 total = 0 if score_str == 'AA' |
|
249 or score_str == 'AAA' |
|
250 or score_str == '' |
|
251 else int(fields[8]) |
|
252 data[fields[0]]['total'].append(total) |
|
253 \end{lstlisting} |
|
254 \end{frame} |
|
255 |
|
256 \begin{frame}[fragile] |
|
257 \frametitle{Dictionary - Building parsed data \ldots} |
|
258 \begin{lstlisting} |
|
259 pfw_key = fields[9] |
|
260 or fields[10] |
|
261 or 'F' |
|
262 data[fields[0]][pfw_key] += 1 |
|
263 \end{lstlisting} |
|
264 \end{frame} |
|
265 |
|
266 \begin{frame}[fragile] |
|
267 \frametitle{NumPy arrays} |
|
268 \centerline{\alert{But I lied!?!?!?}} |
|
269 \end{frame} |
|
270 |
|
271 \begin{frame}[fragile] |
|
272 \frametitle{Calculations} |
|
273 \begin{lstlisting} |
|
274 for k in data: |
|
275 data[k]['marks'] = array( |
|
276 data[k]['marks']) |
|
277 data[k]['total'] = array( |
|
278 data[k]['total']) |
|
279 \end{lstlisting} |
|
280 \end{frame} |
|
281 |
|
282 \begin{frame}[fragile] |
|
283 \frametitle{Calculations} |
|
284 \small |
|
285 \begin{lstlisting} |
|
286 data[k]['avg'] = average( |
|
287 data[k]['total']) |
|
288 marks = data[k]['marks'] |
|
289 sub_avg = average(marks, axis=1) |
|
290 sub_std = sqrt(sum(square( |
|
291 sub_avg[:,newaxis] - marks), axis=0) / |
|
292 len(marks)) |
|
293 data[k]['sub_avg'] = sub_avg |
|
294 data[k]['sub_std'] = sub_std |
|
295 \end{lstlisting} |
|
296 \end{frame} |
|
297 |
|
298 \begin{frame}[fragile] |
|
299 \frametitle{New Concepts} |
|
300 \begin{itemize} |
|
301 \item Dictionaries |
|
302 \item Slicing lists |
|
303 \item New type of conditional |
|
304 \item NumPy arrays |
|
305 \item Slicing NumPy arrays |
|
306 \item NumPy array functions - square, average, sqrt |
|
307 \end{itemize} |
|
308 \end{frame} |
128 |
309 |
129 \begin{frame}[fragile] |
310 \begin{frame}[fragile] |
130 \frametitle{Least Squares Fit} |
311 \frametitle{Least Squares Fit} |
131 \vspace{-0.15in} |
312 \vspace{-0.15in} |
132 \begin{figure} |
313 \begin{figure} |
236 \begin{lstlisting} |
417 \begin{lstlisting} |
237 In []: plot(L, Tline) |
418 In []: plot(L, Tline) |
238 \end{lstlisting} |
419 \end{lstlisting} |
239 \end{frame} |
420 \end{frame} |
240 |
421 |
241 \begin{frame} |
|
242 \frametitle{Statistical Analysis and Parsing} |
|
243 Read the data supplied in \emph{sslc1.txt} and obtain the following statistics: |
|
244 \begin{itemize} |
|
245 \item Average total marks scored in each region |
|
246 \item Subject wise average score of each region |
|
247 \item \alert{??Subject wise average score for all regions combined??} |
|
248 \item Find the subject wise standard deviation of scores for each region |
|
249 \end{itemize} |
|
250 \end{frame} |
|
251 |
|
252 \begin{frame} |
|
253 \frametitle{Statistical Analysis and Parsing \ldots} |
|
254 Machinery Required - |
|
255 \begin{itemize} |
|
256 \item File reading and parsing |
|
257 \item NumPy arrays - sum by rows and sum by coloumns |
|
258 \item Dictionaries |
|
259 \end{itemize} |
|
260 \end{frame} |
|
261 |
|
262 \begin{frame} |
|
263 \frametitle{File reading and parsing} |
|
264 Understanding the structure of sslc1.txt |
|
265 \begin{itemize} |
|
266 \item Each line in the file, i.e each row of a file is a single record. |
|
267 \item Each record corresponds to a record of a single student |
|
268 \item Each record consists of several fields separated by a ';' |
|
269 \end{itemize} |
|
270 \end{frame} |
|
271 |
|
272 \begin{frame} |
|
273 \frametitle{File reading and parsing \ldots} |
|
274 Each record consists of: |
|
275 \begin{itemize} |
|
276 \item Region Code |
|
277 \item Roll Number |
|
278 \item Name |
|
279 \item Marks of 5 subjects |
|
280 \item Total marks |
|
281 \item Pass (P) |
|
282 \item Withdrawn (W) |
|
283 \item Fail (F) |
|
284 \end{itemize} |
|
285 \end{frame} |
|
286 |
|
287 \begin{frame}[fragile] |
|
288 \frametitle{File reading and parsing \ldots} |
|
289 \begin{lstlisting} |
|
290 for record in open('sslc1.txt'): |
|
291 fields = record.split(';') |
|
292 \end{lstlisting} |
|
293 \end{frame} |
|
294 |
|
295 \begin{frame}[fragile] |
|
296 \frametitle{Dictionary - Building parsed data} |
|
297 \begin{itemize} |
|
298 \item Let the parsed data be stored in dictionary \typ{data} |
|
299 \item \begin{lstlisting} |
|
300 data = {} # is an empty dictionary |
|
301 \end{lstlisting} |
|
302 \item Index of a dictionary is called a \emph{key} |
|
303 \item \emph{Keys} of \typ{data} are strings - region codes |
|
304 \item Value of a \emph{key} can be any Python object |
|
305 \end{itemize} |
|
306 \end{frame} |
|
307 |
|
308 \begin{frame}[fragile] |
|
309 \frametitle{Dictionary - Building parsed data...} |
|
310 \begin{itemize} |
|
311 \item In this problem let the value of a \emph{key} be another dictionary. |
|
312 \item This dictionary contains: |
|
313 \begin{itemize} |
|
314 \item 'marks': A \emph{List} of \emph{Lists} containing all marks |
|
315 \item 'total': A \emph{List} of total marks of each student |
|
316 \item 'P': Number of passes |
|
317 \item 'F': Number of failures |
|
318 \item 'W': Number of withdrawls |
|
319 \end{itemize} |
|
320 \end{itemize} |
|
321 \end{frame} |
|
322 |
|
323 \begin{frame}[fragile] |
|
324 \frametitle{Dictionary - Building parsed data \ldots} |
|
325 \small |
|
326 \begin{lstlisting} |
|
327 data = {} |
|
328 for record in open('sslc1.txt'): |
|
329 fields = record.split(';') |
|
330 if fields[0] not in data: |
|
331 data[fields[0]] = { |
|
332 'marks': [], |
|
333 'total': [], |
|
334 'P': 0, |
|
335 'F': 0, |
|
336 'W': 0 |
|
337 } |
|
338 \end{lstlisting} |
|
339 \end{frame} |
|
340 |
|
341 \begin{frame}[fragile] |
|
342 \frametitle{Dictionary - Building parsed data \ldots} |
|
343 \begin{lstlisting} |
|
344 marks = [] |
|
345 for field in fields[3:8]: |
|
346 score_str = field.strip() |
|
347 score = 0 if score_str == 'AA' |
|
348 or score_str == 'AAA' |
|
349 or score_str == '' |
|
350 else int(score_str) |
|
351 marks.append(score) |
|
352 |
|
353 data[fields[0]]['marks'].append(marks) |
|
354 \end{lstlisting} |
|
355 \end{frame} |
|
356 |
|
357 \begin{frame}[fragile] |
|
358 \frametitle{Dictionary - Building parsed data \ldots} |
|
359 \begin{lstlisting} |
|
360 total = 0 if score_str == 'AA' |
|
361 or score_str == 'AAA' |
|
362 or score_str == '' |
|
363 else int(fields[8]) |
|
364 data[fields[0]]['total'].append(total) |
|
365 \end{lstlisting} |
|
366 \end{frame} |
|
367 |
|
368 \begin{frame}[fragile] |
|
369 \frametitle{Dictionary - Building parsed data \ldots} |
|
370 \begin{lstlisting} |
|
371 pfw_key = fields[9] |
|
372 or fields[10] |
|
373 or 'F' |
|
374 data[fields[0]][pfw_key] += 1 |
|
375 \end{lstlisting} |
|
376 \end{frame} |
|
377 |
|
378 \begin{frame}[fragile] |
|
379 \frametitle{NumPy arrays} |
|
380 \centerline{\alert{But I lied!?!?!?}} |
|
381 \end{frame} |
|
382 |
|
383 \begin{frame}[fragile] |
|
384 \frametitle{Calculations} |
|
385 \begin{lstlisting} |
|
386 for k in data: |
|
387 data[k]['marks'] = array( |
|
388 data[k]['marks']) |
|
389 data[k]['total'] = array( |
|
390 data[k]['total']) |
|
391 \end{lstlisting} |
|
392 \end{frame} |
|
393 |
|
394 \begin{frame}[fragile] |
|
395 \frametitle{Calculations} |
|
396 \small |
|
397 \begin{lstlisting} |
|
398 data[k]['avg'] = average( |
|
399 data[k]['total']) |
|
400 marks = data[k]['marks'] |
|
401 sub_avg = average(marks, axis=1) |
|
402 sub_std = sqrt(sum(square( |
|
403 sub_avg[:,newaxis] - marks), axis=0) / |
|
404 len(marks)) |
|
405 data[k]['sub_avg'] = sub_avg |
|
406 data[k]['sub_std'] = sub_std |
|
407 \end{lstlisting} |
|
408 \end{frame} |
|
409 |
|
410 \begin{frame}[fragile] |
|
411 \frametitle{New Concepts} |
|
412 \begin{itemize} |
|
413 \item Dictionaries |
|
414 \item Slicing lists |
|
415 \item New type of conditional |
|
416 \item NumPy arrays |
|
417 \item Slicing NumPy arrays |
|
418 \item NumPy array functions - square, average, sqrt |
|
419 \end{itemize} |
|
420 \end{frame} |
|
421 |
|
422 \end{document} |
422 \end{document} |