181 \end{frame} |
186 \end{frame} |
182 |
187 |
183 \begin{frame}[fragile] |
188 \begin{frame}[fragile] |
184 \frametitle{Dictionary - Building parsed data} |
189 \frametitle{Dictionary - Building parsed data} |
185 \begin{itemize} |
190 \begin{itemize} |
186 \item Let the parsed data be stored in dictionary \typ{data} |
191 \item Let the parsed data be stored in list of dictionaries. |
187 \item \begin{lstlisting} |
192 \item d = \{\} is an empty dictionary |
188 data = {} # is an empty dictionary |
193 \end{itemize} |
189 \end{lstlisting} |
194 \end{frame} |
|
195 |
|
196 \begin{frame}[fragile] |
|
197 \frametitle{Dictionary - Building parsed data} |
|
198 \begin{lstlisting} |
|
199 ninety_percents = [{}, {}, {}, {}, {}] |
|
200 \end{lstlisting} |
|
201 \end{frame} |
|
202 |
|
203 \begin{frame}[fragile] |
|
204 \frametitle{Dictionary - Building parsed data} |
|
205 \begin{itemize} |
190 \item Index of a dictionary is called a \emph{key} |
206 \item Index of a dictionary is called a \emph{key} |
191 \item \emph{Keys} of \typ{data} are strings - region codes |
207 \item \emph{Keys} of these dictionaries are strings - region codes |
192 \item Value of a \emph{key} can be any Python object |
208 \end{itemize} |
193 \end{itemize} |
209 \end{frame} |
194 \end{frame} |
210 |
195 |
211 \begin{frame}[fragile] |
196 \begin{frame}[fragile] |
212 \frametitle{Dictionary - Building parsed data \ldots} |
197 \frametitle{Dictionary - Building parsed data...} |
213 \begin{itemize} |
198 \begin{itemize} |
214 \item Value of a \emph{key} can be any legal Python value |
199 \item In this problem let the value of a \emph{key} be another dictionary. |
215 \item In this problem let the value of a \emph{key} be another an integer |
200 \item This dictionary contains: |
216 \item This dictionary contains: |
201 \begin{itemize} |
217 \end{itemize} |
202 \item 'marks': A \emph{List} of \emph{Lists} containing all marks |
218 'region code': Number of students who scored more than 90\% in this region for this subject |
203 \item 'total': A \emph{List} of total marks of each student |
219 \end{frame} |
204 \item 'P': Number of passes |
220 |
205 \item 'F': Number of failures |
221 \begin{frame}[fragile] |
206 \item 'W': Number of withdrawls |
222 \frametitle{Building parsed data \ldots} |
207 \end{itemize} |
223 \begin{lstlisting} |
208 \end{itemize} |
224 from pylab import * |
209 \end{frame} |
225 |
210 |
226 ninety_percents = [{}, {}, {}, {}, {}] |
211 \begin{frame}[fragile] |
227 |
212 \frametitle{Dictionary - Building parsed data \ldots} |
228 for record in open('sslc1.txt'): |
|
229 record = record.strip() |
|
230 fields = record.split(';') |
|
231 |
|
232 region_code = fields[0].strip() |
|
233 \end{lstlisting} |
|
234 \end{frame} |
|
235 |
|
236 \begin{frame}[fragile] |
|
237 \frametitle{Building parsed data \ldots} |
213 \small |
238 \small |
214 \begin{lstlisting} |
239 \begin{lstlisting} |
215 data = {} |
240 for i, field in enumerate(fields[3:8]): |
216 for record in open('sslc1.txt'): |
241 |
217 fields = record.split(';') |
242 if region_code not in ninety_percents[i]: |
218 if fields[0] not in data: |
243 ninety_percents[i][region_code] = 0 |
219 data[fields[0]] = { |
244 |
220 'marks': [], |
|
221 'total': [], |
|
222 'P': 0, |
|
223 'F': 0, |
|
224 'W': 0 |
|
225 } |
|
226 \end{lstlisting} |
|
227 \end{frame} |
|
228 |
|
229 \begin{frame}[fragile] |
|
230 \frametitle{Dictionary - Building parsed data \ldots} |
|
231 \begin{lstlisting} |
|
232 marks = [] |
|
233 for field in fields[3:8]: |
|
234 score_str = field.strip() |
245 score_str = field.strip() |
235 score = 0 if score_str == 'AA' |
246 |
236 or score_str == 'AAA' |
247 score = 0 if score_str == 'AA' else |
237 or score_str == '' |
248 int(score_str) |
238 else int(score_str) |
249 if score > 90: |
239 marks.append(score) |
250 ninety_percents[i][region_code] += 1 |
240 |
251 \end{lstlisting} |
241 data[fields[0]]['marks'].append(marks) |
252 \end{frame} |
242 \end{lstlisting} |
253 |
243 \end{frame} |
254 \begin{frame}[fragile] |
244 |
255 \frametitle{Consolidating data} |
245 \begin{frame}[fragile] |
256 \begin{lstlisting} |
246 \frametitle{Dictionary - Building parsed data \ldots} |
257 subj_total = [] |
247 \begin{lstlisting} |
258 for subject in ninety_percents: |
248 total = 0 if score_str == 'AA' |
259 subj_total.append(sum( |
249 or score_str == 'AAA' |
260 subject.values())) |
250 or score_str == '' |
261 \end{lstlisting} |
251 else int(fields[8]) |
262 \end{frame} |
252 data[fields[0]]['total'].append(total) |
263 |
253 \end{lstlisting} |
264 \begin{frame}[fragile] |
254 \end{frame} |
265 \frametitle{Pie charts} |
255 |
|
256 \begin{frame}[fragile] |
|
257 \frametitle{Dictionary - Building parsed data \ldots} |
|
258 \begin{lstlisting} |
|
259 pfw_key = fields[9] |
|
260 or fields[10] |
|
261 or 'F' |
|
262 data[fields[0]][pfw_key] += 1 |
|
263 \end{lstlisting} |
|
264 \end{frame} |
|
265 |
|
266 \begin{frame}[fragile] |
|
267 \frametitle{NumPy arrays} |
|
268 \centerline{\alert{But I lied!?!?!?}} |
|
269 \end{frame} |
|
270 |
|
271 \begin{frame}[fragile] |
|
272 \frametitle{Calculations} |
|
273 \begin{lstlisting} |
|
274 for k in data: |
|
275 data[k]['marks'] = array( |
|
276 data[k]['marks']) |
|
277 data[k]['total'] = array( |
|
278 data[k]['total']) |
|
279 \end{lstlisting} |
|
280 \end{frame} |
|
281 |
|
282 \begin{frame}[fragile] |
|
283 \frametitle{Calculations} |
|
284 \small |
266 \small |
285 \begin{lstlisting} |
267 \begin{lstlisting} |
286 data[k]['avg'] = average( |
268 figure(1) |
287 data[k]['total']) |
269 pie(ninety_percents[4].values(), |
288 marks = data[k]['marks'] |
270 labels=ninety_percents[1].keys()) |
289 sub_avg = average(marks, axis=1) |
271 title('Students scoring 90% and above |
290 sub_std = sqrt(sum(square( |
272 in science by region') |
291 sub_avg[:,newaxis] - marks), axis=0) / |
273 savefig('/tmp/science.png') |
292 len(marks)) |
274 \end{lstlisting} |
293 data[k]['sub_avg'] = sub_avg |
275 \begin{columns} |
294 data[k]['sub_std'] = sub_std |
276 \column{5.25\textwidth} |
295 \end{lstlisting} |
277 \hspace*{1.1in} |
296 \end{frame} |
278 \includegraphics[height=2in, interpolate=true]{data/science} |
297 |
279 \column{0.8\textwidth} |
298 \begin{frame}[fragile] |
280 \end{columns} |
299 \frametitle{New Concepts} |
281 \end{frame} |
300 \begin{itemize} |
282 |
301 \item Dictionaries |
283 \begin{frame}[fragile] |
302 \item Slicing lists |
284 \frametitle{Pie charts} |
303 \item New type of conditional |
285 \begin{lstlisting} |
304 \item NumPy arrays |
286 figure(2) |
305 \item Slicing NumPy arrays |
287 pie(subj_total, labels=['English', |
306 \item NumPy array functions - square, average, sqrt |
288 'Hindi', 'Maths', 'Science', |
307 \end{itemize} |
289 'Social']) |
|
290 title('Students scoring more than |
|
291 90% by subject(All regions |
|
292 combined).') |
|
293 savefig('/tmp/all_regions.png') |
|
294 \end{lstlisting} |
|
295 \end{frame} |
|
296 |
|
297 \begin{frame}[fragile] |
|
298 \frametitle{Pie charts} |
|
299 \includegraphics[height=3in, interpolate=true]{data/all_regions} |
308 \end{frame} |
300 \end{frame} |
309 |
301 |
310 \begin{frame}[fragile] |
302 \begin{frame}[fragile] |
311 \frametitle{Dealing with data whole-sale} |
303 \frametitle{Dealing with data whole-sale} |
312 \begin{lstlisting} |
304 \begin{lstlisting} |