day1/session3.tex
changeset 249 135062d6f91f
parent 240 5a96cf81bdc5
child 252 359f77927931
--- a/day1/session3.tex	Wed Oct 28 20:31:29 2009 +0530
+++ b/day1/session3.tex	Wed Oct 28 20:32:06 2009 +0530
@@ -126,7 +126,7 @@
 %%   % You might wish to add the option [pausesections]
 %% \end{frame}
 
-\section{Statistics}
+\section{Processing voluminous data}
 \begin{frame}
   \frametitle{More on data processing}
   \begin{block}{}
@@ -136,13 +136,31 @@
 
 
 \begin{frame}
-  \frametitle{Statistical Analysis and Parsing}
-  Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
-  \begin{itemize}
-    \item Draw a pie chart representing the number of students who scored more than 90\% in Science per region.
-    \item Draw a pie chart representing the number of students who scored more than 90\% per subject(All regions combined).
+  \frametitle{Statistical Analysis: Problem statement}
+  Read the data supplied in \emph{sslc1.txt} and carry out the following:
+  \begin{enumerate}
+    \item Draw a pie chart representing the proportion of students who scored more than 90\% in each region in Science.
+    \item Draw a pie chart representing the proportion of students who scored more than 90\% in each subject across regions.
     \item Print mean, median, mode and standard deviation of math scores for all regions combined.
-  \end{itemize}
+  \end{enumerate}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Problem statement: explanation}
+    \emphbar{Draw a pie chart representing the proportion of students who scored more than 90\% in each region in Science.}
+    \begin{enumerate}
+      \item Complete(100\%) data - Number of students who scored more than 90\% in Science
+      \item Each slice - Number of students who scored more than 90\% in Science in one region
+    \end{enumerate}
+\end{frame}
+
+\begin{frame}
+  \frametitle{Problem statement: explanation}
+    \emphbar{Draw a pie chart representing the proportion of students who scored more than 90\% in each subject across regions.}
+    \begin{enumerate}
+      \item Complete(100\%) data - Number of students who scored more than 90\% across all regions
+      \item Each slice - Number of students who scored more than 90\% in each subject across all regions
+    \end{enumerate}
 \end{frame}
 
 \begin{frame}
@@ -161,9 +179,9 @@
   \frametitle{File reading and parsing}
   Understanding the structure of sslc1.txt
   \begin{itemize}
-    \item Each line in the file corresponds to one student's details
+    \item One line in file corresponds to a student's details
     \item aka record
-    \item Each record consists of several fields separated by a ';'
+    \item Each record consists of fields separated by ';'
   \end{itemize}
 \end{frame}
 
@@ -235,6 +253,19 @@
 \end{frame}
 
 \begin{frame}[fragile]
+  \frametitle{Back to lists: Iterating}
+  \begin{itemize}
+    \item Python's \kwrd{for} loop iterates through list items
+    \item In other languages (C/C++) we run through indices and pick items from the array using these indices
+    \item In Python, while iterating through list items current position is not available
+  \end{itemize}
+  \begin{block}{Iterating through indices}
+    What if we want the index of an item of a list?
+  \end{block}
+
+\end{frame}
+
+\begin{frame}[fragile]
   \frametitle{enumerate: Iterating through list indices}
   \begin{lstlisting}
 In [1]: names = ["Guido","Alex", "Tim"]
@@ -250,7 +281,7 @@
 \end{frame}
 
 \begin{frame}[fragile]
-  \frametitle{Dictionary: Building parsed data}
+  \frametitle{Continuing with our Dictionary}
   Let our dictionary be:
   \begin{lstlisting}
 science = {} # is an empty dictionary
@@ -296,7 +327,7 @@
   \end{lstlisting}
 \end{frame}
 
-\subsection{Visualizing the data}
+\subsection{Visualizing data}
 \begin{frame}[fragile]
   \frametitle{Pie charts}
   \small
@@ -388,7 +419,59 @@
   \includegraphics[height=3in, interpolate=true]{data/all_regions}
 \end{frame}
 
-\subsection{Obtaining stastics}
+\subsection{Obtaining statistics}
+\begin{frame}[fragile]
+  \frametitle{Obtaining statistics}
+  \begin{block}{Statistics: Mean}
+    Obtain the mean of Math scores
+  \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Obtaining statistics: Solution}
+  \begin{block}{Statistics: Mean}
+    Obtain the mean of Math scores
+  \end{block}
+  \begin{lstlisting}
+math_scores = scores[2]
+total = 0
+for i, score in enumerate(math_scores):
+    total += score
+
+mean = total / (i + 1)
+print "Mean: ", mean
+  \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+  \frametitle{Obtaining statistics: Another solution}
+  \begin{block}{Statistics: Mean}
+    Obtain the mean of Math scores
+  \end{block}
+  \begin{lstlisting}
+math_scores = scores[2]
+mean = sum(math_scores) /
+          len(math_scores)
+  \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+\frametitle{NumPy arrays}
+  \begin{itemize}
+    \item NumPy provides arrays
+    \item arrays are very efficient and powerful 
+    \item Very easy to perform element-wise operations - \typ{+, -, *, /, \%}
+    \begin{lstlisting}
+In [1]: a = array([1, 2, 3])
+In [2]: b = array([4, 5, 6])
+
+In [3]: a + b
+Out[3]: array([5, 7, 9])
+    \end{lstlisting}
+    \item Very easy to compute statistics
+  \end{itemize}
+\end{frame}
+
 \begin{frame}[fragile]
   \frametitle{Obtaining statistics}
   \begin{lstlisting}
@@ -416,6 +499,7 @@
   \end{itemize}
 \end{frame}
 
+\section{Least square fit}
 \begin{frame}
 \frametitle{L vs $T^2$ \ldots}
 Let's go back to the L vs $T^2$ plot
@@ -444,17 +528,6 @@
 \end{frame}
 
 \begin{frame}[fragile]
-\frametitle{Arrays}
-\begin{itemize}
-\item \typ{T} and \typ{L} are now arrays
-\item arrays are very efficient and powerful 
-\item Very easy to perform element-wise operations
-\item \typ{+, -, *, /, \%}
-\item More about arrays later
-\end{itemize}
-\end{frame}
-
-\begin{frame}[fragile]
 \frametitle{Least Squares Fit}
 \vspace{-0.15in}
 \begin{figure}
@@ -508,6 +581,7 @@
 \end{itemize}
 \end{frame}
 
+\subsection{Van der Monde matrix generation}
 \begin{frame}[fragile]
 \frametitle{Van der Monde Matrix}
 \begin{itemize}
@@ -539,6 +613,7 @@
 \end{lstlisting}
 \end{frame}
 
+\subsection{Plotting}
 \begin{frame}[fragile]
 \frametitle{Least Square Fit Line \ldots}
 We get the points of the line from \typ{coef}
@@ -553,4 +628,13 @@
 \end{lstlisting}
 \end{frame}
 
+\begin{frame}[fragile]
+  \frametitle{What did we learn?}
+  \begin{itemize}
+   \item Least square fit
+   \item Van der Monde matrix generation
+   \item Plotting the least square fit curve
+  \end{itemize}
+\end{frame}
+
 \end{document}