--- a/day1/session3.tex Wed Oct 28 20:31:29 2009 +0530
+++ b/day1/session3.tex Wed Oct 28 20:32:06 2009 +0530
@@ -126,7 +126,7 @@
%% % You might wish to add the option [pausesections]
%% \end{frame}
-\section{Statistics}
+\section{Processing voluminous data}
\begin{frame}
\frametitle{More on data processing}
\begin{block}{}
@@ -136,13 +136,31 @@
\begin{frame}
- \frametitle{Statistical Analysis and Parsing}
- Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
- \begin{itemize}
- \item Draw a pie chart representing the number of students who scored more than 90\% in Science per region.
- \item Draw a pie chart representing the number of students who scored more than 90\% per subject(All regions combined).
+ \frametitle{Statistical Analysis: Problem statement}
+ Read the data supplied in \emph{sslc1.txt} and carry out the following:
+ \begin{enumerate}
+ \item Draw a pie chart representing the proportion of students who scored more than 90\% in each region in Science.
+ \item Draw a pie chart representing the proportion of students who scored more than 90\% in each subject across regions.
\item Print mean, median, mode and standard deviation of math scores for all regions combined.
- \end{itemize}
+ \end{enumerate}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Problem statement: explanation}
+ \emphbar{Draw a pie chart representing the proportion of students who scored more than 90\% in each region in Science.}
+ \begin{enumerate}
+ \item Complete(100\%) data - Number of students who scored more than 90\% in Science
+ \item Each slice - Number of students who scored more than 90\% in Science in one region
+ \end{enumerate}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Problem statement: explanation}
+ \emphbar{Draw a pie chart representing the proportion of students who scored more than 90\% in each subject across regions.}
+ \begin{enumerate}
+ \item Complete(100\%) data - Number of students who scored more than 90\% across all regions
+ \item Each slice - Number of students who scored more than 90\% in each subject across all regions
+ \end{enumerate}
\end{frame}
\begin{frame}
@@ -161,9 +179,9 @@
\frametitle{File reading and parsing}
Understanding the structure of sslc1.txt
\begin{itemize}
- \item Each line in the file corresponds to one student's details
+ \item One line in file corresponds to a student's details
\item aka record
- \item Each record consists of several fields separated by a ';'
+ \item Each record consists of fields separated by ';'
\end{itemize}
\end{frame}
@@ -235,6 +253,19 @@
\end{frame}
\begin{frame}[fragile]
+ \frametitle{Back to lists: Iterating}
+ \begin{itemize}
+ \item Python's \kwrd{for} loop iterates through list items
+ \item In other languages (C/C++) we run through indices and pick items from the array using these indices
+ \item In Python, while iterating through list items current position is not available
+ \end{itemize}
+ \begin{block}{Iterating through indices}
+ What if we want the index of an item of a list?
+ \end{block}
+
+\end{frame}
+
+\begin{frame}[fragile]
\frametitle{enumerate: Iterating through list indices}
\begin{lstlisting}
In [1]: names = ["Guido","Alex", "Tim"]
@@ -250,7 +281,7 @@
\end{frame}
\begin{frame}[fragile]
- \frametitle{Dictionary: Building parsed data}
+ \frametitle{Continuing with our Dictionary}
Let our dictionary be:
\begin{lstlisting}
science = {} # is an empty dictionary
@@ -296,7 +327,7 @@
\end{lstlisting}
\end{frame}
-\subsection{Visualizing the data}
+\subsection{Visualizing data}
\begin{frame}[fragile]
\frametitle{Pie charts}
\small
@@ -388,7 +419,59 @@
\includegraphics[height=3in, interpolate=true]{data/all_regions}
\end{frame}
-\subsection{Obtaining stastics}
+\subsection{Obtaining statistics}
+\begin{frame}[fragile]
+ \frametitle{Obtaining statistics}
+ \begin{block}{Statistics: Mean}
+ Obtain the mean of Math scores
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Obtaining statistics: Solution}
+ \begin{block}{Statistics: Mean}
+ Obtain the mean of Math scores
+ \end{block}
+ \begin{lstlisting}
+math_scores = scores[2]
+total = 0
+for i, score in enumerate(math_scores):
+ total += score
+
+mean = total / (i + 1)
+print "Mean: ", mean
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Obtaining statistics: Another solution}
+ \begin{block}{Statistics: Mean}
+ Obtain the mean of Math scores
+ \end{block}
+ \begin{lstlisting}
+math_scores = scores[2]
+mean = sum(math_scores) /
+ len(math_scores)
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+\frametitle{NumPy arrays}
+ \begin{itemize}
+ \item NumPy provides arrays
+ \item arrays are very efficient and powerful
+ \item Very easy to perform element-wise operations - \typ{+, -, *, /, \%}
+ \begin{lstlisting}
+In [1]: a = array([1, 2, 3])
+In [2]: b = array([4, 5, 6])
+
+In [3]: a + b
+Out[3]: array([5, 7, 9])
+ \end{lstlisting}
+ \item Very easy to compute statistics
+ \end{itemize}
+\end{frame}
+
\begin{frame}[fragile]
\frametitle{Obtaining statistics}
\begin{lstlisting}
@@ -416,6 +499,7 @@
\end{itemize}
\end{frame}
+\section{Least square fit}
\begin{frame}
\frametitle{L vs $T^2$ \ldots}
Let's go back to the L vs $T^2$ plot
@@ -444,17 +528,6 @@
\end{frame}
\begin{frame}[fragile]
-\frametitle{Arrays}
-\begin{itemize}
-\item \typ{T} and \typ{L} are now arrays
-\item arrays are very efficient and powerful
-\item Very easy to perform element-wise operations
-\item \typ{+, -, *, /, \%}
-\item More about arrays later
-\end{itemize}
-\end{frame}
-
-\begin{frame}[fragile]
\frametitle{Least Squares Fit}
\vspace{-0.15in}
\begin{figure}
@@ -508,6 +581,7 @@
\end{itemize}
\end{frame}
+\subsection{Van der Monde matrix generation}
\begin{frame}[fragile]
\frametitle{Van der Monde Matrix}
\begin{itemize}
@@ -539,6 +613,7 @@
\end{lstlisting}
\end{frame}
+\subsection{Plotting}
\begin{frame}[fragile]
\frametitle{Least Square Fit Line \ldots}
We get the points of the line from \typ{coef}
@@ -553,4 +628,13 @@
\end{lstlisting}
\end{frame}
+\begin{frame}[fragile]
+ \frametitle{What did we learn?}
+ \begin{itemize}
+ \item Least square fit
+ \item Van der Monde matrix generation
+ \item Plotting the least square fit curve
+ \end{itemize}
+\end{frame}
+
\end{document}