# HG changeset patch # User Santosh G. Vattam # Date 1256742207 -19800 # Node ID 63cf968627a37a770817e81ce1fd79394df6e8f9 # Parent cc2424a28d249026cd3b7d4d3cea9ab58181ce57# Parent 359f779279313a399972ab6f89f49eddda74f867 Merged branches. diff -r cc2424a28d24 -r 63cf968627a3 day1/session1.tex --- a/day1/session1.tex Wed Oct 28 20:32:29 2009 +0530 +++ b/day1/session1.tex Wed Oct 28 20:33:27 2009 +0530 @@ -159,27 +159,23 @@ \end{itemize} \end{block} - \begin{block}{Goal} - Successful participants will be able to - \begin{itemize} - \item use Python as their scripting and problem solving language. - \item train the students to use Python for the same - \end{itemize} + \begin{block}{Goal: Successful participants will be able to} + \begin{itemize} + \item Use Python as plotting, computational toolkit + \item Understand how Python can be used as scripting and problem solving language. + \item Train the students to use Python for the same + \end{itemize} \end{block} \end{frame} - +\section{Getting started} \begin{frame} -\frametitle{Bucketlist} - \begin{block}{IPython} - Type ipython at the command line. Is it available? - \end{block} - \begin{block}{Editor} - We recommend scite. - \end{block} - \begin{block}{Data files} - Make sure you have all data files. - \end{block} +\frametitle{Checklist} + \begin{enumerate} + \item IPython: Type ipython at the command line. Is it available? + \item Editor: We recommend scite. + \item Data files: Make sure you have all data files. + \end{enumerate} \end{frame} \begin{frame}[fragile] @@ -215,6 +211,9 @@ \end{lstlisting} \end{frame} +\section{Plotting} + +\subsection{Drawing plots} \begin{frame}[fragile] \frametitle{First Plot} \begin{columns} @@ -249,6 +248,7 @@ \end{block} \end{frame} +\subsection{Decoration} \begin{frame}[fragile] \frametitle{Adding Labels} \begin{columns} @@ -287,6 +287,7 @@ \emphbar{By default plots would be overlaid!} \end{frame} +\subsection{More decoration} \begin{frame}[fragile] \frametitle{Title and Legends} \vspace*{-0.15in} @@ -358,6 +359,7 @@ \end{lstlisting} \end{frame} +\section{Multiple plots} \begin{frame}[fragile] \frametitle{Plotting separate figures} \begin{lstlisting} @@ -433,6 +435,7 @@ $\vdots$ \end{frame} +\section{Exercises} \begin{frame}[fragile] \frametitle{Review Problem \ldots} \small{ @@ -454,19 +457,16 @@ } \end{frame} \begin{frame} - \frametitle{Things we have learned} + \frametitle{What did we learn?} \begin{itemize} - \item Creating simple plots. - \item Adding labels and legends. - \item Annotating plots. - \item Changing the looks: size, linewidth + \item Creating simple plots. + \item Adding labels and legends. + \item Annotating plots. + \item Changing the looks: size, linewidth \end{itemize} -\end{frame} -\begin{frame}[fragile] - \begin{center} - End of Session-1\\ - \alert{Don't Close \typ{IPython}} - \end{center} + \begin{block}{Note} + \centerline{\alert{Don't Close \typ{IPython}}} + \end{block} \end{frame} \end{document} diff -r cc2424a28d24 -r 63cf968627a3 day1/session3.tex --- a/day1/session3.tex Wed Oct 28 20:32:29 2009 +0530 +++ b/day1/session3.tex Wed Oct 28 20:33:27 2009 +0530 @@ -126,7 +126,7 @@ %% % You might wish to add the option [pausesections] %% \end{frame} -\section{Statistics} +\section{Processing voluminous data} \begin{frame} \frametitle{More on data processing} \begin{block}{} @@ -136,13 +136,31 @@ \begin{frame} - \frametitle{Statistical Analysis and Parsing} - Read the data supplied in \emph{sslc1.txt} and obtain the following statistics: - \begin{itemize} - \item Draw a pie chart representing the number of students who scored more than 90\% in Science per region. - \item Draw a pie chart representing the number of students who scored more than 90\% per subject(All regions combined). + \frametitle{Statistical Analysis: Problem statement} + Read the data supplied in \emph{sslc1.txt} and carry out the following: + \begin{enumerate} + \item Draw a pie chart representing the proportion of students who scored more than 90\% in each region in Science. + \item Draw a pie chart representing the proportion of students who scored more than 90\% in each subject across regions. \item Print mean, median, mode and standard deviation of math scores for all regions combined. - \end{itemize} + \end{enumerate} +\end{frame} + +\begin{frame} + \frametitle{Problem statement: explanation} + \emphbar{Draw a pie chart representing the proportion of students who scored more than 90\% in each region in Science.} + \begin{enumerate} + \item Complete(100\%) data - Number of students who scored more than 90\% in Science + \item Each slice - Number of students who scored more than 90\% in Science in one region + \end{enumerate} +\end{frame} + +\begin{frame} + \frametitle{Problem statement: explanation} + \emphbar{Draw a pie chart representing the proportion of students who scored more than 90\% in each subject across regions.} + \begin{enumerate} + \item Complete(100\%) data - Number of students who scored more than 90\% across all regions + \item Each slice - Number of students who scored more than 90\% in each subject across all regions + \end{enumerate} \end{frame} \begin{frame} @@ -161,9 +179,9 @@ \frametitle{File reading and parsing} Understanding the structure of sslc1.txt \begin{itemize} - \item Each line in the file corresponds to one student's details + \item One line in file corresponds to a student's details \item aka record - \item Each record consists of several fields separated by a ';' + \item Each record consists of fields separated by ';' \end{itemize} \end{frame} @@ -235,6 +253,19 @@ \end{frame} \begin{frame}[fragile] + \frametitle{Back to lists: Iterating} + \begin{itemize} + \item Python's \kwrd{for} loop iterates through list items + \item In other languages (C/C++) we run through indices and pick items from the array using these indices + \item In Python, while iterating through list items current position is not available + \end{itemize} + \begin{block}{Iterating through indices} + What if we want the index of an item of a list? + \end{block} + +\end{frame} + +\begin{frame}[fragile] \frametitle{enumerate: Iterating through list indices} \begin{lstlisting} In [1]: names = ["Guido","Alex", "Tim"] @@ -250,7 +281,7 @@ \end{frame} \begin{frame}[fragile] - \frametitle{Dictionary: Building parsed data} + \frametitle{Continuing with our Dictionary} Let our dictionary be: \begin{lstlisting} science = {} # is an empty dictionary @@ -296,7 +327,7 @@ \end{lstlisting} \end{frame} -\subsection{Visualizing the data} +\subsection{Visualizing data} \begin{frame}[fragile] \frametitle{Pie charts} \small @@ -388,7 +419,59 @@ \includegraphics[height=3in, interpolate=true]{data/all_regions} \end{frame} -\subsection{Obtaining stastics} +\subsection{Obtaining statistics} +\begin{frame}[fragile] + \frametitle{Obtaining statistics} + \begin{block}{Statistics: Mean} + Obtain the mean of Math scores + \end{block} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Obtaining statistics: Solution} + \begin{block}{Statistics: Mean} + Obtain the mean of Math scores + \end{block} + \begin{lstlisting} +math_scores = scores[2] +total = 0 +for i, score in enumerate(math_scores): + total += score + +mean = total / (i + 1) +print "Mean: ", mean + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Obtaining statistics: Another solution} + \begin{block}{Statistics: Mean} + Obtain the mean of Math scores + \end{block} + \begin{lstlisting} +math_scores = scores[2] +mean = sum(math_scores) / + len(math_scores) + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] +\frametitle{NumPy arrays} + \begin{itemize} + \item NumPy provides arrays + \item arrays are very efficient and powerful + \item Very easy to perform element-wise operations - \typ{+, -, *, /, \%} + \begin{lstlisting} +In [1]: a = array([1, 2, 3]) +In [2]: b = array([4, 5, 6]) + +In [3]: a + b +Out[3]: array([5, 7, 9]) + \end{lstlisting} + \item Very easy to compute statistics + \end{itemize} +\end{frame} + \begin{frame}[fragile] \frametitle{Obtaining statistics} \begin{lstlisting} @@ -416,6 +499,7 @@ \end{itemize} \end{frame} +\section{Least square fit} \begin{frame} \frametitle{L vs $T^2$ \ldots} Let's go back to the L vs $T^2$ plot @@ -444,17 +528,6 @@ \end{frame} \begin{frame}[fragile] -\frametitle{Arrays} -\begin{itemize} -\item \typ{T} and \typ{L} are now arrays -\item arrays are very efficient and powerful -\item Very easy to perform element-wise operations -\item \typ{+, -, *, /, \%} -\item More about arrays later -\end{itemize} -\end{frame} - -\begin{frame}[fragile] \frametitle{Least Squares Fit} \vspace{-0.15in} \begin{figure} @@ -508,6 +581,7 @@ \end{itemize} \end{frame} +\subsection{Van der Monde matrix generation} \begin{frame}[fragile] \frametitle{Van der Monde Matrix} \begin{itemize} @@ -540,6 +614,7 @@ \end{lstlisting} \end{frame} +\subsection{Plotting} \begin{frame}[fragile] \frametitle{Least Square Fit Line \ldots} We get the points of the line from \typ{coef} @@ -554,4 +629,13 @@ \end{lstlisting} \end{frame} +\begin{frame}[fragile] + \frametitle{What did we learn?} + \begin{itemize} + \item Least square fit + \item Van der Monde matrix generation + \item Plotting the least square fit curve + \end{itemize} +\end{frame} + \end{document} diff -r cc2424a28d24 -r 63cf968627a3 day1/sslc1.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/day1/sslc1.py Wed Oct 28 20:33:27 2009 +0530 @@ -0,0 +1,50 @@ +from pylab import * +from scipy import * +from scipy import stats + +scores = [[], [], [], [], []] +ninety_percents = [{}, {}, {}, {}, {}] + +for record in open('sslc1.txt'): + record = record.strip() + fields = record.split(';') + + region_code = fields[0].strip() + + for i, field in enumerate(fields[3:8]): + if region_code not in ninety_percents[i]: + ninety_percents[i][region_code] = 0 + score_str = field.strip() + score = 0 if score_str == 'AA' else int(score_str) + scores[i].append(score) + if score > 90: + ninety_percents[i][region_code] += 1 + +subj_total = [] +for subject in ninety_percents: + subj_total.append(sum(subject.values())) + + +figure(1) +pie(ninety_percents[3].values(), labels=ninety_percents[3].keys()) +title('Students scoring 90% and above in science by region') +savefig('/tmp/science.png') + +figure(2) +pie(subj_total, labels=['English', 'Hindi', 'Maths', 'Science', 'Social']) +title('Students scoring more than 90% by subject(All regions combined).') +savefig('/tmp/all_regions.png') + +math_scores = array(scores[2]) +# Mean score in Maths(All regions combined) +print "Mean: ", mean(math_scores) + +# Median score in Maths(All regions combined) +print "Median: ", median(math_scores) + +# Mode score in Maths(All regions combined) +print "Mode: ", stats.mode(math_scores) + +# Standard deviation of scores in Maths(All regions combined) +print "Standard Deviation: ", std(math_scores) + diff -r cc2424a28d24 -r 63cf968627a3 day2/session1.tex --- a/day2/session1.tex Wed Oct 28 20:32:29 2009 +0530 +++ b/day2/session1.tex Wed Oct 28 20:33:27 2009 +0530 @@ -73,7 +73,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Title page -\title[Basic Python]{Python:\\A formal approach} +\title[Basic Python]{Python language: Basics} \author[FOSSEE Team] {The FOSSEE Group} @@ -138,7 +138,7 @@ \begin{frame}[fragile] \frametitle{Numbers} \begin{itemize} - \item \kwrd{int}\\ Any whole number is an \kwrd{int}, no matter what the size! + \item \kwrd{int}\\ \kwrd{int} = whole number, no matter what the size! \begin{lstlisting} In [1]: a = 13 @@ -302,8 +302,8 @@ In [3]: s + p Out[3]: 'Hello World' -In [4]: s * 12 -Out[4]: 'Hello Hello Hello Hello ...' +In [4]: s * 4 +Out[4]: 'Hello Hello Hello Hello' \end{lstlisting} \end{frame} @@ -369,24 +369,36 @@ \item Comments: \begin{lstlisting} In [4]: a = 1 # In-line comments -In [5]: # Comment in a line to itself. -In [6]: a = "# This is not a comment!" +In [5]: # A comment line. +In [6]: a = "# Not a comment!" \end{lstlisting} \end{itemize} \inctime{15} \end{frame} \section{Simple IO} -\begin{frame}{Simple IO} - \begin{block} - {Console Input} - \texttt{raw\_input()} waits for user input.\\Prompt string is optional.\\ - All keystrokes are Strings!\\\texttt{int()} converts string to int. - \end{block} - \begin{block} - {Console output} - \texttt{print} is straight forward. Note the distinction between \texttt{print x} and \texttt{print x,} - \end{block} +\begin{frame}[fragile] + \frametitle{Simple IO: Console Input} + \begin{itemize} + \item raw\_input() waits for user input. + \begin{lstlisting} +In [1]: a = raw_input() +5 + +In [2]: a = raw_input('prompt > ') +prompt > 5 + \end{lstlisting} + \item Prompt string is optional. + \item All keystrokes are Strings! + \item \texttt{int()} converts string to int. + \end{itemize} +\end{frame} + +\begin{frame}{Simple IO: Console output} + \begin{itemize} + \item \texttt{print} is straight forward + \item Note the distinction between \texttt{print x} and \texttt{print x,} + \end{itemize} \end{frame} \section{Control flow} @@ -437,8 +449,9 @@ \frametitle{\typ{range()}} \kwrd{range([start,] stop[, step])}\\ \begin{itemize} - \item \alert {range() returns a list of integers} - \item \alert {The start and the step arguments are optional} + \item range() returns a list of integers + \item The \emph{start} and the \emph{step} arguments are optional + \item \emph{stop} argument is not included in the list \end{itemize} \end{frame} @@ -448,8 +461,8 @@ \begin{lstlisting} In []: for i in range(5): ....: print i, i * i - ....: - ....: + ....: + ....: 0 0 1 1 2 4 @@ -460,20 +473,13 @@ \end{frame} \subsection{Exercises} -\begin{frame} - \frametitle{Problem set 1} - \begin{itemize} - \item All the problems can be\\ - solved using \kwrd{if} and \kwrd{while} - \end{itemize} -\end{frame} -\begin{frame}{Problem 1.1} +\begin{frame}{Problem set 1: Problem 1.1} Write a program that displays all three digit numbers that are equal to the sum of the cubes of their digits. That is, print numbers $abc$ that have the property $abc = a^3 + b^3 + c^3$\\ \vspace*{0.2in} \emphbar{These are called $Armstrong$ numbers.} \end{frame} - + \begin{frame}{Problem 1.2 - Collatz sequence} \begin{enumerate} \item Start with an arbitrary (positive) integer. @@ -495,14 +501,14 @@ The number of lines must be obtained from the user as input.\\ \pause \emphbar{When can your code fail?} -\only<2->{\inctime{10}} +\inctime{5} \end{frame} \begin{frame}[fragile] \frametitle{What did we learn?} \begin{itemize} \item Basic data types - \item Arithematic, logical and relational operations + \item Operators \item Conditional structures \item Loops \end{itemize} diff -r cc2424a28d24 -r 63cf968627a3 day2/session2.tex --- a/day2/session2.tex Wed Oct 28 20:32:29 2009 +0530 +++ b/day2/session2.tex Wed Oct 28 20:33:27 2009 +0530 @@ -73,7 +73,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Title page -\title[Basic Python]{Python:\\A formal approach} +\title[Basic Python]{Python language: Data structures and functions} \author[FOSSEE Team] {The FOSSEE Group} @@ -215,7 +215,7 @@ \end{frame} \begin{frame} {Problem Set 2.1: Problem 2.1.1} -You are given date strings of the form ``29, Jul 2009'', or ``4 January 2008''. In other words a number a string and another number, with a comma sometimes separating the items.Write a function that takes such a string and returns a tuple (yyyy, mm, dd) where all three elements are ints. +You are given date strings of the form ``29, Jul 2009'', or ``4 January 2008''. In other words a number, a string and another number, with a comma sometimes separating the items.\\Write a function that takes such a string and returns a tuple (yyyy, mm, dd) where all three elements are ints. \end{frame} \subsection{Set} @@ -261,7 +261,7 @@ \frametitle{Problem set 2.2} \begin{description} \item[2.2.1] Given a dictionary of the names of students and their marks, identify how many duplicate marks are there? and what are these? - \item[2.2.2] Given a string of the form ``4-7, 9, 12, 15'' find the numbers missing in this list for a given range. + \item[2.2.2] Given a string of the form ``4-7, 9, 12, 15'' find the missing numbers in the given range. \end{description} \inctime{15} \end{frame}