In [1]:
import numpy as np
import pandas as pd

df1 = pd.read_csv('iris.csv')
df1.head()
Out[1]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 1 6.5 3.2 5.1 2.0 Iris-virginica
1 2 6.1 2.8 4.0 1.3 Iris-versicolor
2 3 5.1 3.5 1.4 0.3 Iris-setosa
3 4 6.4 3.1 5.5 1.8 Iris-virginica
4 5 6.7 3.1 4.7 1.5 Iris-versicolor
In [2]:
df1['Species'].head()
Out[2]:
0     Iris-virginica
1    Iris-versicolor
2        Iris-setosa
3     Iris-virginica
4    Iris-versicolor
Name: Species, dtype: object
In [3]:
list( df1['SepalLengthCm'].head() )
Out[3]:
[6.5, 6.1, 5.1, 6.4, 6.7]
In [4]:
df1.head().to_dict()
Out[4]:
{'Id': {0: 1, 1: 2, 2: 3, 3: 4, 4: 5},
 'SepalLengthCm': {0: 6.5, 1: 6.1, 2: 5.1, 3: 6.4, 4: 6.7},
 'SepalWidthCm': {0: 3.2, 1: 2.8, 2: 3.5, 3: 3.1, 4: 3.1},
 'PetalLengthCm': {0: 5.1, 1: 4.0, 2: 1.4, 3: 5.5, 4: 4.7},
 'PetalWidthCm': {0: 2.0, 1: 1.3, 2: 0.3, 3: 1.8, 4: 1.5},
 'Species': {0: 'Iris-virginica',
  1: 'Iris-versicolor',
  2: 'Iris-setosa',
  3: 'Iris-virginica',
  4: 'Iris-versicolor'}}
In [5]:
df1.groupby(by='Species').mean()
Out[5]:
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
Species
Iris-setosa 83.50 5.006 3.418 1.464 0.244
Iris-versicolor 66.76 5.936 2.770 4.260 1.326
Iris-virginica 76.24 6.588 2.974 5.552 2.026
In [6]:
max_df = df1.groupby(by='Species').max()
max_df.loc['Iris-setosa'].SepalLengthCm
Out[6]:
5.8
In [7]:
import matplotlib.pyplot as plt

signal = [0, 2, 2, 2.1, 2, 1.8, 0, 0, 1, 4, -1, -2, -1, ];
plt.plot(signal);
plt.show();
In [8]:
stnames = [ 'Anna', 'Basheer', 'Charan', 'David', 'Emily', 'Feroz', 
           'Ganesh', 'Hanifa', 'Irfan', 'Jane', 'Kamal' ]
subject = [ 'Bio', 'Maths', 'Phy', 'Bio', 'Maths', 'Bio', 
           'Maths', 'Phy', 'Bio', 'Maths', 'Bio']

df_st = pd.DataFrame( { 'Names': stnames, 'Major': subject } )

stats = df_st.groupby(by='Major').count()
stats.columns = ['Count']

plt.bar(stats.index, stats['Count']);
In [9]:
# Histogram
children_ages = [8, 10, 3, 5, 4, 6, 9, 4, 5, 10, 7, 4, 3, 6, 5]
plt.hist(children_ages, bins=3)
# Range=(10 - 3)=7, No. of bins = 3, Bin size = 7/3 = 2.33
# So bin array = [ 3, 3+2.33, 3+2.33+2.33, 10]
Out[9]:
(array([8., 3., 4.]),
 array([ 3.        ,  5.33333333,  7.66666667, 10.        ]),
 <a list of 3 Patch objects>)
In [10]:
df1.boxplot( column='SepalWidthCm'  )
plt.show();