import numpy as np
import pandas as pd
df1 = pd.read_csv('iris.csv')
df1.head()
df1['Species'].head()
list( df1['SepalLengthCm'].head() )
df1.head().to_dict()
df1.groupby(by='Species').mean()
max_df = df1.groupby(by='Species').max()
max_df.loc['Iris-setosa'].SepalLengthCm
import matplotlib.pyplot as plt
signal = [0, 2, 2, 2.1, 2, 1.8, 0, 0, 1, 4, -1, -2, -1, ];
plt.plot(signal);
plt.show();
stnames = [ 'Anna', 'Basheer', 'Charan', 'David', 'Emily', 'Feroz',
'Ganesh', 'Hanifa', 'Irfan', 'Jane', 'Kamal' ]
subject = [ 'Bio', 'Maths', 'Phy', 'Bio', 'Maths', 'Bio',
'Maths', 'Phy', 'Bio', 'Maths', 'Bio']
df_st = pd.DataFrame( { 'Names': stnames, 'Major': subject } )
stats = df_st.groupby(by='Major').count()
stats.columns = ['Count']
plt.bar(stats.index, stats['Count']);
# Histogram
children_ages = [8, 10, 3, 5, 4, 6, 9, 4, 5, 10, 7, 4, 3, 6, 5]
plt.hist(children_ages, bins=3)
# Range=(10 - 3)=7, No. of bins = 3, Bin size = 7/3 = 2.33
# So bin array = [ 3, 3+2.33, 3+2.33+2.33, 10]
df1.boxplot( column='SepalWidthCm' )
plt.show();