import numpy as np
import pandas as pd
iris_df = pd.read_csv('iris.csv')
iris_df.head()
iris_df.tail()
# Remove the id columns as it doesn't contribute towards the data
iris_df.drop('Id', axis=1, inplace=True)
iris_df
iris_df.shape
iris_df.info()
iris_df.describe().T
iris_df.isnull().sum()
# Purpose column has a lot of Null values
iris_df['Species'].unique()
iris_df['Species'] = iris_df['Species'].map({ 'Iris-virginica': 1, 'Iris-versicolor': 2, 'Iris-setosa': 3 })
iris_df
import matplotlib.pyplot as plt
iris_df.hist(figsize=(12,12))
plt.show();
iris_df_corr = iris_df.corr('pearson')['Species']
iris_df_corr_valuable = iris_df_corr[ (iris_df_corr >= 0.3) | (iris_df_corr <= -0.2)]
iris_df_corr_valuable
import seaborn as sns
sns.pairplot(iris_df, diag_kind='kde')
plt.show();
for colname in iris_df.columns:
iris_df[[colname]].boxplot();
plt.show();