import numpy as np
import pandas as pd # pd is the alias used
# Using primitive array
fruits = ['apple', 'orange', 'mango']
sr1 = pd.Series(fruits)
print(sr1)
# Using numpy array
colors = np.array(['Green', 'Orange', 'Pink', 'Blue']) # ndarray
sr2 = pd.Series(colors)
print(sr2)
# Get operation
print ('Element at index 1 =', sr2.get(1))
# Searching for a value
sr2[sr2 == 'Blue'].index[0]
# add method - appends the word color behind the exsting array elements
sr2 = sr2.add(' color')
sr2
sr2.drop(2) # Unless to save it back to sr2 it will not be in effect or use inplace=True
sr2.append( pd.Series(['Yellow']), ignore_index = True)
# Create a simple DataFrame with one column
names = ['Albert', 'Amir', 'Max', 'Ram']
df1 = pd.DataFrame(names)
print(names)
print('-------------------------------------')
df1
# Create a spreahsheet like DataFrame from dictionary
del names # removes the previous instance of a variable
names = [ 'Adam', 'Anwar', 'Basheer', 'Priya']
countries = [ 'Nigeria', 'UAE', 'KSA', 'Singapore' ]
major = [ 'Economics', 'Computer Science', 'Accountancy', 'Mathematics' ]
students = {
'Student_Name': names,
'Student_Country': countries,
'Degree_major': major
}
df_students = pd.DataFrame(students)
df_students
# Extract a column and its data type
print ( df_students['Student_Name'] )
print ( '-----------------------------------' )
print ( 'Data type of the column: ', type(df_students['Student_Name']) )
# Create new index for the spreadsheet
rollno_array = [ '12001', '12002', '12003', '12004' ] # lets create an array of hypothetical roll numbers
column_title = [ 'Name', 'Country', 'SUbject' ] # lets create an array of column labels
df_students.index = rollno_array
df_students.columns = column_title
df_students
# Reading a dataframe from excel or csv
df_iris = pd.read_csv('iris.csv') # to read xls use pd.read_excel()
df_iris.head()
# Create a city population dataframe
cities = [ 'Tokyo', 'Jakarta', 'Delhi', 'Manila', 'Seoul', 'Shanghai', 'Karachi', 'Beijing', 'Mumbai', 'Chongqing' ]
ct_ctry = [ 'JA', 'ID', 'IN', 'PH', 'KR', 'CN', 'PK', 'CN', 'IN', 'CN' ]
pop_mil = [ 37.84, 30.53, 24.99, 24.13, 23.48, 23.41, 22.12, 21.00, 17.7, 15.7 ]
city_df = pd.DataFrame( { 'City': cities, 'Country_Code': ct_ctry, 'Population(Mil)': pop_mil } )
city_df
# Create a country master dataframe
country = [ 'Japan', 'Indonesia', 'India', 'Philippines', 'S.Korea', 'China', 'Pakistan', 'Singapore', 'Malaysia' ]
ctry_cd = [ 'JA', 'ID', 'IN', 'PH', 'KR', 'CN', 'PK', 'SG', 'MY' ]
ctry_df = pd.DataFrame( { 'Country_Code': ctry_cd, 'Country_Name': country } )
ctry_df
city_df.groupby(by='Country_Code').sum()
city_df.groupby(by='Country_Code').count().iloc[:,0]
city_df.join(ctry_df, lsuffix='_city', rsuffix='_ctry') # how='inner', -- Default is outer join
# Appends the columns based on index key
pd.merge(city_df, ctry_df, on='Country_Code', sort=True, how='outer')