Data Visualization in Python using numpy and matplotlib

In this notebook, I showed different visualization as they are a pivotal part of Data Sience. I tried all of these visualizations on the Famous Titanic Data for which EDA and ETL was already done. Dataset and the notebook can also be found here .
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [4]:
df = pd.read_csv('data_titanic.csv')
df.head()
Out[4]:
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked Initial
0 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S Mr
1 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C Mrs
2 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S Miss
3 3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S Mrs
4 4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S Mr
In [23]:
#plotting multiple graphs

plt.plot(df.PassengerId,df.Age,color='red',label='Age',alpha=0.8)
plt.plot(df.Fare,color='blue',label='Fare',alpha=0.2)
plt.legend(loc='best')
plt.xlabel("Passenger ID")
plt.show()
In [16]:
#Plotting using subplots
plt.subplot(2,1,1)
plt.title('Using subplots')
plt.plot(df.PassengerId,df.Age,color='red',marker='o')
plt.xlabel('Passenger ID')
plt.ylabel('Age')
plt.show()

plt.subplot(2,1,2)
plt.title('Using subplots 2')

plt.plot(df.PassengerId,df.Fare,color='green',marker='*')
plt.xlabel('Passenger ID')
plt.ylabel('Fare')

plt.show()
In [52]:
#using axes to specify [xlo, ylo, width, height] instead of subplot.

plt.axes([0.05, 0.5, 0.5, 0.9])
plt.plot(df.Age,c='orange')
plt.xlabel('passengers')
plt.ylabel('Age')
plt.title('Age Plot')


plt.axes([0.625, 0.5, 0.5, 0.9])
plt.plot(df.Fare,c='magenta')
plt.xlabel('passengers')
plt.ylabel('Fare')
plt.title('Passenger plot')
plt.show()
In [55]:
#Using custom limits of x and y axes

plt.plot(df.PassengerId,df.Age,c='blue',marker='*')
plt.xlim([0,200])
plt.show()
In [65]:
#If we wish to set both x and y limits, we can also do that using axis. 

plt.plot(df.PassengerId,df.Age,color='blue')
plt.axis((75,90,0,40))
plt.show()
In [80]:
#Placing an arrow at the maximum point of the age

#plotting the two arrays as usual
plt.plot(df.PassengerId,df.Age,color='Green')
plt.xlabel('Passenger ID')
plt.ylabel('Age')
plt.title('Checking Arrow')

#setting y limit a bit higher to place arrow properly

plt.ylim(0,100)

#Getting max age to see which is the highest age. 
age = df['Age']
max_age = df['Age'].max()

# argmax() get argument corresponding to the max value

passengerid = df['PassengerId']
pidmax = passengerid[age.argmax()]

plt.annotate('Maximum', xy=(pidmax, max_age), xytext=(pidmax+20, max_age+20), arrowprops=dict(facecolor='black'))

plt.show()
In [81]:
#seeing visualization techniques 

plt.style.available
Out[81]:
['bmh',
 'classic',
 'dark_background',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-dark-palette',
 'seaborn-dark',
 'seaborn-darkgrid',
 'seaborn-deep',
 'seaborn-muted',
 'seaborn-notebook',
 'seaborn-paper',
 'seaborn-pastel',
 'seaborn-poster',
 'seaborn-talk',
 'seaborn-ticks',
 'seaborn-white',
 'seaborn-whitegrid',
 'seaborn',
 '_classic_test']
In [100]:
#You can use one of the above mentioned styles to plot your graphs. 

plt.style.use('classic')
plt.subplot(2, 2, 1) 
plt.plot(df.PassengerId,df.Age)
plt.subplot(2, 2, 2) 
plt.plot(df.PassengerId,df.Survived,'ro')
plt.subplot(2, 2, 3) 
plt.plot(df.PassengerId,df.Fare)
plt.tight_layout()
plt.show()
In [104]:
#visualizing mesh grids 

u = np.linspace(-2, 2, 41)
v = np.linspace(-1,1,21)

X,Y = np.meshgrid(u,v)

Z = np.sin(3*np.sqrt(X**2 + Y**2)) 
plt.pcolor(Z)
plt.xlim([0,41])
plt.ylim([0,21])
plt.tight_layout()
plt.show()
In [107]:
#Using contourplots

plt.style.use('classic')

plt.subplot(2,2,1)
plt.contourf(X,Y,Z,20, cmap='viridis')
plt.colorbar()
plt.title('Viridis')

# Create a filled contour plot with a color map of 'gray'
plt.subplot(2,2,2)
plt.contourf(X,Y,Z,20, cmap='gray')
plt.colorbar()
plt.title('Gray')

# Create a filled contour plot with a color map of 'autumn'
plt.subplot(2,2,3)
plt.contourf(X,Y,Z,20,cmap="autumn")
plt.colorbar()
plt.title('Autumn')

# Create a filled contour plot with a color map of 'winter'
plt.subplot(2,2,4)
plt.contourf(X,Y,Z,20,cmap='winter')
plt.colorbar()
plt.title('Winter')

# Improve the spacing between subplots and display them
plt.tight_layout()
plt.show()
In [124]:
plt.hist2d(df.Fare,df.Age,bins=(30,30),cmap='YlGnBu')

#complete list of cmaps available at https://matplotlib.org/users/colormaps.html

plt.xlabel('Fares')
plt.ylabel('Ages')
plt.title('2D histogram between Age and Fare')
plt.colorbar()
plt.show()
In [126]:
plt.hist2d(df.Survived,df.Age,bins=(30,30),cmap='YlGnBu')

#complete list of cmaps available at https://matplotlib.org/users/colormaps.html

plt.xlabel('Survived')
plt.ylabel('Ages')
plt.title('2D histogram between Age and Fare')
plt.colorbar()
plt.show()
In [132]:
#using hexbins to visualize same data. 

plt.hexbin(df.Age,df.Fare,gridsize=(15,12))

plt.show()
In [141]:
df.head()
df = df.drop('Name',axis=1)
In [146]:
df.head()
df = df.drop('Cabin',axis=1)
In [149]:
df.head()
df = df.drop('Ticket',axis=1)
In [150]:
df.head()
Out[150]:
Unnamed: 0 PassengerId Survived Pclass Sex Age SibSp Parch Fare Embarked Initial
0 0 1 0 3 male 22.0 1 0 7.2500 S Mr
1 1 2 1 1 female 38.0 1 0 71.2833 C Mrs
2 2 3 1 3 female 26.0 0 0 7.9250 S Miss
3 3 4 1 1 female 35.0 1 0 53.1000 S Mrs
4 4 5 0 3 male 35.0 0 0 8.0500 S Mr
In [151]:
df = pd.get_dummies(df)
In [153]:
df.head()
df = df.drop('Sex_female',axis=1)
df = df.drop('Embarked_C',axis=1)
df = df.drop('Initial_Master',axis=1)
In [154]:
df.head()
Out[154]:
Unnamed: 0 PassengerId Survived Pclass Age SibSp Parch Fare Sex_male Embarked_Q Embarked_S Initial_Miss Initial_Mr Initial_Mrs Initial_Other
0 0 1 0 3 22.0 1 0 7.2500 1 0 1 0 1 0 0
1 1 2 1 1 38.0 1 0 71.2833 0 0 0 0 0 1 0
2 2 3 1 3 26.0 0 0 7.9250 0 0 1 1 0 0 0
3 3 4 1 1 35.0 1 0 53.1000 0 0 1 0 0 1 0
4 4 5 0 3 35.0 0 0 8.0500 1 0 1 0 1 0 0
In [167]:
#LMPlot
sns.lmplot(x='Age',y='Fare',data=df)
plt.show()
In [168]:
#Residual Plot
sns.residplot(x='Age', y='Fare', data=df, color='green')
#plt.show()
In [179]:
#Higher order regression

plt.scatter(df['Age'], df['Fare'], label='data', color='red', marker='o')
sns.regplot(x='Age', y='Fare', data=df, scatter=None, color='blue', label='order 1')
sns.regplot(x='Age', y='Fare', data=df, scatter=None, color='green', label='order 2',order=2)
sns.regplot(x='Age', y='Fare', data=df, scatter=None, color='green', label='order 3',order=3)
sns.regplot(x='Age', y='Fare', data=df, scatter=None, color='green', label='order 4',order=4)

plt.legend(loc='best')

plt.show()
In [182]:
#Regression by hue

sns.lmplot(x='Age',y='Fare',hue='Survived',data=df,palette='Set1')

plt.show()
In [187]:
sns.lmplot(x='Age',y='Fare',hue='Survived',row='Pclass',data=df)
plt.show()
In [193]:
#visualizing univariate data

sns.stripplot(x='Survived', y='Fare', data=df)
plt.show()
In [200]:
sns.stripplot(x='Survived', y='Fare', data=df,jitter=True,size=2)
plt.show() 
In [206]:
#Swarm Plot works same as strip plot in jitter

sns.swarmplot(x='Survived',y='Age',hue='Sex_male',data=df)
plt.show()
In [211]:
#Violin plots

sns.violinplot(x='Survived', y='Age', data=df)
plt.show()
In [213]:
#violin plots with strip plots.
#inner = None removes the inner boxplot. coloring it light gray makes it a light background for overlaying the strip plot
sns.violinplot(x='Survived', y='Age', data=df,inner=None,color='lightgray')
sns.stripplot(x='Survived',y='Age',data=df,jitter=True,size=1.5)
plt.show()