In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_columns', None)
warnings.simplefilter("ignore")
sns.set_style('darkgrid')
In [2]:
''' reading dataset i.e matches.csv '''
df_matches = pd.read_csv('matches.csv')
In [3]:
''' displaying 1st 5 rows '''
df_matches.head()
Out[3]:
In [4]:
''' shape of dataset '''
df_matches.shape
Out[4]:
In [5]:
''' checking null values '''
df_matches.isnull().sum()
Out[5]:
In [6]:
''' checking unique values in column Season '''
df_matches.Season.unique()
Out[6]:
In [8]:
''' barplot of top 5 most player_of_match '''
pom = df_matches.player_of_match.value_counts().nlargest(5)
plt.figure(figsize=(10, 5))
sns.barplot(pom.index, pom)
plt.xlabel('Players Name')
plt.ylabel('Count');
In [13]:
''' value counts of results '''
result_label = df_matches.result.value_counts()
''' barplot '''
plt.figure(figsize=(10, 5))
sns.barplot(result_label.index, result_label, log=True)
plt.xlabel('Results')
plt.ylabel('Count');
In [18]:
''' count plot of toss_decision'''
plt.figure(figsize=(8, 5))
sns.countplot(df_matches.toss_decision);
In [22]:
''' groupby by winner '''
df_matches.groupby('winner')['toss_decision'].value_counts()
Out[22]:
In [30]:
''' barplot of most toss winners '''
toss_win_label = df_matches.toss_winner.value_counts()
plt.figure(figsize=(10, 8))
sns.barplot(toss_win_label, toss_win_label.index);
plt.ylabel('Teams')
plt.xlabel('Count')
plt.title('Toss Winners');
In [35]:
'''teams did fielding first and result was normal '''
df_matches[(df_matches['toss_decision'] == 'field') & (df_matches['result'] == 'normal')].head()
Out[35]:
In [47]:
''' teams did batting first and won'''
win = df_matches[(df_matches.toss_decision == 'bat') & (df_matches.win_by_runs != 0)]['winner'].value_counts().sort_values(ascending=False)
'''barplot'''
plt.figure(figsize=(10, 5))
sns.barplot(win, win.index)
plt.xlabel('Count')
plt.ylabel('Teams');
In [48]:
''' teams did bowling first and won'''
win = df_matches[(df_matches.toss_decision == 'field') & (df_matches.win_by_wickets != 0)]['winner'].value_counts().sort_values(ascending=False)
'''barplot'''
plt.figure(figsize=(10, 5))
sns.barplot(win, win.index)
plt.xlabel('Count')
plt.ylabel('Teams');
In [51]:
''' barplot of won_by_wickets '''
plt.figure(figsize=(10, 5))
sns.barplot(df_matches.win_by_wickets.value_counts(), df_matches.win_by_wickets.value_counts().index)
Out[51]:
In [58]:
''' number of matches played each year '''
season = df_matches['Season'].value_counts()
plt.figure(figsize=(10, 5))
sns.barplot(season, season.index)
plt.ylabel('IPL Year')
plt.xlabel('Count of Numberof Matches Played');
In [63]:
''' number of matches played in top 10 city '''
city = df_matches['city'].value_counts().sort_values().nlargest(10)
plt.figure(figsize=(10, 5))
sns.barplot(city, city.index)
plt.ylabel('City')
plt.xlabel('Count of Number Matches Played');
In [65]:
''' analysis on deliveries data file'''
df_deliveries = pd.read_csv('deliveries.csv')
In [66]:
''' top 5 rows '''
df_deliveries.head()
Out[66]:
In [67]:
''' shape of data '''
df_deliveries.shape
Out[67]:
In [70]:
''' match id 1'''
df_match1 = df_deliveries[df_deliveries.match_id == 1]
''' top 5 rows '''
df_match1.head()
Out[70]:
In [71]:
''' shape of match id 1'''
df_match1.shape
Out[71]:
In [72]:
df_match1.batting_team.unique()
Out[72]:
In [76]:
''' 1st inning '''
srh = df_match1[df_match1['inning'] == 1]
In [74]:
srh.head()
Out[74]:
In [75]:
srh['dismissal_kind'].value_counts()
Out[75]:
In [81]:
''' total number of balls bowled by srh '''
len(srh.ball)
Out[81]:
In [89]:
''' count of 4s hit by srh '''
len(srh[srh.total_runs == 4])
Out[89]:
In [90]:
''' count of 6s hit by srh '''
len(srh[srh.total_runs == 6])
Out[90]:
In [77]:
''' 2nd innings '''
rcb = df_match1[df_match1.inning == 2]
In [78]:
rcb.head()
Out[78]:
In [79]:
rcb['dismissal_kind'].value_counts()
Out[79]:
In [80]:
''' total number of balls bowled by rcb '''
len(rcb.ball)
Out[80]:
In [88]:
''' count of 4s hit by rcb '''
len(rcb[rcb.total_runs == 4])
Out[88]:
In [91]:
''' count of 6s hit by rcb '''
len(rcb[rcb.total_runs == 6])
Out[91]:
In [ ]:
No comments:
Post a Comment