IPL Data Analysis


 

IPL Data Analysis

In [1]:
import pandas as pd 
import numpy as np
import seaborn as sns
import warnings
import matplotlib.pyplot as plt 
%matplotlib inline

pd.set_option('display.max_columns', None)
warnings.simplefilter("ignore")
sns.set_style('darkgrid')
In [2]:
''' reading dataset i.e matches.csv '''
df_matches = pd.read_csv('matches.csv')
In [3]:
''' displaying 1st 5 rows '''
df_matches.head()
Out[3]:
idSeasoncitydateteam1team2toss_winnertoss_decisionresultdl_appliedwinnerwin_by_runswin_by_wicketsplayer_of_matchvenueumpire1umpire2umpire3
01IPL-2017Hyderabad05-04-2017Sunrisers HyderabadRoyal Challengers BangaloreRoyal Challengers Bangalorefieldnormal0Sunrisers Hyderabad350Yuvraj SinghRajiv Gandhi International Stadium, UppalAY DandekarNJ LlongNaN
12IPL-2017Pune06-04-2017Mumbai IndiansRising Pune SupergiantRising Pune Supergiantfieldnormal0Rising Pune Supergiant07SPD SmithMaharashtra Cricket Association StadiumA Nand KishoreS RaviNaN
23IPL-2017Rajkot07-04-2017Gujarat LionsKolkata Knight RidersKolkata Knight Ridersfieldnormal0Kolkata Knight Riders010CA LynnSaurashtra Cricket Association StadiumNitin MenonCK NandanNaN
34IPL-2017Indore08-04-2017Rising Pune SupergiantKings XI PunjabKings XI Punjabfieldnormal0Kings XI Punjab06GJ MaxwellHolkar Cricket StadiumAK ChaudharyC ShamshuddinNaN
45IPL-2017Bangalore08-04-2017Royal Challengers BangaloreDelhi DaredevilsRoyal Challengers Bangalorebatnormal0Royal Challengers Bangalore150KM JadhavM Chinnaswamy StadiumNaNNaNNaN
In [4]:
''' shape of dataset '''
df_matches.shape
Out[4]:
(756, 18)
In [5]:
''' checking null values '''
df_matches.isnull().sum()
Out[5]:
id                   0
Season               0
city                 7
date                 0
team1                0
team2                0
toss_winner          0
toss_decision        0
result               0
dl_applied           0
winner               4
win_by_runs          0
win_by_wickets       0
player_of_match      4
venue                0
umpire1              2
umpire2              2
umpire3            637
dtype: int64
In [6]:
''' checking unique values in column Season '''
df_matches.Season.unique()
Out[6]:
array(['IPL-2017', 'IPL-2008', 'IPL-2009', 'IPL-2010', 'IPL-2011',
       'IPL-2012', 'IPL-2013', 'IPL-2014', 'IPL-2015', 'IPL-2016',
       'IPL-2018', 'IPL-2019'], dtype=object)
In [8]:
''' barplot of top 5 most player_of_match ''' 
pom = df_matches.player_of_match.value_counts().nlargest(5)
plt.figure(figsize=(10, 5))
sns.barplot(pom.index, pom)
plt.xlabel('Players Name')
plt.ylabel('Count');
In [13]:
''' value counts of results '''
result_label = df_matches.result.value_counts()

''' barplot '''
plt.figure(figsize=(10, 5))
sns.barplot(result_label.index, result_label, log=True)
plt.xlabel('Results')
plt.ylabel('Count');
In [18]:
''' count plot of toss_decision'''
plt.figure(figsize=(8, 5))
sns.countplot(df_matches.toss_decision);
In [22]:
''' groupby by winner '''
df_matches.groupby('winner')['toss_decision'].value_counts()
Out[22]:
winner                       toss_decision
Chennai Super Kings          bat              52
                             field            48
Deccan Chargers              field            15
                             bat              14
Delhi Capitals               field             8
                             bat               2
Delhi Daredevils             field            38
                             bat              29
Gujarat Lions                field            11
                             bat               2
Kings XI Punjab              field            61
                             bat              21
Kochi Tuskers Kerala         field             6
Kolkata Knight Riders        field            57
                             bat              35
Mumbai Indians               field            64
                             bat              45
Pune Warriors                bat               9
                             field             3
Rajasthan Royals             field            41
                             bat              34
Rising Pune Supergiant       field            10
Rising Pune Supergiants      field             3
                             bat               2
Royal Challengers Bangalore  field            58
                             bat              26
Sunrisers Hyderabad          field            37
                             bat              21
Name: toss_decision, dtype: int64
In [30]:
''' barplot of most toss winners '''
toss_win_label = df_matches.toss_winner.value_counts()
plt.figure(figsize=(10, 8))
sns.barplot(toss_win_label, toss_win_label.index);
plt.ylabel('Teams')
plt.xlabel('Count')
plt.title('Toss Winners');
In [35]:
'''teams did fielding first and result was normal '''
df_matches[(df_matches['toss_decision'] == 'field') & (df_matches['result'] == 'normal')].head()
Out[35]:
idSeasoncitydateteam1team2toss_winnertoss_decisionresultdl_appliedwinnerwin_by_runswin_by_wicketsplayer_of_matchvenueumpire1umpire2umpire3
01IPL-2017Hyderabad05-04-2017Sunrisers HyderabadRoyal Challengers BangaloreRoyal Challengers Bangalorefieldnormal0Sunrisers Hyderabad350Yuvraj SinghRajiv Gandhi International Stadium, UppalAY DandekarNJ LlongNaN
12IPL-2017Pune06-04-2017Mumbai IndiansRising Pune SupergiantRising Pune Supergiantfieldnormal0Rising Pune Supergiant07SPD SmithMaharashtra Cricket Association StadiumA Nand KishoreS RaviNaN
23IPL-2017Rajkot07-04-2017Gujarat LionsKolkata Knight RidersKolkata Knight Ridersfieldnormal0Kolkata Knight Riders010CA LynnSaurashtra Cricket Association StadiumNitin MenonCK NandanNaN
34IPL-2017Indore08-04-2017Rising Pune SupergiantKings XI PunjabKings XI Punjabfieldnormal0Kings XI Punjab06GJ MaxwellHolkar Cricket StadiumAK ChaudharyC ShamshuddinNaN
56IPL-2017Hyderabad09-04-2017Gujarat LionsSunrisers HyderabadSunrisers Hyderabadfieldnormal0Sunrisers Hyderabad09Rashid KhanRajiv Gandhi International Stadium, UppalA DeshmukhNJ LlongNaN
In [47]:
''' teams did batting first and won'''
win = df_matches[(df_matches.toss_decision == 'bat') & (df_matches.win_by_runs != 0)]['winner'].value_counts().sort_values(ascending=False)

'''barplot'''
plt.figure(figsize=(10, 5))
sns.barplot(win, win.index)
plt.xlabel('Count')
plt.ylabel('Teams');
In [48]:
''' teams did bowling first and won'''
win = df_matches[(df_matches.toss_decision == 'field') & (df_matches.win_by_wickets != 0)]['winner'].value_counts().sort_values(ascending=False)

'''barplot'''
plt.figure(figsize=(10, 5))
sns.barplot(win, win.index)
plt.xlabel('Count')
plt.ylabel('Teams');
In [51]:
''' barplot of won_by_wickets '''
plt.figure(figsize=(10, 5))
sns.barplot(df_matches.win_by_wickets.value_counts(), df_matches.win_by_wickets.value_counts().index)
Out[51]:
<AxesSubplot:xlabel='win_by_wickets'>
In [58]:
''' number of matches played each year '''
season = df_matches['Season'].value_counts()
plt.figure(figsize=(10, 5))
sns.barplot(season, season.index)
plt.ylabel('IPL Year')
plt.xlabel('Count of Numberof Matches Played');
In [63]:
''' number of matches played in top 10 city '''
city = df_matches['city'].value_counts().sort_values().nlargest(10)
plt.figure(figsize=(10, 5))
sns.barplot(city, city.index)
plt.ylabel('City')
plt.xlabel('Count of Number Matches Played');
In [65]:
''' analysis on deliveries data file'''
df_deliveries = pd.read_csv('deliveries.csv')
In [66]:
''' top 5 rows '''
df_deliveries.head()
Out[66]:
match_idinningbatting_teambowling_teamoverballbatsmannon_strikerbowleris_super_overwide_runsbye_runslegbye_runsnoball_runspenalty_runsbatsman_runsextra_runstotal_runsplayer_dismisseddismissal_kindfielder
011Sunrisers HyderabadRoyal Challengers Bangalore11DA WarnerS DhawanTS Mills000000000NaNNaNNaN
111Sunrisers HyderabadRoyal Challengers Bangalore12DA WarnerS DhawanTS Mills000000000NaNNaNNaN
211Sunrisers HyderabadRoyal Challengers Bangalore13DA WarnerS DhawanTS Mills000000404NaNNaNNaN
311Sunrisers HyderabadRoyal Challengers Bangalore14DA WarnerS DhawanTS Mills000000000NaNNaNNaN
411Sunrisers HyderabadRoyal Challengers Bangalore15DA WarnerS DhawanTS Mills020000022NaNNaNNaN
In [67]:
''' shape of data '''
df_deliveries.shape
Out[67]:
(179078, 21)
In [70]:
''' match id 1'''
df_match1 = df_deliveries[df_deliveries.match_id == 1]

''' top 5 rows '''
df_match1.head()
Out[70]:
match_idinningbatting_teambowling_teamoverballbatsmannon_strikerbowleris_super_overwide_runsbye_runslegbye_runsnoball_runspenalty_runsbatsman_runsextra_runstotal_runsplayer_dismisseddismissal_kindfielder
011Sunrisers HyderabadRoyal Challengers Bangalore11DA WarnerS DhawanTS Mills000000000NaNNaNNaN
111Sunrisers HyderabadRoyal Challengers Bangalore12DA WarnerS DhawanTS Mills000000000NaNNaNNaN
211Sunrisers HyderabadRoyal Challengers Bangalore13DA WarnerS DhawanTS Mills000000404NaNNaNNaN
311Sunrisers HyderabadRoyal Challengers Bangalore14DA WarnerS DhawanTS Mills000000000NaNNaNNaN
411Sunrisers HyderabadRoyal Challengers Bangalore15DA WarnerS DhawanTS Mills020000022NaNNaNNaN
In [71]:
''' shape of match id 1'''
df_match1.shape
Out[71]:
(248, 21)
In [72]:
df_match1.batting_team.unique()
Out[72]:
array(['Sunrisers Hyderabad', 'Royal Challengers Bangalore'], dtype=object)
In [76]:
''' 1st inning '''
srh = df_match1[df_match1['inning'] == 1]
In [74]:
srh.head()
Out[74]:
match_idinningbatting_teambowling_teamoverballbatsmannon_strikerbowleris_super_overwide_runsbye_runslegbye_runsnoball_runspenalty_runsbatsman_runsextra_runstotal_runsplayer_dismisseddismissal_kindfielder
011Sunrisers HyderabadRoyal Challengers Bangalore11DA WarnerS DhawanTS Mills000000000NaNNaNNaN
111Sunrisers HyderabadRoyal Challengers Bangalore12DA WarnerS DhawanTS Mills000000000NaNNaNNaN
211Sunrisers HyderabadRoyal Challengers Bangalore13DA WarnerS DhawanTS Mills000000404NaNNaNNaN
311Sunrisers HyderabadRoyal Challengers Bangalore14DA WarnerS DhawanTS Mills000000000NaNNaNNaN
411Sunrisers HyderabadRoyal Challengers Bangalore15DA WarnerS DhawanTS Mills020000022NaNNaNNaN
In [75]:
srh['dismissal_kind'].value_counts()
Out[75]:
caught    3
bowled    1
Name: dismissal_kind, dtype: int64
In [81]:
''' total number of balls bowled by srh '''
len(srh.ball)
Out[81]:
125
In [89]:
''' count of 4s hit by srh '''
len(srh[srh.total_runs == 4])
Out[89]:
17
In [90]:
''' count of 6s hit by srh '''
len(srh[srh.total_runs == 6])
Out[90]:
9
In [77]:
'''  2nd innings '''
rcb = df_match1[df_match1.inning == 2]
In [78]:
rcb.head()
Out[78]:
match_idinningbatting_teambowling_teamoverballbatsmannon_strikerbowleris_super_overwide_runsbye_runslegbye_runsnoball_runspenalty_runsbatsman_runsextra_runstotal_runsplayer_dismisseddismissal_kindfielder
12512Royal Challengers BangaloreSunrisers Hyderabad11CH GayleMandeep SinghA Nehra000000101NaNNaNNaN
12612Royal Challengers BangaloreSunrisers Hyderabad12Mandeep SinghCH GayleA Nehra000000000NaNNaNNaN
12712Royal Challengers BangaloreSunrisers Hyderabad13Mandeep SinghCH GayleA Nehra000000000NaNNaNNaN
12812Royal Challengers BangaloreSunrisers Hyderabad14Mandeep SinghCH GayleA Nehra000000202NaNNaNNaN
12912Royal Challengers BangaloreSunrisers Hyderabad15Mandeep SinghCH GayleA Nehra000000404NaNNaNNaN
In [79]:
rcb['dismissal_kind'].value_counts()
Out[79]:
caught     6
run out    2
bowled     2
Name: dismissal_kind, dtype: int64
In [80]:
''' total number of balls bowled by rcb '''
len(rcb.ball)
Out[80]:
123
In [88]:
''' count of 4s hit by rcb '''
len(rcb[rcb.total_runs == 4])
Out[88]:
15
In [91]:
''' count of 6s hit by rcb '''
len(rcb[rcb.total_runs == 6])
Out[91]:
8
In [ ]:

No comments:

Post a Comment