Skip to content

Commit 496ed0e

Browse files
committed
Do Defenses Get Tired?
1 parent ad018d0 commit 496ed0e

File tree

1 file changed

+245
-0
lines changed

1 file changed

+245
-0
lines changed

scripts/endurance.py

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
#!/opt/local/bin/python
2+
####!/Library/Frameworks/Python.framework/Versions/Current/bin/python
3+
4+
import sys
5+
import numpy as np
6+
import scipy
7+
import scipy.stats
8+
from scipy.optimize import curve_fit
9+
import math
10+
import os
11+
import matplotlib.pyplot as plt
12+
from matplotlib.ticker import AutoMinorLocator
13+
import MySQLdb as mdb
14+
import astroML.plotting
15+
import time
16+
17+
18+
def gauss(x, *p):
19+
A, mu, sigma = p
20+
return A*np.exp(-(x-mu)**2/(2.*sigma**2))
21+
22+
def resetnumplays(gameids,cumplays):
23+
uniquegameids,uniquegamefirstindices,uniquegameindices = np.unique(gameids,return_index=True,return_inverse=True)
24+
start = time.time()
25+
splitcumplays = np.split(cumplays,uniquegamefirstindices[1:])
26+
playspergame = np.array([arr.min() for arr in splitcumplays],dtype=np.int)
27+
return cumplays - playspergame[uniquegameindices]
28+
29+
con = ''
30+
data = []
31+
32+
#Querying the database:
33+
try:
34+
con = mdb.connect('localhost','nfluser','lt56','armchairanalysis')
35+
cur = con.cursor()
36+
37+
#Get the table:
38+
cur.execute('select core.*,games.h,games.ptsv,games.ptsh from core join games on core.gid=games.gid where core.dseq > 0')
39+
data = np.array(cur.fetchall())
40+
#0) Game id, 1) play id, 2) offensive team, 3) defensive team, 4) play type, 5) drive sequence, 6) play length, 7) quarter, 8) minutes left, 9) seconds left, 10) offensive points, 11) defensive points, 12) offense timeouts left, 13) defense timeouts left, 14) down, 15) yards to gain, 16) yards from own goal, 17) zone, 18) offensive line ID, 19) home team, 20) visitor points, 21) home points.
41+
#Get scoring plays:
42+
cur.execute('select scoring.* from scoring join core on core.pid = scoring.pid where core.dseq > 0')
43+
scoring = np.array(cur.fetchall())
44+
#0) play id, 1) number of points (can be negative for return/recovery TDs)
45+
cur.execute("select penalties.desc,core.pid from penalties inner join core on core.pid = penalties.pid inner join games on core.gid = games.gid where core.dseq > 0 group by penalties.pid")
46+
penalties = np.array(cur.fetchall())
47+
#0) Description, 1) play id
48+
49+
#Connect the penalties and scoring to the data:
50+
connectscorebool = np.in1d(data[:,1].astype(np.int),scoring[:,0].astype(np.int))
51+
connectpoints = np.zeros(len(connectscorebool))
52+
connectpoints[connectscorebool] = scoring[:,1].astype(np.int)
53+
#remove points scored by defense (return/recovery/safety)
54+
connectpoints[connectpoints < 3] = 0
55+
cumconnectpoints = np.cumsum(connectpoints)
56+
57+
connectpenaltiesbool = np.in1d(data[:,1].astype(np.int),penalties[:,1].astype(np.int))
58+
connectpenalties = np.array(['No Penalty' for i in range(len(connectpenaltiesbool))],dtype=object)
59+
connectpenalties[connectpenaltiesbool] = penalties[:,0]
60+
61+
#Compute drive IDs for every play:
62+
firstplayofdrive = np.zeros(len(data[:,0]),dtype=np.bool)
63+
#Sometimes penalties are duplicated - so there's a play and then a NOPL. Need a bool that is false if a down has dseq=1, but play type = 'NOPL' and one of the adjacent plays has dseq = 1 and the same offense and defense.
64+
dup_penalty_bool = np.zeros(len(data[:,5]),dtype=np.bool)
65+
dup_penalty_bool[1:-1] = (data[1:-1,4] == 'NOPL') & (((data[:-2,5].astype(np.int) == 1) & (data[:-2,2] == data[1:-1,2]) & (data[:-2,3] == data[1:-1,3])) | ((data[2:,5].astype(np.int) == 1) & (data[2:,2] == data[1:-1,2]) & (data[2:,3] == data[1:-1,3])))
66+
firstplayofdrive[(data[:,5].astype(np.int) == 1) & (dup_penalty_bool == False)] = True#Seems to work
67+
driveid = np.cumsum(firstplayofdrive)
68+
timeleft = 60. - (data[:,7].astype(np.float)-1.)*15. - (15. - data[:,8].astype(np.float) - data[:,9].astype(np.float)/60.)
69+
70+
uniquedriveids,uniquedrivefirstindices,uniquedriveindices = np.unique(driveid,return_index=True,return_inverse=True)
71+
start = time.time()
72+
splitdrivepoints = np.split(connectpoints,uniquedrivefirstindices[1:])
73+
pointsperdrive = np.array([arr.max() for arr in splitdrivepoints],dtype=np.int)
74+
pointsfromdrive = pointsperdrive[uniquedriveindices]
75+
print "Took {0:.2f} s".format(time.time()-start)
76+
77+
#Cut arrays down:
78+
goodplays = ((data[:,4] == "RUSH") | (data[:,4] == "PASS") | (data[:,4] == "NOPL")) & (connectpenalties != "False Start") & (connectpenalties != "Encroachment") & (connectpenalties != "Delay of Game") & (np.abs(data[:,20].astype(np.int)-data[:,21].astype(np.int)) <= 8)
79+
gooddata = data[goodplays,:]
80+
goodpoints = connectpoints[goodplays]
81+
goodpointsfromdrive = pointsfromdrive[goodplays]
82+
gooddriveid = driveid[goodplays]
83+
goodcumpoints = cumconnectpoints[goodplays]
84+
goodtimeleft = timeleft[goodplays]
85+
86+
#Determine the first and last index for each drive:
87+
firstplayindices = np.unique(gooddriveid,return_index=True)[1]
88+
lastplayindices = len(gooddriveid) - 1 - np.unique(gooddriveid[::-1],return_index = True)[1]
89+
firstplays = np.zeros(len(gooddriveid),dtype=np.bool)
90+
firstplays[firstplayindices] = True
91+
lastplays = np.zeros(len(gooddriveid),dtype=np.bool)
92+
lastplays[lastplayindices] = True
93+
lastplaytimeleft = goodtimeleft[lastplayindices]
94+
firstplaytimeleft = goodtimeleft[firstplayindices]
95+
96+
#Determine the number of plays in each drive and game ids:
97+
numplaysperdrive = lastplayindices-firstplayindices+1
98+
drivegameids = gooddata[firstplayindices,0].astype(np.int)
99+
100+
101+
#Determine the resulting number of points for each drive:
102+
driveresults = goodpointsfromdrive[firstplayindices]
103+
104+
#Get which drives are by the home team:
105+
homedrives = (gooddata[firstplayindices,2] == gooddata[firstplayindices,19])
106+
107+
#Cut out drives that end within 2 minutes of the end the half or the game:
108+
baddrivesbool = (firstplaytimeleft < 2.) | ((firstplaytimeleft-30 < 2.) & (firstplaytimeleft >= 30))
109+
gooddrivesbool = np.invert(baddrivesbool)
110+
gooddata = gooddata[gooddrivesbool,:]
111+
numplaysperdrive = numplaysperdrive[gooddrivesbool]
112+
driveresults = driveresults[gooddrivesbool]
113+
drivegameids = drivegameids[gooddrivesbool]
114+
homedrives = homedrives[gooddrivesbool]
115+
116+
print homedrives.shape,drivegameids.shape,np.sum(gooddrivesbool),len(gooddrivesbool)
117+
118+
119+
#Sort each drive into home and away arrays for number of plays, game id, and resulting points:
120+
homenumplays = numplaysperdrive[homedrives]
121+
homeresults = driveresults[homedrives]
122+
homegameids = drivegameids[homedrives]
123+
awaynumplays = numplaysperdrive[homedrives == False]
124+
awayresults = driveresults[homedrives == False]
125+
awaygameids = drivegameids[homedrives == False]
126+
127+
#Determine the cumulative number of plays at the start of each drive per game:
128+
play_threshold = 60
129+
homenumplays[1:] = homenumplays[:-1]
130+
homenumplays[0] = 0
131+
awaynumplays[1:] = awaynumplays[:-1]
132+
awaynumplays[0] = 0
133+
cumhomeplays = np.cumsum(homenumplays)
134+
cumawayplays = np.cumsum(awaynumplays)
135+
gamehomeplays = resetnumplays(homegameids,cumhomeplays)
136+
gameawayplays = resetnumplays(awaygameids,cumawayplays)
137+
goodhomeids = np.unique(homegameids[(gamehomeplays > play_threshold)])
138+
goodawayids = np.unique(awaygameids[(gameawayplays > play_threshold)])
139+
goodhomegamebool = np.in1d(homegameids,goodhomeids)
140+
goodawaygamebool = np.in1d(awaygameids,goodawayids)
141+
gametotplays = np.append(gamehomeplays[goodhomegamebool],gameawayplays[goodawaygamebool])
142+
totresults = np.append(homeresults[goodhomegamebool],awayresults[goodawaygamebool])
143+
144+
#Make plots:
145+
#Distribution of drive lengths:
146+
lengthax = plt.figure().add_subplot(111)
147+
uniformbins = np.arange(numplaysperdrive.min()-0.5,numplaysperdrive.max()+0.5)
148+
lengthn,lengthbins = np.histogram(numplaysperdrive,bins = uniformbins)
149+
lengthax.bar(lengthbins[:-1],lengthn,width=(lengthbins[1:]-lengthbins[:-1]),color='gray',edgecolor='black',alpha=0.5,log=True)
150+
lengthax.set_xlabel('Plays Run on the Drive')
151+
lengthax.set_ylabel('Number of Drives')
152+
lengthax.xaxis.set_minor_locator(AutoMinorLocator())
153+
lengthax.figure.savefig('endurance_drivelength.png',dpi=300)
154+
155+
#Distribution of drives per team per game:
156+
dpgax = plt.figure().add_subplot(111)
157+
homedrivespergame = np.bincount(homegameids)
158+
awaydrivespergame = np.bincount(awaygameids)
159+
homedpgbins = np.arange(homedrivespergame[homedrivespergame > 0].min()-0.5,homedrivespergame.max()+0.5)
160+
homedpgn,homedpgbins = np.histogram(homedrivespergame,bins=homedpgbins)
161+
162+
# p0 is the initial guess for the fitting coefficients (A, mu and sigma above)
163+
p0 = [300., 10., 2.]
164+
165+
homecoeff, homevar_matrix = curve_fit(gauss, (homedpgbins[:-1]+homedpgbins[1:])/2., homedpgn, p0=p0)
166+
print "Home: ",scipy.stats.describe(homedrivespergame[homedrivespergame > 0])
167+
print homecoeff
168+
homegaussx = np.linspace(homedpgbins[0],homedpgbins[-1],100)
169+
homegaussy = gauss(homegaussx,*homecoeff)
170+
dpgax.plot(homegaussx,homegaussy,ls='-',color='blue',lw=4)
171+
dpgax.bar(homedpgbins[:-1],homedpgn,width=(homedpgbins[1:]-homedpgbins[:-1]),color='blue',edgecolor='blue',alpha=0.5,log=True,label='Home')
172+
awaydpgbins = np.arange(awaydrivespergame[awaydrivespergame > 0].min()-0.5,awaydrivespergame.max()+0.5)
173+
awaydpgn,awaydpgbins = np.histogram(awaydrivespergame,bins=awaydpgbins)
174+
awaycoeff, awayvar_matrix = curve_fit(gauss, (awaydpgbins[:-1]+awaydpgbins[1:])/2., awaydpgn, p0=p0)
175+
print "Away: ",scipy.stats.describe(awaydrivespergame[awaydrivespergame > 0])
176+
print awaycoeff
177+
awaygaussx = np.linspace(awaydpgbins[0],awaydpgbins[-1],100)
178+
awaygaussy = gauss(awaygaussx,*awaycoeff)
179+
dpgax.plot(awaygaussx,awaygaussy,ls='-',color='red',lw=4)
180+
dpgax.bar(awaydpgbins[:-1],awaydpgn,width=(awaydpgbins[1:]-awaydpgbins[:-1]),color='red',edgecolor='red',alpha=0.5,log=True,label='Away')
181+
dpgax.set_xlabel('Drives per Game')
182+
dpgax.set_ylabel('Number of Games')
183+
dpgax.set_ylim(1,dpgax.get_ylim()[1])
184+
dpgax.xaxis.set_minor_locator(AutoMinorLocator())
185+
dpgax.legend(loc='upper right',prop={'size':10})
186+
dpgax.figure.savefig('endurance_dpg.png',dpi=300)
187+
188+
#Fraction of drives that end in points:
189+
#Making even bins:
190+
sortedplays = np.sort(gametotplays)
191+
numbins = 15
192+
numperbin = int(np.floor(len(gametotplays)/float(numbins)))
193+
totbins = sortedplays[::numperbin]
194+
totbins[-1] = sortedplays[-1]
195+
totbins = np.unique(totbins)
196+
#Making histograms:
197+
totn,totbins = np.histogram(gametotplays,bins=totbins)
198+
tdn,tdbins = np.histogram(gametotplays[(totresults > 5) & (totresults < 9)],bins=totbins)
199+
fgn,fgbins = np.histogram(gametotplays[totresults == 3],bins=totbins)
200+
scoren,scorebins = np.histogram(gametotplays[totresults > 2],bins=totbins)
201+
202+
#Fractions:
203+
tdfrac = tdn.astype(np.float)/totn.astype(np.float)
204+
fgfrac = fgn.astype(np.float)/totn.astype(np.float)
205+
scorefrac = scoren.astype(np.float)/totn.astype(np.float)
206+
#Poisson errors:
207+
toterr = np.sqrt(totn)
208+
tderr = np.sqrt(tdn+(tdn*toterr/totn)**2)/totn
209+
fgerr = np.sqrt(fgn+(fgn*toterr/totn)**2)/totn
210+
scoreerr = np.sqrt(scoren+(scoren*toterr/totn)**2)/totn
211+
#Totals:
212+
totax = plt.figure().add_subplot(111)
213+
totax.bar(totbins[:-1],totn,width=(totbins[1:]-totbins[:-1]),color='gray',edgecolor='black',alpha=0.5,yerr=tderr,ecolor='black')
214+
totax.set_xlabel('Plays Run on the Defense')
215+
totax.set_ylabel('Number of Drives')
216+
totax.figure.savefig('endurance_totals.png',dpi=300)
217+
#TDs:
218+
tdax = plt.figure().add_subplot(111)
219+
tdax.bar(totbins[:-1],tdfrac,width=(totbins[1:]-totbins[:-1]),color='gray',edgecolor='black',alpha=0.5,yerr=tderr,ecolor='black')
220+
tdax.set_xlabel('Plays Run on the Defense')
221+
tdax.set_ylabel('Fraction of Drives Ending With TDs')
222+
tdax.figure.savefig('endurance_tds.png',dpi=300)
223+
#FGs:
224+
fgax = plt.figure().add_subplot(111)
225+
fgax.bar(totbins[:-1],fgfrac,width=(totbins[1:]-totbins[:-1]),color='gray',edgecolor='black',alpha=0.5,yerr=fgerr,ecolor='black')
226+
fgax.set_xlabel('Plays Run on the Defense')
227+
fgax.set_ylabel('Fraction of Drives Ending With FGs')
228+
fgax.figure.savefig('endurance_fgs.png',dpi=300)
229+
#Scores:
230+
scoreax = plt.figure().add_subplot(111)
231+
scoreax.bar(totbins[:-1],scorefrac,width=(totbins[1:]-totbins[:-1]),color='gray',edgecolor='black',alpha=0.5,yerr=scoreerr,ecolor='black')
232+
scoreax.set_xlabel('Plays Run on the Defense')
233+
scoreax.set_ylabel('Fraction of Drives Ending With Scores')
234+
scoreax.figure.savefig('endurance_scores.png',dpi=300)
235+
236+
237+
238+
except mdb.Error, e:
239+
print "Error %d: %s" % (e.args[0],e.args[1])
240+
sys.exit(1)
241+
242+
243+
finally:
244+
if con:
245+
con.close()

0 commit comments

Comments
 (0)