Skip to content

Commit 6738cb1

Browse files
author
LProcopi15
committed
Restructed and updated
1 parent 6745985 commit 6738cb1

File tree

1 file changed

+96
-48
lines changed

1 file changed

+96
-48
lines changed

Week 8 - Time Series Analysis/Lecture8.R

Lines changed: 96 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ abline(bike.trend,col='red') # Overall upward trend
3232

3333
# 2. Modeling for seasonality
3434

35-
# Get the periodogram from bike.ts
35+
# Get the periodogram from bike.ts
36+
# Periodograms 'examining frequency-domain models of an equi-spaced time series.'
3637
pg.bike<-spec.pgram(bike.ts,spans=9,demean=T,log='no')
3738

3839
# Find the peak, max.omega.bike
@@ -42,33 +43,7 @@ max.omega.bike<-pg.bike$freq[which(pg.bike$spec==max(pg.bike$spec))]
4243
1/max.omega.bike # 23.97; there is a repeating cycle every ~24 days
4344

4445
# Conclusion: there is both seasonality and trend in this data set - need to address both of these in the model
45-
# Get the residuals for trend
46-
e.ts.bike <- ts(bike.trend$residuals)
47-
48-
49-
# Plot autocorrelation (acf) and partial autocorrelation (pacf)
50-
par(mfrow=c(1,2))
51-
acf(e.ts.bike, main="ACF of Residuals from e.ts.bike")
52-
pacf(e.ts.bike,main="PACF of Residuals from e.ts.bike")
53-
par(mfrow=c(1,1))
54-
55-
# Use the ACF and PACF to choose a model type
56-
# See table for details
57-
# ACF is sidusodial and PACF cannot see any trends
58-
# Choose AR or ARMA
59-
60-
# Choose r values for AR model
61-
# ar(3) p=3
62-
bike.ar3 <- arima(e.ts.bike, order=c(4,0,0))
63-
summary(bike.ar3)
64-
AIC(bike.ar3)
65-
66-
# Use the auto.arima from 'forecast' library- Automatically find p, q, & d terms
67-
library('forecast')
68-
bike.auto <- auto.arima(e.ts.bike, trace=TRUE)
69-
# Using autoregressive function the best model is ARIMA(2,1,1)
70-
AIC(bike.auto)
71-
46+
# Create a seasonality model
7247
# Transform weekly info
7348
Day <- rep(NA, length(bike.ts)-7)
7449
Day[which((time.bike %% 7) == 1)] <- "Sat"
@@ -88,50 +63,123 @@ contrasts(Day)
8863
bike.season<- lm(bike.ts[time.bike]~Day)
8964
summary(bike.season)
9065

91-
# Get the residuals from the bike.season model above and store in e.ts.bike2:
92-
e.ts.bike2 <- ts(bike.season$residuals)
66+
# Combine the trend model with the seasonal model (because both trend and seasonilty were significant)
67+
bike.trendseason <-lm(bike.ts[time.bike]~time.bike+Day)
68+
summary(bike.trendseason)
69+
70+
# Get the residuals for this model
71+
# An assumption for linear modeling is that the residuals are not correlated
72+
# However, often in time series data that is not true thus we need to check for correlation in the residuals
73+
e.ts.bike <- ts(bike.trendseason$residuals)
9374

94-
# Plot acf and pacf side by side for easier examination
75+
# Plot autocorrelation (acf) and partial autocorrelation (pacf)
76+
# ACF and PACF measure the correlation of residuals and give incite into which models to use (see diagram)
9577
par(mfrow=c(1,2))
96-
acf(e.ts.bike2, main="ACF of Residuals from bike.season")
97-
pacf(e.ts.bike2,main="PACF of Residuals from bike.season")
78+
acf(e.ts.bike, main="ACF of Residuals from e.ts.bike")
79+
pacf(e.ts.bike,main="PACF of Residuals from e.ts.bike")
9880
par(mfrow=c(1,1))
9981

100-
# Sinusodial decay on ACF and not on PACF - AR or ARMA
101-
bike.arma13 <- arima(e.ts.bike2, order=c(1,0,3))
82+
# ACF is sidusodial decay
83+
# PACF cannot see any trends
84+
# Choose AR or ARMA
85+
86+
# Autoregressive: AR(p) models: "The AR model establishes that a realization at time t is a linear combination of the p previous realization plus some noise term."
87+
# Moving Average: MA(q): "can define correlated noise structure in our data and goes beyond the traditional assumption where errors are iid."
88+
# ARMA: combines autoregressive and moving average terms
89+
# ARIMA: combines autoregressive and moving average terms when the data in non-stationary - i.e. mean, variance, autocorrelation, etc. are not constant overtime
90+
91+
# Choose p values for AR model
92+
# ar(3) p=3
93+
bike.ar3 <- arima(e.ts.bike, order=c(3,0,0))
94+
summary(bike.ar3)
95+
96+
# Choose p and r values for ARMA model
97+
bike.arma13 <- arima(e.ts.bike, order = c(1,0,3))
98+
summary(bike.arma13)
10299

103100
# Use the auto.arima from 'forecast' library- Automatically find p, q, & d terms
104-
bike2.auto <- auto.arima(e.ts.bike2, trace=TRUE)
105-
# Best model is ARIMA(2,1,1)
101+
library('forecast')
102+
bike.auto <- auto.arima(e.ts.bike, trace=TRUE)
103+
# Using autoregressive function the best model is ARIMA(2,1,1)
104+
105+
# Compare time series models using AIC and BIC
106+
AIC(bike.trendseason)
107+
AIC(bike.auto) # 206349
108+
AIC(bike.ar3) # 204358.6
109+
AIC(bike.arma13) # 204264.8
110+
111+
AIC(bike.trendseason, k = log(length(bike.ts))) # 228669.7
112+
AIC(bike.auto, k = log(length(e.ts.bike))) # 206380
113+
AIC(bike.ar3, k = log(length(e.ts.bike))) # 204397.4
114+
AIC(bike.arma13, k = log(length(e.ts.bike))) # 204311.4
115+
116+
# Best model is the bike.arma13 [ARMA(1,3)]
117+
118+
###################
119+
#
120+
# Prediction using ts
121+
#
122+
###################
123+
124+
# Create a dataframe which is a 1 week period (recall we removed a week previously)
125+
next.week.time<-c((length(bike.ts)-6):(length(bike.ts)))
126+
127+
# the test data frame
128+
next.week<-data.frame(time.bike = next.week.time, count = bike.ts[next.week.time])
129+
130+
# Prediction for the next week by bike.arma13:
131+
E_Y.pred<-predict(bike.trend,newdata=next.week)
132+
e_t.pred<-forecast(bike.arma13,h=7)
133+
next.week.prediction<-E_Y.pred+e_t.pred$mean
134+
135+
# MSE:
136+
# can compute mse based on predicted error minus counts
137+
mean((next.week.prediction-next.week$count)^2)
138+
# MSE is 22592
139+
140+
####################
141+
#
142+
# Assumption checking
143+
#
144+
####################
145+
146+
# Autocorrelation has been accounted for - all p-values for L-B are below significance line
147+
tsdiag(bike.arma13,gof.lag=20)
148+
149+
# Recheck ACF and PACF for residuals
150+
par(mfrow=c(1,2))
151+
acf(bike.arma13$residuals, main="ACF of Residuals from bike.arma13")
152+
pacf(bike.arma13$residuals,main="PACF of Residuals from bike.arma13")
153+
par(mfrow=c(1,1))
154+
# 1. Looking for no pattern - i.e. not exponential or sinusidal decay
155+
# 2. Looking for insignficant lags - i.e. below the blue line
106156

107157
###################
108158
#
109159
# Practice problems
110160
#
111161
###################
112162

113-
# 1. Create a time series dataset base on 'causual'
163+
# 1. Create a time series dataset bases on 'causual'
114164

115165
# 2. Create a model for the trend
116166

117167
# 3. Plot the ts data and the trend line
118168

119169
# 4. Determine if trend is significant
120170

121-
# 5. Obtain the residuals from the trend model
122-
123-
# 6. Plot the ACF and the PACF for this model and determine if AR, ARMA, or ARIMa should be used
171+
# 5. Create a periodgram for this data set
124172

125-
# 7. Use auto.arima to determine the optimal model; also create a few other models with varying values for p, q and r
173+
# 6. Determine if seasonality is significant
126174

127-
# 8. Compare models using AIC, BIC and adjs-R2
175+
# 7. Create a model based on which terms are significant - seasonality, trend or both
128176

129-
# 9. Create a periodgram for this data
177+
# 8. Obtain the residuals for this model and plot the ACF and PACF
130178

131-
# 10. Determine if seasonality is significant
179+
# 9. Based on the ACF and PACF determine which model should be used (AR, MA, ARMA or ARIMA) and create them
132180

133-
# 11. Obtain the residuals, plot the ACF and PACF and interpret them
181+
# 10. Create a model using the auto-arima function
134182

135-
# 12. Create appropriate models for seasonality
183+
# 11. Compare the models using AIC and BIC - conclude which is best
136184

137-
# 13. Compare models using AIC, BIC and adjst-R2
185+
# 12. Predict the next weeks casual user count

0 commit comments

Comments
 (0)