Skip to content

Commit c0adab1

Browse files
author
LProcopi15
committed
Started lecture 6 material
1 parent 88533ff commit c0adab1

File tree

2 files changed

+72
-1
lines changed

2 files changed

+72
-1
lines changed

Week 5 - Introduction to Modeling/Lecture5.R

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ for (i in 1:ncol(fire)){
2020
summary(fire$area)
2121
areabox <- boxplot(fire$area)
2222
upperwhisk <- areabox$stats[5,]
23-
2423
xtm_fire <- subset(fire, area >= upperwhisk, select = c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain', 'area'))
2524

2625
####################
@@ -83,6 +82,7 @@ lm1_ISI <- lm(area~wind, data = high_ISI)
8382

8483
# 5. Determine if there are any correlated attributes
8584
symnum(cor(high_ISI[c("FFMC", "DMC", "DC", "ISI", "temp", "RH", "wind", "rain")]))
85+
?symnum
8686

8787
# 6. If there are any correlated attributes add the interaction between them to a new model
8888
# Call this model lm2_ISI - RH and temp
@@ -100,13 +100,18 @@ anova(lm1_ISI, lm2_ISI)
100100

101101
#Suppose we want to develop a model to predict the area and also using month and day attributes
102102
class(fire$month)
103+
levels(fire$month)
103104
contrasts(fire$month)
104105
class(fire$day)
105106
contrasts(fire$day)
106107

108+
lm(area~temp+month+day, data = fire)
109+
107110
#Get the right dataset
108111
xtm_fire_withCategorical <- subset(fire, area >= upperwhisk, select = c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain', 'area','month','day'))
109112
lm4 <- lm(area~temp+FFMC+wind+(DC+DMC)^2+(ISI+FFMC)^2+(temp+FFMC)^2+month+day, data = xtm_fire_withCategorical)
113+
summary(lm4)
114+
110115

111116
#Practice problem: is lm4 better than lm3? How do you know?
112117

@@ -149,6 +154,7 @@ pmse.lm.train
149154

150155
#Practice question: how does pmse.lm.train compared to the MSE on the training set? Which value better represents the robustness of the model?
151156
#Hint: use mean((lm.train$residuals)^2) to compute the MSE on training set
157+
mean((lm.train$residuals)^2)
152158

153159
#####################
154160
#
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
fire <- read.csv("C:/Users/Student/Documents/UVA 2016-2017/RWorkshop/Week 5 - Introduction to Modeling/forestfires.csv")
2+
lm1 <- lm(area~temp, data = xtm_fire)
3+
lm3 <- lm(area~temp+FFMC+wind+(DC+DMC)^2+(ISI+FFMC)^2+(temp+FFMC)^2, data = xtm_fire)
4+
5+
######################
6+
#
7+
# Diagnostic plots
8+
#
9+
#####################
10+
# 1. Residual vs. Fitted: constant variance, no pattern, even scatter about the mean
11+
# 2. Normal Q-Q: dots are on the line
12+
# 3. Scale-Location: constant variance, no pattern, even scatter about the mean
13+
# 4. Residual vs. Leverage: Points outside of Cook's distance are influential points
14+
par(mfrow=c(2,2))
15+
plot(lm1, labels.id = NULL)
16+
par(mfrow=c(1,1))
17+
# Based on Cook's distance we see that 32 is an influential point
18+
xtm_fire[32,]
19+
hist(xtm_fire$area)
20+
summary(xtm_fire$area)
21+
22+
par(mfrow=c(2,2))
23+
plot(lm3, labels.id = NULL)
24+
par(mfrow=c(1,1))
25+
26+
######################
27+
#
28+
# Model Comparison
29+
#
30+
#####################
31+
32+
# AIC - smaller is better
33+
AIC(lm1) # 834.4967
34+
AIC(lm3) # 848.3075
35+
36+
# BIC - smaller is better
37+
AIC(lm1,k=log(nrow(xtm_fire))) # 840.9733
38+
AIC(lm3,k=log(nrow(xtm_fire))) # 872.0552
39+
40+
# adj-R2 - closer to 1 is better
41+
summary(lm1)$adj.r.squared
42+
summary(lm3)$adj.r.squared
43+
44+
45+
######################
46+
#
47+
# Transformations and Modeling
48+
#
49+
#####################
50+
51+
# Stepwise
52+
lm.cas2.step<-step(lm.cas2)
53+
54+
# Box-cox
55+
library(MASS)
56+
boxcox(lm1)
57+
# Value near 0 thus we need to perform a log transform
58+
59+
#The best lambda
60+
L<-boxcox(accdmg.lm1, plotit = F)$x[which.max(boxcox(accdmg.lm1, plotit = F)$y)]
61+
L
62+
63+
# Box-cox transform
64+
xdmgnd.lm1.boxcox<-lm(ACCDMG^L ~TEMP + TRNSPD + TONS + CARS + HEADEND1,data=xdmgnd)
65+
summary(xdmgnd.lm1.boxcox)

0 commit comments

Comments
 (0)