Skip to content

Commit ec84645

Browse files
author
LProcopi15
committed
Added log transform
1 parent 588b4fb commit ec84645

File tree

1 file changed

+19
-2
lines changed

1 file changed

+19
-2
lines changed

Week 5 - Introduction to Modeling/Lecture5.R

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ View(fire)
1111
summary(fire)
1212
# No NA's, all numeric data is normalized
1313

14+
1415
# Check class of each attribute
1516
for (i in 1:ncol(fire)){
1617
print(paste(colnames(fire[i]), ": ", class(fire[,i]),sep = ""))
@@ -23,6 +24,12 @@ upperwhisk <- areabox$stats[5,]
2324

2425
xtm_fire <- subset(fire, area >= upperwhisk, select = c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain', 'area'))
2526

27+
# Transforming the data
28+
hist(fire$area) # highly skewed to the left
29+
hist(log(fire$area))
30+
31+
fire$area_log <- log(fire$area + 0.1)
32+
2633
####################
2734
#
2835
# Basic modeling - Linear Regression
@@ -38,7 +45,16 @@ abline(lm1, col = "orange")
3845
# Statistical information about this lm
3946
summary(lm1)
4047

41-
# Can add many more factors to this lm
48+
# Plot two variables
49+
plot(fire$temp, fire$area_log)
50+
# Create linear regression model between the two
51+
lm1_log <- lm(area_log~temp, data = fire)
52+
# Add regression line to plot
53+
abline(lm1_log, col = "orange")
54+
# Statistical information about this lm
55+
summary(lm1_log)
56+
57+
# Can add many more factors to any lm
4258
lm2 <- lm(area~temp+FFMC+wind, data = xtm_fire)
4359
summary(lm2)
4460

@@ -51,7 +67,8 @@ lm3 <- lm(area~temp+FFMC+wind+(DC+DMC)^2+(ISI+FFMC)^2+(temp+FFMC)^2, data = xtm_
5167
summary(lm3)
5268

5369
# compare models
54-
anova(lm1, lm2)
70+
anova(lm1, lm1_log)
71+
anova(lm1, lm2)
5572
# Large p-value means that the additional factors do not contribute to predicting the value of the response
5673
anova(lm1, lm3)
5774
anova(lm2, lm3)

0 commit comments

Comments
 (0)