@@ -11,6 +11,7 @@ View(fire)
1111summary(fire )
1212# No NA's, all numeric data is normalized
1313
14+
1415# Check class of each attribute
1516for (i in 1 : ncol(fire )){
1617 print(paste(colnames(fire [i ]), " : " , class(fire [,i ]),sep = " " ))
@@ -23,6 +24,12 @@ upperwhisk <- areabox$stats[5,]
2324
2425xtm_fire <- subset(fire , area > = upperwhisk , select = c(' FFMC' , ' DMC' , ' DC' , ' ISI' , ' temp' , ' RH' , ' wind' , ' rain' , ' area' ))
2526
27+ # Transforming the data
28+ hist(fire $ area ) # highly skewed to the left
29+ hist(log(fire $ area ))
30+
31+ fire $ area_log <- log(fire $ area + 0.1 )
32+
2633# ###################
2734#
2835# Basic modeling - Linear Regression
@@ -38,7 +45,16 @@ abline(lm1, col = "orange")
3845# Statistical information about this lm
3946summary(lm1 )
4047
41- # Can add many more factors to this lm
48+ # Plot two variables
49+ plot(fire $ temp , fire $ area_log )
50+ # Create linear regression model between the two
51+ lm1_log <- lm(area_log ~ temp , data = fire )
52+ # Add regression line to plot
53+ abline(lm1_log , col = " orange" )
54+ # Statistical information about this lm
55+ summary(lm1_log )
56+
57+ # Can add many more factors to any lm
4258lm2 <- lm(area ~ temp + FFMC + wind , data = xtm_fire )
4359summary(lm2 )
4460
@@ -51,7 +67,8 @@ lm3 <- lm(area~temp+FFMC+wind+(DC+DMC)^2+(ISI+FFMC)^2+(temp+FFMC)^2, data = xtm_
5167summary(lm3 )
5268
5369# compare models
54- anova(lm1 , lm2 )
70+ anova(lm1 , lm1_log )
71+ anova(lm1 , lm2 )
5572# Large p-value means that the additional factors do not contribute to predicting the value of the response
5673anova(lm1 , lm3 )
5774anova(lm2 , lm3 )
0 commit comments