1- fire <- read.csv(" /Users/yuyanzhang/Desktop /RWorkshop/Week 5 - Introduction to Modeling/forestfires.csv" )
1+ fire <- read.csv(" C: /Users/Student/Documents/UVA 2016-2017 /RWorkshop/Week 5 - Introduction to Modeling/forestfires.csv" )
22View(fire )
33
44# ####################
@@ -9,7 +9,7 @@ View(fire)
99
1010# Get summary information
1111summary(fire )
12- # No NA's, all numeric data is normalized
12+ # No NA's
1313
1414# Check class of each attribute
1515for (i in 1 : ncol(fire )){
@@ -56,6 +56,9 @@ anova(lm1, lm2)
5656anova(lm1 , lm3 )
5757anova(lm2 , lm3 )
5858
59+ hist(fire $ area )
60+ hist(log(fire $ area ))
61+ summary(lm1 )
5962
6063# ####################
6164#
@@ -64,21 +67,30 @@ anova(lm2, lm3)
6467# ##################
6568
6669# 1. Create a new subset that includes only with an ISI (inital spread index) greater than the median
70+ ISI_box <- boxplot(fire $ ISI )
71+ median_ISI <- ISI_box $ stats [3 ,]
72+ high_ISI <- subset(fire , ISI > = median_ISI )
6773
6874# 2. Plot the relationship between area and wind
75+ plot(high_ISI $ wind , high_ISI $ area )
6976
7077# 3. Add a linear regression line to the plot
78+ abline(lm(area ~ wind , data = high_ISI ), col = " Orange" )
7179
7280# 4. Create a linear model with wind as your predictor, and area as your response
7381# Call this model lm1_ISI
82+ lm1_ISI <- lm(area ~ wind , data = high_ISI )
7483
7584# 5. Determine if there are any correlated attributes
85+ symnum(cor(high_ISI [c(" FFMC" , " DMC" , " DC" , " ISI" , " temp" , " RH" , " wind" , " rain" )]))
7686
7787# 6. If there are any correlated attributes add the interaction between them to a new model
78- # Call this model lm2_ISI
88+ # Call this model lm2_ISI - RH and temp
89+ lm2_ISI <- lm(area ~ wind + (RH + temp )^ 2 , data = high_ISI )
7990
8091# 7. Compare these two models and determine which model is better at predicting the size of the area burned
81-
92+ anova(lm1_ISI , lm2_ISI )
93+ # p-value = 0.3004; therefore lm1 is better
8294
8395# ####################
8496#
0 commit comments