1+ fire <- read.csv(" C:/Users/Student/Documents/forestfires.csv" )
2+ View(fire )
3+
4+ # ####################
5+ #
6+ # Explore Data
7+ #
8+ # ###################
9+
10+ # Get summary information
11+ summary(fire )
12+ # No NA's, all numeric data is normalized
13+
14+ # Check class of each attribute
15+ for (i in 1 : ncol(fire )){
16+ print(paste(colnames(fire [i ]), " : " , class(fire [,i ]),sep = " " ))
17+ }
18+
19+ # Getting a subset of the data
20+ summary(fire $ area )
21+ areabox <- boxplot(fire $ area )
22+ upperwhisk <- areabox $ stats [5 ,]
23+
24+ xtm_fire <- subset(fire , area > = upperwhisk , select = c(' FFMC' , ' DMC' , ' DC' , ' ISI' , ' temp' , ' RH' , ' wind' , ' rain' , ' area' ))
25+
26+ # ###################
27+ #
28+ # Basic modeling - Linear Regression
29+ #
30+ # ###################
31+
32+ # Plot two variables
33+ plot(xtm_fire $ temp , xtm_fire $ area )
34+ # Create linear regression model between the two
35+ lm1 <- lm(area ~ temp , data = xtm_fire )
36+ # Add regression line to plot
37+ abline(lm1 , col = " orange" )
38+ # Statistical information about this lm
39+ summary(lm1 )
40+
41+ # Can add many more factors to this lm
42+ lm2 <- lm(area ~ temp + FFMC + wind , data = xtm_fire )
43+ summary(lm2 )
44+
45+ # Could interactions between variables help us?
46+ pairs(xtm_fire [c(' FFMC' , ' DMC' , ' DC' , ' ISI' , ' temp' , ' RH' , ' wind' , ' area' )])
47+ symnum(cor(xtm_fire [c(' FFMC' , ' DMC' , ' DC' , ' ISI' , ' temp' , ' RH' , ' wind' , ' area' )]))
48+ # Highly correlated: DC and DMC, ISI and FFMC, temp and FFMC
49+ # Add these correlated attributes to a model
50+ lm3 <- lm(area ~ temp + FFMC + wind + (DC + DMC )^ 2 + (ISI + FFMC )^ 2 + (temp + FFMC )^ 2 , data = xtm_fire )
51+ summary(lm3 )
52+
53+ # compare models
54+ anova(lm1 , lm2 )
55+ anova(lm1 , lm3 )
56+ anova(lm2 , lm3 )
57+
58+ # ####################
59+ #
60+ # Practice Problems
61+ #
62+ # ###################
63+
64+ # 1. Create a new subset that includes only with an ISI (inital spread index) greater than the median
65+
66+ # 2. Create a linear model with area and wind as your predictors, and area as your response
67+ # Call this model lm1_ISI
68+
69+ # 3. Determine if there are any correlated attributes
70+
71+ # 4. If there are any correlated attributes add the interaction between them to a new model
72+ # Call this model lm2_ISI
73+
74+ # Compare these two models and determine which model is better at predicting the size of the area burned
0 commit comments