Skip to content

Commit 007cb6f

Browse files
author
LProcopi15
authored
Code for Lecture 5
1 parent 023e2dd commit 007cb6f

File tree

1 file changed

+74
-0
lines changed

1 file changed

+74
-0
lines changed

Lecture5.R

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
fire <- read.csv("C:/Users/Student/Documents/forestfires.csv")
2+
View(fire)
3+
4+
#####################
5+
#
6+
# Explore Data
7+
#
8+
####################
9+
10+
# Get summary information
11+
summary(fire)
12+
# No NA's, all numeric data is normalized
13+
14+
# Check class of each attribute
15+
for (i in 1:ncol(fire)){
16+
print(paste(colnames(fire[i]), ": ", class(fire[,i]),sep = ""))
17+
}
18+
19+
# Getting a subset of the data
20+
summary(fire$area)
21+
areabox <- boxplot(fire$area)
22+
upperwhisk <- areabox$stats[5,]
23+
24+
xtm_fire <- subset(fire, area >= upperwhisk, select = c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain', 'area'))
25+
26+
####################
27+
#
28+
# Basic modeling - Linear Regression
29+
#
30+
####################
31+
32+
# Plot two variables
33+
plot(xtm_fire$temp, xtm_fire$area)
34+
# Create linear regression model between the two
35+
lm1 <- lm(area~temp, data = xtm_fire)
36+
# Add regression line to plot
37+
abline(lm1, col = "orange")
38+
# Statistical information about this lm
39+
summary(lm1)
40+
41+
# Can add many more factors to this lm
42+
lm2 <- lm(area~temp+FFMC+wind, data = xtm_fire)
43+
summary(lm2)
44+
45+
# Could interactions between variables help us?
46+
pairs(xtm_fire[c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'area')])
47+
symnum(cor(xtm_fire[c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'area')]))
48+
# Highly correlated: DC and DMC, ISI and FFMC, temp and FFMC
49+
# Add these correlated attributes to a model
50+
lm3 <- lm(area~temp+FFMC+wind+(DC+DMC)^2+(ISI+FFMC)^2+(temp+FFMC)^2, data = xtm_fire)
51+
summary(lm3)
52+
53+
# compare models
54+
anova(lm1, lm2)
55+
anova(lm1, lm3)
56+
anova(lm2, lm3)
57+
58+
#####################
59+
#
60+
# Practice Problems
61+
#
62+
####################
63+
64+
# 1. Create a new subset that includes only with an ISI (inital spread index) greater than the median
65+
66+
# 2. Create a linear model with area and wind as your predictors, and area as your response
67+
# Call this model lm1_ISI
68+
69+
# 3. Determine if there are any correlated attributes
70+
71+
# 4. If there are any correlated attributes add the interaction between them to a new model
72+
# Call this model lm2_ISI
73+
74+
# Compare these two models and determine which model is better at predicting the size of the area burned

0 commit comments

Comments
 (0)