Skip to content

Commit f695ccf

Browse files
author
LProcopi15
committed
Finished lecture 6 material
1 parent c0adab1 commit f695ccf

File tree

3 files changed

+188
-12
lines changed

3 files changed

+188
-12
lines changed

Week 6 - Assumptions and Transformations/Lecture6.R

Lines changed: 69 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,28 +38,85 @@ AIC(lm1,k=log(nrow(xtm_fire))) # 840.9733
3838
AIC(lm3,k=log(nrow(xtm_fire))) # 872.0552
3939

4040
# adj-R2 - closer to 1 is better
41-
summary(lm1)$adj.r.squared
42-
summary(lm3)$adj.r.squared
41+
summary(lm1)$adj.r.squared # 0.04932201
42+
summary(lm3)$adj.r.squared # -0.05481489
4343

4444

4545
######################
4646
#
4747
# Transformations and Modeling
4848
#
4949
#####################
50-
51-
# Stepwise
52-
lm.cas2.step<-step(lm.cas2)
53-
54-
# Box-cox
50+
# Box-cox transformation: used to increase normality of a dataset
5551
library(MASS)
56-
boxcox(lm1)
57-
# Value near 0 thus we need to perform a log transform
5852

5953
#The best lambda
60-
L<-boxcox(accdmg.lm1, plotit = F)$x[which.max(boxcox(accdmg.lm1, plotit = F)$y)]
54+
L<-boxcox(lm3, plotit = F)$x[which.max(boxcox(lm3, plotit = F)$y)]
6155
L
6256

6357
# Box-cox transform
64-
xdmgnd.lm1.boxcox<-lm(ACCDMG^L ~TEMP + TRNSPD + TONS + CARS + HEADEND1,data=xdmgnd)
65-
summary(xdmgnd.lm1.boxcox)
58+
fire.lm1.boxcox <- lm(area^L~., data = xtm_fire)
59+
summary(fire.lm1.boxcox)
60+
61+
# Look at new diagnostics
62+
par(mfrow=c(2,2))
63+
plot(fire.lm1.boxcox, labels.id = NULL)
64+
par(mfrow=c(1,1))
65+
66+
# Stepwise regression
67+
lm1.step <-step(lm3)
68+
summary(lm1.step)
69+
70+
# Again we can compare these models using multiple aproaches
71+
AIC(fire.lm1.boxcox)
72+
AIC(lm1)
73+
AIC(lm1.step)
74+
75+
# Logistic regression
76+
fert <- read.csv("C:/Users/Student/Documents/UVA 2016-2017/RWorkshop/Week 6 - Assumptions and Transformations/fertility_Diagnosis.txt", header = FALSE)
77+
View(fert)
78+
79+
colnames(fert) <- c("season", "age", "childish.disease", "accident", "surgical", "fevers", "alcohol", "smoking", "hours.sitting", "output")
80+
81+
# Recode the response
82+
class(fert$output)
83+
fert$output <- as.character(fert$output)
84+
fert$output[which(fert$output == "N")] <- 0
85+
fert$output[which(fert$output == "O")] <- 1
86+
fert$output <- as.integer(fert$output)
87+
88+
# Binary logistic regression (logistic regression) model: gives the log-odds on the event
89+
fert.glm <- glm(output~., family = binomial(link = "logit"), data = fert)
90+
summary(fert.glm)
91+
92+
######################
93+
#
94+
# Practice Problems
95+
#
96+
#####################
97+
# Use the fertility data for these
98+
99+
# 1. Split the data into a training set (70%) and testing set (30%)
100+
101+
# Use the training set to build your models
102+
# 2. Create a general linear regression model with output as the response and all the other attributes as a predictor
103+
104+
# 3. Determine correlation between attributes
105+
106+
# 4. Add an interaction term with hours.sitting and age
107+
108+
# 5. Perform a step-wise regression on the model from Q3
109+
110+
# 6. Create a new logistic regression model using the training data
111+
112+
# 7. Create diagnostic plots for all models
113+
114+
# 8. Compare the models using AIC
115+
116+
# 9. Compare the models using BIC
117+
118+
# 10. Compare the models using ajd-R2 (won't work for logistic)
119+
120+
# 11. Use the first linear regression model and for the logistic regression model to predict test values
121+
122+
# 12. Compare these two models using MSE
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
Season in which the analysis was performed. 1) winter, 2) spring, 3) Summer, 4) fall. (-1, -0.33, 0.33, 1)
2+
3+
Age at the time of analysis. 18-36 (0, 1)
4+
5+
Childish diseases (ie , chicken pox, measles, mumps, polio) 1) yes, 2) no. (0, 1)
6+
7+
Accident or serious trauma 1) yes, 2) no. (0, 1)
8+
9+
Surgical intervention 1) yes, 2) no. (0, 1)
10+
11+
High fevers in the last year 1) less than three months ago, 2) more than three months ago, 3) no. (-1, 0, 1)
12+
13+
Frequency of alcohol consumption 1) several times a day, 2) every day, 3) several times a week, 4) once a week, 5) hardly ever or never (0, 1)
14+
15+
Smoking habit 1) never, 2) occasional 3) daily. (-1, 0, 1)
16+
17+
Number of hours spent sitting per day ene-16 (0, 1)
18+
19+
Output: Diagnosis normal (N), altered (O)
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
-0.33,0.69,0,1,1,0,0.8,0,0.88,N
2+
-0.33,0.94,1,0,1,0,0.8,1,0.31,O
3+
-0.33,0.5,1,0,0,0,1,-1,0.5,N
4+
-0.33,0.75,0,1,1,0,1,-1,0.38,N
5+
-0.33,0.67,1,1,0,0,0.8,-1,0.5,O
6+
-0.33,0.67,1,0,1,0,0.8,0,0.5,N
7+
-0.33,0.67,0,0,0,-1,0.8,-1,0.44,N
8+
-0.33,1,1,1,1,0,0.6,-1,0.38,N
9+
1,0.64,0,0,1,0,0.8,-1,0.25,N
10+
1,0.61,1,0,0,0,1,-1,0.25,N
11+
1,0.67,1,1,0,-1,0.8,0,0.31,N
12+
1,0.78,1,1,1,0,0.6,0,0.13,N
13+
1,0.75,1,1,1,0,0.8,1,0.25,N
14+
1,0.81,1,0,0,0,1,-1,0.38,N
15+
1,0.94,1,1,1,0,0.2,-1,0.25,N
16+
1,0.81,1,1,0,0,1,1,0.5,N
17+
1,0.64,1,0,1,0,1,-1,0.38,N
18+
1,0.69,1,0,1,0,0.8,-1,0.25,O
19+
1,0.75,1,1,1,0,1,1,0.25,N
20+
1,0.67,1,0,0,0,0.8,1,0.38,O
21+
1,0.67,0,0,1,0,0.8,-1,0.25,N
22+
1,0.75,1,0,0,0,0.6,0,0.25,N
23+
1,0.67,1,1,0,0,0.8,-1,0.25,N
24+
1,0.69,1,0,1,-1,1,-1,0.44,O
25+
1,0.56,1,0,1,0,1,-1,0.63,N
26+
1,0.67,1,0,0,0,1,-1,0.25,N
27+
1,0.67,1,0,1,0,0.6,-1,0.38,O
28+
1,0.78,1,1,0,1,0.6,-1,0.38,O
29+
1,0.58,0,0,1,0,1,-1,0.19,N
30+
1,0.67,0,0,1,0,0.6,0,0.5,O
31+
1,0.61,1,0,1,0,1,-1,0.63,N
32+
1,0.56,1,0,0,0,1,-1,0.44,N
33+
1,0.64,0,0,0,0,1,-1,0.63,N
34+
1,0.58,1,1,1,0,0.8,0,0.44,N
35+
1,0.56,1,1,1,0,1,-1,0.63,N
36+
-1,0.78,1,1,0,1,0.6,-1,0.38,N
37+
-1,0.78,1,0,1,0,1,-1,0.25,N
38+
-1,0.56,1,0,1,0,1,-1,0.63,N
39+
-1,0.67,0,0,1,0,0.6,0,0.5,O
40+
-1,0.69,1,0,0,0,1,-1,0.31,N
41+
-1,0.53,1,1,1,0,0.8,1,0.5,N
42+
-1,0.56,1,1,0,0,0.8,1,0.5,N
43+
-1,0.58,1,0,1,-1,0.8,1,0.5,N
44+
-1,0.56,1,0,0,0,1,-1,0.44,N
45+
-1,0.53,1,1,0,1,1,0,0.31,N
46+
-1,0.53,1,0,0,1,1,0,0.44,N
47+
-0.33,0.56,1,0,0,0,1,-1,0.63,N
48+
-0.33,0.72,1,1,0,0,0.6,1,0.19,N
49+
-0.33,0.64,1,1,1,0,0.8,-1,0.31,N
50+
-0.33,0.75,1,1,1,0,0.6,-1,0.19,N
51+
-0.33,0.67,1,0,1,0,0.8,-1,0.19,N
52+
-0.33,0.53,1,1,0,1,1,-1,0.75,N
53+
-0.33,0.53,1,1,0,0,0.8,0,0.5,N
54+
-0.33,0.58,1,1,1,-1,0.8,0,0.19,N
55+
-0.33,0.61,1,0,1,0,1,-1,0.63,N
56+
-0.33,0.58,1,0,1,0,0.8,1,0.19,N
57+
-0.33,0.53,1,1,0,0,0.8,0,0.75,N
58+
-0.33,0.69,1,1,1,-1,1,-1,0.75,N
59+
-0.33,0.56,1,1,0,0,0.4,1,0.63,N
60+
1,0.58,0,0,0,1,0.8,1,0.44,N
61+
1,0.56,0,0,0,1,0.8,0,1,N
62+
-1,0.64,1,0,0,1,1,1,0.25,N
63+
-1,0.61,1,1,1,0,0.6,-1,0.38,N
64+
-1,0.56,1,0,0,1,1,-1,0.5,N
65+
-1,0.53,1,0,0,1,0.8,-1,0.31,N
66+
-0.33,0.56,0,0,1,0,1,-1,0.56,N
67+
-0.33,0.5,1,1,0,-1,0.8,0,0.88,N
68+
-0.33,0.5,1,0,0,1,1,-1,0.47,N
69+
-0.33,0.5,1,0,0,1,0.8,0,0.31,N
70+
-0.33,0.5,1,0,1,-1,0.8,-1,0.5,N
71+
-0.33,0.5,1,1,0,-1,0.8,0,0.88,O
72+
0.33,0.69,1,0,0,1,1,-1,0.31,N
73+
1,0.56,1,0,0,1,0.6,0,0.5,N
74+
-1,0.5,1,0,0,1,0.8,-1,0.44,N
75+
-1,0.53,1,0,0,1,0.8,-1,0.63,N
76+
-1,0.78,1,0,1,1,1,1,0.25,N
77+
-1,0.75,1,0,1,1,0.6,0,0.56,N
78+
-1,0.72,1,1,1,1,0.8,-1,0.19,N
79+
-1,0.53,1,1,0,1,0.8,-1,0.38,N
80+
-1,1,1,0,1,1,0.6,0,0.25,N
81+
-0.33,0.92,1,1,0,1,1,-1,0.63,N
82+
-1,0.81,1,1,1,1,0.8,0,0.19,N
83+
-0.33,0.92,1,0,0,1,0.6,-1,0.19,N
84+
-0.33,0.86,1,1,1,1,1,-1,0.25,N
85+
-0.33,0.78,1,0,0,1,1,1,0.06,O
86+
-0.33,0.89,1,1,0,0,0.6,1,0.31,N
87+
-0.33,0.75,1,1,1,0,0.6,1,0.25,N
88+
-0.33,0.75,1,1,1,1,0.8,1,0.25,N
89+
-0.33,0.83,1,1,1,0,1,-1,0.31,N
90+
-0.33,0.81,1,1,1,0,1,1,0.38,N
91+
-0.33,0.81,1,1,1,1,0.8,-1,0.38,N
92+
0.33,0.78,1,0,0,0,1,1,0.06,N
93+
0.33,0.75,1,1,0,0,0.8,-1,0.38,N
94+
0.33,0.75,1,0,1,0,0.8,-1,0.44,O
95+
1,0.58,1,0,0,0,0.6,1,0.5,N
96+
-1,0.67,1,0,0,0,1,-1,0.5,N
97+
-1,0.61,1,0,0,0,0.8,0,0.5,N
98+
-1,0.67,1,1,1,0,1,-1,0.31,N
99+
-1,0.64,1,0,1,0,1,0,0.19,N
100+
-1,0.69,0,1,1,0,0.6,-1,0.19,N

0 commit comments

Comments
 (0)