Skip to content

Commit 2baa4a8

Browse files
author
LProcopi15
committed
Reorganization
1 parent 618a75a commit 2baa4a8

File tree

4 files changed

+82
-78
lines changed

4 files changed

+82
-78
lines changed

Week 4 - Case Study/Week4_mortality.R

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
#############Medical decision making: What contribute(s) to mortality?############
33
#############Step 1: Understand the dataset###########
44
#1.1 Load the data
5-
5+
MD_medical <- read.csv("C:/Users/Student/Documents/UVA 2016-2017/RWorkshop/Week 4 - Case Study/MortData_medical.csv")
6+
MD_personal <- read.csv("C:/Users/Student/Documents/UVA 2016-2017/RWorkshop/Week 4 - Case Study/MortData_personal.csv")
67

78
#Merge two datasets
89
colnames(MD_personal)[1] <- "Patient_id"
@@ -13,6 +14,7 @@ View(MD)
1314

1415
#1.2 Read the description files and understand what is each attributes.
1516
#Rename attributes if you find it helpful
17+
colnames(MD) <- c('Length.Of.Stay','Disease.Class','Comorbidities','Coma.Score','Care.Intensity','Mean.BP','WBC','Heart.Rate','Temperature','Blood.Gases','Albumin','Bilirubin','Creatinine','Sodium','Adjusted.Shock.Index','Age','Sex','Race','Death')
1618

1719

1820
#1.3 Check the class of each variables.
@@ -34,8 +36,7 @@ View(MD)
3436

3537

3638
#2.2.1 Temperature: mean imputation
37-
38-
39+
mean(MD$Temperature, na.rm = TRUE)
3940

4041

4142
#Additional question: why can we use mean imputation for temperature? Consider the disbribution of temp attribute.
@@ -49,7 +50,10 @@ View(MD)
4950
#2.2.3 pafi: Blood Gase
5051
#Consider the real meaning of the attributes. Let's convert the attribute into a factor with two levels, ventilator patients and non-ventilator patients
5152
#Hint: you can use cut function. Type ?Cut to see the usage of the function
52-
53+
# ventilator patients non-ventilator patients
54+
MD$Blood.Gases[which(is.na(MD$Blood.Gases))] <- 0
55+
max(MD$Blood.Gases)
56+
MD$Blood.Gases <- cut(MD$Blood.Gases, c(0, 32, 870))
5357

5458

5559
#In medical world, bili and albi are measured in the same test.

FireInfo renamed to Week 5 - Introduction to Modeling/FireInfo

File renamed without changes.

Lecture5.R renamed to Week 5 - Introduction to Modeling/Lecture5.R

Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,74 @@
1-
fire <- read.csv("C:/Users/Student/Documents/forestfires.csv")
2-
View(fire)
3-
4-
#####################
5-
#
6-
# Explore Data
7-
#
8-
####################
9-
10-
# Get summary information
11-
summary(fire)
12-
# No NA's, all numeric data is normalized
13-
14-
# Check class of each attribute
15-
for (i in 1:ncol(fire)){
16-
print(paste(colnames(fire[i]), ": ", class(fire[,i]),sep = ""))
17-
}
18-
19-
# Getting a subset of the data
20-
summary(fire$area)
21-
areabox <- boxplot(fire$area)
22-
upperwhisk <- areabox$stats[5,]
23-
24-
xtm_fire <- subset(fire, area >= upperwhisk, select = c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain', 'area'))
25-
26-
####################
27-
#
28-
# Basic modeling - Linear Regression
29-
#
30-
####################
31-
32-
# Plot two variables
33-
plot(xtm_fire$temp, xtm_fire$area)
34-
# Create linear regression model between the two
35-
lm1 <- lm(area~temp, data = xtm_fire)
36-
# Add regression line to plot
37-
abline(lm1, col = "orange")
38-
# Statistical information about this lm
39-
summary(lm1)
40-
41-
# Can add many more factors to this lm
42-
lm2 <- lm(area~temp+FFMC+wind, data = xtm_fire)
43-
summary(lm2)
44-
45-
# Could interactions between variables help us?
46-
pairs(xtm_fire[c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'area')])
47-
symnum(cor(xtm_fire[c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'area')]))
48-
# Highly correlated: DC and DMC, ISI and FFMC, temp and FFMC
49-
# Add these correlated attributes to a model
50-
lm3 <- lm(area~temp+FFMC+wind+(DC+DMC)^2+(ISI+FFMC)^2+(temp+FFMC)^2, data = xtm_fire)
51-
summary(lm3)
52-
53-
# compare models
54-
anova(lm1, lm2)
55-
anova(lm1, lm3)
56-
anova(lm2, lm3)
57-
58-
#####################
59-
#
60-
# Practice Problems
61-
#
62-
####################
63-
64-
# 1. Create a new subset that includes only with an ISI (inital spread index) greater than the median
65-
66-
# 2. Create a linear model with area and wind as your predictors, and area as your response
67-
# Call this model lm1_ISI
68-
69-
# 3. Determine if there are any correlated attributes
70-
71-
# 4. If there are any correlated attributes add the interaction between them to a new model
72-
# Call this model lm2_ISI
73-
74-
# 5. Compare these two models and determine which model is better at predicting the size of the area burned
1+
fire <- read.csv("C:/Users/Student/Documents/forestfires.csv")
2+
View(fire)
3+
4+
#####################
5+
#
6+
# Explore Data
7+
#
8+
####################
9+
10+
# Get summary information
11+
summary(fire)
12+
# No NA's, all numeric data is normalized
13+
14+
# Check class of each attribute
15+
for (i in 1:ncol(fire)){
16+
print(paste(colnames(fire[i]), ": ", class(fire[,i]),sep = ""))
17+
}
18+
19+
# Getting a subset of the data
20+
summary(fire$area)
21+
areabox <- boxplot(fire$area)
22+
upperwhisk <- areabox$stats[5,]
23+
24+
xtm_fire <- subset(fire, area >= upperwhisk, select = c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain', 'area'))
25+
26+
####################
27+
#
28+
# Basic modeling - Linear Regression
29+
#
30+
####################
31+
32+
# Plot two variables
33+
plot(xtm_fire$temp, xtm_fire$area)
34+
# Create linear regression model between the two
35+
lm1 <- lm(area~temp, data = xtm_fire)
36+
# Add regression line to plot
37+
abline(lm1, col = "orange")
38+
# Statistical information about this lm
39+
summary(lm1)
40+
41+
# Can add many more factors to this lm
42+
lm2 <- lm(area~temp+FFMC+wind, data = xtm_fire)
43+
summary(lm2)
44+
45+
# Could interactions between variables help us?
46+
pairs(xtm_fire[c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'area')])
47+
symnum(cor(xtm_fire[c('FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'area')]))
48+
# Highly correlated: DC and DMC, ISI and FFMC, temp and FFMC
49+
# Add these correlated attributes to a model
50+
lm3 <- lm(area~temp+FFMC+wind+(DC+DMC)^2+(ISI+FFMC)^2+(temp+FFMC)^2, data = xtm_fire)
51+
summary(lm3)
52+
53+
# compare models
54+
anova(lm1, lm2)
55+
anova(lm1, lm3)
56+
anova(lm2, lm3)
57+
58+
#####################
59+
#
60+
# Practice Problems
61+
#
62+
####################
63+
64+
# 1. Create a new subset that includes only with an ISI (inital spread index) greater than the median
65+
66+
# 2. Create a linear model with area and wind as your predictors, and area as your response
67+
# Call this model lm1_ISI
68+
69+
# 3. Determine if there are any correlated attributes
70+
71+
# 4. If there are any correlated attributes add the interaction between them to a new model
72+
# Call this model lm2_ISI
73+
74+
# 5. Compare these two models and determine which model is better at predicting the size of the area burned

forestfires.csv renamed to Week 5 - Introduction to Modeling/forestfires.csv

File renamed without changes.

0 commit comments

Comments
 (0)