---
title: "Untitled"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(tidyverse)
library(scales)
theme_set(theme_light())
maryland_bridges <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018-11-27/baltimore_bridges.csv") %>%
select(-vehicles) %>%
mutate(inspection_yr = inspection_yr + 2000,
decade = 10 * (yr_built %/% 10),
responsibility = fct_lump(responsibility, 4),
county = str_to_title(county))
```
```{r}
maryland_bridges %>%
filter(yr_built >= 1900) %>%
count(decade = 10 * (yr_built %/% 10), sort = TRUE) %>%
ggplot(aes(decade, n)) +
geom_line() +
expand_limits(y = 0) +
labs(y = "# of bridges in Baltimore built this decade")
```
How does the condition of bridges depend on how old it is?
```{r}
maryland_bridges %>%
filter(yr_built >= 1900) %>%
group_by(decade) %>%
summarize(pct_good = mean(bridge_condition == "Good"),
total = n()) %>%
ggplot(aes(decade, pct_good)) +
geom_line() +
scale_y_continuous(labels = percent_format()) +
expand_limits(y = 0)
```
Most bridges built before 1970 we would not consider in "Good" condition (though it doesn't matter how long before 1970 they were built), while a vast majority of bridges built since 2000 are in Good condition.
```{r}
maryland_bridges %>%
replace_na(list(responsibility = "Other")) %>%
count(responsibility = fct_lump(responsibility, 4), sort = TRUE) %>%
mutate(responsibility = fct_reorder(responsibility, n)) %>%
ggplot(aes(responsibility, n)) +
geom_col() +
coord_flip()
```
```{r}
maryland_bridges %>%
filter(yr_built >= 1900) %>%
group_by(responsibility = fct_lump(responsibility, 4),
decade) %>%
summarize(pct_good = mean(bridge_condition == "Good"),
total = n()) %>%
filter(responsibility != "Other") %>%
ggplot(aes(decade, pct_good, color = responsibility)) +
geom_line() +
scale_y_continuous(labels = percent_format()) +
expand_limits(y = 0) +
labs(y = "% of bridges rated 'Good'")
```
The County Highway Agency consistently has the most bridges rated as "Good" from before 1970. State Toll Authority is underperforming in bridges built in the 1990s.
```{r}
maryland_bridges %>%
ggplot(aes(avg_daily_traffic)) +
geom_histogram() +
scale_x_log10(labels = comma_format())
```
```{r}
maryland_bridges %>%
filter(yr_built >= 1990) %>%
group_by(traffic_category = cut(avg_daily_traffic, c(0, 1000, 10000, Inf),
labels = c("<1000", "1000-10,000", "10,000+"))) %>%
summarize(pct_good = mean(bridge_condition == "Good"),
total = n())
```
What does the traffic look like geographically?
```{r}
maryland_bridges %>%
ggplot(aes(long, lat, color = avg_daily_traffic)) +
borders("state", regions = "Maryland") +
geom_point() +
scale_color_gradient2(low = "blue",
high = "red",
midpoint = log10(median(maryland_bridges$avg_daily_traffic)),
trans = "log10",
labels = comma_format()) +
coord_map() +
theme_void()
```
```{r}
maryland_bridges %>%
ggplot(aes(long, lat, color = bridge_condition)) +
borders("state", regions = "Maryland") +
geom_point(size = 1) +
coord_map() +
theme_void()
```
```{r}
maryland_bridges %>%
filter(yr_built >= 1900) %>%
ggplot(aes(long, lat, color = county)) +
borders("state", regions = "Maryland") +
geom_point(size = 1) +
coord_map() +
theme_void()
```
```{r}
maryland_bridges %>%
filter(yr_built >= 1900) %>%
group_by(county, decade) %>%
summarize(pct_good = mean(bridge_condition == "Good"),
total = n()) %>%
arrange(county, decade) %>%
ggplot(aes(decade, pct_good, color = county)) +
geom_line() +
scale_y_continuous(labels = percent_format()) +
expand_limits(y = 0) +
labs(y = "% of bridges rated 'Good'")
```
### Effect of county, responsibility, traffic and time on bridge condition
```{r}
# fit a logistic model
bridges <- maryland_bridges %>%
filter(yr_built >= 1900)
library(broom)
library(splines)
simple_model <- bridges %>%
mutate(good = bridge_condition == "Good") %>%
glm(good ~ ns(yr_built, 4), data = ., family = "binomial")
model <- bridges %>%
mutate(good = bridge_condition == "Good") %>%
glm(good ~ ns(yr_built, 4) + responsibility + county, data = ., family = "binomial")
augment(simple_model, bridges, type.predict = "response") %>%
ggplot(aes(yr_built, .fitted)) +
geom_line() +
expand_limits(y = 0) +
scale_y_continuous(labels = percent_format()) +
labs(y = "Predicted probability a bridge is rated 'Good'")
augment(model, bridges, type.predict = "response") %>%
ggplot(aes(yr_built, .fitted, color = responsibility)) +
geom_line() +
expand_limits(y = 0) +
facet_wrap(~ county) +
scale_y_continuous(labels = percent_format()) +
labs(y = "Predicted probability a bridge is rated 'Good'")
```
Controlled for the overall trend in change over time, what's the effect of being in a particular county or responsibility?
```{r}
model %>%
tidy(conf.int = TRUE) %>%
filter(str_detect(term, "responsibility|county")) %>%
mutate(term = reorder(term, estimate)) %>%
ggplot(aes(estimate, term)) +
geom_point() +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high)) +
geom_vline(xintercept = 0, color = "red", lty = 2)
```
We haven't found evidence of an effect of geography or ownership on bridge condition, once we control for time.