---
title: "Pizza"
output: html_document
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## R Markdown
```{r}
library(tidyverse)
theme_set(theme_light())\
```
```{r}
pizza_jared <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_jared.csv")
pizza_barstool <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_barstool.csv")
pizza_datafiniti <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-10-01/pizza_datafiniti.csv")
```
```{r}
answer_orders <- c("Never Again", "Poor", "Average", "Good", "Excellent")
by_place_answer <- pizza_jared %>%
mutate(time = as.POSIXct(time, origin = "1970-01-01"),
date = as.Date(time),
answer = fct_relevel(answer, answer_orders)) %>%
group_by(place, answer) %>%
summarize(votes = sum(votes)) %>%
mutate(total = sum(votes),
percent = votes / total,
answer_integer = as.integer(answer),
average = sum(answer_integer * percent)) %>%
ungroup()
by_place <- by_place_answer %>%
distinct(place, total, average)
```
```{r}
by_place_answer %>%
filter(as.integer(fct_reorder(place, total, .desc = TRUE)) <= 16,
answer != "Fair") %>%
mutate(place = glue::glue("{ place } ({ total })"),
place = fct_reorder(place, average)) %>%
ggplot(aes(answer, percent)) +
geom_col() +
facet_wrap(~ place) +
scale_y_continuous(labels = scales::percent) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x = "",
y = "% of respondents",
title = "What is the most popular pizza place in Open Stats meetup?",
subtitle = "Only the 16 pizza places with the most respondents. # respondents shown in parentheses.")
```
```{r}
library(broom)
# THIS TRICK DOESN'T WORK
# tidy(lm(c(1, 2, 3, 4, 5) ~ 1, weights = c(100, 300, 100, 200, 150)), conf.int = TRUE)
t_test_repeated <- function(x, frequency) {
tidy(t.test(rep(x, frequency)))
}
by_place_answer %>%
filter(total >= 3) %>%
group_by(place, total) %>%
summarize(t_test_result = list(t_test_repeated(answer_integer, votes))) %>%
ungroup() %>%
unnest(t_test_result) %>%
select(place, total, average = estimate, low = conf.low, high = conf.high) %>%
top_n(16, total) %>%
mutate(place = fct_reorder(place, average)) %>%
ggplot(aes(average, place)) +
geom_point(aes(size = total)) +
geom_errorbarh(aes(xmin = low, xmax = high)) +
labs(x = "Average score (1-5 Likert Scale)",
y = "",
title = "What is the most popular pizza place in Open Stats meetup?",
subtitle = "Only the 16 pizza places with the most respondents.",
size = "# of respondents")
```
```{r}
# Don't bother comparing them, this is a bad graph
pizza_barstool %>%
select(place = name,
barstool_total = review_stats_all_count,
barstool_average = review_stats_all_average_score) %>%
inner_join(by_place, by = "place") %>%
group_by(place) %>%
filter(n() == 1) %>%
ungroup() %>%
filter(barstool_total >= 5,
total >= 5) %>%
ggplot(aes(average, barstool_average)) +
geom_point() +
labs(x = "Meetup",
y = "Barstool")
```
```{r}
pizza_barstool %>%
top_n(50, review_stats_all_count) %>%
ggplot(aes(price_level, review_stats_all_average_score, group = price_level)) +
geom_boxplot()
pizza_barstool %>%
filter(review_stats_all_count >= 50) %>%
mutate(name = fct_reorder(name, review_stats_all_average_score)) %>%
ggplot(aes(review_stats_all_average_score, name, size = review_stats_all_count)) +
geom_point() +
labs(x = "Average rating",
y = "",
size = "# of reviews",
title = "Barstool Sports ratings of pizza places",
subtitle = "Only places with at least 50 reviews")
```
```{r}
pizza_barstool %>%
filter(review_stats_all_count >= 20) %>%
mutate(city = fct_lump(city, 3)) %>%
add_count(city) %>%
mutate(city = glue::glue("{ city } ({ n })")) %>%
ggplot(aes(city, review_stats_all_average_score)) +
geom_boxplot() +
labs(title = "Do pizza ratings differ across cities?",
subtitle = "Only pizza places with at least 20 reviews")
```
```{r}
pizza_cleaned <- pizza_barstool %>%
select(place = name,
price_level,
contains("review")) %>%
rename_all(~ str_remove(., "review_stats_")) %>%
select(-contains("provider"))
pizza_cleaned %>%
filter(critic_count > 0) %>%
ggplot(aes(critic_average_score, dave_average_score)) +
geom_point() +
geom_abline(color = "red") +
geom_smooth(method = "lm") +
labs(title = "Does Barstool Sports' Dave agree with the critics?",
x = "Critic average score",
y = "Dave score")
```
```{r}
pizza_cleaned %>%
filter(community_count >= 20) %>%
ggplot(aes(community_average_score, dave_average_score)) +
geom_point(aes(size = community_count)) +
geom_abline(color = "red") +
geom_smooth(method = "lm") +
labs(size = "# of community reviews",
x = "Community score",
y = "Dave score")
```