Distribution of team ratings by NFL division since 2002.
In my last post, 2019 NFL Team Ratings, I created an interactive visualization to display the NFL team ratings from Pro Football Reference website for the 2019 season.
After spending time looking at the 2019 ratings, I was curious to see how each NFL team rated over the past ~20 years and how they compared to other teams within their division.
In order to do this, I pulled the team ratings for every year since 2002 from Pro Football Reference and plotted the data using the highly versatile ggplot2
package.
The final plots for each division are located at the end of the article under the Visualization section.
The data I needed to gather was located on 18 different pages from Pro Football Reference’s site (one page for each year from 2002 to 2019). Here is the link for the 2019 team ratings from Pro Football Reference. If you want to view the ratings for any of the prior years, you just need to change the year at the end of the link. In order to pull the data for all 18 years at once, I wrote a function to scrape each year and bind the data together to form one datatset.
I then did some minor cleaning/tidying on the data so that it could be easily used for creating the data visualizations.
library(rvest)
library(janitor)
library(tidyverse)
### Pull data from pro-football-reference -----------------------
### AFC tables
get_AFC_tables <- function(year) {
cat("Getting", year, "AFC Standings\n")
url <- paste0("https://www.pro-football-reference.com/years/", year)
AFC <- url %>%
xml2::read_html() %>%
html_nodes(xpath = '//*[@id="AFC"]') %>%
html_table()
AFC <- AFC[[1]]
AFC <- AFC %>%
mutate(Year = year)
}
### NFC tables
get_NFC_tables <- function(year) {
cat("Getting", year, "NFC Standings\n")
url <- paste0("https://www.pro-football-reference.com/years/", year)
NFC <- url %>%
xml2::read_html() %>%
html_nodes(xpath = '//*[@id="NFC"]') %>%
html_table()
NFC <- NFC[[1]]
NFC <- NFC %>%
mutate(Year = year)
}
AFC_standings_past18_years <- lapply(2002:2019, get_AFC_tables) %>%
bind_rows()
NFC_standings_past18_years <- lapply(2002:2019, get_NFC_tables) %>%
bind_rows()
### Clean dataset -----------------------
NFL_table_clean <- AFC_standings_past18_years %>%
rbind(NFC_standings_past18_years) %>%
### Create NFL divisions column
mutate(Division = ifelse(str_detect(Tm, "FC"), Tm, NA)) %>%
fill(Division, .direction = "down") %>%
### Create NFL conferences column
mutate(Conference = ifelse(str_detect(Division, "NFC"), "NFC", "AFC")) %>%
### Remove team division names from Tm column
filter(str_detect(Tm, "FC", negate = TRUE)) %>%
### Add column to say if team made playoffs based off of pre-existing asterisks
mutate(Playoffs = ifelse(str_detect(Tm, "[*+]"), "Yes", "No")) %>%
### Remove asterisks and plus signs next to team names
mutate(Tm = gsub("[*+]", "", Tm)) %>%
mutate(
Team = case_when(
Tm == "Oakland Raiders" ~ "Las Vegas Raiders",
Tm == "Washington Redskins" ~ "Washington Football Team",
Tm == "San Diego Chargers" ~ "Los Angeles Chargers",
Tm == "St. Louis Rams" ~ "Los Angeles Rams",
TRUE ~ Tm
)
) %>%
### Convert columns containing numbers from character to numeric
mutate_at(c("SRS", "OSRS", "DSRS"),
as.numeric) %>%
group_by(Division) %>%
mutate(
div_median_SRS = median(SRS)
) %>%
ungroup() %>%
select(
Division,
Conference,
Team,
Playoffs,
OSRS,
DSRS,
SRS,
Year,
div_median_SRS
) %>%
arrange(SRS)
The visualizations above shows the distribution of SRS by team and division since 2002 and whether or not the team made the playoffs for each year.
There are a couple key takeaways that stand out:
library(extrafont)
extrafont::loadfonts(device = "win")
library(ggtext)
library(colorspace)
library(nflfastR)
### Set theme for ggplot -----------------------
theme_set(theme_minimal(base_size = 18, base_family = "Lucida Console"))
theme_update(
panel.grid.major = element_line(color = "grey92", size = .4),
panel.grid.minor = element_blank(),
axis.title.x = element_text(
size = 15,
color = "grey30",
margin = margin(t = 7)
),
axis.title.y = element_blank(),
axis.text = element_text(color = "grey60"),
axis.ticks = element_line(color = "grey92", size = .4),
axis.ticks.length = unit(.6, "lines"),
legend.position = "top",
plot.title = element_text(
hjust = 0,
color = "black",
family = "Lucida Sans Unicode",
size = 21,
margin = margin(t = 10)
),
plot.subtitle = element_text(
hjust = 0,
face = "bold",
color = "grey30",
family = "Lucida Sans Unicode",
size = 16,
margin = margin(0, 0, 10, 0)
),
plot.title.position = "plot",
plot.caption = element_text(
color = "grey50",
size = 12,
hjust = 1,
family = "Lucida Sans Unicode",
lineheight = 1.05,
margin = margin(20, 0, 0, 0)
),
plot.caption.position = "plot",
plot.margin = margin(rep(20, 4))
)
### Create chart for each division -----------------------
division <- unique(NFL_table_clean$Division)
for (i in division) {
data <- NFL_table_clean %>%
filter(Division == i)
data <- data %>%
group_by(Team) %>%
mutate(
median = median(SRS),
q25 = quantile(SRS, probs = .25),
q75 = quantile(SRS, probs = .75),
n = n()
) %>%
ungroup() %>%
mutate(Team_num = as.numeric(fct_rev(Team))) %>%
arrange(Team)
teams_colors_logos <- teams_colors_logos %>%
filter(team_abbr != "LA")
team_colors <- data %>%
distinct(Team) %>%
inner_join(teams_colors_logos, by=c("Team"="team_name")) %>%
arrange(Team)
pal <- team_colors$team_color
abbrv <- team_colors$team_abbr
ggplot(data, aes(SRS, Team_num - .2)) +
geom_linerange(
data = data %>%
group_by(Team, Team_num) %>%
summarize(m = unique(median)) %>%
ungroup(),
aes(
xmin = -Inf,
xmax = m,
y = Team_num,
color = Team
),
inherit.aes = F,
linetype = "dotted",
size = .5
) +
geom_boxplot(
aes(
y = Team_num - .15,
color = Team,
color = after_scale(darken(color, .1, space = "HLS"))
),
width = 0,
size = .5,
outlier.shape = 8
) +
geom_rect(
aes(
xmin = q25,
xmax = median,
ymin = Team_num - .07,
ymax = Team_num - .22
),
fill = "grey95",
color = "grey80"
) +
geom_rect(
aes(
xmin = q75,
xmax = median,
ymin = Team_num - .07,
ymax = Team_num - .22
),
fill = "grey85",
color = "grey80"
) +
geom_point(
aes(y = Team_num - .15),
color = ifelse(data$Playoffs == "Yes", "darkgreen", "red"),
shape = 20,
size = 5,
alpha = .4
) +
ggdist::stat_halfeye(
aes(
y = Team_num,
color = Team,
fill = after_scale(lighten(color, .5))
),
alpha = .7,
shape = 18,
point_size = 3,
interval_size = 1.8,
adjust = .5,
.width = c(0, 1)
) +
geom_text(
data = data %>%
group_by(Team, Team_num) %>%
summarize(m = unique(median)) %>%
ungroup(),
aes(
x = m,
y = Team_num + .1,
label = sprintf("%+.1f", m)
),
inherit.aes = F,
color = "white",
fontface = "bold",
family = "Lucida Console",
size = 4.5
) +
geom_text(
data = data %>%
filter(Team_num == 4) %>%
summarize(max = max(SRS)) %>%
ungroup(),
aes(x = max + 1,
y = 4 + .05,
label = "Playoff Pct."),
inherit.aes = F,
family = "Lucida Sans Unicode",
color = "grey20",
fontface = "bold",
size = 3.5,
hjust = 0
) +
geom_text(
data = data %>%
mutate(playoff_num = ifelse(Playoffs == "Yes", 1, 0)) %>%
group_by(Team, Team_num) %>%
summarize(max = max(SRS),
playoff_pct = round(100 * sum(playoff_num) / 18, 0)) %>%
ungroup(),
aes(
x = max + 1,
y = Team_num - .15,
label = glue::glue("{playoff_pct}%"),
color = Team
),
inherit.aes = F,
family = "Lucida Console",
fontface = "bold",
size = 4,
hjust = 0
) +
scale_x_continuous(
labels = function(x)
sprintf("%+.0f", x),
limits = c(-18, 21.5),
breaks = c(-20, -15, -10, -5, 0, 5, 10, 15, 20)
) +
scale_y_continuous(
limits = c(.55, NA),
breaks = 1:4,
labels = rev(team_colors$team_abbr),
expand = c(0, 0.05)
) +
scale_color_manual(values = pal,
guide = F) +
scale_fill_manual(values = pal,
guide = F) +
labs(
x = "SRS",
title = i,
subtitle = "Distribution of SRS since 2002",
caption = 'Note: Simple Rating System (SRS) is a measurement of team quality relative to average (0.0)\nVisualization: Kyle Cuilla • Data: Pro-Football-Reference'
) +
theme(
panel.grid.major.y = element_blank(),
axis.text.y = element_text(
family = "Lucida Console",
color = rev(pal),
size = 20,
lineheight = .9
),
axis.ticks.length = unit(0, "lines")
) +
geom_vline(
xintercept = data$div_median_SRS,
linetype = "dashed",
size = 0.5,
alpha = 0.5
) +
geom_text(
data = data %>%
filter(Team_num == 4),
aes(
x = div_median_SRS + 0.4,
y = 4 + 1,
label = paste0("Division median: ", sprintf("%+.1f", div_median_SRS))
),
inherit.aes = F,
family = "Lucida Sans Unicode",
color = "grey20",
fontface = "bold.italic",
size = 3.8,
hjust = 0
) +
annotate(
"text",
x = 17.5,
y = 5,
label = "Playoffs",
family = "Lucida Sans Unicode",
color = "grey20",
fontface = "bold",
size = 4,
lineheight = .9
) +
geom_point(
x = 16.5,
y = 4.7,
color = "darkgreen",
shape = 20,
size = 5,
alpha = .5
) +
annotate(
"text",
x = 16.5,
y = 4.85,
label = "Yes",
family = "Lucida Sans Unicode",
color = "grey20",
size = 3.2,
lineheight = .9
) +
geom_point(
x = 18,
y = 4.7,
color = "red",
shape = 20,
size = 5,
alpha = .5
) +
annotate(
"text",
x = 18,
y = 4.85,
label = "No",
family = "Lucida Sans Unicode",
color = "grey20",
size = 3.2,
lineheight = .9
)
ggsave(file = paste0(i, " Division.png"), height = 4, width = 4, device = "png", type = "cairo")
}
For attribution, please cite this work as
Cuilla (2020, Sept. 13). UNCHARTED DATA: Visualizing NFL Division Ratings. Retrieved from https://uncharteddata.netlify.app/posts/2021-03-10-nfldivisionratings/
BibTeX citation
@misc{cuilla2020visualizing, author = {Cuilla, Kyle}, title = {UNCHARTED DATA: Visualizing NFL Division Ratings}, url = {https://uncharteddata.netlify.app/posts/2021-03-10-nfldivisionratings/}, year = {2020} }