-
Notifications
You must be signed in to change notification settings - Fork 132
/
hex_chart.R
152 lines (129 loc) · 4.85 KB
/
hex_chart.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# from ggplot2 hexbin.R: https://github.com/hadley/ggplot2/blob/master/R/hexbin.R
hex_bounds <- function(x, binwidth) {
c(
plyr::round_any(min(x), binwidth, floor) - 1e-6,
plyr::round_any(max(x), binwidth, ceiling) + 1e-6
)
}
calculate_hex_coords = function(shots, binwidths) {
xbnds = hex_bounds(shots$loc_x, binwidths[1])
xbins = diff(xbnds) / binwidths[1]
ybnds = hex_bounds(shots$loc_y, binwidths[2])
ybins = diff(ybnds) / binwidths[2]
hb = hexbin(
x = shots$loc_x,
y = shots$loc_y,
xbins = xbins,
xbnds = xbnds,
ybnds = ybnds,
shape = ybins / xbins,
IDs = TRUE
)
shots = mutate(shots, hexbin_id = hb@cID)
hexbin_stats = shots %>%
group_by(hexbin_id) %>%
summarize(
hex_attempts = n(),
hex_pct = mean(shot_made_numeric),
hex_points_scored = sum(shot_made_numeric * shot_value),
hex_points_per_shot = mean(shot_made_numeric * shot_value),
.groups = "drop"
)
hexbin_ids_to_zones = shots %>%
group_by(hexbin_id, shot_zone_range, shot_zone_area) %>%
summarize(attempts = n(), .groups = "drop") %>%
arrange(hexbin_id, desc(attempts)) %>%
group_by(hexbin_id) %>%
filter(row_number() == 1) %>%
select(hexbin_id, shot_zone_range, shot_zone_area)
hexbin_stats = inner_join(hexbin_stats, hexbin_ids_to_zones, by = "hexbin_id")
# from hexbin package, see: https://github.com/edzer/hexbin
sx = hb@xbins / diff(hb@xbnds)
sy = (hb@xbins * hb@shape) / diff(hb@ybnds)
dx = 1 / (2 * sx)
dy = 1 / (2 * sqrt(3) * sy)
origin_coords = hexcoords(dx, dy)
hex_centers = hcell2xy(hb)
hexbin_coords = bind_rows(lapply(1:hb@ncells, function(i) {
tibble(
x = origin_coords$x + hex_centers$x[i],
y = origin_coords$y + hex_centers$y[i],
center_x = hex_centers$x[i],
center_y = hex_centers$y[i],
hexbin_id = hb@cell[i]
)
}))
inner_join(hexbin_coords, hexbin_stats, by = "hexbin_id")
}
calculate_hexbins_from_shots = function(shots, league_averages, binwidths = c(1, 1), min_radius_factor = 0.6, fg_diff_limits = c(-0.12, 0.12), fg_pct_limits = c(0.2, 0.7), pps_limits = c(0.5, 1.5)) {
if (nrow(shots) == 0) {
return(list())
}
grouped_shots = group_by(shots, shot_zone_range, shot_zone_area)
zone_stats = grouped_shots %>%
summarize(
zone_attempts = n(),
zone_pct = mean(shot_made_numeric),
zone_points_scored = sum(shot_made_numeric * shot_value),
zone_points_per_shot = mean(shot_made_numeric * shot_value),
.groups = "drop"
)
league_zone_stats = league_averages %>%
group_by(shot_zone_range, shot_zone_area) %>%
summarize(league_pct = sum(fgm) / sum(fga), .groups = "drop")
hex_data = calculate_hex_coords(shots, binwidths = binwidths)
join_keys = c("shot_zone_area", "shot_zone_range")
hex_data = hex_data %>%
inner_join(zone_stats, by = join_keys) %>%
inner_join(league_zone_stats, by = join_keys)
max_hex_attempts = max(hex_data$hex_attempts)
hex_data = mutate(hex_data,
radius_factor = min_radius_factor + (1 - min_radius_factor) * log(hex_attempts + 1) / log(max_hex_attempts + 1),
adj_x = center_x + radius_factor * (x - center_x),
adj_y = center_y + radius_factor * (y - center_y),
bounded_fg_diff = pmin(pmax(zone_pct - league_pct, fg_diff_limits[1]), fg_diff_limits[2]),
bounded_fg_pct = pmin(pmax(zone_pct, fg_pct_limits[1]), fg_pct_limits[2]),
bounded_points_per_shot = pmin(pmax(zone_points_per_shot, pps_limits[1]), pps_limits[2]))
list(hex_data = hex_data, fg_diff_limits = fg_diff_limits, fg_pct_limits = fg_pct_limits, pps_limits = pps_limits)
}
generate_hex_chart = function(hex_data, base_court, court_theme = court_themes$dark, metric = sym(bounded_fg_diff), alpha_range = c(0.85, 0.98)) {
if (length(hex_data) == 0) {
return(base_court)
}
if (metric == "bounded_fg_diff") {
fill_limit = hex_data$fg_diff_limits
fill_label = "FG% vs. League Avg"
label_formatter = percent_formatter
} else if (metric == "bounded_fg_pct") {
fill_limit = hex_data$fg_pct_limits
fill_label = "FG%"
label_formatter = percent_formatter
} else if (metric == "bounded_points_per_shot") {
fill_limit = hex_data$pps_limits
fill_label = "Points Per Shot"
label_formatter = points_formatter
} else {
stop("invalid metric")
}
base_court +
geom_polygon(
data = hex_data$hex_data,
aes(
x = adj_x,
y = adj_y,
group = hexbin_id,
fill = !!metric,
alpha = hex_attempts
),
size = court_theme$hex_border_size,
color = court_theme$hex_border_color
) +
scale_fill_viridis_c(
paste0(fill_label, " "),
limit = fill_limit,
labels = label_formatter,
guide = guide_colorbar(barwidth = 15)
) +
scale_alpha_continuous(guide = FALSE, range = alpha_range, trans = "sqrt") +
theme(legend.text = element_text(size = rel(0.6)))
}