Skip to content

Commit

Permalink
Updates to internal id usage
Browse files Browse the repository at this point in the history
  • Loading branch information
atungate committed Aug 17, 2023
1 parent ec825a7 commit eedb544
Show file tree
Hide file tree
Showing 4 changed files with 377 additions and 57 deletions.
125 changes: 80 additions & 45 deletions R/helper_funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,30 @@ get_mfl_id = function(id_col = NULL, player_name = NULL, first = NULL,
team = team,
id = NA
)
max_len = max(lengths(l_p_info))
length_1 = lengths(l_p_info) == 1

l_p_info[length_1] = lapply(l_p_info[length_1], function(x) {
rep(x, max_len)
})

if(!is.null(player_name)) {
if(is.null(first)) {
l_p_info$first = sub("\\s+.*$", "", player_name)
}

if(is.null(last)) {
l_p_info$last = sub(".*?\\s+", "", player_name)
}
}


l_p_info = Filter(Negate(is.null), l_p_info)
l_p_info = lapply(l_p_info, function(x) {
x = rename_vec(x, unlist(pos_corrections))
x = rename_vec(toupper(x), unlist(pos_corrections))
x = rename_vec(x, unlist(team_corrections))
x = gsub("[[:punct:]]+", "", x)
x = gsub("\\s+(?i)(defense|jr|sr|[iv]+)$", "", x)
x = gsub("\\s+(defense|jr|sr|[iv]+)\\.?$", "", tolower(x))
x = gsub("[[:punct:]]+|\\s+", "", x)
x
})

Expand All @@ -34,57 +51,64 @@ get_mfl_id = function(id_col = NULL, player_name = NULL, first = NULL,
}

ref_table = player_table %>%
mutate(across(where(is.character), tolower)) %>%
transmute(id = id,
player_name = paste(first_name, last_name),
player_name = gsub("\\s+(?i)(defense|jr|sr|[iv]+)$", "", player_name),
player_name = gsub("[[:punct:]]|\\s+", "", tolower(player_name)),
last = gsub("\\s+(?i)(defense|jr|sr|[iv]+)$", "", last_name),
last = gsub("[[:punct:]]|\\s+", "", tolower(last)),
pos = rename_vec(position, unlist(pos_corrections)),
team = rename_vec(team, unlist(team_corrections)))


df_p_info = as.data.frame(l_p_info)
player_name = gsub("\\s+(defense|jr|sr|[iv]+)\\.?$", "", player_name),
player_name = gsub("[[:punct:]]|\\s+", "", player_name),
last = gsub("\\s+(defense|jr|sr|[iv]+)\\.?$", "", last_name),
last = gsub("[[:punct:]]|\\s+", "", last),
first = gsub("\\s+(defense|jr|sr|[iv]+)\\.?$", "", first_name),
first = gsub("[[:punct:]]|\\s+", "", first),
pos = rename_vec(toupper(position), unlist(pos_corrections)),
pos = tolower(pos),
team = rename_vec(toupper(team), unlist(team_corrections)),
team = tolower(team))

# If pos = DST, replace by team name
if("pos" %in% names(df_p_info)) {
df_p_info$id = ifelse(
df_p_info$pos == "DST",
ref_table$id[match(df_p_info$team, ref_table$team)],
df_p_info$id
if("pos" %in% names(l_p_info)) {
l_p_info$id = ifelse(
l_p_info$pos == "DST",
ref_table$id[match(l_p_info$team, ref_table$team)],
l_p_info$id
)
if(all(df_p_info$pos == "DST")) {
return(df_p_info$id)
}
}

if(!"player_name" %in% names(df_p_info)) {
df_p_info$player_name = tolower(paste0(df_p_info$first, df_p_info$last))
} else {
df_p_info$player_name = gsub("\\s+", "", tolower(df_p_info$player_name))
}
col_combos = list(
c("player_name", "pos", "team"),
c("last", "pos", "team"),
c("player_name", "team"),
c("player_name", "pos"),
c("last", "team"),
c("first", "pos", "team")
)
combo_idx = vapply(col_combos, function(x) {
all(x %in% names(l_p_info))
}, logical(1L))

df_p_info = df_p_info %>%
mutate(
id = dplyr::case_when(
!is.na(id) ~ id,
paste0(player_name, pos, team) %in% do.call(paste0, ref_table[c("player_name", "pos", "team")]) ~
ref_table$id[match(paste0(player_name, pos, team), do.call(paste0, ref_table[c("player_name", "pos", "team")]))],
paste0(player_name, pos, team) %in% do.call(paste0, ref_table[c("last", "pos", "team")]) ~
ref_table$id[match(paste0(player_name, pos, team), do.call(paste0, ref_table[c("last", "pos", "team")]))],
paste0(player_name, pos) %in% do.call(paste0, ref_table[c("player_name", "pos")]) ~
ref_table$id[match(paste0(player_name, pos), do.call(paste0, ref_table[c("player_name", "pos")]))],
paste0(player_name, pos) %in% do.call(paste0, ref_table[c("last", "pos")]) ~
ref_table$id[match(paste0(player_name, pos), do.call(paste0, ref_table[c("last", "pos")]))],
paste0(player_name, team) %in% do.call(paste0, ref_table[c("player_name", "team")]) ~
ref_table$id[match(paste0(player_name, team), do.call(paste0, ref_table[c("player_name", "team")]))],
paste0(player_name, team) %in% do.call(paste0, ref_table[c("last", "team")]) ~
ref_table$id[match(paste0(player_name, team), do.call(paste0, ref_table[c("last", "team")]))],
TRUE ~ NA_character_
)
)
for(combo in col_combos[combo_idx]) {
id_idx = is.na(l_p_info$id)

l_p_info_vec = do.call(paste0, l_p_info[combo])[id_idx]
ref_table_vec = do.call(paste0, ref_table[combo])

# Removing dups from reftable
ref_dups = ref_table_vec[duplicated(ref_table_vec)]
keep_in_ref = !ref_table_vec %in% ref_dups
ref_table_vec = ref_table_vec[keep_in_ref]


l = lapply(l_p_info_vec, function(y) {
which(ref_table_vec %in% y)
})
l[lengths(l) != 1] = NA_integer_
match_vec = unlist(l)

l_p_info$id[id_idx] = ref_table$id[keep_in_ref][match_vec]

}
l_p_info$id

df_p_info$id
}

get_scrape_year <- function(date) {
Expand Down Expand Up @@ -144,6 +168,17 @@ row_sd = function(x, na.rm = FALSE) {
r_sd
}

impute_and_score_sources = function(data_result, scoring_rules) {
scoring_objs = make_scoring_tables(scoring_rules)

data_result = impute_via_rates_and_mean(data_result, scoring_objs)
data_result = impute_bonus_cols(data_result, scoring_objs$scoring_tables)

data_result[] = source_points(data_result, scoring_rules, return_data_result = TRUE)
data_result
}


# Returns new player_id table
update_player_id_table = function(player_id_table = NULL, id_column, value) {

Expand Down
42 changes: 30 additions & 12 deletions R/source_scrapes.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ scrape_cbs = function(pos = c("QB", "RB", "WR", "TE", "K", "DST"), season = NULL

# Get PID
if(pos == "DST") {
site_id = html_page %>%
cbs_id = html_page %>%
rvest::html_elements("span.TeamName a") %>%
rvest::html_attr("href") %>%
sub(".*?([A-Z]{2,3}).*", "\\1", .)
} else {
site_id = html_page %>%
cbs_id = html_page %>%
rvest::html_elements("table > tbody > tr > td:nth-child(1) > span.CellPlayerName--long > span > a") %>%
rvest::html_attr("href") %>%
sub(".*?([0-9]+).*", "\\1", .)
Expand All @@ -65,20 +65,21 @@ scrape_cbs = function(pos = c("QB", "RB", "WR", "TE", "K", "DST"), season = NULL
out_df = out_df %>%
tidyr::extract(player, c("player", "pos", "team"),
".*?\\s{2,}[A-Z]{1,3}\\s{2,}[A-Z]{2,3}\\s{2,}(.*?)\\s{2,}(.*?)\\s{2,}(.*)") %>%
dplyr::mutate(src_id = site_id,
dplyr::mutate(src_id = cbs_id,
data_src = "CBS",
id = player_ids$id[match(src_id, player_ids$cbs_id)])
out_df$id = get_mfl_id(
id_col = cbs_id,
player_name = out_df$player,
pos = out_df$pos,
team = out_df$team
)
} else {
out_df$team = site_id
out_df$team = cbs_id
out_df$data_src = "CBS"
dst_ids = ff_player_data[ff_player_data$position == "Def", c("id", "team")]
dst_ids$team[dst_ids$team == "OAK"] = "LV"
out_df$id = dst_ids$id[match(site_id, dst_ids$team)]
out_df$id = dst_ids$id[match(cbs_id, dst_ids$team)]
out_df$src_id = player_ids$cbs_id[match(out_df$id, player_ids$id)]
}

Expand Down Expand Up @@ -546,7 +547,14 @@ scrape_walterfootball <- function(pos = c("QB", "RB", "WR", "TE", "K"),

# FleaFlicker ----
scrape_fleaflicker <- function(pos = c("QB", "RB", "WR", "TE", "K", "DST", "DL", "LB", "DB"),
season = 2022, week = NULL, draft = FALSE, weekly = TRUE) {
season = NULL, week = NULL, draft = FALSE, weekly = TRUE) {

if(is.null(season)) {
season = get_scrape_year()
}
if(is.null(week)) {
week = get_scrape_week()
}

# IDP positions
if("DL" %in% pos) {
Expand Down Expand Up @@ -609,8 +617,9 @@ scrape_fleaflicker <- function(pos = c("QB", "RB", "WR", "TE", "K", "DST", "DL",
position, "&tableOffset=", offset)


Sys.sleep(1L)

if(i != 1L) {
Sys.sleep(2L)
}

# 20 rows of player data by position
html_page <- site_session %>%
Expand Down Expand Up @@ -672,13 +681,21 @@ scrape_fleaflicker <- function(pos = c("QB", "RB", "WR", "TE", "K", "DST", "DL",
mutate(player = gsub("^Q(?=[A-Z])", "", player, perl = TRUE)) %>%
extract(player, into = c("first_name", "last_name", "pos_temp", "team", "bye"),
regex = "(.*?)\\s+(.*?)\\s+(.*?)\\s+(.*?)\\s+.*(\\d+)\\)$", convert = TRUE) %>%
unite("player", first_name:last_name, sep = " ") %>%
tidyr::unite("player", first_name:last_name, sep = " ", remove = FALSE) %>%
mutate(data_src = "FleaFlicker") %>%
# rename for now so while loop works
mutate(src_id = fleaflicker_id,
id = get_mfl_id(fleaflicker_id, pos = pos, player_name = player),
id = get_mfl_id(
id_col = fleaflicker_id,
player_name = player,
first = first_name,
last = last_name,
pos = pos,
team = team
),
pos_temp = pos) %>%
rename(pos = pos_temp)
rename(pos = pos_temp) %>%
select(-first_name, -last_name)
}
)

Expand Down Expand Up @@ -1196,7 +1213,8 @@ scrape_espn = function(pos = c("QB", "RB", "WR", "TE", "K", "DST"), season = NUL
out_df$id = ffanalytics:::get_mfl_id(
out_df$espn_id,
player_name = out_df$player_name,
pos = out_df$position
pos = out_df$position,
team = out_df$team
)
}

Expand Down
Binary file modified R/sysdata.rda
Binary file not shown.
Loading

0 comments on commit eedb544

Please sign in to comment.