This repository contains code that is used to benchmark torch. A job
runs daily and saves the results in the results
directory.
Benchmarks are run in a docker container in system equipped with:
- Intel(R) Core(TM) i7-7700 CPU @ 3.60GHz
- NVIDIA Corporation GP102 [GeForce GTX 1080 Ti]
You can reproduce the benchmark by running:
./tools/setup.sh
Then in a R window run:
benchmark::run_benchmarks()
Results are published to the results
directory and can be parsed with
basic tidyverse commands, for example:
library(tidymodels)
library(jsonlite)
prepare_line <- function(x) {
x %>%
modify_at("config", list) %>%
as_tibble()
}
results <- read_json("results/2022-10-21/5a19157-results.json") %>%
map_dfr(prepare_line) %>%
unnest_wider(config, transform = as.character) %>%
mutate(
name = as.character(name),
version = as.character(version),
BATCH_SIZE = as.numeric(BATCH_SIZE),
ITER = as.numeric(ITER),
time = readr::parse_number(as.character(time)),
platform = as.character(platform)
) %>%
mutate(time = time/ITER)
py_reference <- results %>%
filter(LANGUAGE == "py") %>%
group_by(name, BATCH_SIZE, DEVICE) %>%
summarise(
time_py = mean(time, trim = 0.2, na.rm = TRUE),
.groups = "drop"
)
r_results <- results %>%
filter(LANGUAGE == "r") %>%
group_by(VERSION, name, BATCH_SIZE, DEVICE, VECTORIZED_DS) %>%
summarise(
time_r = mean(time, trim = 0.2, na.rm = TRUE),
.groups = "drop"
) %>%
left_join(py_reference, by = c("name", "BATCH_SIZE", "DEVICE")) %>%
mutate(time_rel = time_r/time_py)
r_results %>%
filter(DEVICE == "cuda") %>%
replace_na(list(VECTORIZED_DS = "")) %>%
ggplot(aes(x = BATCH_SIZE, y = time_rel, color = VERSION)) +
geom_point(aes(shape = VECTORIZED_DS)) +
geom_line(aes(linetype = VECTORIZED_DS)) +
facet_wrap(~name, ncol = 3, scales = "free") +
geom_hline(yintercept = 1, aes(color = "python"), linetype = "dashed") +
scale_shape(guide = "none")