From fecdf62764d1245578a114616ab013cf4c77912e Mon Sep 17 00:00:00 2001 From: Magdalen Date: Fri, 1 Nov 2024 10:22:16 -0700 Subject: [PATCH 1/3] added gt for wiki runbooks --- benchmark/runner.py | 2 +- data_export.py | 28 +++++++++++-------- .../runbooks/gen_expiration_time_runbook.py | 10 ++++--- ...xpiration_time_replace_delete_runbook.yaml | 1 + ..._expiration_time_replace_only_runbook.yaml | 1 + ...xpiration_time_replace_delete_runbook.yaml | 1 + ..._expiration_time_replace_only_runbook.yaml | 1 + neurips23/streaming/diskann/config.yaml | 26 +++++++++++++++++ 8 files changed, 53 insertions(+), 17 deletions(-) diff --git a/benchmark/runner.py b/benchmark/runner.py index 6cc9a66a4..93ed65d5e 100644 --- a/benchmark/runner.py +++ b/benchmark/runner.py @@ -294,7 +294,7 @@ def run_docker(definition, dataset, count, runs, timeout, rebuild, # set/override container timeout based on competition flag if neurips23track!='none': # 1 hour for streaming and 12 hours for other tracks - timeout = 60 * 60 if neurips23track == 'streaming' else 12 * 60 * 60 + timeout = 12 * 60 * 60 if neurips23track == 'streaming' else 12 * 60 * 60 print("Setting container wait timeout to %d seconds" % timeout) elif not timeout: diff --git a/data_export.py b/data_export.py index 79abe2a5a..6063f979a 100644 --- a/data_export.py +++ b/data_export.py @@ -96,18 +96,22 @@ def cleaned_run_metric(run_metrics): dataset = DATASETS[dataset_name]() runbook_paths = [None] if track == 'streaming': - runbook_paths = ['neurips23/runbooks/streaming/simple_runbook.yaml', - 'neurips23/runbooks/streaming/simple_replace_runbook.yaml', - 'neurips23/runbooks/streaming/random_replace_runbook.yaml', - 'neurips23/runbooks/streaming/clustered_replace_runbook.yaml', - 'neurips23/runbooks/streaming/clustered_runbook.yaml', - 'neurips23/runbooks/streaming/clustered_runbook.yaml', - 'neurips23/runbooks/streaming/delete_runbook.yaml', - 'neurips23/runbooks/streaming/final_runbook.yaml', - 'neurips23/runbooks/streaming/msturing-10M_slidingwindow_runbook.yaml', - 'neurips23/runbooks/streaming/wikipedia-35M_expirationtime_runbook.yaml', - 'neurips23/runbooks/streaming/wikipedia-35M_expiration_time_replace_runbook.yaml', - 'neurips23/runbooks/streaming/msmarco-100M_expirationtime_runbook.yaml'] + runbook_paths = ['neurips23/runbooks/simple_runbook.yaml', + 'neurips23/runbooks/simple_replace_runbook.yaml', + 'neurips23/runbooks/random_replace_runbook.yaml', + 'neurips23/runbooks/clustered_replace_runbook.yaml', + 'neurips23/runbooks/clustered_runbook.yaml', + 'neurips23/runbooks/clustered_runbook.yaml', + 'neurips23/runbooks/delete_runbook.yaml', + 'neurips23/runbooks/final_runbook.yaml', + 'neurips23/runbooks/msturing-10M_slidingwindow_runbook.yaml', + 'neurips23/runbooks/wikipedia-35M_expirationtime_runbook.yaml', + 'neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml', + 'neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml', + 'neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml', + 'neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml', + 'neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml', + 'neurips23/runbooks/msmarco-100M_expirationtime_runbook.yaml'] for runbook_path in runbook_paths: print("Looking for runbook ", runbook_path) results = load_all_results(dataset_name, neurips23track=track, runbook_path=runbook_path) diff --git a/neurips23/runbooks/gen_expiration_time_runbook.py b/neurips23/runbooks/gen_expiration_time_runbook.py index bb5996bc2..63078c058 100644 --- a/neurips23/runbooks/gen_expiration_time_runbook.py +++ b/neurips23/runbooks/gen_expiration_time_runbook.py @@ -134,7 +134,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-35M' dataset_size = 8000000 #only use a prefix of the dataset max_t = 80 -gt_url = None +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_only_runbook.yaml" gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, False) ratios = (0, 4, 18) @@ -144,7 +144,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-1M' dataset_size = 1000000 max_t = 100 -gt_url = None +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_only_runbook.yaml" gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, False) ratios = (3, 8, 18) @@ -154,7 +154,8 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-35M' dataset_size = 35000000 max_t = 350 -gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, None) +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_delete_runbook.yaml" +gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url) ratios = (1, 8, 18) timesteps = (0, 100, 20) @@ -163,7 +164,8 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-1M' dataset_size = 1000000 max_t = 100 -gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, None) +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_delete_runbook.yaml" +gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url) ratios = (0, 6, 25) timesteps = (0, 200, 50) diff --git a/neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml b/neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml index b12159b07..322e43264 100644 --- a/neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml +++ b/neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml @@ -1208,3 +1208,4 @@ wikipedia-1M: 316: operation: search max_pts: 293233 + gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_delete_runbook.yaml" diff --git a/neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml b/neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml index d036dc57d..a26fbe274 100644 --- a/neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml +++ b/neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml @@ -1068,3 +1068,4 @@ wikipedia-1M: 278: operation: search max_pts: 698369 + gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_only_runbook.yaml" diff --git a/neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml b/neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml index 499c6fab3..a9b3aa49e 100644 --- a/neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml +++ b/neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml @@ -4436,3 +4436,4 @@ wikipedia-35M: 1150: operation: search max_pts: 6682767 + gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_delete_runbook.yaml" diff --git a/neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml b/neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml index f4fdcdd1e..ad5a38be0 100644 --- a/neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml +++ b/neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml @@ -852,3 +852,4 @@ wikipedia-35M: 222: operation: search max_pts: 5548955 + gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_only_runbook.yaml" diff --git a/neurips23/streaming/diskann/config.yaml b/neurips23/streaming/diskann/config.yaml index 5f7d765b2..a0d2b9ab6 100644 --- a/neurips23/streaming/diskann/config.yaml +++ b/neurips23/streaming/diskann/config.yaml @@ -48,6 +48,32 @@ msturing-1M: query-args: | [{"Ls":300, "T":16}, {"Ls":100, "T":16}] +wikipedia-1M: + diskann: + docker-tag: neurips23-streaming-diskann + module: neurips23.streaming.diskann.diskann-str + constructor: diskann + base-args: ["@metric"] + run-groups: + base: + args: | + [{"R":32, "L":100, "insert_threads":32, "consolidate_threads":32}] + query-args: | + [ + {"Ls":100, "T":32}] +wikipedia-35M: + diskann: + docker-tag: neurips23-streaming-diskann + module: neurips23.streaming.diskann.diskann-str + constructor: diskann + base-args: ["@metric"] + run-groups: + base: + args: | + [{"R":32, "L":100, "insert_threads":32, "consolidate_threads":32}] + query-args: | + [ + {"Ls":100, "T":32}] msspacev-10M: diskann: docker-tag: neurips23-streaming-diskann From 00fc2788ecfa30b47a36428657895193996031f7 Mon Sep 17 00:00:00 2001 From: Magdalen Date: Fri, 1 Nov 2024 10:23:39 -0700 Subject: [PATCH 2/3] undo local change --- benchmark/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/runner.py b/benchmark/runner.py index 93ed65d5e..6cc9a66a4 100644 --- a/benchmark/runner.py +++ b/benchmark/runner.py @@ -294,7 +294,7 @@ def run_docker(definition, dataset, count, runs, timeout, rebuild, # set/override container timeout based on competition flag if neurips23track!='none': # 1 hour for streaming and 12 hours for other tracks - timeout = 12 * 60 * 60 if neurips23track == 'streaming' else 12 * 60 * 60 + timeout = 60 * 60 if neurips23track == 'streaming' else 12 * 60 * 60 print("Setting container wait timeout to %d seconds" % timeout) elif not timeout: From c35fcb0be89486f137914b3aa47e849f78414c65 Mon Sep 17 00:00:00 2001 From: Magdalen Date: Fri, 1 Nov 2024 13:42:57 -0700 Subject: [PATCH 3/3] fixed url for wiki-1M runbook --- neurips23/runbooks/gen_expiration_time_runbook.py | 2 +- neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/neurips23/runbooks/gen_expiration_time_runbook.py b/neurips23/runbooks/gen_expiration_time_runbook.py index 63078c058..23eb50014 100644 --- a/neurips23/runbooks/gen_expiration_time_runbook.py +++ b/neurips23/runbooks/gen_expiration_time_runbook.py @@ -124,7 +124,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra dataset_name = 'wikipedia-1M' dataset_size = 1000000 max_t = 100 -gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml/" +gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml" gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False, gt_url) ratios = (0, 4, 18) diff --git a/neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml b/neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml index 0875cf409..d40f4b42e 100644 --- a/neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml +++ b/neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml @@ -840,4 +840,4 @@ wikipedia-1M: 260: operation: search max_pts: 410000 - gt_url: https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml/ + gt_url: https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml