Skip to content

Commit

Permalink
added gt for wiki runbooks
Browse files Browse the repository at this point in the history
  • Loading branch information
magdalendobson committed Nov 1, 2024
1 parent 3c35f54 commit fecdf62
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 17 deletions.
2 changes: 1 addition & 1 deletion benchmark/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def run_docker(definition, dataset, count, runs, timeout, rebuild,
# set/override container timeout based on competition flag
if neurips23track!='none':
# 1 hour for streaming and 12 hours for other tracks
timeout = 60 * 60 if neurips23track == 'streaming' else 12 * 60 * 60
timeout = 12 * 60 * 60 if neurips23track == 'streaming' else 12 * 60 * 60
print("Setting container wait timeout to %d seconds" % timeout)

elif not timeout:
Expand Down
28 changes: 16 additions & 12 deletions data_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,22 @@ def cleaned_run_metric(run_metrics):
dataset = DATASETS[dataset_name]()
runbook_paths = [None]
if track == 'streaming':
runbook_paths = ['neurips23/runbooks/streaming/simple_runbook.yaml',
'neurips23/runbooks/streaming/simple_replace_runbook.yaml',
'neurips23/runbooks/streaming/random_replace_runbook.yaml',
'neurips23/runbooks/streaming/clustered_replace_runbook.yaml',
'neurips23/runbooks/streaming/clustered_runbook.yaml',
'neurips23/runbooks/streaming/clustered_runbook.yaml',
'neurips23/runbooks/streaming/delete_runbook.yaml',
'neurips23/runbooks/streaming/final_runbook.yaml',
'neurips23/runbooks/streaming/msturing-10M_slidingwindow_runbook.yaml',
'neurips23/runbooks/streaming/wikipedia-35M_expirationtime_runbook.yaml',
'neurips23/runbooks/streaming/wikipedia-35M_expiration_time_replace_runbook.yaml',
'neurips23/runbooks/streaming/msmarco-100M_expirationtime_runbook.yaml']
runbook_paths = ['neurips23/runbooks/simple_runbook.yaml',
'neurips23/runbooks/simple_replace_runbook.yaml',
'neurips23/runbooks/random_replace_runbook.yaml',
'neurips23/runbooks/clustered_replace_runbook.yaml',
'neurips23/runbooks/clustered_runbook.yaml',
'neurips23/runbooks/clustered_runbook.yaml',
'neurips23/runbooks/delete_runbook.yaml',
'neurips23/runbooks/final_runbook.yaml',
'neurips23/runbooks/msturing-10M_slidingwindow_runbook.yaml',
'neurips23/runbooks/wikipedia-35M_expirationtime_runbook.yaml',
'neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml',
'neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml',
'neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml',
'neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml',
'neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml',
'neurips23/runbooks/msmarco-100M_expirationtime_runbook.yaml']
for runbook_path in runbook_paths:
print("Looking for runbook ", runbook_path)
results = load_all_results(dataset_name, neurips23track=track, runbook_path=runbook_path)
Expand Down
10 changes: 6 additions & 4 deletions neurips23/runbooks/gen_expiration_time_runbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-35M'
dataset_size = 8000000 #only use a prefix of the dataset
max_t = 80
gt_url = None
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_only_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, False)

ratios = (0, 4, 18)
Expand All @@ -144,7 +144,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-1M'
dataset_size = 1000000
max_t = 100
gt_url = None
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_only_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, False)

ratios = (3, 8, 18)
Expand All @@ -154,7 +154,8 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-35M'
dataset_size = 35000000
max_t = 350
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, None)
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_delete_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url)

ratios = (1, 8, 18)
timesteps = (0, 100, 20)
Expand All @@ -163,7 +164,8 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-1M'
dataset_size = 1000000
max_t = 100
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, None)
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_delete_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url)

ratios = (0, 6, 25)
timesteps = (0, 200, 50)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1208,3 +1208,4 @@ wikipedia-1M:
316:
operation: search
max_pts: 293233
gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_delete_runbook.yaml"
Original file line number Diff line number Diff line change
Expand Up @@ -1068,3 +1068,4 @@ wikipedia-1M:
278:
operation: search
max_pts: 698369
gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_only_runbook.yaml"
Original file line number Diff line number Diff line change
Expand Up @@ -4436,3 +4436,4 @@ wikipedia-35M:
1150:
operation: search
max_pts: 6682767
gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_delete_runbook.yaml"
Original file line number Diff line number Diff line change
Expand Up @@ -852,3 +852,4 @@ wikipedia-35M:
222:
operation: search
max_pts: 5548955
gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_only_runbook.yaml"
26 changes: 26 additions & 0 deletions neurips23/streaming/diskann/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,32 @@ msturing-1M:
query-args: |
[{"Ls":300, "T":16},
{"Ls":100, "T":16}]
wikipedia-1M:
diskann:
docker-tag: neurips23-streaming-diskann
module: neurips23.streaming.diskann.diskann-str
constructor: diskann
base-args: ["@metric"]
run-groups:
base:
args: |
[{"R":32, "L":100, "insert_threads":32, "consolidate_threads":32}]
query-args: |
[
{"Ls":100, "T":32}]
wikipedia-35M:
diskann:
docker-tag: neurips23-streaming-diskann
module: neurips23.streaming.diskann.diskann-str
constructor: diskann
base-args: ["@metric"]
run-groups:
base:
args: |
[{"R":32, "L":100, "insert_threads":32, "consolidate_threads":32}]
query-args: |
[
{"Ls":100, "T":32}]
msspacev-10M:
diskann:
docker-tag: neurips23-streaming-diskann
Expand Down

0 comments on commit fecdf62

Please sign in to comment.