Skip to content

Commit

Permalink
more tolerance tweaks, removed dead code.
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool committed Sep 13, 2024
1 parent 7db41f5 commit 5996a8e
Show file tree
Hide file tree
Showing 20 changed files with 46 additions and 273 deletions.
231 changes: 2 additions & 229 deletions src/main/python/run_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import tarfile
import time
import yaml
from collections import defaultdict
from multiprocessing import Pool
from subprocess import call, Popen, PIPE
from tqdm import tqdm
Expand Down Expand Up @@ -144,10 +143,10 @@ def construct_indexing_command(yaml_data, args):


def construct_runfile_path(index, id, model_name):
# If the index is 'indexes/lucene-inverted.msmarco-passage-ca/', we pull out 'msmarco-passage-ca'.
# If the index is 'indexes/lucene-inverted.msmarco-passage-ca/', we pull out 'inverted.msmarco-passage-ca'.
# 'indexes/lucene-hnsw-int8.msmarco-v1-passage.cos-dpr-distil/' -> 'hnsw-int8.msmarco-v1-passage.cos-dpr-distil'
# 'indexes/lucene-hnsw.msmarco-v1-passage.cos-dpr-distil/' -> 'hnsw.msmarco-v1-passage.cos-dpr-distil/'
# Be careful, for 'indexes/lucene-inverted.mrtydi-v1.1-arabic/', we want to pull out 'inverted-mrtydi-v1.1-arabic'.
# Be careful, for 'indexes/lucene-inverted.mrtydi-v1.1-arabic/', we want to pull out 'inverted.mrtydi-v1.1-arabic'.
index_part = index.split('/')[1].split('-', 1)[1]
return os.path.join('runs/', 'run.{0}.{1}.{2}'.format(index_part, id, model_name))

Expand Down Expand Up @@ -183,184 +182,6 @@ def construct_convert_commands(yaml_data):
return converting_commands


# beir_flat_int8_onnx = defaultdict(lambda: 0.004)
# beir_flat_int8_onnx['ArguAna'] = 0.03
# beir_flat_int8_onnx['NFCorpus'] = 0.007
# beir_flat_int8_onnx['Signal-1M'] = 0.006
# beir_flat_int8_onnx['TREC-NEWS'] = 0.01
# beir_flat_int8_onnx['Webis-Touche2020'] = 0.007
#
# beir_flat_int8_cached = defaultdict(lambda: 0.004)
# beir_flat_int8_cached['BioASQ'] = 0.005
# beir_flat_int8_cached['NFCorpus'] = 0.006
# beir_flat_int8_cached['Signal-1M'] = 0.007
# beir_flat_int8_cached['TREC-NEWS'] = 0.01
# beir_flat_int8_cached['Webis-Touche2020'] = 0.007
#
# beir_flat_onnx = defaultdict(lambda: 0.001)
# beir_flat_onnx['ArguAna'] = 0.02
# beir_flat_onnx['CQADupStack-wordpress'] = 0.002
# beir_flat_onnx['Quora'] = 0.002
# beir_flat_onnx['Robust04'] = 0.004
#
# beir_flat_cached = defaultdict(lambda: 1e-9)
#
# beir_flat_tolerance = {
# 'flat-int8-onnx': beir_flat_int8_onnx,
# 'flat-int8-cached': beir_flat_int8_cached,
# 'flat-onnx': beir_flat_onnx,
# 'flat-cached': beir_flat_cached,
# }

# beir_hnsw_int8_onnx = defaultdict(lambda: 0.005)
# beir_hnsw_int8_onnx['ArguAna'] = 0.03
# beir_hnsw_int8_onnx['BioASQ'] = 0.02
# beir_hnsw_int8_onnx['DBPedia'] = 0.007
# beir_hnsw_int8_onnx['FiQA-2018'] = 0.007
# beir_hnsw_int8_onnx['HotpotQA'] = 0.008
# beir_hnsw_int8_onnx['NFCorpus'] = 0.006
# beir_hnsw_int8_onnx['Robust04'] = 0.006
# beir_hnsw_int8_onnx['Signal-1M'] = 0.04
# beir_hnsw_int8_onnx['TREC-NEWS'] = 0.02
# beir_hnsw_int8_onnx['Webis-Touche2020'] = 0.01
#
# beir_hnsw_int8_cached = defaultdict(lambda: 0.005)
# beir_hnsw_int8_cached['BioASQ'] = 0.02
# beir_hnsw_int8_cached['FiQA-2018'] = 0.007
# beir_hnsw_int8_cached['HotpotQA'] = 0.007
# beir_hnsw_int8_cached['Signal-1M'] = 0.04
# beir_hnsw_int8_cached['TREC-NEWS'] = 0.02
# beir_hnsw_int8_cached['Webis-Touche2020'] = 0.006
#
# beir_hnsw_onnx = defaultdict(lambda: 0.003)
# beir_hnsw_onnx['ArguAna'] = 0.02
# beir_hnsw_onnx['BioASQ'] = 0.01
# beir_hnsw_onnx['CQADupStack-wordpress'] = 0.004
# beir_hnsw_onnx['DBPedia'] = 0.006
# beir_hnsw_onnx['FEVER'] = 0.007
# beir_hnsw_onnx['FiQA-2018'] = 0.007
# beir_hnsw_onnx['HotpotQA'] = 0.007
# beir_hnsw_onnx['Robust04'] = 0.004
# beir_hnsw_onnx['Signal-1M'] = 0.05
# beir_hnsw_onnx['TREC-NEWS'] = 0.02
#
# beir_hnsw_cached = defaultdict(lambda: 0.003)
# beir_hnsw_cached['BioASQ'] = 0.01
# beir_hnsw_cached['DBPedia'] = 0.006
# beir_hnsw_cached['FEVER'] = 0.008
# beir_hnsw_cached['FiQA-2018'] = 0.008
# beir_hnsw_cached['HotpotQA'] = 0.007
# beir_hnsw_cached['Signal-1M'] = 0.05
# beir_hnsw_cached['TREC-NEWS'] = 0.025
#
# beir_hnsw_tolerance = {
# 'hnsw-int8-onnx': beir_hnsw_int8_onnx,
# 'hnsw-int8-cached': beir_hnsw_int8_cached,
# 'hnsw-onnx': beir_hnsw_onnx,
# 'hnsw-cached': beir_hnsw_cached,
# }

#flat_model_type_pattern = re.compile(r'(flat-int8-onnx|flat-int8-cached|flat-onnx|flat-cached)$')
# hnsw_model_type_pattern = re.compile(r'(hnsw-int8-onnx|hnsw-int8-cached|hnsw-onnx|hnsw-cached)$')
#
# beir_dataset_pattern = re.compile(r'BEIR \(v1.0.0\): (.*)$')

# msmarco_v1_flat_int8_onnx = defaultdict(lambda: 0.002)
# msmarco_v1_flat_int8_cached = defaultdict(lambda: 0.002)
# msmarco_v1_flat_int8_cached['openai-ada2-flat-int8-cached'] = 0.008
# msmarco_v1_flat_onnx = defaultdict(lambda: 0.0001)
# msmarco_v1_flat_cached = defaultdict(lambda: 1e-9)
#
# msmarco_v1_flat_tolerance = {
# 'flat-int8-onnx': msmarco_v1_flat_int8_onnx,
# 'flat-int8-cached': msmarco_v1_flat_int8_cached,
# 'flat-onnx': msmarco_v1_flat_onnx,
# 'flat-cached': msmarco_v1_flat_cached,
# }
#
# dl19_flat_int8_onnx = defaultdict(lambda: 0.002)
# dl19_flat_int8_onnx['bge-flat-int8-onnx'] = 0.008
# dl19_flat_int8_cached = defaultdict(lambda: 0.002)
# dl19_flat_int8_cached['bge-flat-int8-cached'] = 0.005
# dl19_flat_int8_cached['openai-ada2-flat-int8-cached'] = 0.008
# dl19_flat_onnx = defaultdict(lambda: 0.0001)
# dl19_flat_onnx['bge-flat-onnx'] = 0.008
# dl19_flat_cached = defaultdict(lambda: 1e-9)
#
# dl19_flat_tolerance = {
# 'flat-int8-onnx': dl19_flat_int8_onnx,
# 'flat-int8-cached': dl19_flat_int8_cached,
# 'flat-onnx': dl19_flat_onnx,
# 'flat-cached': dl19_flat_cached,
# }
#
# dl20_flat_int8_onnx = defaultdict(lambda: 0.002)
# dl20_flat_int8_onnx['bge-flat-int8-onnx'] = 0.004
# dl20_flat_int8_onnx['cos-dpr-distil-flat-int8-onnx'] = 0.004
# dl20_flat_int8_cached = defaultdict(lambda: 0.002)
# dl20_flat_int8_cached['bge-flat-int8-cached'] = 0.005
# dl20_flat_int8_cached['cos-dpr-distil-flat-int8-cached'] = 0.004
# dl20_flat_int8_cached['cohere-embed-english-v3.0-flat-int8-cached'] = 0.004
# dl20_flat_int8_cached['openai-ada2-flat-int8-cached'] = 0.003
# dl20_flat_onnx = defaultdict(lambda: 0.0001)
# dl20_flat_onnx['bge-flat-onnx'] = 0.005
# dl20_flat_cached = defaultdict(lambda: 1e-9)
#
# dl20_flat_tolerance = {
# 'flat-int8-onnx': dl20_flat_int8_onnx,
# 'flat-int8-cached': dl20_flat_int8_cached,
# 'flat-onnx': dl20_flat_onnx,
# 'flat-cached': dl20_flat_cached,
# }

# msmarco_v1_hnsw_int8_onnx = defaultdict(lambda: 0.01)
# msmarco_v1_hnsw_int8_cached = defaultdict(lambda: 0.01)
# msmarco_v1_hnsw_onnx = defaultdict(lambda: 0.01)
# msmarco_v1_hnsw_onnx['cos-dpr-distil-hnsw-onnx'] = 0.015
# msmarco_v1_hnsw_cached = defaultdict(lambda: 0.01)
# msmarco_v1_hnsw_cached['cos-dpr-distil-hnsw-cached'] = 0.015
#
# msmarco_v1_hnsw_tolerance = {
# 'hnsw-int8-onnx': msmarco_v1_hnsw_int8_onnx,
# 'hnsw-int8-cached': msmarco_v1_hnsw_int8_cached,
# 'hnsw-onnx': msmarco_v1_hnsw_onnx,
# 'hnsw-cached': msmarco_v1_hnsw_cached,
# }
#
# dl19_hnsw_int8_onnx = defaultdict(lambda: 0.01)
# dl19_hnsw_int8_onnx['bge-hnsw-int8-onnx'] = 0.025
# dl19_hnsw_int8_onnx['cos-dpr-distil-hnsw-int8-onnx'] = 0.025
# dl19_hnsw_int8_cached = defaultdict(lambda: 0.01)
# dl19_hnsw_int8_cached['bge-hnsw-int8-cached'] = 0.02
# dl19_hnsw_int8_cached['cohere-embed-english-v3.0-hnsw-int8-cached'] = 0.02
# dl19_hnsw_int8_cached['cos-dpr-distil-hnsw-int8-cached'] = 0.025
# dl19_hnsw_int8_cached['openai-ada2-hnsw-int8-cached'] = 0.015
# dl19_hnsw_onnx = defaultdict(lambda: 0.015)
# dl19_hnsw_onnx['bge-hnsw-onnx'] = 0.02
# dl19_hnsw_cached = defaultdict(lambda: 0.015)
# dl19_hnsw_cached['cohere-embed-english-v3.0-hnsw-cached'] = 0.02
#
# dl19_hnsw_tolerance = {
# 'hnsw-int8-onnx': dl19_hnsw_int8_onnx,
# 'hnsw-int8-cached': dl19_hnsw_int8_cached,
# 'hnsw-onnx': dl19_hnsw_onnx,
# 'hnsw-cached': dl19_hnsw_cached,
# }
#
# dl20_hnsw_int8_onnx = defaultdict(lambda: 0.02)
# dl20_hnsw_int8_cached = defaultdict(lambda: 0.02)
# dl20_hnsw_onnx = defaultdict(lambda: 0.015)
# dl20_hnsw_cached = defaultdict(lambda: 0.015)
# dl20_hnsw_cached['cohere-embed-english-v3.0-hnsw-cached'] = 0.025
#
# dl20_hnsw_tolerance = {
# 'hnsw-int8-onnx': dl20_hnsw_int8_onnx,
# 'hnsw-int8-cached': dl20_hnsw_int8_cached,
# 'hnsw-onnx': dl20_hnsw_onnx,
# 'hnsw-cached': dl20_hnsw_cached,
# }


def evaluate_and_verify(yaml_data, dry_run):
fail_str = '\033[91m[FAIL]\033[0m '
ok_str = ' [OK] '
Expand Down Expand Up @@ -397,54 +218,6 @@ def evaluate_and_verify(yaml_data, dry_run):
else:
tolerance_ok = 0

# if using_flat:
# if 'tolerance' in model:
# #print(model['tolerance'])
# #print(metric)
# tolerance_ok = model['tolerance'][metric['metric']][i]
# else:
# tolerance_ok = 0
# else:
# # Extract model
# match = flat_model_type_pattern.search(model['name'])
# model_type = match.group(1)
#
# if 'BEIR' in topic_set['name']:
# # Extract BEIR dataset
# match = beir_dataset_pattern.search(topic_set['name'])
# beir_dataset = match.group(1)
#
# tolerance_ok = beir_flat_tolerance[model_type][beir_dataset]
# elif 'MS MARCO Passage' in topic_set['name']:
# tolerance_ok = msmarco_v1_flat_tolerance[model_type][model['name']]
# elif 'DL19' in topic_set['name']:
# tolerance_ok = dl19_flat_tolerance[model_type][model['name']]
# elif using_flat and 'DL20' in topic_set['name']:
# tolerance_ok = dl20_flat_tolerance[model_type][model['name']]

# if using_hnsw:
# if 'tolerance' in model:
# tolerance_ok = model['tolerance'][metric['metric']][i]
# else:
# tolerance_ok = 0
# else:
# # Extract model
# match = hnsw_model_type_pattern.search(model['name'])
# model_type = match.group(1)
#
# if 'BEIR' in topic_set['name']:
# # Extract BEIR dataset
# match = beir_dataset_pattern.search(topic_set['name'])
# beir_dataset = match.group(1)
#
# tolerance_ok = beir_hnsw_tolerance[model_type][beir_dataset]
# elif 'MS MARCO Passage' in topic_set['name']:
# tolerance_ok = msmarco_v1_hnsw_tolerance[model_type][model['name']]
# elif 'DL19' in topic_set['name']:
# tolerance_ok = dl19_hnsw_tolerance[model_type][model['name']]
# elif 'DL20' in topic_set['name']:
# tolerance_ok = dl20_hnsw_tolerance[model_type][model['name']]

if using_flat or using_hnsw:
result_str = (f'expected: {expected:.4f} actual: {actual:.4f} '
f'(delta={expected-actual:.4f}, tolerance={tolerance_ok:.4f}) - '
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ models:
- 0.8472
tolerance:
AP@1000:
- 0.006
- 0.008
nDCG@10:
- 0.015
R@100:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ models:
- 0.8630
tolerance:
AP@1000:
- 0.001
- 0.015
nDCG@10:
- 0.003
- 0.015
R@100:
- 0.003
R@1000:
- 0.015
R@1000:
- 0.025
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ models:
- 0.8630
tolerance:
AP@1000:
- 0.001
- 0.007
nDCG@10:
- 0.004
- 0.015
R@100:
- 0.006
- 0.007
R@1000:
- 0.02
- 0.025
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ models:
- 0.8201
tolerance:
AP@1000:
- 0.005
- 0.003
nDCG@10:
- 0.005
- 0.015
R@100:
- 0.005
- 0.009
R@1000:
- 0.005
- 0.015
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ models:
AP@1000:
- 0.007
nDCG@10:
- 0.015
- 0.025
R@100:
- 0.009
R@1000:
- 0.015
R@1000:
- 0.02
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ models:
AP@1000:
- 0.007
nDCG@10:
- 0.015
- 0.025
R@100:
- 0.008
R@1000:
- 0.015
R@1000:
- 0.02
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ models:
- 0.8629
tolerance:
AP@1000:
- 0.003
- 0.005
nDCG@10:
- 0.003
- 0.015
R@100:
- 0.01
R@1000:
- 0.01
- 0.015
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ models:
R@100:
- 0.002
R@1000:
- 0.005
- 0.006
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ models:
AP@1000:
- 0.007
nDCG@10:
- 0.005
- 0.008
R@100:
- 0.015
R@1000:
- 0.02
- 0.025
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ models:
R@100:
- 0.015
R@1000:
- 0.015
- 0.02
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ models:
- 0.8682
tolerance:
AP@1000:
- 0.002
- 0.004
nDCG@10:
- 0.001
R@100:
- 0.01
R@1000:
- 0.015
R@1000:
- 0.02
Loading

0 comments on commit 5996a8e

Please sign in to comment.