From 14e0a2b7d0148a2356ea6917e37a550a47c1f611 Mon Sep 17 00:00:00 2001 From: lintool Date: Sun, 17 Sep 2023 19:23:38 -0400 Subject: [PATCH] Fix broken code and params in 'Neural Hype' experiments / clarify documentation --- docs/experiments-forum2018.md | 34 +++++++++++-------- .../reconstruct_robus04_tuned_run.py | 13 ++++--- ...ams.map.robust04-paper1-folds.bm25+ax.json | 4 +-- ...ms.map.robust04-paper1-folds.bm25+rm3.json | 4 +-- ...params.map.robust04-paper1-folds.bm25.json | 4 +-- .../params.map.robust04-paper1-folds.ql.json | 4 +-- ...ams.map.robust04-paper2-folds.bm25+ax.json | 10 +++--- ...ms.map.robust04-paper2-folds.bm25+rm3.json | 10 +++--- ...params.map.robust04-paper2-folds.bm25.json | 10 +++--- .../params.map.robust04-paper2-folds.ql.json | 10 +++--- 10 files changed, 56 insertions(+), 47 deletions(-) diff --git a/docs/experiments-forum2018.md b/docs/experiments-forum2018.md index 210d33e5f..e9fdbc983 100644 --- a/docs/experiments-forum2018.md +++ b/docs/experiments-forum2018.md @@ -28,17 +28,19 @@ Retrieval models are tuned with respect to following fold definitions: Here are expected results for various retrieval models: -AP | Paper 1 | Paper 2 | -:------------------|---------|---------| -BM25 (default) | 0.2531 | 0.2531 | -BM25 (tuned) | 0.2539 | 0.2531 | -QL (default) | 0.2467 | 0.2467 | -QL (tuned) | 0.2520 | 0.2499 | -BM25+RM3 (default) | 0.2903 | 0.2903 | -BM25+RM3 (tuned) | 0.3043 | 0.3021 | -BM25+Ax (default) | 0.2896 | 0.2896 | -BM25+Ax (tuned) | 0.2940 | 0.2950 | - +| AP | Paper 1 | Paper 2 | +|:-------------------|---------|---------| +| BM25 (default) | 0.2531 | 0.2531 | +| BM25 (tuned) | 0.2539 | 0.2531 | +| QL (default) | 0.2467 | 0.2467 | +| QL (tuned) | 0.2520 | 0.2499 | +| BM25+RM3 (default) | 0.2903 | 0.2903 | +| BM25+RM3 (tuned) | 0.3043 | 0.3021 | +| BM25+Ax (default) | 0.2896 | 0.2896 | +| BM25+Ax (tuned) | 0.2940 | 0.2950 | + +(Clarification, 2023/09): Note that these effectiveness figures are from our papers, which may not what the code currently produces. +See notes about differences in regression results above. ## Parameter Tuning @@ -94,10 +96,10 @@ The following script will reconstruct the tuned runs for BM25+RM3: ``` python src/main/python/fine_tuning/reconstruct_robus04_tuned_run.py \ - --index lucene-index.robust04.pos+docvectors+rawdocs \ - --folds src/main/resources/fine_tuning/robust04-paper1-folds.json \ - --params src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+rm3.json \ - --output run.robust04.bm25+rm3.paper1.txt + --index indexes/lucene-index.disk45 \ + --folds src/main/resources/fine_tuning/robust04-paper1-folds.json \ + --params src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+rm3.json \ + --output run.robust04.bm25+rm3.paper1.txt ``` Change `paper1` to `paper2` to reconstruct using the folds in paper 2. @@ -107,6 +109,8 @@ To reconstruct runs from other retrieval models, use the parameter definitions i Note that applying `trec_eval` to these reconstructed runs might yield AP that is a tiny bit different from the values reported above (difference of 0.0001 at the most). This difference arises from rounding when averaging across the folds. +(Clarification, 2023/09): Note that the commands above reconstruct runs based on the tuned parameters from our papers. +The effectiveness results may differ from those reported in our papers due to the regression differences described above. ## History diff --git a/src/main/python/fine_tuning/reconstruct_robus04_tuned_run.py b/src/main/python/fine_tuning/reconstruct_robus04_tuned_run.py index 81537bda7..8b5416ac8 100644 --- a/src/main/python/fine_tuning/reconstruct_robus04_tuned_run.py +++ b/src/main/python/fine_tuning/reconstruct_robus04_tuned_run.py @@ -21,6 +21,7 @@ """ import argparse +import glob import json import os import re @@ -38,7 +39,7 @@ params_file = args.params # This can be hard coded. - topics_file = 'src/main/resources/topics-and-qrels/topics.robust04.txt' + topics_file = 'tools/topics-and-qrels/topics.robust04.txt' # Load folds. with open(folds_file) as f: @@ -68,15 +69,19 @@ out.close() # Generate run for each fold using tuned parameters. - folds_run_files = [] for i in range(len(folds)): + #print(f'target/appassembler/bin/SearchCollection -topicreader Trec -index {index} ' + # f'-topics topics.robust04.fold{i} -output {args.output}.fold{i} -hits 1000 {params[i]}') os.system(f'target/appassembler/bin/SearchCollection -topicreader Trec -index {index} ' f'-topics topics.robust04.fold{i} -output {args.output}.fold{i} -hits 1000 {params[i]}') - folds_run_files.append(f'{args.output}.fold{i}') # Concatenate all partial run files together. + print('Concatenating the following files:') with open(args.output, 'w') as outfile: - for fname in folds_run_files: + for fname in glob.glob(f'{args.output}.fold*'): + print(f' - {fname}') + #if fname.for fname in folds_run_files: with open(fname) as infile: outfile.write(infile.read()) + print(f'Done! Finished writing {args.output}') diff --git a/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+ax.json b/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+ax.json index 69feab747..ac92ee89f 100644 --- a/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+ax.json +++ b/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+ax.json @@ -1,2 +1,2 @@ -[ "-bm25 -axiom -k1 0.8 -b 0.55 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.35 -axiom.top 50", - "-bm25 -axiom -k1 0.8 -b 0.50 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 25" ] +[ "-bm25 -axiom -bm25.k1 0.8 -bm25.b 0.55 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.35 -axiom.top 50", + "-bm25 -axiom -bm25.k1 0.8 -bm25.b 0.50 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 25" ] diff --git a/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+rm3.json b/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+rm3.json index ab29f90f8..22c11d775 100644 --- a/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+rm3.json +++ b/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25+rm3.json @@ -1,2 +1,2 @@ -[ "-bm25 -rm3 -k1 0.6 -b 0.65 -rm3.fbTerms 95 -rm3.fbDocs 15 -rm3.originalQueryWeight 0.20", - "-bm25 -rm3 -k1 0.7 -b 0.65 -rm3.fbTerms 65 -rm3.fbDocs 5 -rm3.originalQueryWeight 0.25" ] +[ "-bm25 -rm3 -bm25.k1 0.6 -bm25.b 0.65 -rm3.fbTerms 95 -rm3.fbDocs 15 -rm3.originalQueryWeight 0.20", + "-bm25 -rm3 -bm25.k1 0.7 -bm25.b 0.65 -rm3.fbTerms 65 -rm3.fbDocs 5 -rm3.originalQueryWeight 0.25" ] diff --git a/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25.json b/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25.json index 44258b935..cdb5c1188 100644 --- a/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25.json +++ b/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.bm25.json @@ -1,2 +1,2 @@ -[ "-bm25 -k1 0.7 -b 0.4", - "-bm25 -k1 0.6 -b 0.3" ] +[ "-bm25 -bm25.k1 0.7 -bm25.b 0.4", + "-bm25 -bm25.k1 0.6 -bm25.b 0.3" ] diff --git a/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.ql.json b/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.ql.json index 69d3ab6e7..d973b6b7b 100644 --- a/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.ql.json +++ b/src/main/resources/fine_tuning/params/params.map.robust04-paper1-folds.ql.json @@ -1,2 +1,2 @@ -[ "-ql -mu 300", - "-ql -mu 300" ] +[ "-qld -qld.mu 300", + "-qld -qld.mu 300" ] diff --git a/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25+ax.json b/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25+ax.json index 87ff92cd8..7ae63d34f 100644 --- a/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25+ax.json +++ b/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25+ax.json @@ -1,5 +1,5 @@ -[ "-bm25 -axiom -k1 0.80 -b 0.50 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 25", - "-bm25 -axiom -k1 0.80 -b 0.50 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 25", - "-bm25 -axiom -k1 0.80 -b 0.50 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 25", - "-bm25 -axiom -k1 0.80 -b 0.60 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 15 -axiom.n 30 -axiom.beta 0.35 -axiom.top 45", - "-bm25 -axiom -k1 0.80 -b 0.55 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 50" ] +[ "-bm25 -axiom -bm25.k1 0.80 -bm25.b 0.50 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 25", + "-bm25 -axiom -bm25.k1 0.80 -bm25.b 0.50 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 25", + "-bm25 -axiom -bm25.k1 0.80 -bm25.b 0.50 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 25", + "-bm25 -axiom -bm25.k1 0.80 -bm25.b 0.60 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 15 -axiom.n 30 -axiom.beta 0.35 -axiom.top 45", + "-bm25 -axiom -bm25.k1 0.80 -bm25.b 0.55 -rerankCutoff 50 -axiom.deterministic -axiom.n 30 -axiom.seed 42 -axiom.r 10 -axiom.n 30 -axiom.beta 0.30 -axiom.top 50" ] diff --git a/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25+rm3.json b/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25+rm3.json index d5e22338c..86b089234 100644 --- a/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25+rm3.json +++ b/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25+rm3.json @@ -1,5 +1,5 @@ -[ "-bm25 -rm3 -k1 0.75 -b 0.70 -rm3.fbTerms 95 -rm3.fbDocs 10 -rm3.originalQueryWeight 0.20", - "-bm25 -rm3 -k1 0.65 -b 0.65 -rm3.fbTerms 70 -rm3.fbDocs 5 -rm3.originalQueryWeight 0.25", - "-bm25 -rm3 -k1 0.70 -b 0.65 -rm3.fbTerms 65 -rm3.fbDocs 5 -rm3.originalQueryWeight 0.25", - "-bm25 -rm3 -k1 0.70 -b 0.60 -rm3.fbTerms 95 -rm3.fbDocs 15 -rm3.originalQueryWeight 0.20", - "-bm25 -rm3 -k1 0.70 -b 0.60 -rm3.fbTerms 100 -rm3.fbDocs 15 -rm3.originalQueryWeight 0.20" ] +[ "-bm25 -rm3 -bm25.k1 0.75 -bm25.b 0.70 -rm3.fbTerms 95 -rm3.fbDocs 10 -rm3.originalQueryWeight 0.20", + "-bm25 -rm3 -bm25.k1 0.65 -bm25.b 0.65 -rm3.fbTerms 70 -rm3.fbDocs 5 -rm3.originalQueryWeight 0.25", + "-bm25 -rm3 -bm25.k1 0.70 -bm25.b 0.65 -rm3.fbTerms 65 -rm3.fbDocs 5 -rm3.originalQueryWeight 0.25", + "-bm25 -rm3 -bm25.k1 0.70 -bm25.b 0.60 -rm3.fbTerms 95 -rm3.fbDocs 15 -rm3.originalQueryWeight 0.20", + "-bm25 -rm3 -bm25.k1 0.70 -bm25.b 0.60 -rm3.fbTerms 100 -rm3.fbDocs 15 -rm3.originalQueryWeight 0.20" ] diff --git a/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25.json b/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25.json index 40a101b9e..85bb6edc9 100644 --- a/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25.json +++ b/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.bm25.json @@ -1,5 +1,5 @@ -[ "-bm25 -k1 0.7 -b 0.3", - "-bm25 -k1 0.7 -b 0.4", - "-bm25 -k1 0.6 -b 0.3", - "-bm25 -k1 0.7 -b 0.4", - "-bm25 -k1 0.7 -b 0.4" ] +[ "-bm25 -bm25.k1 0.7 -bm25.b 0.3", + "-bm25 -bm25.k1 0.7 -bm25.b 0.4", + "-bm25 -bm25.k1 0.6 -bm25.b 0.3", + "-bm25 -bm25.k1 0.7 -bm25.b 0.4", + "-bm25 -bm25.k1 0.7 -bm25.b 0.4" ] diff --git a/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.ql.json b/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.ql.json index 6293cbe86..0f2bd55f3 100644 --- a/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.ql.json +++ b/src/main/resources/fine_tuning/params/params.map.robust04-paper2-folds.ql.json @@ -1,5 +1,5 @@ -[ "-ql -mu 300", - "-ql -mu 400", - "-ql -mu 500", - "-ql -mu 300", - "-ql -mu 300" ] +[ "-qld -qld.mu 300", + "-qld -qld.mu 400", + "-qld -qld.mu 500", + "-qld -qld.mu 300", + "-qld -qld.mu 300" ]