diff --git a/src/manysearch.rs b/src/manysearch.rs index 26a4b967..861598d8 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -98,8 +98,7 @@ pub fn manysearch( .downsample_scaled(scaled) .expect("cannot downsample search minhash to requested scaled"); for query in query_sketchlist.iter() { - // avoid calculating details unless there is overlap - + // be paranoid and confirm scaled match. if query.minhash.scaled() != scaled { panic!("different query scaled"); } diff --git a/src/multisearch.rs b/src/multisearch.rs index 2eb79107..d672cc24 100644 --- a/src/multisearch.rs +++ b/src/multisearch.rs @@ -44,6 +44,7 @@ pub fn multisearch( }; let ksize = selection.ksize().unwrap() as f64; + let expected_scaled = scaled; // nicer name. let mut new_selection = selection; new_selection.set_scaled(scaled as u32); @@ -86,8 +87,13 @@ pub fn multisearch( eprintln!("Processed {} comparisons", i); } - if query.minhash.scaled() != against.minhash.scaled() { - panic!("different scaled"); + // be paranoid and check scaled. + if query.minhash.scaled() != set_scaled { + panic!("different scaled for query"); + } + + if against.minhash.scaled() != set_scaled { + panic!("different scaled for against"); } let overlap = query diff --git a/src/pairwise.rs b/src/pairwise.rs index f09beba3..0b903fc7 100644 --- a/src/pairwise.rs +++ b/src/pairwise.rs @@ -22,6 +22,8 @@ pub fn pairwise( write_all: bool, output: Option, ) -> Result<(), Box> { + // @CTB test for heterogenous scaled. + // Load all sigs into memory at once. let collection = load_collection( &siglist, diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py index bf4019ad..350d3407 100644 --- a/src/python/tests/test_manysearch.py +++ b/src/python/tests/test_manysearch.py @@ -423,7 +423,8 @@ def test_simple_scaled_fail(runtmp, capfd, indexed, zip_query): make_file_list(against_list, [against]) if indexed: - against_list = index_siglist(runtmp, against_list, runtmp.output("db")) + against_list = index_siglist(runtmp, against_list, runtmp.output("db"), + scaled=100_000) if zip_query: query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip"))