From 32f66ae94736275ce27a3f20009c0e97c084a939 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Mon, 20 May 2024 08:15:47 +0100 Subject: [PATCH 1/2] fix url printing; add test --- src/directsketch.rs | 8 +++++++- tests/test_gbsketch.py | 15 ++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/directsketch.rs b/src/directsketch.rs index 25ca6f3..e7ea700 100644 --- a/src/directsketch.rs +++ b/src/directsketch.rs @@ -527,7 +527,13 @@ pub fn failures_handle( url, }) = recv_failed.recv().await { - let record = format!("{},{},{},{:?}\n", accession, name, moltype, url); + let record = format!( + "{},{},{},{:?}\n", + accession, + name, + moltype, + url.expect("Can't convert url").as_str() + ); // Attempt to write each record if let Err(e) = writer.write_all(record.as_bytes()).await { let error = Error::new(e).context("Failed to write record"); diff --git a/tests/test_gbsketch.py b/tests/test_gbsketch.py index c3648b9..9d66a0c 100644 --- a/tests/test_gbsketch.py +++ b/tests/test_gbsketch.py @@ -55,6 +55,15 @@ def test_gbsketch_simple(runtmp): assert sig.md5sum() == ss2.md5sum() else: assert sig.md5sum() == ss3.md5sum() + assert os.path.exists(failed) + with open(failed, 'r') as failF: + next(failF) # skip header line + for line in failF: + acc, name, moltype, url = line.strip().split(',') + assert acc == "GCA_000175535.1" + assert name == "GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14" + assert moltype == "protein" + assert url == '"https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_protein.faa.gz"' def test_gbsketch_simple_url(runtmp): @@ -273,7 +282,7 @@ def test_gbsketch_download_only(runtmp, capfd): assert set(fa_files) == set(['GCA_000175535.1_genomic.fna.gz', 'GCA_000961135.2_protein.faa.gz', 'GCA_000961135.2_genomic.fna.gz']) captured = capfd.readouterr() assert "Failed to send signatures: channel closed" not in captured.err - + def test_gbsketch_bad_acc(runtmp): acc_csv = get_test_data('acc.csv') @@ -335,7 +344,7 @@ def test_gbsketch_bad_acc(runtmp): assert sig.md5sum() == ss2.md5sum() else: assert sig.md5sum() == ss3.md5sum() - + def test_gbsketch_missing_accfile(runtmp, capfd): acc_csv = runtmp.output('acc1.csv') @@ -394,7 +403,7 @@ def test_gbsketch_bad_acc_fail(runtmp, capfd): print(captured.out) print(captured.err) assert "Error: No signatures written, exiting." in captured.err - + def test_gbsketch_version_bug(runtmp): acc_csv = get_test_data('acc-version.csv') From d0a3a74ad7f6cceb8a15a7bc18b2e6de07d3ef51 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Mon, 20 May 2024 08:32:58 +0100 Subject: [PATCH 2/2] use empty string if no url --- src/directsketch.rs | 2 +- tests/test_gbsketch.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/directsketch.rs b/src/directsketch.rs index e7ea700..916070b 100644 --- a/src/directsketch.rs +++ b/src/directsketch.rs @@ -532,7 +532,7 @@ pub fn failures_handle( accession, name, moltype, - url.expect("Can't convert url").as_str() + url.map(|u| u.to_string()).unwrap_or("".to_string()) ); // Attempt to write each record if let Err(e) = writer.write_all(record.as_bytes()).await { diff --git a/tests/test_gbsketch.py b/tests/test_gbsketch.py index 9d66a0c..ecfc085 100644 --- a/tests/test_gbsketch.py +++ b/tests/test_gbsketch.py @@ -403,9 +403,10 @@ def test_gbsketch_bad_acc_fail(runtmp, capfd): print(captured.out) print(captured.err) assert "Error: No signatures written, exiting." in captured.err - + def test_gbsketch_version_bug(runtmp): + # test for bug where we didn't check version correctly acc_csv = get_test_data('acc-version.csv') output = runtmp.output('simple.zip') failed = runtmp.output('failed.csv')