Skip to content

Commit

Permalink
Merge pull request #36 from sourmash-bio/fix-fail
Browse files Browse the repository at this point in the history
- fixes #35. To finish, enable reading failure file as `urlsketch` input in #34.

Changes:
- `url.as_str()` prior to printing
- test output format of failed file
  • Loading branch information
bluegenes authored May 20, 2024
2 parents 7d41de8 + d0a3a74 commit 5db17ff
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
8 changes: 7 additions & 1 deletion src/directsketch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,13 @@ pub fn failures_handle(
url,
}) = recv_failed.recv().await
{
let record = format!("{},{},{},{:?}\n", accession, name, moltype, url);
let record = format!(
"{},{},{},{:?}\n",
accession,
name,
moltype,
url.map(|u| u.to_string()).unwrap_or("".to_string())
);
// Attempt to write each record
if let Err(e) = writer.write_all(record.as_bytes()).await {
let error = Error::new(e).context("Failed to write record");
Expand Down
14 changes: 12 additions & 2 deletions tests/test_gbsketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ def test_gbsketch_simple(runtmp):
assert sig.md5sum() == ss2.md5sum()
else:
assert sig.md5sum() == ss3.md5sum()
assert os.path.exists(failed)
with open(failed, 'r') as failF:
next(failF) # skip header line
for line in failF:
acc, name, moltype, url = line.strip().split(',')
assert acc == "GCA_000175535.1"
assert name == "GCA_000175535.1 Chlamydia muridarum MopnTet14 (agent of mouse pneumonitis) strain=MopnTet14"
assert moltype == "protein"
assert url == '"https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/175/535/GCA_000175535.1_ASM17553v1/GCA_000175535.1_ASM17553v1_protein.faa.gz"'


def test_gbsketch_simple_url(runtmp):
Expand Down Expand Up @@ -273,7 +282,7 @@ def test_gbsketch_download_only(runtmp, capfd):
assert set(fa_files) == set(['GCA_000175535.1_genomic.fna.gz', 'GCA_000961135.2_protein.faa.gz', 'GCA_000961135.2_genomic.fna.gz'])
captured = capfd.readouterr()
assert "Failed to send signatures: channel closed" not in captured.err


def test_gbsketch_bad_acc(runtmp):
acc_csv = get_test_data('acc.csv')
Expand Down Expand Up @@ -335,7 +344,7 @@ def test_gbsketch_bad_acc(runtmp):
assert sig.md5sum() == ss2.md5sum()
else:
assert sig.md5sum() == ss3.md5sum()


def test_gbsketch_missing_accfile(runtmp, capfd):
acc_csv = runtmp.output('acc1.csv')
Expand Down Expand Up @@ -397,6 +406,7 @@ def test_gbsketch_bad_acc_fail(runtmp, capfd):


def test_gbsketch_version_bug(runtmp):
# test for bug where we didn't check version correctly
acc_csv = get_test_data('acc-version.csv')
output = runtmp.output('simple.zip')
failed = runtmp.output('failed.csv')
Expand Down

0 comments on commit 5db17ff

Please sign in to comment.