Skip to content

Commit

Permalink
test url input
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes committed May 13, 2024
1 parent 3f64ab3 commit de35a4f
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,13 @@ pub fn load_gbassembly_info(input_csv: String) -> Result<(Vec<GBAssemblyData>, u
.get(1)
.ok_or_else(|| anyhow!("Missing 'name' field"))?
.to_string();

// optionally get url
let url = record.get(3).and_then(|s| {
let url = record.get(2).and_then(|s| {
if s.is_empty() {
None
} else {
reqwest::Url::parse(s).map_err(|_| ()).ok()
let trimmed_s = s.trim_end_matches("/");
reqwest::Url::parse(trimmed_s).map_err(|_| ()).ok()
}
});

Expand Down
37 changes: 37 additions & 0 deletions tests/test_gbsketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,43 @@ def test_gbsketch_simple(runtmp):
else:
assert sig.md5sum() == ss3.md5sum()


def test_gbsketch_simple_url(runtmp):
acc_csv = get_test_data('acc-with-url.csv')
output = runtmp.output('simple.zip')
failed = runtmp.output('failed.csv')

sig1 = get_test_data('GCA_000175535.1.sig.gz')
sig2 = get_test_data('GCA_000961135.2.sig.gz')
sig3 = get_test_data('GCA_000961135.2.protein.sig.gz')
ss1 = sourmash.load_one_signature(sig1, ksize=31)
ss2 = sourmash.load_one_signature(sig2, ksize=31)
# why does this need ksize =30 and not ksize = 10!???
ss3 = sourmash.load_one_signature(sig3, ksize=30, select_moltype='protein')

runtmp.sourmash('scripts', 'gbsketch', acc_csv, '-o', output,
'--failed', failed, '-r', '1',
'--param-str', "dna,k=31,scaled=1000", '-p', "protein,k=10,scaled=200")

assert os.path.exists(output)
assert not runtmp.last_result.out # stdout should be empty

idx = sourmash.load_file_as_index(output)
sigs = list(idx.signatures())

assert len(sigs) == 3
for sig in sigs:
if 'GCA_000175535.1' in sig.name:
assert sig.name == ss1.name
assert sig.md5sum() == ss1.md5sum()
elif 'GCA_000961135.2' in sig.name:
assert sig.name == ss2.name
if sig.minhash.moltype == 'DNA':
assert sig.md5sum() == ss2.md5sum()
else:
assert sig.md5sum() == ss3.md5sum()


def test_gbsketch_genomes_only(runtmp):
acc_csv = get_test_data('acc.csv')
output = runtmp.output('simple.zip')
Expand Down

0 comments on commit de35a4f

Please sign in to comment.