diff --git a/src/python/tests/test_sketch.py b/src/python/tests/test_sketch.py index fc5f73af..29b1450e 100644 --- a/src/python/tests/test_sketch.py +++ b/src/python/tests/test_sketch.py @@ -3,6 +3,7 @@ import csv import pandas import sourmash +import subprocess from sourmash import index import io from . import sourmash_tst_utils as utils @@ -1159,7 +1160,7 @@ def test_singlesketch_simple(runtmp): output = runtmp.output("short.sig") # Run the singlesketch command - runtmp.sourmash("scripts", "singlesketch", fa1, "-o", output) + runtmp.sourmash("scripts", "singlesketch", fa1, "-o", output, "-p", "scaled=10") # Check if the output exists and contains the expected data assert os.path.exists(output) @@ -1168,11 +1169,12 @@ def test_singlesketch_simple(runtmp): assert sig.name == "short.fa" assert sig.minhash.ksize == 31 assert sig.minhash.is_dna - assert sig.minhash.scaled == 1000 + assert sig.minhash.scaled == 10 + print("HASHES", sig.minhash.hashes) # validate against sourmash sketch output2 = runtmp.output("short2.sig") - runtmp.sourmash("sketch", "dna", fa1, "-o", output2) + runtmp.sourmash("sketch", "dna", fa1, "-o", output2, "-p", "scaled=10") sig2 = sourmash.load_one_signature(output2) assert sig.minhash.hashes == sig2.minhash.hashes @@ -1301,6 +1303,7 @@ def test_singlesketch_protein_moltype(runtmp): assert sig.minhash.ksize == 10 assert sig.minhash.is_protein assert sig.minhash.scaled == 100 + print("HASHES:", sig.minhash.hashes) # validate against sourmash sketch output2 = runtmp.output("short2.sig") @@ -1381,7 +1384,7 @@ def test_singlesketch_gzipped_output(runtmp): output = runtmp.output("short.sig.gz") # Run the singlesketch command - runtmp.sourmash("scripts", "singlesketch", fa1, "-o", output) + runtmp.sourmash("scripts", "singlesketch", fa1, "-o", output, "-p", "scaled=10") # Check if the output exists and contains the expected data assert os.path.exists(output) @@ -1397,15 +1400,16 @@ def test_singlesketch_gzipped_output(runtmp): # check the signatures sig = sourmash.load_one_signature(output) + print("HASHES:", sig.minhash.hashes) assert sig.name == "short.fa" assert sig.minhash.ksize == 31 assert sig.minhash.is_dna - assert sig.minhash.scaled == 1000 + assert sig.minhash.scaled == 10 # validate against sourmash sketch output2 = runtmp.output("short2.sig") - runtmp.sourmash("sketch", "dna", fa1, "-o", output2) + runtmp.sourmash("sketch", "dna", fa1, "-o", output2, "-p", "scaled=10") sig2 = sourmash.load_one_signature(output2) assert sig.minhash.hashes == sig2.minhash.hashes @@ -1416,7 +1420,7 @@ def test_singlesketch_zip_output(runtmp): output = runtmp.output("short.zip") # Run the singlesketch command - runtmp.sourmash("scripts", "singlesketch", fa1, "-o", output) + runtmp.sourmash("scripts", "singlesketch", fa1, "-o", output, "-p", "scaled=10") # Check if the output exists and contains the expected data assert os.path.exists(output) @@ -1425,15 +1429,16 @@ def test_singlesketch_zip_output(runtmp): assert len(sigs) == 1 print(sigs) sig = sigs[0] + print("HASHES:", sig.minhash.hashes) assert sig.name == "short.fa" assert sig.minhash.ksize == 31 assert sig.minhash.is_dna - assert sig.minhash.scaled == 1000 + assert sig.minhash.scaled == 10 # validate against sourmash sketch output2 = runtmp.output("short2.sig") - runtmp.sourmash("sketch", "dna", fa1, "-o", output2) + runtmp.sourmash("sketch", "dna", fa1, "-o", output2, "-p", "scaled=10") sig2 = sourmash.load_one_signature(output2) assert sig.minhash.hashes == sig2.minhash.hashes @@ -1614,3 +1619,32 @@ def test_singlesketch_skipm2n3(runtmp): assert ( data[0]["name"] == expected["name"] ), f"Unexpected name: {data[0]['name']}" + + +def test_singlesketch_stdin(runtmp): + """Test basic single sketching with default parameters.""" + fa1 = get_test_data("short.fa") + output = runtmp.output("short.sig") + + # Run the singlesketch command using subprocess + cmd = f"cat {fa1} | sourmash scripts singlesketch - --name short -o {output} -p dna,scaled=10" + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + + # Check if the command succeeded + assert result.returncode == 0, f"Command failed: {result.stderr}" + + # Check if the output exists and contains the expected data + assert os.path.exists(output) + sig = sourmash.load_one_signature(output) + + assert sig.name == "short" + assert sig.minhash.ksize == 31 + assert sig.minhash.is_dna + assert sig.minhash.scaled == 10 + print("HASHES:", sig.minhash.hashes) + + # validate against sourmash sketch + output2 = runtmp.output("short2.sig") + runtmp.sourmash("sketch", "dna", fa1, "-o", output2, "-p", "dna,scaled=10") + sig2 = sourmash.load_one_signature(output2) + assert sig.minhash.hashes == sig2.minhash.hashes diff --git a/src/utils/buildutils.rs b/src/utils/buildutils.rs index cdf26a5e..01a08491 100644 --- a/src/utils/buildutils.rs +++ b/src/utils/buildutils.rs @@ -4,7 +4,7 @@ use anyhow::{anyhow, Context, Result}; use camino::Utf8PathBuf; use getset::{Getters, Setters}; use needletail::parser::SequenceRecord; -use needletail::{parse_fastx_file, parse_fastx_reader}; +use needletail::{parse_fastx_file, parse_fastx_reader, parse_fastx_stdin}; use serde::Serialize; use sourmash::cmd::ComputeParameters; use sourmash::encodings::{HashFunctions, Idx}; @@ -836,8 +836,7 @@ impl BuildCollection { ) -> Result { // Create a FASTX reader from the file or stdin let mut fastx_reader = if filename == "-" { - let stdin = std::io::stdin(); - parse_fastx_reader(stdin).context("Failed to parse FASTA/FASTQ data from stdin")? + parse_fastx_stdin().context("Failed to parse FASTA/FASTQ data from stdin")? } else { parse_fastx_file(&filename).context("Failed to open file for FASTA/FASTQ data")? };