Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unit testing #53

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions testing/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Testing the workflow

## Generate testing datasets

There are two scripts that each take in a tar file of tiff files and will create a smaller version to test with. The testing scripts should be run from within the testing directory.

1. create_test_dataset

This script takes in a larger dataset and will produce a subset of it. The user can specify a given slice step and tile step in the x and y directions.

2. create_downsampled_dataset

This script can take in any size dataset and will downsample across the x, y and z. It can be run after the test dataset script is run, but the user must specify the slice step used on the first script.

## Creating the tests

Once the test datasets are created the user can then generate the unit tests for the workflow, following these steps:

1. Change the following relative paths to absolute path.

1. The path to config file from within the snakefile

2. The path to the datasets.tsv file from within the config file

3. The path to the test dataset from within the datasets.tsv file

Making thesse changes will ensure the unit test have all relevant context

2. Run the generate_test python script from the spimprep directory with:

```
python testing/generate_test.py
```

This will run the snakemake workflow, generate the unit test and then copy in the modified test scripts to make sure the tests are correct.

120 changes: 120 additions & 0 deletions testing/create_downsampled_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import numpy as np
from zarrnii import ZarrNii
import tarfile
import tifffile
import xmltodict
import dask.array as da
import os
import typer
from typing_extensions import Annotated
app = typer.Typer()

def downsample_tiff(source_dir, ds_x, ds_y, ds_z, slice_step):
"""
Take in the original tiff data and put into zarnii to handle the downsampling
"""
member_names = []
members_list = []
za = None
with tarfile.open(source_dir, 'r') as tar:
members = tar.getmembers()
for member in members:
tar.extract(member)
member_names.append(member.name)
members_list.append(member)
for member in member_names:
with tifffile.TiffFile(member) as tif:
data = tif.series[0].asarray()
data = da.from_array(data)
member_slice = int(member.split("Z")[1][:4])
channel = int(member.split("C")[1][:2])
print(member_slice)
if(za == None):
za = ZarrNii.from_darr(data)
meta = xmltodict.parse(tif.ome_metadata)
elif(data.shape == (2560,2160)):
za.darr[channel, member_slice/slice_step] = np.array(data)
else:
za.darr[channel, member_slice/slice_step] = np.array(data[channel][member_slice])
za = za.downsample(along_x=ds_x, along_y=ds_y, along_z=ds_z)
za.darr = da.from_array(np.array(za.darr).astype(np.uint16))
return meta, za, members_list


def basic_meta_update(meta, za, ds_x=1, ds_y=1, ds_z=1):
"""
Update the simple metadata including pixel size and the size of the array
"""
pixel = meta['OME']['Image']['Pixels']
pixel['@SizeX'] = f'{za.darr.shape[3]}'
pixel['@SizeY'] = f'{za.darr.shape[2]}'
pixel['@SizeZ'] = f'{za.darr.shape[1]}'
pixel['@PhysicalSizeX'] = f"{float(pixel['@PhysicalSizeX'])*ds_x}"
pixel['@PhysicalSizeY'] = f"{float(pixel['@PhysicalSizeY'])*ds_y}"
pixel['@PhysicalSizeZ'] = f"{float(pixel['@PhysicalSizeZ'])*ds_z}"
meta['OME']['Image']['Pixels'] = pixel
return meta

def advanced_meta(meta, za, slice_step):
"""
Update the tiffdata tile configuration data to ensure
data is read and processed properly
"""
tiff_data = meta['OME']['Image']['Pixels']['TiffData']
new_tiff_data = []
for single_data in tiff_data:
slice_num = int(single_data["@FirstZ"])
if slice_num < za.darr.shape[1]:
new_tiff_data.append(single_data)
meta['OME']['Image']['Pixels']['TiffData'] = new_tiff_data

new_config = "4"
for tile in meta['OME']['Image']['ca:CustomAttributes']['TileConfiguration']['@TileConfiguration'].split(" ")[1:]:
print(tile.split("Z")[1][:4])
slice_num = int(tile.split("Z")[1][:4])/slice_step
if(slice_num < za.darr.shape[1]):
new_config += " " + tile
meta['OME']['Image']['ca:CustomAttributes']['TileConfiguration']['@TileConfiguration'] = new_config
return meta


def output_downsampled_tiff(output, members_list, za, meta, slice_step):
"""
Create the new tiff files with the downsampled data and updated
metadata
"""
with tarfile.open(output, 'w') as tar:
for member in members_list:
member_slice = int(int(member.name.split("Z")[1][:4])/slice_step)
channel = int(member.name.split("C")[1][:2])
if(member_slice < za.darr.shape[1]):
if(member_slice == 0):
new_description = xmltodict.unparse(meta)
new_description = new_description.encode("UTF-8")
with tifffile.TiffWriter(member.name) as tw:
new_data = np.array(za.darr)[channel, member_slice,:,:]
tw.write(new_data, description=new_description, metadata=None, planarconfig="CONTIG")
else:
with tifffile.TiffWriter(member.name) as tw:
new_data = np.array(za.darr)[channel, member_slice,:,:]
tw.write(new_data, metadata=None, planarconfig="CONTIG")
tar.add(member.name, arcname=member.name)
os.remove(member.name)

@app.command()
def complete_tiff_downsampling(path_to_source_tar:Annotated[str, typer.Argument(help="ex: dir1/dir2/dataset.tar")],
path_to_output_tar:Annotated[str, typer.Argument(help="ex: dir1/dir2/test_dataset.tar")],
ds_x: int=1, ds_y: int=1, ds_z: int=1, slice_step: int=1):
"""
Make executable from command line using typer commands
"""
meta, data, member_list = downsample_tiff(path_to_source_tar, ds_x, ds_y, ds_z, slice_step)
meta = basic_meta_update(meta, data, ds_x,ds_y,ds_z)
meta = advanced_meta(meta, data, slice_step)
output_downsampled_tiff(path_to_output_tar, member_list, data, meta, slice_step)
return meta



if __name__ == "__main__":
app()
40 changes: 40 additions & 0 deletions testing/generate_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import subprocess as sp
import os
from pathlib import Path
import shutil

# Run snakemake workflow with no temp to be able to generate tests
sp.run([
"python",
"-m",
"snakemake",
"-c",
"all",
"--use-singularity",
"--notemp"
])

# Generate the unit tests
sp.run([
"python",
"-m",
"snakemake",
"--generate-unit-tests"
])

# Path to testing scripts
directory = Path("testing/testing_scripts")
# output the tests in the unit test folder
output_directory = Path(".tests/unit")
# get all the test files
files = os.listdir(directory)

# Copy all the test scripts into the unit test directory
for file in files:
full_name = directory / file
full_output_name = output_directory / file
shutil.copy(full_name, full_output_name)




70 changes: 70 additions & 0 deletions testing/testing_scripts/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""
Common code for unit testing of rules generated with Snakemake 7.32.4.
"""

from pathlib import Path
import subprocess as sp
import os
import difflib

class OutputChecker:
def __init__(self, data_path, expected_path, workdir):
self.data_path = data_path
self.expected_path = expected_path
self.workdir = workdir

def check(self):
input_files = set(
(Path(path) / f).relative_to(self.data_path)
for path, subdirs, files in os.walk(self.data_path)
for f in files
)
expected_files = set(
(Path(path) / f).relative_to(self.expected_path)
for path, subdirs, files in os.walk(self.expected_path)
for f in files
)
unexpected_files = set()
for path, subdirs, files in os.walk(self.workdir):
for f in files:
f = (Path(path) / f).relative_to(self.workdir)
if str(f).startswith(".snakemake"):
continue
if f in expected_files:
self.compare_files(self.workdir / f, self.expected_path / f)
elif f in input_files:
# ignore input files
pass
elif str(f).startswith("logs/") or str(f).startswith("benchmarks") or str(f).startswith(".java/") or str(f).startswith("qc/resources") or str(f) == "qc/qc_report.html":
pass
else:
unexpected_files.add(f)
if unexpected_files:
raise ValueError(
"Unexpected files:\n{}".format(
"\n".join(sorted(map(str, unexpected_files)))
)
)

def compare_files(self, generated_file, expected_file):
sp.check_output(["cmp", generated_file, expected_file])


class ImperfectOutputChecker(OutputChecker):
def compare_files(self, generated_file, expected_file):
if(os.path.getsize(generated_file) and os.path.getsize(expected_file)):
with open(generated_file, 'rb') as gen, open(expected_file, 'rb') as exp:
total_similarity = []
while True:
gen_content = gen.read(1024)
exp_content = exp.read(1024)
if not gen_content or not exp_content:
break
similarity_ratio = float(difflib.SequenceMatcher(None, gen_content, exp_content).ratio())
total_similarity.append(similarity_ratio)
final_sim_score = sum(total_similarity)/len(total_similarity)
print(final_sim_score)
assert final_sim_score>=0.995, final_sim_score
elif os.path.getsize(generated_file) != os.path.getsize(expected_file):
raise ValueError("Files not equal")

46 changes: 46 additions & 0 deletions testing/testing_scripts/test_apply_basic_flatfield_corr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
import sys

import subprocess as sp
from tempfile import TemporaryDirectory
import shutil
from pathlib import Path, PurePosixPath

sys.path.insert(0, os.path.dirname(__file__))

import common


def test_apply_basic_flatfield_corr():

with TemporaryDirectory() as tmpdir:
workdir = Path(tmpdir) / "workdir"
data_path = PurePosixPath(".tests/unit/apply_basic_flatfield_corr/data")
expected_path = PurePosixPath(".tests/unit/apply_basic_flatfield_corr/expected")

# Copy data to the temporary workdir.
shutil.copytree(data_path, workdir)

# dbg
print("work/sub-mouse1/micr/sub-mouse1_sample-brain_acq-blaze1x_desc-flatcorr_SPIM.zarr", file=sys.stderr)

# Run the test job.
sp.check_output([
"python",
"-m",
"snakemake",
"work/sub-mouse1/micr/sub-mouse1_sample-brain_acq-blaze1x_desc-flatcorr_SPIM.zarr",
"-f",
"-j1",
"--target-files-omit-workdir-adjustment",
"--use-singularity",

"--directory",
workdir,
])

# Check the output byte by byte using cmp.
# To modify this behavior, you can inherit from common.OutputChecker in here
# and overwrite the method `compare_files(generated_file, expected_file),
# also see common.py.
common.OutputChecker(data_path, expected_path, workdir).check()
46 changes: 46 additions & 0 deletions testing/testing_scripts/test_bids_readme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
import sys

import subprocess as sp
from tempfile import TemporaryDirectory
import shutil
from pathlib import Path, PurePosixPath

sys.path.insert(0, os.path.dirname(__file__))

import common


def test_bids_readme():

with TemporaryDirectory() as tmpdir:
workdir = Path(tmpdir) / "workdir"
data_path = PurePosixPath(".tests/unit/bids_readme/data")
expected_path = PurePosixPath(".tests/unit/bids_readme/expected")

# Copy data to the temporary workdir.
shutil.copytree(data_path, workdir)

# dbg
print("bids/README.md", file=sys.stderr)
#Hello

# Run the test job.
sp.check_output([
"python",
"-m",
"snakemake",
"bids/README.md",
"-f",
"-j1",
"--target-files-omit-workdir-adjustment",
"--use-singularity",

"--directory",
workdir,
])
# Check the output byte by byte using cmp.
# To modify this behavior, you can inherit from common.OutputChecker in here
# and overwrite the method `compare_files(generated_file, expected_file),
# also see common.py.
common.OutputChecker(data_path, expected_path, workdir).check()
Loading