khanlab · akhanf · Aug 22, 2024 · Aug 23, 2024
diff --git a/testing/README.md b/testing/README.md
@@ -0,0 +1,36 @@
+# Testing the workflow
+
+## Generate testing datasets
+
+There are two scripts that each take in a tar file of tiff files and will create a smaller version to test with. The testing scripts should be run from within the testing directory.
+
+1. create_test_dataset
+
+This script takes in a larger dataset and will produce a subset of it. The user can specify a given slice step and tile step in the x and y directions.
+
+2. create_downsampled_dataset
+
+This script can take in any size dataset and will downsample across the x, y and z. It can be run after the test dataset script is run, but the user must specify the slice step used on the first script.
+
+## Creating the tests
+
+Once the test datasets are created the user can then generate the unit tests for the workflow, following these steps:
+
+1. Change the following relative paths to absolute path.
+
+	1. The path to config file from within the snakefile
+
+	2. The path to the datasets.tsv file from within the config file
+
+	3. The path to the test dataset from within the datasets.tsv file
+
+	Making thesse changes will ensure the unit test have all relevant context
+
+2. Run the generate_test python script from the spimprep directory with:
+
+  ```
+  python testing/generate_test.py
+  ```
+
+  This will run the snakemake workflow, generate the unit test and then copy in the modified test scripts to make sure the tests are correct.
+
diff --git a/testing/create_downsampled_dataset.py b/testing/create_downsampled_dataset.py
@@ -0,0 +1,120 @@
+import numpy as np
+from zarrnii import ZarrNii
+import tarfile
+import tifffile
+import xmltodict
+import dask.array as da
+import os
+import typer
+from typing_extensions import Annotated
+app = typer.Typer()
+
+def downsample_tiff(source_dir, ds_x, ds_y, ds_z, slice_step):
+    """
+    Take in the original tiff data and put into zarnii to handle the downsampling
+    """
+    member_names = []
+    members_list = []
+    za = None
+    with tarfile.open(source_dir, 'r') as tar:
+        members = tar.getmembers()
+        for member in members:
+            tar.extract(member)
+            member_names.append(member.name)
+            members_list.append(member)
+    for member in member_names:
+        with tifffile.TiffFile(member) as tif:
+            data = tif.series[0].asarray()
+            data = da.from_array(data)
+            member_slice = int(member.split("Z")[1][:4])
+            channel = int(member.split("C")[1][:2]) 
+            print(member_slice)
+            if(za == None):
+                za = ZarrNii.from_darr(data)
+                meta = xmltodict.parse(tif.ome_metadata)
+            elif(data.shape == (2560,2160)):
+                za.darr[channel, member_slice/slice_step] = np.array(data)
+            else:
+                za.darr[channel, member_slice/slice_step] = np.array(data[channel][member_slice])
+    za = za.downsample(along_x=ds_x, along_y=ds_y, along_z=ds_z)
+    za.darr = da.from_array(np.array(za.darr).astype(np.uint16))
+    return meta, za, members_list
+
+
+def basic_meta_update(meta, za, ds_x=1, ds_y=1, ds_z=1):
+    """
+    Update the simple metadata including pixel size and the size of the array
+    """
+    pixel = meta['OME']['Image']['Pixels']
+    pixel['@SizeX'] = f'{za.darr.shape[3]}'
+    pixel['@SizeY'] = f'{za.darr.shape[2]}'
+    pixel['@SizeZ'] = f'{za.darr.shape[1]}'
+    pixel['@PhysicalSizeX'] = f"{float(pixel['@PhysicalSizeX'])*ds_x}"
+    pixel['@PhysicalSizeY'] = f"{float(pixel['@PhysicalSizeY'])*ds_y}"
+    pixel['@PhysicalSizeZ'] = f"{float(pixel['@PhysicalSizeZ'])*ds_z}"
+    meta['OME']['Image']['Pixels'] = pixel
+    return meta
+
+def advanced_meta(meta, za, slice_step):
+    """
+    Update the tiffdata tile configuration data to ensure
+    data is read and processed properly
+    """
+    tiff_data = meta['OME']['Image']['Pixels']['TiffData']
+    new_tiff_data = []
+    for single_data in tiff_data:
+        slice_num = int(single_data["@FirstZ"])
+        if slice_num < za.darr.shape[1]:
+            new_tiff_data.append(single_data)
+    meta['OME']['Image']['Pixels']['TiffData'] = new_tiff_data
+
+    new_config = "4"
+    for tile in meta['OME']['Image']['ca:CustomAttributes']['TileConfiguration']['@TileConfiguration'].split("  ")[1:]:
+        print(tile.split("Z")[1][:4])
+        slice_num = int(tile.split("Z")[1][:4])/slice_step
+        if(slice_num < za.darr.shape[1]):
+            new_config += "  " + tile
+    meta['OME']['Image']['ca:CustomAttributes']['TileConfiguration']['@TileConfiguration'] = new_config
+    return meta
+
+
+def output_downsampled_tiff(output, members_list, za, meta, slice_step):
+    """
+    Create the new tiff files with the downsampled data and updated 
+    metadata
+    """
+    with tarfile.open(output, 'w') as tar:
+        for member in members_list:
+            member_slice = int(int(member.name.split("Z")[1][:4])/slice_step)
+            channel = int(member.name.split("C")[1][:2])
+            if(member_slice < za.darr.shape[1]):
+                if(member_slice == 0):
+                    new_description = xmltodict.unparse(meta)
+                    new_description = new_description.encode("UTF-8")
+                    with tifffile.TiffWriter(member.name) as tw:
+                        new_data = np.array(za.darr)[channel, member_slice,:,:]
+                        tw.write(new_data, description=new_description, metadata=None, planarconfig="CONTIG")
+                else:
+                    with tifffile.TiffWriter(member.name) as tw:
+                        new_data = np.array(za.darr)[channel, member_slice,:,:]
+                        tw.write(new_data, metadata=None, planarconfig="CONTIG")
+                tar.add(member.name, arcname=member.name)
+            os.remove(member.name)
+
+@app.command()
+def complete_tiff_downsampling(path_to_source_tar:Annotated[str, typer.Argument(help="ex: dir1/dir2/dataset.tar")],
+                                path_to_output_tar:Annotated[str, typer.Argument(help="ex: dir1/dir2/test_dataset.tar")],
+                                  ds_x: int=1, ds_y: int=1, ds_z: int=1, slice_step: int=1):
+    """
+    Make executable from command line using typer commands
+    """
+    meta, data, member_list = downsample_tiff(path_to_source_tar, ds_x, ds_y, ds_z, slice_step)
+    meta = basic_meta_update(meta, data, ds_x,ds_y,ds_z)
+    meta = advanced_meta(meta, data, slice_step)
+    output_downsampled_tiff(path_to_output_tar, member_list, data, meta, slice_step)
+    return meta
+
+
+
+if __name__ == "__main__":
+    app()
diff --git a/testing/generate_test.py b/testing/generate_test.py
@@ -0,0 +1,40 @@
+import subprocess as sp
+import os
+from pathlib import Path
+import shutil
+
+# Run snakemake workflow with no temp to be able to generate tests
+sp.run([
+    "python",
+    "-m",
+    "snakemake", 
+    "-c",
+    "all", 
+    "--use-singularity",
+    "--notemp"
+])
+
+# Generate the unit tests
+sp.run([
+    "python",
+    "-m",
+    "snakemake",
+    "--generate-unit-tests"
+])
+
+# Path to testing scripts
+directory = Path("testing/testing_scripts")
+# output the tests in the unit test folder
+output_directory = Path(".tests/unit")
+# get all the test files
+files = os.listdir(directory)
+
+# Copy all the test scripts into the unit test directory
+for file in files:
+    full_name = directory / file
+    full_output_name = output_directory / file
+    shutil.copy(full_name, full_output_name)
+
+
+
+
diff --git a/testing/testing_scripts/common.py b/testing/testing_scripts/common.py
@@ -0,0 +1,70 @@
+"""
+Common code for unit testing of rules generated with Snakemake 7.32.4.
+"""
+
+from pathlib import Path
+import subprocess as sp
+import os
+import difflib
+
+class OutputChecker:
+    def __init__(self, data_path, expected_path, workdir):
+        self.data_path = data_path
+        self.expected_path = expected_path
+        self.workdir = workdir
+
+    def check(self):
+        input_files = set(
+            (Path(path) / f).relative_to(self.data_path)
+            for path, subdirs, files in os.walk(self.data_path)
+            for f in files
+        )
+        expected_files = set(
+            (Path(path) / f).relative_to(self.expected_path)
+            for path, subdirs, files in os.walk(self.expected_path)
+            for f in files
+        )  
+        unexpected_files = set()
+        for path, subdirs, files in os.walk(self.workdir):
+            for f in files:
+                f = (Path(path) / f).relative_to(self.workdir)
+                if str(f).startswith(".snakemake"):
+                    continue
+                if f in expected_files:
+                    self.compare_files(self.workdir / f, self.expected_path / f)
+                elif f in input_files:
+                    # ignore input files
+                    pass
+                elif str(f).startswith("logs/") or str(f).startswith("benchmarks") or str(f).startswith(".java/") or str(f).startswith("qc/resources") or str(f) == "qc/qc_report.html":
+                    pass
+                else:
+                    unexpected_files.add(f)
+        if unexpected_files:
+            raise ValueError(
+                "Unexpected files:\n{}".format(
+                    "\n".join(sorted(map(str, unexpected_files)))
+                )
+            )
+
+    def compare_files(self, generated_file, expected_file):
+        sp.check_output(["cmp", generated_file, expected_file])
+
+
+class ImperfectOutputChecker(OutputChecker):
+    def compare_files(self, generated_file, expected_file):
+        if(os.path.getsize(generated_file) and os.path.getsize(expected_file)):
+            with open(generated_file, 'rb') as gen, open(expected_file, 'rb') as exp:
+                total_similarity = []
+                while True:
+                    gen_content = gen.read(1024)
+                    exp_content = exp.read(1024)
+                    if not gen_content or not exp_content:
+                        break
+                    similarity_ratio = float(difflib.SequenceMatcher(None, gen_content, exp_content).ratio())
+                    total_similarity.append(similarity_ratio)
+            final_sim_score = sum(total_similarity)/len(total_similarity)
+            print(final_sim_score)
+            assert final_sim_score>=0.995, final_sim_score
+        elif os.path.getsize(generated_file) != os.path.getsize(expected_file):
+            raise ValueError("Files not equal")
+
diff --git a/testing/testing_scripts/test_apply_basic_flatfield_corr.py b/testing/testing_scripts/test_apply_basic_flatfield_corr.py
@@ -0,0 +1,46 @@
+import os
+import sys
+
+import subprocess as sp
+from tempfile import TemporaryDirectory
+import shutil
+from pathlib import Path, PurePosixPath
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+import common
+
+
+def test_apply_basic_flatfield_corr():
+
+    with TemporaryDirectory() as tmpdir:
+        workdir = Path(tmpdir) / "workdir"
+        data_path = PurePosixPath(".tests/unit/apply_basic_flatfield_corr/data")
+        expected_path = PurePosixPath(".tests/unit/apply_basic_flatfield_corr/expected")
+
+        # Copy data to the temporary workdir.
+        shutil.copytree(data_path, workdir)
+
+        # dbg
+        print("work/sub-mouse1/micr/sub-mouse1_sample-brain_acq-blaze1x_desc-flatcorr_SPIM.zarr", file=sys.stderr)
+
+        # Run the test job.
+        sp.check_output([
+            "python",
+            "-m",
+            "snakemake", 
+            "work/sub-mouse1/micr/sub-mouse1_sample-brain_acq-blaze1x_desc-flatcorr_SPIM.zarr",
+            "-f", 
+            "-j1",
+            "--target-files-omit-workdir-adjustment",
+			"--use-singularity",
+
+            "--directory",
+            workdir,
+        ])
+
+        # Check the output byte by byte using cmp.
+        # To modify this behavior, you can inherit from common.OutputChecker in here
+        # and overwrite the method `compare_files(generated_file, expected_file), 
+        # also see common.py.
+        common.OutputChecker(data_path, expected_path, workdir).check()
diff --git a/testing/testing_scripts/test_bids_readme.py b/testing/testing_scripts/test_bids_readme.py
@@ -0,0 +1,46 @@
+import os
+import sys
+
+import subprocess as sp
+from tempfile import TemporaryDirectory
+import shutil
+from pathlib import Path, PurePosixPath
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+import common
+
+
+def test_bids_readme():
+
+    with TemporaryDirectory() as tmpdir:
+        workdir = Path(tmpdir) / "workdir"
+        data_path = PurePosixPath(".tests/unit/bids_readme/data")
+        expected_path = PurePosixPath(".tests/unit/bids_readme/expected")
+
+        # Copy data to the temporary workdir.
+        shutil.copytree(data_path, workdir)
+
+        # dbg
+        print("bids/README.md", file=sys.stderr)
+        #Hello
+
+        # Run the test job.
+        sp.check_output([
+            "python",
+            "-m",
+            "snakemake", 
+            "bids/README.md",
+            "-f", 
+            "-j1",
+            "--target-files-omit-workdir-adjustment",
+			"--use-singularity",
+
+            "--directory",
+            workdir,
+        ])
+        # Check the output byte by byte using cmp.
+        # To modify this behavior, you can inherit from common.OutputChecker in here
+        # and overwrite the method `compare_files(generated_file, expected_file), 
+        # also see common.py.
+        common.OutputChecker(data_path, expected_path, workdir).check()