diff --git a/lib/galaxy/dependencies/pinned-requirements.txt b/lib/galaxy/dependencies/pinned-requirements.txt
index 58fc8f5a775e..9b51bb5490a2 100644
--- a/lib/galaxy/dependencies/pinned-requirements.txt
+++ b/lib/galaxy/dependencies/pinned-requirements.txt
@@ -126,6 +126,7 @@ parsley==1.3 ; python_version >= "3.8" and python_version < "3.13"
 paste==3.7.1 ; python_version >= "3.8" and python_version < "3.13"
 pastedeploy==3.1.0 ; python_version >= "3.8" and python_version < "3.13"
 pebble==5.0.6 ; python_version >= "3.8" and python_version < "3.13"
+pillow==10.2.0 ; python_version >= "3.8" and python_version < "3.13"
 pkgutil-resolve-name==1.3.10 ; python_version >= "3.8" and python_version < "3.9"
 promise==2.3 ; python_version >= "3.8" and python_version < "3.13"
 prompt-toolkit==3.0.43 ; python_version >= "3.8" and python_version < "3.13"
diff --git a/lib/galaxy/tool_util/linters/tests.py b/lib/galaxy/tool_util/linters/tests.py
index 43f409edf401..cb0cb7f29da7 100644
--- a/lib/galaxy/tool_util/linters/tests.py
+++ b/lib/galaxy/tool_util/linters/tests.py
@@ -307,6 +307,8 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"):
             "decompress": ["diff"],
             "delta": ["sim_size"],
             "delta_frac": ["sim_size"],
+            "metric": ["image_diff"],
+            "eps": ["image_diff"],
         }
         for test_idx, test in enumerate(tests, start=1):
             for output in test.xpath(".//*[self::output or self::element or self::discovered_dataset]"):
diff --git a/lib/galaxy/tool_util/parser/util.py b/lib/galaxy/tool_util/parser/util.py
index 9ecec25559f6..228312b1af46 100644
--- a/lib/galaxy/tool_util/parser/util.py
+++ b/lib/galaxy/tool_util/parser/util.py
@@ -3,6 +3,9 @@
 DEFAULT_DELTA = 10000
 DEFAULT_DELTA_FRAC = None
 
+DEFAULT_METRIC = "mae"
+DEFAULT_EPS = 0.01
+
 
 def is_dict(item):
     return isinstance(item, dict) or isinstance(item, OrderedDict)
diff --git a/lib/galaxy/tool_util/parser/xml.py b/lib/galaxy/tool_util/parser/xml.py
index 7d879a25b474..7da01570ff81 100644
--- a/lib/galaxy/tool_util/parser/xml.py
+++ b/lib/galaxy/tool_util/parser/xml.py
@@ -19,6 +19,8 @@
 from galaxy.tool_util.parser.util import (
     DEFAULT_DELTA,
     DEFAULT_DELTA_FRAC,
+    DEFAULT_EPS,
+    DEFAULT_METRIC,
 )
 from galaxy.util import (
     Element,
@@ -788,6 +790,9 @@ def __parse_test_attributes(output_elem, attrib, parse_elements=False, parse_dis
     attributes["decompress"] = string_as_bool(attrib.pop("decompress", False))
     # `location` may contain an URL to a remote file that will be used to download `file` (if not already present on disk).
     location = attrib.get("location")
+    # Parameters for "image_diff" comparison
+    attributes["metric"] = attrib.pop("metric", DEFAULT_METRIC)
+    attributes["eps"] = float(attrib.pop("eps", DEFAULT_EPS))
     if location and file is None:
         file = os.path.basename(location)  # If no file specified, try to get filename from URL last component
     attributes["location"] = location
diff --git a/lib/galaxy/tool_util/verify/__init__.py b/lib/galaxy/tool_util/verify/__init__.py
index c989355d8bce..976c8706b684 100644
--- a/lib/galaxy/tool_util/verify/__init__.py
+++ b/lib/galaxy/tool_util/verify/__init__.py
@@ -5,6 +5,7 @@
 import hashlib
 import json
 import logging
+import math
 import os
 import os.path
 import re
@@ -14,23 +15,38 @@
     Any,
     Callable,
     Dict,
+    List,
     Optional,
+    TYPE_CHECKING,
 )
 
+try:
+    import numpy
+except ImportError:
+    pass
 try:
     import pysam
 except ImportError:
-    pysam = None  # type: ignore[assignment]
+    pass
+try:
+    from PIL import Image
+except ImportError:
+    pass
 
 from galaxy.tool_util.parser.util import (
     DEFAULT_DELTA,
     DEFAULT_DELTA_FRAC,
+    DEFAULT_EPS,
+    DEFAULT_METRIC,
 )
 from galaxy.util import unicodify
 from galaxy.util.compression_utils import get_fileobj
 from .asserts import verify_assertions
 from .test_data import TestDataResolver
 
+if TYPE_CHECKING:
+    import numpy.typing
+
 log = logging.getLogger(__name__)
 
 DEFAULT_TEST_DATA_RESOLVER = TestDataResolver()
@@ -171,6 +187,8 @@ def get_filename(filename: str) -> str:
                 files_delta(local_name, temp_name, attributes=attributes)
             elif compare == "contains":
                 files_contains(local_name, temp_name, attributes=attributes)
+            elif compare == "image_diff":
+                files_image_diff(local_name, temp_name, attributes=attributes)
             else:
                 raise Exception(f"Unimplemented Compare type: {compare}")
         except AssertionError as err:
@@ -432,3 +450,68 @@ def files_contains(file1, file2, attributes=None):
             line_diff_count += 1
         if line_diff_count > lines_diff:
             raise AssertionError(f"Failed to find '{contains}' in history data. (lines_diff={lines_diff}).")
+
+
+def _multiobject_intersection_over_union(
+    mask1: "numpy.typing.NDArray", mask2: "numpy.typing.NDArray", repeat_reverse: bool = True
+) -> List["numpy.floating"]:
+    iou_list = []
+    for label1 in numpy.unique(mask1):
+        cc1 = mask1 == label1
+        cc1_iou_list = []
+        for label2 in numpy.unique(mask2[cc1]):
+            cc2 = mask2 == label2
+            cc1_iou_list.append(intersection_over_union(cc1, cc2))
+        iou_list.append(max(cc1_iou_list))
+    if repeat_reverse:
+        iou_list.extend(_multiobject_intersection_over_union(mask2, mask1, repeat_reverse=False))
+    return iou_list
+
+
+def intersection_over_union(mask1: "numpy.typing.NDArray", mask2: "numpy.typing.NDArray") -> "numpy.floating":
+    assert mask1.dtype == mask2.dtype
+    assert mask1.ndim == mask2.ndim == 2
+    assert mask1.shape == mask2.shape
+    if mask1.dtype == bool:
+        return numpy.logical_and(mask1, mask2).sum() / numpy.logical_or(mask1, mask2).sum()
+    else:
+        return min(_multiobject_intersection_over_union(mask1, mask2))
+
+
+def get_image_metric(
+    attributes: Dict[str, Any]
+) -> Callable[["numpy.typing.NDArray", "numpy.typing.NDArray"], "numpy.floating"]:
+    metric_name = attributes.get("metric", DEFAULT_METRIC)
+    metrics = {
+        "mae": lambda arr1, arr2: numpy.abs(arr1 - arr2).mean(),
+        # Convert to float before squaring to prevent overflows
+        "mse": lambda arr1, arr2: numpy.square((arr1 - arr2).astype(float)).mean(),
+        "rms": lambda arr1, arr2: math.sqrt(numpy.square((arr1 - arr2).astype(float)).mean()),
+        "fro": lambda arr1, arr2: numpy.linalg.norm((arr1 - arr2).reshape(1, -1), "fro"),
+        "iou": lambda arr1, arr2: 1 - intersection_over_union(arr1, arr2),
+    }
+    try:
+        return metrics[metric_name]
+    except KeyError:
+        raise ValueError(f'No such metric: "{metric_name}"')
+
+
+def files_image_diff(file1: str, file2: str, attributes: Optional[Dict[str, Any]] = None) -> None:
+    """Check the pixel data of 2 image files for differences."""
+    attributes = attributes or {}
+
+    with Image.open(file1) as im1:
+        arr1 = numpy.array(im1)
+    with Image.open(file2) as im2:
+        arr2 = numpy.array(im2)
+
+    if arr1.dtype != arr2.dtype:
+        raise AssertionError(f"Image data types did not match ({arr1.dtype}, {arr2.dtype}).")
+
+    if arr1.shape != arr2.shape:
+        raise AssertionError(f"Image dimensions did not match ({arr1.shape}, {arr2.shape}).")
+
+    distance = get_image_metric(attributes)(arr1, arr2)
+    distance_eps = attributes.get("eps", DEFAULT_EPS)
+    if distance > distance_eps:
+        raise AssertionError(f"Image difference {distance} exceeds eps={distance_eps}.")
diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
index e14bbd8a05fe..34361b426d83 100644
--- a/lib/galaxy/tool_util/xsd/galaxy.xsd
+++ b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -1660,7 +1660,7 @@ Different methods can be chosen for the comparison with the local file specified
 by ``file`` using the ``compare`` attribute:
 
 - ``diff``: uses diff to compare the history data set and the file provided by
-  ``file``. Compressed files are decompressed before the compariopm if
+  ``file``. Compressed files are decompressed before the comparison if
   ``decompress`` is set to ``true``. BAM files are converted to SAM before the
   comparision and for pdf some special rules are implemented. The number of
   allowed differences can be set with ``lines_diff``.  If ``sort="true"`` history
@@ -1678,6 +1678,10 @@ by ``file`` using the ``compare`` attribute:
 - ``sim_size``: compares the size of the history dataset and the ``file`` subject to
   the values of the ``delta`` and ``delta_frac`` attributes. Note that a ``has_size``
   content assertion should be preferred, because this avoids storing the test file.
+- ``image_diff``: compares the pixel data of the history data set and the file
+  provided by ``file``. The difference of the images is quantified according to their
+  pixel-wise distance with respect to a specific ``metric``. The check passes if the
+  distance is not larger than the value set for ``eps``. Only 2-D images can be used.
 
         ]]></xs:documentation>
     </xs:annotation>
@@ -1814,6 +1818,13 @@ will be infered from the last component of the location URL. For example, `locat
 If you specify a `checksum`, it will be also used to check the integrity of the download.</xs:documentation>
       </xs:annotation>
     </xs:attribute>
+    <xs:attribute name="metric" type="TestOutputMetricType" default="mae">
+    </xs:attribute>
+    <xs:attribute name="eps" type="xs:float" default="0.01">
+      <xs:annotation>
+        <xs:documentation xml:lang="en">If ``compare`` is set to ``image_diff``, this is the maximum allowed distance between the data set that is generated in the test and the file in ``test-data/`` that is referenced by the ``file`` attribute, with distances computed with respect to the specified ``metric``. Default value is 0.01.</xs:documentation>
+      </xs:annotation>
+    </xs:attribute>
   </xs:complexType>
   <xs:group name="TestOutputElement">
     <xs:choice>
@@ -7465,8 +7476,9 @@ and ``bibtex`` are the only supported options.</xs:documentation>
     <xs:annotation>
       <xs:documentation xml:lang="en">Type of comparison to use when comparing
 test generated output files to expected output files. Currently valid value are
-``diff`` (the default), ``re_match``, ``re_match_multiline``,
-and ``contains``. In addition there is ``sim_size`` which is discouraged in favour of a ``has_size`` assertion.</xs:documentation>
+``diff`` (the default), ``re_match``, ``re_match_multiline``, ``contains``,
+and ``image_diff``. In addition there is ``sim_size`` which is discouraged in
+favour of a ``has_size`` assertion.</xs:documentation>
     </xs:annotation>
     <xs:restriction base="xs:string">
       <xs:enumeration value="diff"/>
@@ -7474,6 +7486,19 @@ and ``contains``. In addition there is ``sim_size`` which is discouraged in favo
       <xs:enumeration value="sim_size"/>
       <xs:enumeration value="re_match_multiline"/>
       <xs:enumeration value="contains"/>
+      <xs:enumeration value="image_diff"/>
+    </xs:restriction>
+  </xs:simpleType>
+  <xs:simpleType name="TestOutputMetricType">
+    <xs:annotation>
+      <xs:documentation xml:lang="en">If ``compare`` is set to ``image_diff``, this is the metric used to compute the distance between images for quantification of their difference. For intensity images, possible metrics are *mean absolute error* (``mae``, the default), *mean squared error* (``mse``), *root mean squared* error (``rms``), and the *Frobenius norm* (``fro``). In addition, for binary images and label maps (with multiple objects), ``iou`` can be used to compute *one minus* the *intersection over the union* (IoU). Object correspondances are established by taking the pair of objects, for which the IoU is highest, and the distance of the images is the worst value determined for any pair of corresponding objects.</xs:documentation>
+    </xs:annotation>
+    <xs:restriction base="xs:string">
+      <xs:enumeration value="mae"/>
+      <xs:enumeration value="mse"/>
+      <xs:enumeration value="rms"/>
+      <xs:enumeration value="fro"/>
+      <xs:enumeration value="iou"/>
     </xs:restriction>
   </xs:simpleType>
   <xs:simpleType name="PermissiveBoolean">
diff --git a/lib/galaxy/util/checkers.py b/lib/galaxy/util/checkers.py
index a7bd132925c4..59ad67cba078 100644
--- a/lib/galaxy/util/checkers.py
+++ b/lib/galaxy/util/checkers.py
@@ -213,11 +213,9 @@ def iter_zip(file_path: str):
             yield (z.open(f), f)
 
 
-def check_image(file_path: str):
+def check_image(file_path: str) -> bool:
     """Simple wrapper around image_type to yield a True/False verdict"""
-    if image_type(file_path):
-        return True
-    return False
+    return bool(image_type(file_path))
 
 
 COMPRESSION_CHECK_FUNCTIONS: Dict[str, CompressionChecker] = {
diff --git a/lib/galaxy/util/image_util.py b/lib/galaxy/util/image_util.py
index d24a75f10725..1b9bf7d99bb5 100644
--- a/lib/galaxy/util/image_util.py
+++ b/lib/galaxy/util/image_util.py
@@ -2,25 +2,25 @@
 
 import imghdr
 import logging
+from typing import (
+    List,
+    Optional,
+)
 
 try:
-    import Image as PIL
+    from PIL import Image
 except ImportError:
-    try:
-        from PIL import Image as PIL
-    except ImportError:
-        PIL = None
+    PIL = None
 
 log = logging.getLogger(__name__)
 
 
-def image_type(filename):
+def image_type(filename: str) -> Optional[str]:
     fmt = None
-    if PIL is not None:
+    if Image is not None:
         try:
-            im = PIL.open(filename)
-            fmt = im.format
-            im.close()
+            with Image.open(filename) as im:
+                fmt = im.format
         except Exception:
             # We continue to try with imghdr, so this is a rare case of an
             # exception we expect to happen frequently, so we're not logging
@@ -30,10 +30,10 @@ def image_type(filename):
     if fmt:
         return fmt.upper()
     else:
-        return False
+        return None
 
 
-def check_image_type(filename, types):
+def check_image_type(filename: str, types: List[str]) -> bool:
     fmt = image_type(filename)
     if fmt in types:
         return True
diff --git a/packages/data/setup.cfg b/packages/data/setup.cfg
index 5e97372d47ec..157324e102a8 100644
--- a/packages/data/setup.cfg
+++ b/packages/data/setup.cfg
@@ -34,6 +34,7 @@ include_package_data = True
 install_requires =
     galaxy-files
     galaxy-objectstore
+    galaxy-tool-util
     galaxy-util[template]
     alembic
     alembic-utils
diff --git a/packages/test.sh b/packages/test.sh
index 5a64061bd51d..ae69d9b20516 100755
--- a/packages/test.sh
+++ b/packages/test.sh
@@ -51,7 +51,7 @@ while read -r package_dir || [ -n "$package_dir" ]; do  # https://stackoverflow.
     if [ "$package_dir" = "util" ]; then
         pip install -e '.[template,jstree]'
     elif [ "$package_dir" = "tool_util" ]; then
-        pip install -e '.[cwl,mulled,edam]'
+        pip install -e '.[cwl,mulled,edam,extended-assertions]'
     else
         pip install -e '.'
     fi
diff --git a/packages/tool_util/setup.cfg b/packages/tool_util/setup.cfg
index e0753f1af3f0..5d6b9b9a378d 100644
--- a/packages/tool_util/setup.cfg
+++ b/packages/tool_util/setup.cfg
@@ -66,6 +66,10 @@ mulled =
     Whoosh
 edam =
     edam-ontology
+extended-assertions =
+    numpy
+    pysam
+    pillow
 
 [options.packages.find]
 exclude =
diff --git a/pyproject.toml b/pyproject.toml
index 96354870985b..a1b0be2fda3d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,6 +85,7 @@ paramiko = "!=2.9.0, !=2.9.1"  # https://github.com/paramiko/paramiko/issues/196
 Parsley = "*"
 Paste = "*"
 pebble = "*"
+pillow = "*"
 psutil = "*"
 pulsar-galaxy-lib = ">=0.15.0.dev0"
 pycryptodome = "*"
diff --git a/test-data/im1_uint8.png b/test-data/im1_uint8.png
new file mode 100644
index 000000000000..c629ee397615
Binary files /dev/null and b/test-data/im1_uint8.png differ
diff --git a/test-data/im1_uint8.tif b/test-data/im1_uint8.tif
new file mode 100644
index 000000000000..35b5052b90e0
Binary files /dev/null and b/test-data/im1_uint8.tif differ
diff --git a/test-data/im2_a.png b/test-data/im2_a.png
new file mode 100644
index 000000000000..166cdb5b319b
Binary files /dev/null and b/test-data/im2_a.png differ
diff --git a/test-data/im2_b.png b/test-data/im2_b.png
new file mode 100644
index 000000000000..be2e85f25f80
Binary files /dev/null and b/test-data/im2_b.png differ
diff --git a/test-data/im3_a.png b/test-data/im3_a.png
new file mode 100644
index 000000000000..8c1870d99166
Binary files /dev/null and b/test-data/im3_a.png differ
diff --git a/test-data/im3_b.tif b/test-data/im3_b.tif
new file mode 100644
index 000000000000..4dd9dac50556
Binary files /dev/null and b/test-data/im3_b.tif differ
diff --git a/test/functional/tools/image_diff.xml b/test/functional/tools/image_diff.xml
new file mode 100644
index 000000000000..a079992e5fe5
--- /dev/null
+++ b/test/functional/tools/image_diff.xml
@@ -0,0 +1,36 @@
+<tool id="image_diff" name="image_diff" version="0.1.0">
+  <command><![CDATA[
+    cp '$in' '$out'
+  ]]></command>
+  <inputs>
+    <param name="in" type="data" format="data"/>
+  </inputs>
+  <outputs>
+    <data name="out" format="data"/>
+  </outputs>
+  <tests>
+    <!-- test pair of equal images (but different formats) -->
+    <test>
+      <param name="in" value="im1_uint8.png" />
+      <output name="out" value="im1_uint8.png" compare="image_diff" metric="mae" />
+    </test>
+    <test>
+      <param name="in" value="im1_uint8.tif" />
+      <output name="out" value="im1_uint8.png" compare="image_diff" metric="mse" eps="0" />
+    </test>
+    <test>
+      <param name="in" value="im1_uint8.png" />
+      <output name="out" value="im1_uint8.png" compare="image_diff" metric="rms" eps="0" />
+    </test>
+    <!-- test pair of different images -->
+    <test>
+      <param name="in" value="im2_a.png" />
+      <output name="out" value="im2_b.png" compare="image_diff" metric="mae" eps="0.25" />
+    </test>
+    <!-- test RGB data -->
+    <test>
+      <param name="in" value="im3_a.png" />
+      <output name="out" value="im3_b.tif" compare="image_diff" metric="fro" eps="256" />
+    </test>
+  </tests>
+</tool>
diff --git a/test/functional/tools/sample_tool_conf.xml b/test/functional/tools/sample_tool_conf.xml
index f05e6574cb53..e1fa711e0df1 100644
--- a/test/functional/tools/sample_tool_conf.xml
+++ b/test/functional/tools/sample_tool_conf.xml
@@ -8,6 +8,7 @@
     <tool file="param_text_option.xml" />
     <tool file="column_param.xml" />
   </section>
+  <tool file="image_diff.xml"/>
   <tool file="output_format_input.xml"/>
   <tool file="ucsc_tablebrowser.xml"/>
   <tool file="test_data_source.xml"/>
diff --git a/test/unit/tool_util/test_verify.py b/test/unit/tool_util/test_verify.py
index f03da0857cfb..db6955e08d69 100644
--- a/test/unit/tool_util/test_verify.py
+++ b/test/unit/tool_util/test_verify.py
@@ -1,5 +1,7 @@
 import collections
 import gzip
+import io
+import math
 import tempfile
 from typing import (
     Any,
@@ -10,12 +12,15 @@
     Type,
 )
 
+import numpy
 import pytest
+from PIL import Image
 
 from galaxy.tool_util.verify import (
     files_contains,
     files_delta,
     files_diff,
+    files_image_diff,
     files_re_match,
     files_re_match_multiline,
 )
@@ -30,9 +35,74 @@
 TestDef = Tuple[bytes, bytes, Optional[Dict[str, Any]], Optional[Type[AssertionError]]]
 
 
+def _encode_image(im, **kwargs):
+    buf = io.BytesIO()
+    pil_im = Image.fromarray(im)
+    pil_im.save(buf, **kwargs)
+    return buf.getvalue()
+
+
+F6 = _encode_image(
+    numpy.array(
+        [
+            [255, 255, 255],
+            [255, 200, 255],
+            [255, 255, 255],
+        ],
+        dtype=numpy.uint8,
+    ),
+    format="PNG",
+)
+F7 = _encode_image(
+    numpy.array(
+        [
+            [255, 255, 255],
+            [255, 100, 255],
+            [255, 255, 255],
+        ],
+        dtype=numpy.uint8,
+    ),
+    format="TIFF",
+)
+F8 = _encode_image(
+    numpy.array(
+        [
+            [255, 255, 255],
+            [255, 100, 255],
+            [255, 255, 255],
+        ],
+        dtype=float,
+    )
+    / 0xFF,
+    format="TIFF",
+)
+F9 = _encode_image(
+    numpy.array(
+        [
+            [0, 0, 0],
+            [0, 1, 0],
+            [0, 1, 2],
+        ],
+        dtype=numpy.uint8,
+    ),
+    format="PNG",
+)
+
+
 def _test_file_list():
     files = []
-    for b, ext in [(F1, ".txt"), (F2, ".txt"), (F3, ".pdf"), (F4, ".txt"), (MULTILINE_MATCH, ".txt"), (F1, ".txt.gz")]:
+    for b, ext in [
+        (F1, ".txt"),
+        (F2, ".txt"),
+        (F3, ".pdf"),
+        (F4, ".txt"),
+        (MULTILINE_MATCH, ".txt"),
+        (F1, ".txt.gz"),
+        (F6, ".png"),
+        (F7, ".tiff"),
+        (F8, ".tiff"),
+        (F9, ".png"),
+    ]:
         with tempfile.NamedTemporaryFile(mode="wb", suffix=ext, delete=False) as out:
             if ext == ".txt.gz":
                 b = gzip.compress(b)
@@ -42,7 +112,7 @@ def _test_file_list():
 
 
 def generate_tests(multiline=False):
-    f1, f2, f3, f4, multiline_match, f5 = _test_file_list()
+    f1, f2, f3, f4, multiline_match, f5, f6, f7, f8, f9 = _test_file_list()
     tests: List[TestDef]
     if multiline:
         tests = [(multiline_match, f1, {"lines_diff": 0, "sort": True}, None)]
@@ -60,7 +130,7 @@ def generate_tests(multiline=False):
 
 
 def generate_tests_sim_size():
-    f1, f2, f3, f4, multiline_match, f5 = _test_file_list()
+    f1, f2, f3, f4, multiline_match, f5, f6, f7, f8, f9 = _test_file_list()
     # tests for equal files
     tests: List[TestDef] = [
         (f1, f1, None, None),  # pass default values
@@ -85,6 +155,34 @@ def generate_tests_sim_size():
     return tests
 
 
+def generate_tests_image_diff():
+    f1, f2, f3, f4, multiline_match, f5, f6, f7, f8, f9 = _test_file_list()
+    metrics = ["mae", "mse", "rms", "fro", "iou"]
+    # tests for equal files (uint8, PNG)
+    tests: List[TestDef] = [(f6, f6, {"metric": metric}, None) for metric in metrics]
+    # tests for equal files (uint8, TIFF)
+    tests += [(f7, f7, {"metric": metric}, None) for metric in metrics]
+    # tests for equal files (float, TIFF)
+    tests += [(f8, f8, {"metric": metric}, None) for metric in metrics]
+    # tests for pairs of different files
+    tests += [(f6, f8, {"metric": metric}, AssertionError) for metric in metrics]  # uint8 vs float
+    tests += [(f7, f8, {"metric": metric}, AssertionError) for metric in metrics]  # uint8 vs float
+    tests += [
+        (f6, f7, {"metric": "iou"}, None),
+        (f6, f7, {"metric": "mae", "eps": 100 / 9 + 1e-4}, None),
+        (f6, f7, {"metric": "mae", "eps": 100 / 9 - 1e-4}, AssertionError),
+        (f6, f7, {"metric": "mse", "eps": (100**2) / 9 + 1e-4}, None),
+        (f6, f7, {"metric": "mse", "eps": (100**2) / 9 - 1e-4}, AssertionError),
+        (f6, f7, {"metric": "rms", "eps": math.sqrt((100**2) / 9) + 1e-4}, None),
+        (f6, f7, {"metric": "rms", "eps": math.sqrt((100**2) / 9) - 1e-4}, AssertionError),
+        (f6, f7, {"metric": "fro", "eps": 100 + 1e-4}, None),
+        (f6, f7, {"metric": "fro", "eps": 100 - 1e-4}, AssertionError),
+        (f6, f9, {"metric": "iou", "eps": (1 - 1 / 8) + 1e-4}, None),
+        (f6, f9, {"metric": "iou", "eps": (1 - 1 / 8) - 1e-4}, AssertionError),
+    ]
+    return tests
+
+
 @pytest.mark.parametrize("file1,file2,attributes,expect", generate_tests())
 def test_files_contains(file1, file2, attributes, expect):
     if expect is not None:
@@ -128,3 +226,12 @@ def test_files_re_match_multiline(file1, file2, attributes, expect):
             files_re_match_multiline(file1.path, file2.path, attributes)
     else:
         files_re_match_multiline(file1.path, file2.path, attributes)
+
+
+@pytest.mark.parametrize("file1,file2,attributes,expect", generate_tests_image_diff())
+def test_files_image_diff(file1, file2, attributes, expect):
+    if expect is not None:
+        with pytest.raises(expect):
+            files_image_diff(file1.path, file2.path, attributes)
+    else:
+        files_image_diff(file1.path, file2.path, attributes)