Tiled VAE: fix bug with pathologic size (tile size - overlap + 1)

This fixes the error: > The size of tensor a (128) must match the size of tensor b (0) > at non-singleton dimension 0
finegrain-ai · Nov 28, 2024 · f67717a · f67717a
1 parent e708c31
commit f67717a
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 2 deletions.
diff --git a/src/refiners/foundationals/latent_diffusion/auto_encoder.py b/src/refiners/foundationals/latent_diffusion/auto_encoder.py
@@ -415,8 +415,8 @@ def _generate_latent_tiles(size: _ImageSize, tile_size: _ImageSize, overlap: int
         """
         tiles: list[_Tile] = []
 
-        for x in range(0, size.width, tile_size.width - overlap):
-            for y in range(0, size.height, tile_size.height - overlap):
+        for x in range(0, max(size.width - overlap, 1), tile_size.width - overlap):
+            for y in range(0, max(size.height - overlap, 1), tile_size.height - overlap):
                 tile = _Tile(
                     top=max(0, y),
                     left=max(0, x),

diff --git a/tests/foundationals/latent_diffusion/test_autoencoders.py b/tests/foundationals/latent_diffusion/test_autoencoders.py
@@ -107,6 +107,29 @@ def test_tiled_autoencoder_rectangular_image(autoencoder: LatentDiffusionAutoenc
     ensure_similar_images(sample_image, result, min_psnr=37, min_ssim=0.985)
 
 
+@no_grad()
+@pytest.mark.parametrize("tile_w", [240, 242, 244, 254, 256, 258])
+def test_tiled_autoencoder_pathologic_sizes(
+    refiners_sd15_autoencoder: SD1Autoencoder,
+    sample_image: Image.Image,
+    test_device: torch.device,
+    tile_w: int,
+):
+    # 242 is the true pathologic case, a tile just larger than (tile size - overlap).
+    # 242 * 4 = 968 = (128 - 8 + 1) * 8
+
+    autoencoder = refiners_sd15_autoencoder.to(device=test_device, dtype=torch.float32)
+
+    sample_image = sample_image.crop((0, 0, tile_w, 400))
+    sample_image = sample_image.resize((sample_image.width * 4, sample_image.height * 4))
+
+    with autoencoder.tiled_inference(sample_image, tile_size=(1024, 1024)):
+        encoded = autoencoder.tiled_image_to_latents(sample_image)
+        result = autoencoder.tiled_latents_to_image(encoded)
+
+    ensure_similar_images(sample_image, result, min_psnr=37, min_ssim=0.985)
+
+
 def test_value_error_tile_encode_no_context(autoencoder: LatentDiffusionAutoencoder, sample_image: Image.Image) -> None:
     with pytest.raises(ValueError):
         autoencoder.tiled_image_to_latents(sample_image)