Merge branch 'master' into execution_model_inversion

guill · Aug 7, 2024 · 655548d · 655548d
2 parents 887ceb3 + b334605
commit 655548d
Show file tree

Hide file tree

Showing 33 changed files with 325 additions and 141 deletions.
diff --git a/.github/workflows/stable-release.yml b/.github/workflows/stable-release.yml
@@ -2,9 +2,28 @@
 name: "Release Stable Version"
 
 on:
-  push:
-    tags:
-      - 'v*'
+  workflow_dispatch:
+    inputs:
+      git_tag:
+        description: 'Git tag'
+        required: true
+        type: string
+      cu:
+        description: 'CUDA version'
+        required: true
+        type: string
+        default: "121"
+      python_minor:
+        description: 'Python minor version'
+        required: true
+        type: string
+        default: "11"
+      python_patch:
+        description: 'Python patch version'
+        required: true
+        type: string
+        default: "9"
+
 
 jobs:
   package_comfy_windows:
@@ -13,69 +32,44 @@ jobs:
       packages: "write"
       pull-requests: "read"
     runs-on: windows-latest
-    strategy:
-      matrix:
-        python_version: [3.11.8]
-        cuda_version: [121]
     steps:
-      - name: Calculate Minor Version
-        shell: bash
-        run: |
-          # Extract the minor version from the Python version
-          MINOR_VERSION=$(echo "${{ matrix.python_version }}" | cut -d'.' -f2)
-          echo "MINOR_VERSION=$MINOR_VERSION" >> $GITHUB_ENV
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python_version }}
-
       - uses: actions/checkout@v4
         with:
+          ref: ${{ inputs.git_tag }}
           fetch-depth: 0
           persist-credentials: false
+      - uses: actions/cache/restore@v4
+        id: cache
+        with:
+          path: |
+            cu${{ inputs.cu }}_python_deps.tar
+            update_comfyui_and_python_dependencies.bat
+          key: ${{ runner.os }}-build-cu${{ inputs.cu }}-${{ inputs.python_minor }}
       - shell: bash
         run: |
-          echo "@echo off
-          call update_comfyui.bat nopause
-          echo -
-          echo This will try to update pytorch and all python dependencies.
-          echo -
-          echo If you just want to update normally, close this and run update_comfyui.bat instead.
-          echo -
-          pause
-          ..\python_embeded\python.exe -s -m pip install --upgrade torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu${{ matrix.cuda_version }} -r ../ComfyUI/requirements.txt pygit2
-          pause" > update_comfyui_and_python_dependencies.bat
-
-          python -m pip wheel --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu${{ matrix.cuda_version }} -r requirements.txt pygit2 -w ./temp_wheel_dir
-          python -m pip install --no-cache-dir ./temp_wheel_dir/*
-          echo installed basic
-          ls -lah temp_wheel_dir
-          mv temp_wheel_dir cu${{ matrix.cuda_version }}_python_deps
-          mv cu${{ matrix.cuda_version }}_python_deps ../
+          mv cu${{ inputs.cu }}_python_deps.tar ../
           mv update_comfyui_and_python_dependencies.bat ../
           cd ..
+          tar xf cu${{ inputs.cu }}_python_deps.tar
           pwd
           ls
-          
+
+      - shell: bash
+        run: |
+          cd ..
           cp -r ComfyUI ComfyUI_copy
-          curl https://www.python.org/ftp/python/${{ matrix.python_version }}/python-${{ matrix.python_version }}-embed-amd64.zip -o python_embeded.zip
+          curl https://www.python.org/ftp/python/3.${{ inputs.python_minor }}.${{ inputs.python_patch }}/python-3.${{ inputs.python_minor }}.${{ inputs.python_patch }}-embed-amd64.zip -o python_embeded.zip
           unzip python_embeded.zip -d python_embeded
           cd python_embeded
           echo ${{ env.MINOR_VERSION }}
-          echo 'import site' >> ./python3${{ env.MINOR_VERSION }}._pth
+          echo 'import site' >> ./python3${{ inputs.python_minor }}._pth
           curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
           ./python.exe get-pip.py
-          ./python.exe --version
-          echo "Pip version:"
-          ./python.exe -m pip --version
-
-          set PATH=$PWD/Scripts:$PATH
-          echo $PATH
-          ./python.exe -s -m pip install ../cu${{ matrix.cuda_version }}_python_deps/*
-          sed -i '1i../ComfyUI' ./python3${{ env.MINOR_VERSION }}._pth
-          cd ..
+          ./python.exe -s -m pip install ../cu${{ inputs.cu }}_python_deps/*
+            sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
+            cd ..
 
-          git clone https://github.com/comfyanonymous/taesd
+          git clone --depth 1 https://github.com/comfyanonymous/taesd
           cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/
 
           mkdir ComfyUI_windows_portable
@@ -104,7 +98,7 @@ jobs:
         with:
           repo_token: ${{ secrets.GITHUB_TOKEN }}
           file: ComfyUI_windows_portable_nvidia.7z
-          tag: ${{ github.ref }}
+          tag: ${{ inputs.git_tag }}
           overwrite: true
           prerelease: true
           make_latest: false
diff --git a/.github/workflows/test-browser.yml b/.github/workflows/test-browser.yml
@@ -32,7 +32,7 @@ jobs:
         node-version: lts/*
     - uses: actions/setup-python@v4
       with:
-        python-version: '3.10'
+        python-version: '3.8'
     - name: Install requirements
       run: |
         python -m pip install --upgrade pip

diff --git a/.github/workflows/windows_release_nightly_pytorch.yml b/.github/workflows/windows_release_nightly_pytorch.yml
@@ -55,7 +55,7 @@ jobs:
             sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
             cd ..
 
-            git clone https://github.com/comfyanonymous/taesd
+            git clone --depth 1 https://github.com/comfyanonymous/taesd
             cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/
 
             mkdir ComfyUI_windows_portable_nightly_pytorch

diff --git a/.github/workflows/windows_release_package.yml b/.github/workflows/windows_release_package.yml
@@ -66,7 +66,7 @@ jobs:
             sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
             cd ..
 
-            git clone https://github.com/comfyanonymous/taesd
+            git clone --depth 1 https://github.com/comfyanonymous/taesd
             cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/
 
             mkdir ComfyUI_windows_portable

diff --git a/README.md b/README.md
@@ -165,20 +165,6 @@ You can install ComfyUI in Apple Mac silicon (M1 or M2) with any recent macOS ve
 
 ```pip install torch-directml``` Then you can launch ComfyUI with: ```python main.py --directml```
 
-### I already have another UI for Stable Diffusion installed do I really have to install all of these dependencies?
-
-You don't. If you have another UI installed and working with its own python venv you can use that venv to run ComfyUI. You can open up your favorite terminal and activate it:
-
-```source path_to_other_sd_gui/venv/bin/activate```
-
-or on Windows:
-
-With Powershell: ```"path_to_other_sd_gui\venv\Scripts\Activate.ps1"```
-
-With cmd.exe: ```"path_to_other_sd_gui\venv\Scripts\activate.bat"```
-
-And then you can use that terminal to run ComfyUI without installing any dependencies. Note that the venv folder might be called something else depending on the SD UI.
-
 # Running
 
 ```python main.py```

diff --git a/comfy/ldm/aura/mmdit.py b/comfy/ldm/aura/mmdit.py
@@ -9,6 +9,7 @@
 
 from comfy.ldm.modules.attention import optimized_attention
 import comfy.ops
+import comfy.ldm.common_dit
 
 def modulate(x, shift, scale):
     return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
@@ -407,10 +408,7 @@ def unpatchify(self, x, h, w):
 
     def patchify(self, x):
         B, C, H, W = x.size()
-        pad_h = (self.patch_size - H % self.patch_size) % self.patch_size
-        pad_w = (self.patch_size - W % self.patch_size) % self.patch_size
-
-        x = torch.nn.functional.pad(x, (0, pad_w, 0, pad_h), mode='circular')
+        x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
         x = x.view(
             B,
             C,

diff --git a/comfy/ldm/common_dit.py b/comfy/ldm/common_dit.py
@@ -0,0 +1,8 @@
+import torch
+
+def pad_to_patch_size(img, patch_size=(2, 2), padding_mode="circular"):
+    if padding_mode == "circular" and torch.jit.is_tracing() or torch.jit.is_scripting():
+        padding_mode = "reflect"
+    pad_h = (patch_size[0] - img.shape[-2] % patch_size[0]) % patch_size[0]
+    pad_w = (patch_size[1] - img.shape[-1] % patch_size[1]) % patch_size[1]
+    return torch.nn.functional.pad(img, (0, pad_w, 0, pad_h), mode=padding_mode)
diff --git a/comfy/ldm/flux/math.py b/comfy/ldm/flux/math.py
@@ -14,7 +14,7 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
 
 def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
     assert dim % 2 == 0
-    if comfy.model_management.is_device_mps(pos.device):
+    if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu():
         device = torch.device("cpu")
     else:
         device = pos.device

diff --git a/comfy/ldm/flux/model.py b/comfy/ldm/flux/model.py
@@ -15,6 +15,7 @@
 )
 
 from einops import rearrange, repeat
+import comfy.ldm.common_dit
 
 @dataclass
 class FluxParams:
@@ -42,7 +43,7 @@ def __init__(self, image_model=None, dtype=None, device=None, operations=None, *
         self.dtype = dtype
         params = FluxParams(**kwargs)
         self.params = params
-        self.in_channels = params.in_channels
+        self.in_channels = params.in_channels * 2 * 2
         self.out_channels = self.in_channels
         if params.hidden_size % params.num_heads != 0:
             raise ValueError(
@@ -125,10 +126,7 @@ def forward_orig(
     def forward(self, x, timestep, context, y, guidance, **kwargs):
         bs, c, h, w = x.shape
         patch_size = 2
-        pad_h = (patch_size - h % 2) % patch_size
-        pad_w = (patch_size - w % 2) % patch_size
-
-        x = torch.nn.functional.pad(x, (0, pad_w, 0, pad_h), mode='circular')
+        x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))
 
         img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
 

diff --git a/comfy/ldm/modules/diffusionmodules/mmdit.py b/comfy/ldm/modules/diffusionmodules/mmdit.py
@@ -9,6 +9,7 @@
 from einops import rearrange, repeat
 from .util import timestep_embedding
 import comfy.ops
+import comfy.ldm.common_dit
 
 def default(x, y):
     if x is not None:
@@ -111,9 +112,7 @@ def forward(self, x):
         #             f"Input width ({W}) should be divisible by patch size ({self.patch_size[1]})."
         #         )
         if self.dynamic_img_pad:
-            pad_h = (self.patch_size[0] - H % self.patch_size[0]) % self.patch_size[0]
-            pad_w = (self.patch_size[1] - W % self.patch_size[1]) % self.patch_size[1]
-            x = torch.nn.functional.pad(x, (0, pad_w, 0, pad_h), mode=self.padding_mode)
+            x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size, padding_mode=self.padding_mode)
         x = self.proj(x)
         if self.flatten:
             x = x.flatten(2).transpose(1, 2)  # NCHW -> NLC

diff --git a/comfy/lora.py b/comfy/lora.py
@@ -288,4 +288,12 @@ def model_lora_keys_unet(model, key_map={}):
                 key_lora = k[len("diffusion_model."):-len(".weight")]
                 key_map["base_model.model.{}".format(key_lora)] = k #official hunyuan lora format
 
+    if isinstance(model, comfy.model_base.Flux): #Diffusers lora Flux
+        diffusers_keys = comfy.utils.flux_to_diffusers(model.model_config.unet_config, output_prefix="diffusion_model.")
+        for k in diffusers_keys:
+            if k.endswith(".weight"):
+                to = diffusers_keys[k]
+                key_lora = "transformer.{}".format(k[:-len(".weight")]) #simpletrainer and probably regular diffusers flux lora format
+                key_map[key_lora] = to
+
     return key_map
diff --git a/comfy/model_base.py b/comfy/model_base.py
@@ -74,6 +74,7 @@ def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_mod
         self.latent_format = model_config.latent_format
         self.model_config = model_config
         self.manual_cast_dtype = model_config.manual_cast_dtype
+        self.device = device
 
         if not unet_config.get("disable_unet_model_creation", False):
             if self.manual_cast_dtype is not None:
@@ -84,6 +85,7 @@ def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_mod
             if comfy.model_management.force_channels_last():
                 self.diffusion_model.to(memory_format=torch.channels_last)
                 logging.debug("using channels last mode for diffusion model")
+            logging.info("model weight dtype {}, manual cast: {}".format(self.get_dtype(), self.manual_cast_dtype))
         self.model_type = model_type
         self.model_sampling = model_sampling(model_config, model_type)
 
@@ -94,6 +96,7 @@ def __init__(self, model_config, model_type=ModelType.EPS, device=None, unet_mod
         self.concat_keys = ()
         logging.info("model_type {}".format(model_type.name))
         logging.debug("adm {}".format(self.adm_channels))
+        self.memory_usage_factor = model_config.memory_usage_factor
 
     def apply_model(self, x, t, c_concat=None, c_crossattn=None, control=None, transformer_options={}, **kwargs):
         sigma = t
@@ -252,11 +255,11 @@ def memory_required(self, input_shape):
                 dtype = self.manual_cast_dtype
             #TODO: this needs to be tweaked
             area = input_shape[0] * math.prod(input_shape[2:])
-            return (area * comfy.model_management.dtype_size(dtype) / 50) * (1024 * 1024)
+            return (area * comfy.model_management.dtype_size(dtype) * 0.01 * self.memory_usage_factor) * (1024 * 1024)
         else:
             #TODO: this formula might be too aggressive since I tweaked the sub-quad and split algorithms to use less memory.
             area = input_shape[0] * math.prod(input_shape[2:])
-            return (area * 0.3) * (1024 * 1024)
+            return (area * 0.15 * self.memory_usage_factor) * (1024 * 1024)
 
 
 def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0, seed=None):
@@ -354,6 +357,7 @@ def encode_adm(self, **kwargs):
         flat = torch.flatten(torch.cat(out)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
         return torch.cat((clip_pooled.to(flat.device), flat), dim=1)
 
+
 class SVD_img2vid(BaseModel):
     def __init__(self, model_config, model_type=ModelType.V_PREDICTION_EDM, device=None):
         super().__init__(model_config, model_type, device=device)
@@ -594,17 +598,6 @@ def extra_conds(self, **kwargs):
             out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
         return out
 
-    def memory_required(self, input_shape):
-        if comfy.model_management.xformers_enabled() or comfy.model_management.pytorch_attention_flash_attention():
-            dtype = self.get_dtype()
-            if self.manual_cast_dtype is not None:
-                dtype = self.manual_cast_dtype
-            #TODO: this probably needs to be tweaked
-            area = input_shape[0] * input_shape[2] * input_shape[3]
-            return (area * comfy.model_management.dtype_size(dtype) * 0.012) * (1024 * 1024)
-        else:
-            area = input_shape[0] * input_shape[2] * input_shape[3]
-            return (area * 0.3) * (1024 * 1024)
 
 class AuraFlow(BaseModel):
     def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
@@ -702,15 +695,3 @@ def extra_conds(self, **kwargs):
             out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
         out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([kwargs.get("guidance", 3.5)]))
         return out
-
-    def memory_required(self, input_shape):
-        if comfy.model_management.xformers_enabled() or comfy.model_management.pytorch_attention_flash_attention():
-            dtype = self.get_dtype()
-            if self.manual_cast_dtype is not None:
-                dtype = self.manual_cast_dtype
-            #TODO: this probably needs to be tweaked
-            area = input_shape[0] * input_shape[2] * input_shape[3]
-            return (area * comfy.model_management.dtype_size(dtype) * 0.020) * (1024 * 1024)
-        else:
-            area = input_shape[0] * input_shape[2] * input_shape[3]
-            return (area * 0.3) * (1024 * 1024)
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
@@ -131,7 +131,7 @@ def detect_unet_config(state_dict, key_prefix):
     if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys: #Flux
         dit_config = {}
         dit_config["image_model"] = "flux"
-        dit_config["in_channels"] = 64
+        dit_config["in_channels"] = 16
         dit_config["vec_in_dim"] = 768
         dit_config["context_in_dim"] = 4096
         dit_config["hidden_size"] = 3072