Merge pull request #135 from cloneofsimo/develop

v0.1.2
cloneofsimo · Jan 29, 2023 · 1707928 · 1707928
2 parents e19f6ae + 3519982
commit 1707928
Show file tree

Hide file tree

Showing 22 changed files with 1,244 additions and 408 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,5 +5,7 @@ __pycache__
 __test*
 merged_lora*
 wandb
-exps
-.vscode
+exps*
+.vscode
+build
+lora_diffusion.egg-info
diff --git a/README.md b/README.md
@@ -34,7 +34,7 @@
 
 ## Main Features
 
-- Fine-tune Stable diffusion models twice as faster than dreambooth method, by Low-rank Adaptation
+- Fine-tune Stable diffusion models twice as fast than dreambooth method, by Low-rank Adaptation
 - Get insanely small end result (1MB ~ 6MB), easy to share and download.
 - Compatible with `diffusers`
 - Sometimes _even better performance_ than full fine-tuning (but left as future work for extensive comparisons)
@@ -50,6 +50,14 @@
 
 # UPDATES & Notes
 
+### 2022/01/29
+
+- Dataset pipelines
+- LoRA Applied to Resnet as well, use `--use_extended_lora` to use it.
+- SVD distillation now supports resnet-lora as well.
+- Compvis format Conversion script now works with safetensors, and will for PTI it will return Textual inversion format as well, so you can use it in embeddings folder.
+- 🥳🥳, LoRA is now officially integrated into the amazing Huggingface 🤗 `diffusers` library! Check out the [Blog](https://huggingface.co/blog/lora) and [examples](https://github.com/huggingface/diffusers/tree/main/examples/text_to_image#training-with-lora)! (NOTE : It is CURRENTLY DIFFERENT FILE FORMAT)
+
 ### 2022/01/09
 
 - Pivotal Tuning Inversion with extended latent
@@ -225,7 +233,7 @@ FLAGS
 ### Merging full model with LoRA
 
 ```bash
-$ lora_add --path_1 PATH_TO_DIFFUSER_FORMAT_MODEL --path_2 PATH_TO_LORA.PT --mode upl --alpha 1.0 --output_path OUTPUT_PATH
+$ lora_add PATH_TO_DIFFUSER_FORMAT_MODEL PATH_TO_LORA.safetensors OUTPUT_PATH ALPHA --mode upl
 ```
 
 `path_1` can be both local path or huggingface model name. When adding LoRA to unet, alpha is the constant as below:
@@ -239,47 +247,37 @@ So, set alpha to 1.0 to fully add LoRA. If the LoRA seems to have too much effec
 **Example**
 
 ```bash
-$ lora_add --path_1 stabilityai/stable-diffusion-2-base --path_2 lora_illust.pt --mode upl --alpha 1.0 --output_path merged_model
+$ lora_add runwayml/stable-diffusion-v1-5 ./example_loras/lora_krk.safetensors ./output_merged 0.8 --mode upl
 ```
 
 ### Mergigng Full model with LoRA and changing to original CKPT format
 
-_TESTED WITH V2, V2.1 ONLY!_
-
 Everything same as above, but with mode `upl-ckpt-v2` instead of `upl`.
 
 ```bash
-$ lora_add --path_1 stabilityai/stable-diffusion-2-base --path_2 lora_illust.pt --mode upl-ckpt-v2 --alpha 1.2 --output_path merged_model.ckpt
+$ lora_add runwayml/stable-diffusion-v1-5 ./example_loras/lora_krk.safetensors ./output_merged.ckpt 0.7 --mode upl-ckpt-v2
 ```
 
 ### Merging LoRA with LoRA
 
 ```bash
-$ lora_add --path_1 PATH_TO_LORA.PT --path_2 PATH_TO_LORA.PT --mode lpl --alpha 0.5 --output_path OUTPUT_PATH.PT
+$ lora_add PATH_TO_LORA1.safetensors PATH_TO_LORA2.safetensors OUTPUT_PATH.safetensors ALPHA_1 ALPHA_2
 ```
 
 alpha is the ratio of the first model to the second model. i.e.,
 
 $$
-\Delta W = (\alpha A_1 + (1 - \alpha) A_2) (\alpha B_1 + (1 - \alpha) B_2)^T
+\Delta W = (\alpha_1 A_1 + \alpha_2 A_2) (\alpha_1 B_1 + \alpha_2 B_2)^T
 $$
 
-Set alpha to 0.5 to get the average of the two models. Set alpha close to 1.0 to get more effect of the first model, and set alpha close to 0.0 to get more effect of the second model.
+Set $\alpha_1 = \alpha_2 = 0.5$ to get the average of the two models. Set $\alpha_1$ close to 1.0 to get more effect of the first model, and set $\alpha_2$ close to 1.0 to get more effect of the second model.
 
 **Example**
 
 ```bash
-$ lora_add --path_1 lora_illust.pt --path_2 lora_pop.pt --alpha 0.3 --output_path lora_merged.pt
-```
-
-### More bash examples with Text Encoder Lora:
-
-```bash
-$ lora_add --path_1 stabilityai/stable-diffusion-2-base --path_2 lora_kiriko.pt --mode upl-ckpt-v2 --alpha 1.2 --with_text_lora --output_path merged_model.ckpt
+$ lora_add ./example_loras/analog_svd_rank4.safetensors ./example_loras/lora_krk.safetensors ./krk_analog.safetensors 2.0 0.7
 ```
 
-: This will build a `merged_model.ckpt` with LoRA merged with $\alpha=1.2$ and text encoder LoRA.
-
 ### Making Text2Img Inference with trained LoRA
 
 Checkout `scripts/run_inference.ipynb` for an example of how to make inference with LoRA.

diff --git a/contents/disney_lora.jpg b/contents/disney_lora.jpg
diff --git a/contents/lion_illust.jpg b/contents/lion_illust.jpg
diff --git a/contents/lora_pti_example.jpg b/contents/lora_pti_example.jpg
diff --git a/contents/pop_art.jpg b/contents/pop_art.jpg
diff --git a/example_loras/analog_svd_distill.pt b/example_loras/analog_svd_distill.pt
diff --git a/example_loras/analog_svd_distill.text_encoder.pt b/example_loras/analog_svd_distill.text_encoder.pt
diff --git a/example_loras/analog_svd_rank4.safetensors b/example_loras/analog_svd_rank4.safetensors
diff --git a/example_loras/analog_svd_rank8.safetensors b/example_loras/analog_svd_rank8.safetensors
diff --git a/example_loras/modern_disney_svd.safetensors b/example_loras/modern_disney_svd.safetensors
diff --git a/lora_diffusion/__init__.py b/lora_diffusion/__init__.py
@@ -1,3 +1,4 @@
 from .lora import *
 from .dataset import *
 from .utils import *
+from .preprocess_files import *
diff --git a/lora_diffusion/cli_lora_add.py b/lora_diffusion/cli_lora_add.py
@@ -3,9 +3,15 @@
 import shutil
 import fire
 from diffusers import StableDiffusionPipeline
+from safetensors.torch import safe_open, save_file
 
 import torch
-from .lora import tune_lora_scale, weight_apply_lora
+from .lora import (
+    tune_lora_scale,
+    patch_pipe,
+    collapse_lora,
+    monkeypatch_remove_lora,
+)
 from .to_ckpt_v2 import convert_to_ckpt
 
 
@@ -18,7 +24,8 @@ def add(
     path_1: str,
     path_2: str,
     output_path: str,
-    alpha: float = 0.5,
+    alpha_1: float = 0.5,
+    alpha_2: float = 0.5,
     mode: Literal[
         "lpl",
         "upl",
@@ -28,79 +35,116 @@ def add(
 ):
     print("Lora Add, mode " + mode)
     if mode == "lpl":
-        for _path_1, _path_2, opt in [(path_1, path_2, "unet")] + (
-            [(_text_lora_path(path_1), _text_lora_path(path_2), "text_encoder")]
-            if with_text_lora
-            else []
-        ):
-            print("Loading", _path_1, _path_2)
-            out_list = []
-            if opt == "text_encoder":
-                if not os.path.exists(_path_1):
-                    print(f"No text encoder found in {_path_1}, skipping...")
-                    continue
-                if not os.path.exists(_path_2):
-                    print(f"No text encoder found in {_path_1}, skipping...")
-                    continue
-
-            l1 = torch.load(_path_1)
-            l2 = torch.load(_path_2)
-
-            l1pairs = zip(l1[::2], l1[1::2])
-            l2pairs = zip(l2[::2], l2[1::2])
-
-            for (x1, y1), (x2, y2) in zip(l1pairs, l2pairs):
-                # print("Merging", x1.shape, y1.shape, x2.shape, y2.shape)
-                x1.data = alpha * x1.data + (1 - alpha) * x2.data
-                y1.data = alpha * y1.data + (1 - alpha) * y2.data
-
-                out_list.append(x1)
-                out_list.append(y1)
-
-            if opt == "unet":
-
-                print("Saving merged UNET to", output_path)
-                torch.save(out_list, output_path)
-
-            elif opt == "text_encoder":
-                print("Saving merged text encoder to", _text_lora_path(output_path))
-                torch.save(
-                    out_list,
-                    _text_lora_path(output_path),
-                )
+        if path_1.endswith(".pt") and path_2.endswith(".pt"):
+            for _path_1, _path_2, opt in [(path_1, path_2, "unet")] + (
+                [(_text_lora_path(path_1), _text_lora_path(path_2), "text_encoder")]
+                if with_text_lora
+                else []
+            ):
+                print("Loading", _path_1, _path_2)
+                out_list = []
+                if opt == "text_encoder":
+                    if not os.path.exists(_path_1):
+                        print(f"No text encoder found in {_path_1}, skipping...")
+                        continue
+                    if not os.path.exists(_path_2):
+                        print(f"No text encoder found in {_path_1}, skipping...")
+                        continue
+
+                l1 = torch.load(_path_1)
+                l2 = torch.load(_path_2)
+
+                l1pairs = zip(l1[::2], l1[1::2])
+                l2pairs = zip(l2[::2], l2[1::2])
+
+                for (x1, y1), (x2, y2) in zip(l1pairs, l2pairs):
+                    # print("Merging", x1.shape, y1.shape, x2.shape, y2.shape)
+                    x1.data = alpha_1 * x1.data + alpha_2 * x2.data
+                    y1.data = alpha_1 * y1.data + alpha_2 * y2.data
+
+                    out_list.append(x1)
+                    out_list.append(y1)
+
+                if opt == "unet":
+
+                    print("Saving merged UNET to", output_path)
+                    torch.save(out_list, output_path)
+
+                elif opt == "text_encoder":
+                    print("Saving merged text encoder to", _text_lora_path(output_path))
+                    torch.save(
+                        out_list,
+                        _text_lora_path(output_path),
+                    )
+
+        elif path_1.endswith(".safetensors") and path_2.endswith(".safetensors"):
+            safeloras_1 = safe_open(path_1, framework="pt", device="cpu")
+            safeloras_2 = safe_open(path_2, framework="pt", device="cpu")
+
+            metadata = dict(safeloras_1.metadata())
+            metadata.update(dict(safeloras_2.metadata()))
+
+            ret_tensor = {}
+
+            for keys in set(list(safeloras_1.keys()) + list(safeloras_2.keys())):
+                if keys.startswith("text_encoder") or keys.startswith("unet"):
+
+                    tens1 = safeloras_1.get_tensor(keys)
+                    tens2 = safeloras_2.get_tensor(keys)
+
+                    tens = alpha_1 * tens1 + alpha_2 * tens2
+                    ret_tensor[keys] = tens
+                else:
+                    if keys in safeloras_1.keys():
+
+                        tens1 = safeloras_1.get_tensor(keys)
+                    else:
+                        tens1 = safeloras_2.get_tensor(keys)
+
+                    ret_tensor[keys] = tens1
+
+            save_file(ret_tensor, output_path, metadata)
 
     elif mode == "upl":
 
+        print(
+            f"Merging UNET/CLIP from {path_1} with LoRA from {path_2} to {output_path}. Merging ratio : {alpha_1}."
+        )
+
         loaded_pipeline = StableDiffusionPipeline.from_pretrained(
             path_1,
         ).to("cpu")
 
-        weight_apply_lora(loaded_pipeline.unet, torch.load(path_2), alpha=alpha)
-        if with_text_lora:
+        patch_pipe(loaded_pipeline, path_2)
+
+        collapse_lora(loaded_pipeline.unet, alpha_1)
+        collapse_lora(loaded_pipeline.text_encoder, alpha_1)
 
-            weight_apply_lora(
-                loaded_pipeline.text_encoder,
-                torch.load(_text_lora_path(path_2)),
-                alpha=alpha,
-                target_replace_module=["CLIPAttention"],
-            )
+        monkeypatch_remove_lora(loaded_pipeline.unet)
+        monkeypatch_remove_lora(loaded_pipeline.text_encoder)
 
         loaded_pipeline.save_pretrained(output_path)
 
     elif mode == "upl-ckpt-v2":
 
+        assert output_path.endswith(".ckpt"), "Only .ckpt files are supported"
+        name = os.path.basename(output_path)[0:-5]
+
+        print(
+            f"You will be using {name} as the token in A1111 webui. Make sure {name} is unique enough token."
+        )
+
         loaded_pipeline = StableDiffusionPipeline.from_pretrained(
             path_1,
         ).to("cpu")
 
-        weight_apply_lora(loaded_pipeline.unet, torch.load(path_2), alpha=alpha)
-        if with_text_lora:
-            weight_apply_lora(
-                loaded_pipeline.text_encoder,
-                torch.load(_text_lora_path(path_2)),
-                alpha=alpha,
-                target_replace_module=["CLIPAttention"],
-            )
+        tok_dict = patch_pipe(loaded_pipeline, path_2, patch_ti=False)
+
+        collapse_lora(loaded_pipeline.unet, alpha_1)
+        collapse_lora(loaded_pipeline.text_encoder, alpha_1)
+
+        monkeypatch_remove_lora(loaded_pipeline.unet)
+        monkeypatch_remove_lora(loaded_pipeline.text_encoder)
 
         _tmp_output = output_path + ".tmp"
 
@@ -109,6 +153,19 @@ def add(
         # remove the tmp_output folder
         shutil.rmtree(_tmp_output)
 
+        keys = sorted(tok_dict.keys())
+        tok_catted = torch.stack([tok_dict[k] for k in keys])
+        ret = {
+            "string_to_token": {"*": torch.tensor(265)},
+            "string_to_param": {"*": tok_catted},
+            "name": name,
+        }
+
+        torch.save(ret, output_path[:-5] + ".pt")
+        print(
+            f"Textual embedding saved as {output_path[:-5]}.pt, put it in the embedding folder and use it as {name} in A1111 repo, "
+        )
+
     else:
         print("Unknown mode", mode)
         raise ValueError(f"Unknown mode {mode}")
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,5 +5,7 @@ __pycache__ @@
     __test*
     merged_lora*
     wandb
-    exps
-    .vscode
+    exps*
+    .vscode
+    build
+    lora_diffusion.egg-info