From 5b0a9d868620e5932fa17d3f96b344e46eff484b Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Tue, 27 Aug 2024 17:14:09 +0900
Subject: [PATCH 01/17] Implement of accelerate and long token (under 231).

---
 modules/model/StableDiffusionModel.py   | 63 +++++++++++++----
 modules/model/StableDiffusionXLModel.py | 94 ++++++++++++++++---------
 modules/trainer/GenericTrainer.py       | 13 +++-
 requirements-global.txt                 |  5 +-
 4 files changed, 127 insertions(+), 48 deletions(-)

diff --git a/modules/model/StableDiffusionModel.py b/modules/model/StableDiffusionModel.py
index d704a778..867633a9 100644
--- a/modules/model/StableDiffusionModel.py
+++ b/modules/model/StableDiffusionModel.py
@@ -222,25 +222,64 @@ def encode_text(
             text_encoder_layer_skip: int = 0,
             text_encoder_output: Tensor | None = None,
     ):
+        chunk_length = 75
+        max_embeddings_multiples = 3
+
+        def __process_tokens(tokens):
+            if tokens is None or tokens.numel() == 0:
+                return None
+
+            chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)]
+            chunk_embeddings = []
+
+            for chunk in chunks:
+                if chunk.numel() == 0:
+                    continue
+
+                if chunk.shape[1] < chunk_length:
+                    padding = torch.full((chunk.shape[0], chunk_length - chunk.shape[1]), self.tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
+                    chunk = torch.cat([chunk, padding], dim=1)
+
+                bos_tokens = torch.full((chunk.shape[0], 1), self.tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device)
+                eos_tokens = torch.full((chunk.shape[0], 1), self.tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
+                chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1)
+                
+                with self.autocast_context:
+                    embedding, _ = encode_clip(
+                        text_encoder=self.text_encoder,
+                        tokens=chunk,
+                        default_layer=-1,
+                        layer_skip=text_encoder_layer_skip,
+                        text_encoder_output=None,
+                        add_pooled_output=False,
+                        use_attention_mask=False,
+                        add_layer_norm=True,
+                    )
+
+                chunk_embeddings.append(embedding)
+
+            if not chunk_embeddings:
+                return None
+
+            if len(chunk_embeddings) > max_embeddings_multiples:
+                chunk_embeddings = chunk_embeddings[:max_embeddings_multiples]
+
+            combined_embedding = torch.cat(chunk_embeddings, dim=1)
+
+            return combined_embedding
+
         if tokens is None:
             tokenizer_output = self.tokenizer(
                 text,
                 padding='max_length',
-                truncation=True,
-                max_length=77,
+                truncation=False,
                 return_tensors="pt",
             )
             tokens = tokenizer_output.input_ids.to(self.text_encoder.device)
 
-        text_encoder_output, _ = encode_clip(
-            text_encoder=self.text_encoder,
-            tokens=tokens,
-            default_layer=-1,
-            layer_skip=text_encoder_layer_skip,
-            text_encoder_output=text_encoder_output,
-            add_pooled_output=False,
-            use_attention_mask=False,
-            add_layer_norm=True,
-        )
+        text_encoder_output = __process_tokens(tokens)
+
+        if text_encoder_output is None:
+            print("Text encoder output is None. Check your input text or tokens.")
 
         return text_encoder_output
diff --git a/modules/model/StableDiffusionXLModel.py b/modules/model/StableDiffusionXLModel.py
index 09384f68..b44bd9c5 100644
--- a/modules/model/StableDiffusionXLModel.py
+++ b/modules/model/StableDiffusionXLModel.py
@@ -203,49 +203,79 @@ def encode_text(
             text_encoder_2_output: Tensor = None,
             pooled_text_encoder_2_output: Tensor = None,
     ):
+        chunk_length = 75
+        max_embeddings_multiples = 3
+
+        def __process_tokens(tokens, tokenizer, text_encoder, layer_skip):
+            if tokens is None or tokens.numel() == 0:
+                return None, None
+
+            chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)]
+            chunk_embeddings = []
+            pooled_outputs = []
+
+            for chunk in chunks:
+                if chunk.numel() == 0:
+                    continue
+
+                if chunk.shape[1] < chunk_length:
+                    padding = torch.full((chunk.shape[0], chunk_length - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
+                    chunk = torch.cat([chunk, padding], dim=1)
+
+                bos_tokens = torch.full((chunk.shape[0], 1), tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device)
+                eos_tokens = torch.full((chunk.shape[0], 1), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
+                chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1)
+                
+                with self.autocast_context:
+                    outputs = text_encoder(
+                        chunk,
+                        output_hidden_states=True,
+                        return_dict=True,
+                    )
+                    embedding = outputs.hidden_states[-(2 + layer_skip)]
+                    if hasattr(outputs, 'text_embeds'):
+                        pooled_outputs.append(outputs.text_embeds)
+
+                chunk_embeddings.append(embedding)
+
+            if not chunk_embeddings:
+                return None, None
+
+            if len(chunk_embeddings) > max_embeddings_multiples:
+                chunk_embeddings = chunk_embeddings[:max_embeddings_multiples]
+                if pooled_outputs:
+                    pooled_outputs = pooled_outputs[:max_embeddings_multiples]
+
+            combined_embedding = torch.cat(chunk_embeddings, dim=1)
+            pooled_output = pooled_outputs[0] if pooled_outputs else None
+
+            return combined_embedding, pooled_output
+
         if tokens_1 is None and text is not None:
-            tokenizer_output = self.tokenizer_1(
+            tokens_1 = self.tokenizer_1(
                 text,
                 padding='max_length',
-                truncation=True,
-                max_length=77,
+                truncation=False,
                 return_tensors="pt",
-            )
-            tokens_1 = tokenizer_output.input_ids.to(self.text_encoder_1.device)
+            ).input_ids.to(self.text_encoder_1.device)
 
         if tokens_2 is None and text is not None:
-            tokenizer_output = self.tokenizer_2(
+            tokens_2 = self.tokenizer_2(
                 text,
                 padding='max_length',
-                truncation=True,
-                max_length=77,
+                truncation=False,
                 return_tensors="pt",
-            )
-            tokens_2 = tokenizer_output.input_ids.to(self.text_encoder_2.device)
+            ).input_ids.to(self.text_encoder_2.device)
 
-        text_encoder_1_output, _ = encode_clip(
-            text_encoder=self.text_encoder_1,
-            tokens=tokens_1,
-            default_layer=-2,
-            layer_skip=text_encoder_1_layer_skip,
-            text_encoder_output=text_encoder_1_output,
-            add_pooled_output=False,
-            use_attention_mask=False,
-            add_layer_norm=False,
-        )
+        if text_encoder_1_output is None:
+            text_encoder_1_output, _ = __process_tokens(tokens_1, self.tokenizer_1, self.text_encoder_1, text_encoder_1_layer_skip)
 
-        text_encoder_2_output, pooled_text_encoder_2_output = encode_clip(
-            text_encoder=self.text_encoder_2,
-            tokens=tokens_2,
-            default_layer=-2,
-            layer_skip=text_encoder_2_layer_skip,
-            text_encoder_output=text_encoder_2_output,
-            add_pooled_output=True,
-            pooled_text_encoder_output=pooled_text_encoder_2_output,
-            use_attention_mask=False,
-            add_layer_norm=False,
-        )
+        if text_encoder_2_output is None or pooled_text_encoder_2_output is None:
+            text_encoder_2_output, pooled_text_encoder_2_output = __process_tokens(tokens_2, self.tokenizer_2, self.text_encoder_2, text_encoder_2_layer_skip)
+
+        if text_encoder_1_output is None or text_encoder_2_output is None:
+            print("Both text encoder outputs are None. Check your input text or tokens.")
 
-        text_encoder_output = torch.concat([text_encoder_1_output, text_encoder_2_output], dim=-1)
+        text_encoder_output = torch.cat([text_encoder_1_output, text_encoder_2_output], dim=-1)
 
         return text_encoder_output, pooled_text_encoder_2_output
diff --git a/modules/trainer/GenericTrainer.py b/modules/trainer/GenericTrainer.py
index 78ebe641..5ff41e36 100644
--- a/modules/trainer/GenericTrainer.py
+++ b/modules/trainer/GenericTrainer.py
@@ -29,6 +29,8 @@
 from modules.util.torch_util import torch_gc
 from modules.util.TrainProgress import TrainProgress
 
+from accelerate import Accelerator
+
 import torch
 from torch import Tensor, nn
 from torch.nn import Parameter
@@ -61,6 +63,8 @@ class GenericTrainer(BaseTrainer):
     def __init__(self, config: TrainConfig, callbacks: TrainCallbacks, commands: TrainCommands):
         super(GenericTrainer, self).__init__(config, callbacks, commands)
 
+        self.accelerator = Accelerator()
+
         tensorboard_log_dir = os.path.join(config.workspace_dir, "tensorboard")
         os.makedirs(Path(tensorboard_log_dir).absolute(), exist_ok=True)
         self.tensorboard = SummaryWriter(os.path.join(tensorboard_log_dir, get_string_timestamp()))
@@ -137,6 +141,11 @@ def start(self):
         self.data_loader = self.create_data_loader(
             self.model, self.model.train_progress
         )
+
+        self.model, self.data_loader = self.accelerator.prepare(
+            self.model, self.data_loader
+        )
+
         self.model_saver = self.create_model_saver()
 
         self.model_sampler = self.create_model_sampler(self.model)
@@ -578,9 +587,9 @@ def sample_commands_fun():
 
                 loss = loss / self.config.gradient_accumulation_steps
                 if scaler:
-                    scaler.scale(loss).backward()
+                    self.accelerator.backward(scaler.scale(loss))
                 else:
-                    loss.backward()
+                    self.accelerator.backward(loss)
 
                 has_gradient = True
                 accumulated_loss += loss.item()
diff --git a/requirements-global.txt b/requirements-global.txt
index 60f28732..0df71008 100644
--- a/requirements-global.txt
+++ b/requirements-global.txt
@@ -11,11 +11,12 @@ matplotlib==3.9.0
 # pytorch
 accelerate==0.30.1
 safetensors==0.4.3
-tensorboard==2.17.0
+tensorboard==2.17.1
 pytorch-lightning==2.2.5
 
 # stable diffusion
--e git+https://github.com/huggingface/diffusers.git@dd4b731#egg=diffusers
+# -e git+https://github.com/huggingface/diffusers.git@dd4b731#egg=diffusers
+diffusers==0.30.0
 transformers==4.42.3
 omegaconf==2.3.0 # needed to load stable diffusion from single ckpt files
 invisible-watermark==0.2.0 # needed for the SDXL pipeline

From 4c7052884df55871c54866f8ecd3d37fa35daf81 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Fri, 30 Aug 2024 21:27:03 +0900
Subject: [PATCH 02/17] implementation of attention mask

---
 modules/model/StableDiffusionXLModel.py | 26 ++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/modules/model/StableDiffusionXLModel.py b/modules/model/StableDiffusionXLModel.py
index b44bd9c5..ba610147 100644
--- a/modules/model/StableDiffusionXLModel.py
+++ b/modules/model/StableDiffusionXLModel.py
@@ -213,22 +213,36 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip):
             chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)]
             chunk_embeddings = []
             pooled_outputs = []
+            attention_masks = []
 
-            for chunk in chunks:
+            for i, chunk in enumerate(chunks):
                 if chunk.numel() == 0:
                     continue
 
-                if chunk.shape[1] < chunk_length:
-                    padding = torch.full((chunk.shape[0], chunk_length - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
-                    chunk = torch.cat([chunk, padding], dim=1)
+                # アテンションマスクの作成（1がマスクしない、0がマスクする）
+                attention_mask = torch.ones_like(chunk, dtype=torch.bool)
 
+                # まず、BOSとEOSを追加
                 bos_tokens = torch.full((chunk.shape[0], 1), tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device)
                 eos_tokens = torch.full((chunk.shape[0], 1), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
                 chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1)
+                attention_mask = torch.cat([torch.zeros_like(bos_tokens, dtype=torch.bool) if i > 0 else torch.ones_like(bos_tokens, dtype=torch.bool),
+                                            attention_mask, 
+                                            torch.zeros_like(eos_tokens, dtype=torch.bool) if i < len(chunks) - 1 else torch.ones_like(eos_tokens, dtype=torch.bool)],
+                                            dim=1)
+
+                # パディングで埋める
+                if chunk.shape[1] < chunk_length + 2:  # +2 はBOSとEOSのため
+                    padding = torch.full((chunk.shape[0], chunk_length + 2 - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
+                    chunk = torch.cat([chunk, padding], dim=1)
+                    attention_mask = torch.cat([attention_mask, torch.zeros_like(padding, dtype=torch.bool)], dim=1)
+                
+                attention_masks.append(attention_mask)
                 
                 with self.autocast_context:
                     outputs = text_encoder(
                         chunk,
+                        attention_mask=attention_mask,
                         output_hidden_states=True,
                         return_dict=True,
                     )
@@ -243,10 +257,12 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip):
 
             if len(chunk_embeddings) > max_embeddings_multiples:
                 chunk_embeddings = chunk_embeddings[:max_embeddings_multiples]
+                attention_masks = attention_masks[:max_embeddings_multiples]
                 if pooled_outputs:
                     pooled_outputs = pooled_outputs[:max_embeddings_multiples]
 
             combined_embedding = torch.cat(chunk_embeddings, dim=1)
+            # combined_attention_mask = torch.cat(attention_masks, dim=1)
             pooled_output = pooled_outputs[0] if pooled_outputs else None
 
             return combined_embedding, pooled_output
@@ -274,7 +290,7 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip):
             text_encoder_2_output, pooled_text_encoder_2_output = __process_tokens(tokens_2, self.tokenizer_2, self.text_encoder_2, text_encoder_2_layer_skip)
 
         if text_encoder_1_output is None or text_encoder_2_output is None:
-            print("Both text encoder outputs are None. Check your input text or tokens.")
+            print("両方のテキストエンコーダーの出力がNoneです。入力テキストまたはトークンを確認してください。")
 
         text_encoder_output = torch.cat([text_encoder_1_output, text_encoder_2_output], dim=-1)
 

From 089b8f373068e6c163701e3aa41a812b860e2bf2 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Sat, 31 Aug 2024 01:19:29 +0900
Subject: [PATCH 03/17] Adam-mini

---
 modules/util/create.py         | 17 +++++++++++++++++
 modules/util/enum/Optimizer.py |  3 +++
 modules/util/optimizer_util.py | 10 ++++++++++
 requirements-global.txt        |  1 +
 4 files changed, 31 insertions(+)

diff --git a/modules/util/create.py b/modules/util/create.py
index ec11a8ad..faed4941 100644
--- a/modules/util/create.py
+++ b/modules/util/create.py
@@ -815,6 +815,23 @@ def create_optimizer(
                 eps=optimizer_config.eps if optimizer_config.eps is not None else 1e-8,
             )
 
+        # Adam-mini Optimizer
+        case Optimizer.ADAM_MINI:
+            from adam_mini import Adam_mini
+            optimizer = Adam_mini(
+                named_parameters=parameters,
+                lr=config.learning_rate,
+                betas=(optimizer_config.beta1 if optimizer_config.beta1 is not None else 0.9,
+                       optimizer_config.beta2 if optimizer_config.beta2 is not None else 0.999),
+                eps=optimizer_config.eps if optimizer_config.eps is not None else 1e-8,
+                weight_decay=optimizer_config.weight_decay if optimizer_config.weight_decay is not None else 0.0,
+                # model_shardingは未実装(マルチGPUの場合はTrueにする)
+                model_sharding=optimizer_config.model_sharding if optimizer_config.model_sharding is not None else False,
+                # dim=model_config.dim,
+                # n_heads=model_config.n_heads,
+                # n_kv_heads=model_config.n_kv_heads,
+            )
+
     if state_dict is not None and optimizer is not None:
         if 'param_group_mapping' not in state_dict:
             # Old method of loading the optimizer state. This only works if the param groups did not change.
diff --git a/modules/util/enum/Optimizer.py b/modules/util/enum/Optimizer.py
index e21bb045..063cca6d 100644
--- a/modules/util/enum/Optimizer.py
+++ b/modules/util/enum/Optimizer.py
@@ -59,6 +59,9 @@ class Optimizer(Enum):
     TIGER = 'TIGER'
     AIDA = 'AIDA'
 
+    # ADAM_MINI
+    ADAM_MINI = 'ADAM_MINI'
+
     @property
     def is_adaptive(self):
         return self in [
diff --git a/modules/util/optimizer_util.py b/modules/util/optimizer_util.py
index 93af8e12..18896dbf 100644
--- a/modules/util/optimizer_util.py
+++ b/modules/util/optimizer_util.py
@@ -368,4 +368,14 @@ def init_model_parameters(
         "adam_debias": False,
         "eps": 1e-8,
     },
+    Optimizer.ADAM_MINI: {
+        "beta1": 0.9,
+        "beta2": 0.999,
+        "eps": 1e-8,
+        "weight_decay": 0.0,
+        "model_sharding": False,
+        # "dim": None,
+        # "n_heads": None,
+        # "n_kv_heads": None,
+    },
 }
diff --git a/requirements-global.txt b/requirements-global.txt
index 0df71008..21dcd816 100644
--- a/requirements-global.txt
+++ b/requirements-global.txt
@@ -34,6 +34,7 @@ lion-pytorch==0.1.4 # lion optimizer
 prodigyopt==1.0 # prodigy optimizer
 schedulefree==1.2.5 # schedule-free optimizers
 pytorch_optimizer==3.0.2 # pytorch optimizers
+adam_mini
 
 # Profiling
 scalene==1.5.41

From 29dd300f76b9798613e150eb8e0d82bfcb06a473 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Sat, 31 Aug 2024 01:32:41 +0900
Subject: [PATCH 04/17] Fix: Adam-mini

---
 modules/util/config/TrainConfig.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/util/config/TrainConfig.py b/modules/util/config/TrainConfig.py
index d844500d..9f9b6b6d 100644
--- a/modules/util/config/TrainConfig.py
+++ b/modules/util/config/TrainConfig.py
@@ -87,6 +87,7 @@ class TrainOptimizerConfig(BaseConfig):
     r: float
     adanorm: bool
     adam_debias: bool
+    model_sharding: bool
 
     def __init__(self, data: list[(str, Any, type, bool)]):
         super(TrainOptimizerConfig, self).__init__(data)
@@ -154,6 +155,7 @@ def default_values():
         data.append(("r", None, float, True))
         data.append(("adanorm", False, bool, False))
         data.append(("adam_debias", False, bool, False))
+        data.append(("model_sharding", False, bool, False))
 
         return TrainOptimizerConfig(data)
 

From 43f72c19d3fc3dc43bf04da6b16991f81a59abf7 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Sat, 31 Aug 2024 01:38:14 +0900
Subject: [PATCH 05/17] Fix: Adam-mini 2

---
 modules/ui/OptimizerParamsWindow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ui/OptimizerParamsWindow.py b/modules/ui/OptimizerParamsWindow.py
index 9f1be17b..4c2be345 100644
--- a/modules/ui/OptimizerParamsWindow.py
+++ b/modules/ui/OptimizerParamsWindow.py
@@ -144,7 +144,7 @@ def create_dynamic_ui(
             'r': {'title': 'R', 'tooltip': 'EMA factor.', 'type': 'float'},
             'adanorm': {'title': 'AdaNorm', 'tooltip': 'Whether to use the AdaNorm variant', 'type': 'bool'},
             'adam_debias': {'title': 'Adam Debias', 'tooltip': 'Only correct the denominator to avoid inflating step sizes early in training.', 'type': 'bool'},
-
+            'model_sharding': {'title': 'Model Sharding', 'tooltip': 'Whether to use model sharding for distributed training.', 'type': 'bool'},
         }
         # @formatter:on
 

From 87727c11ad056172e150a373747539a8a5109847 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Sat, 31 Aug 2024 02:05:25 +0900
Subject: [PATCH 06/17] Fix: Adam-mini 3

---
 modules/util/create.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modules/util/create.py b/modules/util/create.py
index faed4941..43cf16bc 100644
--- a/modules/util/create.py
+++ b/modules/util/create.py
@@ -818,8 +818,11 @@ def create_optimizer(
         # Adam-mini Optimizer
         case Optimizer.ADAM_MINI:
             from adam_mini import Adam_mini
+            named_parameters = [(f'group_{i}.param_{j}', param)
+                                for i, group in enumerate(parameters)
+                                for j, param in enumerate(group['params'])]
             optimizer = Adam_mini(
-                named_parameters=parameters,
+                named_parameters=named_parameters,
                 lr=config.learning_rate,
                 betas=(optimizer_config.beta1 if optimizer_config.beta1 is not None else 0.9,
                        optimizer_config.beta2 if optimizer_config.beta2 is not None else 0.999),

From 6c224b772dc64e17dd1c3d8b297ab919b3fe825e Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Tue, 3 Sep 2024 18:01:42 +0900
Subject: [PATCH 07/17] Jupyter notebook

---
 modules/modelSampler/FluxSampler.py           |   2 +-
 modules/modelSampler/PixArtAlphaSampler.py    |   2 +-
 .../modelSampler/StableDiffusion3Sampler.py   |   2 +-
 .../modelSampler/StableDiffusionSampler.py    |   2 +-
 .../modelSampler/StableDiffusionXLSampler.py  |   2 +-
 modules/modelSampler/WuerstchenSampler.py     |   2 +-
 modules/module/BaseImageCaptionModel.py       |   2 +-
 modules/module/BaseImageMaskModel.py          |   2 +-
 modules/module/GenerateLossesModel.py         |   2 +-
 modules/trainer/GenericTrainer.py             |   3 +-
 train.ipynb                                   | 147 ++++++++++++++++++
 11 files changed, 157 insertions(+), 11 deletions(-)
 create mode 100644 train.ipynb

diff --git a/modules/modelSampler/FluxSampler.py b/modules/modelSampler/FluxSampler.py
index 94a8eae7..9064bfff 100644
--- a/modules/modelSampler/FluxSampler.py
+++ b/modules/modelSampler/FluxSampler.py
@@ -15,7 +15,7 @@
 import torch
 
 from PIL import Image
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 
 class FluxSampler(BaseModelSampler):
diff --git a/modules/modelSampler/PixArtAlphaSampler.py b/modules/modelSampler/PixArtAlphaSampler.py
index eda47672..9fc36633 100644
--- a/modules/modelSampler/PixArtAlphaSampler.py
+++ b/modules/modelSampler/PixArtAlphaSampler.py
@@ -15,7 +15,7 @@
 import torch
 
 from PIL.Image import Image
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 
 class PixArtAlphaSampler(BaseModelSampler):
diff --git a/modules/modelSampler/StableDiffusion3Sampler.py b/modules/modelSampler/StableDiffusion3Sampler.py
index 4c39c60e..6cd021af 100644
--- a/modules/modelSampler/StableDiffusion3Sampler.py
+++ b/modules/modelSampler/StableDiffusion3Sampler.py
@@ -15,7 +15,7 @@
 import torch
 
 from PIL import Image
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 
 class StableDiffusion3Sampler(BaseModelSampler):
diff --git a/modules/modelSampler/StableDiffusionSampler.py b/modules/modelSampler/StableDiffusionSampler.py
index 1bc2d987..57f217fa 100644
--- a/modules/modelSampler/StableDiffusionSampler.py
+++ b/modules/modelSampler/StableDiffusionSampler.py
@@ -17,7 +17,7 @@
 from torchvision.transforms import transforms
 
 from PIL import Image
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 
 class StableDiffusionSampler(BaseModelSampler):
diff --git a/modules/modelSampler/StableDiffusionXLSampler.py b/modules/modelSampler/StableDiffusionXLSampler.py
index cc1cd3d5..73128378 100644
--- a/modules/modelSampler/StableDiffusionXLSampler.py
+++ b/modules/modelSampler/StableDiffusionXLSampler.py
@@ -17,7 +17,7 @@
 from torchvision.transforms import transforms
 
 from PIL import Image
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 
 class StableDiffusionXLSampler(BaseModelSampler):
diff --git a/modules/modelSampler/WuerstchenSampler.py b/modules/modelSampler/WuerstchenSampler.py
index 93280926..f5910219 100644
--- a/modules/modelSampler/WuerstchenSampler.py
+++ b/modules/modelSampler/WuerstchenSampler.py
@@ -14,7 +14,7 @@
 import torch
 
 from PIL import Image
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 
 class WuerstchenSampler(BaseModelSampler):
diff --git a/modules/module/BaseImageCaptionModel.py b/modules/module/BaseImageCaptionModel.py
index 7fd6ac79..5a9c7312 100644
--- a/modules/module/BaseImageCaptionModel.py
+++ b/modules/module/BaseImageCaptionModel.py
@@ -5,7 +5,7 @@
 from modules.util import path_util
 
 from PIL import Image
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 
 class CaptionSample:
diff --git a/modules/module/BaseImageMaskModel.py b/modules/module/BaseImageMaskModel.py
index 1e37a574..768f7d22 100644
--- a/modules/module/BaseImageMaskModel.py
+++ b/modules/module/BaseImageMaskModel.py
@@ -9,7 +9,7 @@
 from torchvision.transforms import transforms
 
 from PIL import Image
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 
 class MaskSample:
diff --git a/modules/module/GenerateLossesModel.py b/modules/module/GenerateLossesModel.py
index a734b91d..4fceac7e 100644
--- a/modules/module/GenerateLossesModel.py
+++ b/modules/module/GenerateLossesModel.py
@@ -11,7 +11,7 @@
 
 import torch
 
-from tqdm import tqdm
+from tqdm.auto import tqdm
 
 
 class GenerateLossesModel:
diff --git a/modules/trainer/GenericTrainer.py b/modules/trainer/GenericTrainer.py
index d9f31eee..9fc5c8c7 100644
--- a/modules/trainer/GenericTrainer.py
+++ b/modules/trainer/GenericTrainer.py
@@ -40,8 +40,7 @@
 from torchvision.transforms.functional import pil_to_tensor
 
 from PIL.Image import Image
-from tqdm import tqdm
-
+from tqdm.auto import tqdm
 
 class GenericTrainer(BaseTrainer):
     model_loader: BaseModelLoader
diff --git a/train.ipynb b/train.ipynb
new file mode 100644
index 00000000..7ab81b45
--- /dev/null
+++ b/train.ipynb
@@ -0,0 +1,147 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config_path = './config/test.json'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-03-24T11:02:30.546988Z",
+     "iopub.status.busy": "2024-03-24T11:02:30.546730Z",
+     "iopub.status.idle": "2024-03-24T11:04:12.062739Z",
+     "shell.execute_reply": "2024-03-24T11:04:12.061431Z",
+     "shell.execute_reply.started": "2024-03-24T11:02:30.546962Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# 依存関係のインストール\n",
+    "!pip -r requirements.txt\n",
+    "\n",
+    "# CUDAの確認\n",
+    "import torch\n",
+    "cuda_available = torch.cuda.is_available()\n",
+    "print(f\"CUDA is {'available' if cuda_available else 'not available'}\")\n",
+    "\n",
+    "if not cuda_available:\n",
+    "    use_zluda = input(\"CUDAが利用できません。WindowsでAMD GPUを使用していますか？ (y/n) \")\n",
+    "    if use_zluda.lower() == 'y':\n",
+    "        print(\"ZLUDAのインストールを続行します\")\n",
+    "        %run scripts/install_zluda.py\n",
+    "    else:\n",
+    "        print(\"エラー：インストール中に問題が発生しました\")\n",
+    "else:\n",
+    "    print(\"インストールが完了しました\")\n",
+    "\n",
+    "# 注意：この環境では%pipを使用していますが、\n",
+    "# 通常のコマンドラインでは!pipを使用することに注意してください"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-03-24T11:10:36.477241Z",
+     "iopub.status.busy": "2024-03-24T11:10:36.476649Z",
+     "iopub.status.idle": "2024-03-24T11:10:42.359595Z",
+     "shell.execute_reply": "2024-03-24T11:10:42.359022Z",
+     "shell.execute_reply.started": "2024-03-24T11:10:36.477212Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# 必要なライブラリのインポート\n",
+    "import os\n",
+    "\n",
+    "# mgdsをリポジトリにcloneした場合\n",
+    "# import sys\n",
+    "# sys.path.append('mgds/src')\n",
+    "\n",
+    "import json\n",
+    "from modules.util.config.TrainConfig import TrainConfig\n",
+    "from modules.util.callbacks.TrainCallbacks import TrainCallbacks\n",
+    "from modules.util.commands.TrainCommands import TrainCommands\n",
+    "from modules.trainer.GenericTrainer import GenericTrainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-03-24T11:27:13.229538Z",
+     "iopub.status.busy": "2024-03-24T11:27:13.229338Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# TrainConfigのインスタンスを作成\n",
+    "train_config = TrainConfig.default_values()\n",
+    "with open(config_path, \"r\") as f:\n",
+    "    train_config.from_dict(json.load(f))\n",
+    "\n",
+    "# userwarningを表示しない\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "# TrainConfigで読み込んだ\"debug_dir\",\"workspace_dir\",cache_dir\"が存在しなければ作成\n",
+    "for dir_path in [train_config.debug_dir, train_config.workspace_dir, train_config.cache_dir]:\n",
+    "    if not os.path.exists(dir_path):\n",
+    "        os.makedirs(dir_path)\n",
+    "\n",
+    "# コールバックとコマンドの設定\n",
+    "callbacks = TrainCallbacks()\n",
+    "commands = TrainCommands()\n",
+    "\n",
+    "# トレーニングプロセスの開始\n",
+    "print(\"Destination_path: \", train_config.output_model_destination)\n",
+    "print(\"Workspace_path: \", train_config.workspace_dir)\n",
+    "print(\"Debug_path: \", train_config.debug_dir)\n",
+    "print(\"Cache_path: \", train_config.cache_dir)\n",
+    "\n",
+    "trainer = GenericTrainer(train_config, callbacks, commands)\n",
+    "trainer.start()\n",
+    "\n",
+    "try:\n",
+    "    # トレーニングの実行\n",
+    "    trainer.train()\n",
+    "except Exception as e:\n",
+    "    print(f\"トレーニング中にエラーが発生しました: {e}\")\n",
+    "finally:\n",
+    "    # トレーニング終了時の処理\n",
+    "    trainer.end()\n",
+    "    print(\"トレーニングが終了しました\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From c43c05ecfca649cf74c216b8b272ceeac8154b1d Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Tue, 3 Sep 2024 19:32:46 +0900
Subject: [PATCH 08/17] Remove log file.

---
 h --force origin master | 854 ----------------------------------------
 1 file changed, 854 deletions(-)
 delete mode 100644 h --force origin master

diff --git a/h --force origin master b/h --force origin master
deleted file mode 100644
index 044a277c..00000000
--- a/h --force origin master	
+++ /dev/null
@@ -1,854 +0,0 @@
-[33md968558[m[33m ([m[1;36mHEAD -> [m[1;32mmaster[m[33m, [m[1;31mupstream/master[m[33m)[m disable gradient checkpointing if OFF is selected
-[33mb727db2[m fix gradient checkpointing for SC models
-[33maa18c65[m move model loading functions out of FluxModelLoader
-[33m98af3f1[m deactivate int8 quantization
-[33m51a9e6f[m hide fine tune option for flux models
-[33m196e1c8[m flux lora preset
-[33mf63b188[m respect the continue_last_backup setting in the sampling tool
-[33m6847457[m fix resuming from backup when training a DoRA
-[33m5da5ddb[m set flux guidance scale to 1.0 during training
-[33m50cf940[m lazy initialization for the dora_scale parameter
-[33md90cdf7[m add a new cpu offloaded gradient checkpointing option
-[33mbac7d5e[m include flux as a model with multiple text encoders
-[33mf957d97[m Merge branch 'refs/heads/master' into flux
-[33m4743314[m fix embedding training for models with a single text encoder
-[33me891861[m add flux embedding training support
-[33md027b4f[m fix saving of included metadata when running from cli
-[33m16b73e6[m fix loading of unquantized flux models
-[33m2925dc8[m add support for DoRA training on quantized base models
-[33m529c9fd[m experimental int8 quantization support. will need fixes in bnb to fully work
-[33mabfc72f[m add flux model saver
-[33mbe4d3f3[m flux lora support
-[33mb95329b[m initial Flux sampling and quantization support
-[33m0f66d0f[m add an unsqueeze call to fix SC training without text encoder training
-[33m5f19791[m deactivate 300 token limit for PixArt Sigma
-[33md931d6c[m move checkpointing_util.py out of stableDiffusion directory
-[33m1ee5a13[m make display_name optional
-[33m81d5283[m combine many common data loader functions to reduce code duplication
-[33me847aa8[m fix a call to encode_text()
-[33m64de057[m reduce code duplication for embedding parameter groups
-[33m7d7a463[m extract clip and t5 calls to a single function
-[33m4c0a624[m split out Pixart text encoding to its own function
-[33m66e9715[m split out SC text encoding to its own function
-[33m70fb48a[m split out SDXL text encoding to its own function
-[33m29dd5cb[m split out SD text encoding to its own function
-[33m87ae56d[m split out SD3 text encoding to its own function to reduce code duplication
-[33mabb6aa3[m Merge pull request #429 from mx/ruff
-[33m96e3770[m remove unused return values
-[33m955dee7[m add line breaks to a long line
-[33mce6ce9c[m Automated ruff fixes.
-[33m1158af5[m Remove some lints that make the code less readable.
-[33m26bd9da[m Add missing comma to attn-mlp preset.
-[33m99a9f17[m Ruff fix include order and sorting.
-[33m0412584[m Add ruff config.
-[33m6187518[m Revert "Merge pull request #419 from hameerabbasi/pre-commit"
-[33mcaf6921[m Merge pull request #419 from hameerabbasi/pre-commit
-[33mcb16854[m Add `requirements-dev.txt`.
-[33mf43f6f2[m Remove redundant line.
-[33m764995e[m some code cleanup
-[33m43fc10a[m Remove irrelevant stuff from `pyproject.toml`.
-[33m1b32d0d[m only load as backup if meta.json exists
-[33m5fec095[m Propagate the train/eval state to lora modules. (#424)
-[33me8554a9[m Remove redundant `None`s.
-[33m48575a9[m Fix the unneeded `_epoch_step`.
-[33m9fc4513[m Introduce `pre-commit` and `ruff`.
-[33mce11529[m Introduce `pre-commit` and `ruff`.
-[33mf2d78c8[m update mgds
-[33mce13283[m Merge pull request #423 from mx/eps
-[33m7c937d8[m Remove the command switch for norm epsilon.
-[33m385b1f5[m Optional epsilon for weight decompose.
-[33m33498b6[m add a missing import
-[33m80ba9c3[m Downgrade matplotlib to 3.9.0.
-[33me34efac[m add "DoRA" to the decompose label
-[33m8ac2693[m fix variable declarations in LoHaModule
-[33m5b21774[m add a config option for SD3 attention masks
-[33me59ec6e[m experimental attention mask for SD3
-[33md8d3f16[m LoRA Refactor (#403)
-[33mffa1f8a[m Merge pull request #409 from seniorsolt/Fix_errors_with_float16
-[33m952a5c8[m Set dtype in alphas_cumprod in debug process sd 1.5
-[33m259215b[m Set dtype in __create_erode_kernel, eroded_mask and latent_mask sd 1.5
-[33mcc76f38[m Optimizers - ADABELIEF, TIGER, AIDA (#373)
-[33m9c0275d[m update mgds
-[33m889ddd2[m Merge branch 'refs/heads/sd3_improvements'
-[33m5553e1d[m fix an exception when loading a backup in the sampling tool
-[33m3311aac[m add missing input fields
-[33m9ad1c5e[m revert onnxruntime to 1.18.0
-[33me2cc711[m update pytorch and xformers
-[33maca6afa[m add a timestep distribution visualization
-[33m23006f0[m Merge pull request #387 from mx/a11y
-[33m1918055[m add non uniform timestep sampling for SD3
-[33mf1fb0e3[m Undo the a11y changes to the masks / caption UI.
-[33ma446419[m fix SD3 debug output
-[33m68046e0[m Merge branch 'refs/heads/master' into sd3_improvements
-[33m474dd2c[m replace the vae distribution from the cache with a single sample to reduce space
-[33md06a650[m enable partial embedding training for all SD3 and SDXL training modes
-[33mfa6a08c[m update mgds
-[33md7d47ea[m disable checkpointing during inference
-[33m4cf80e2[m remove unused code
-[33ma66f705[m update tensorboard to fix a protobuf version incompatibility
-[33md0a3c6c[m rework SD3 embedding training to use a new train_embedding switch
-[33m0ae3129[m fix SD3 embedding loading and saving
-[33m12f623e[m fixed SD3 loss calculation
-[33ma5a06c8[m add SD3 embedding training
-[33mf78a74a[m fix a vae model conversion bug for SD models
-[33mc5911a4[m readme update
-[33ma63bc9f[m fix a dtype exception when not using autocast
-[33m9102e12[m add SD3 presets
-[33m4d745e5[m update mgds
-[33ma33abce[m disable embedding training for SD3, it's not supported yet
-[33m8344cb1[m don't create LoRA layers for untrained text encoders for SD3
-[33mc8cf156[m Merge branch 'refs/heads/master' into sd3
-[33m8dc3a3d[m Merge pull request #370 from adriangaro/master
-[33ma93aaac[m Merge pull request #359 from mx/note
-[33m6cf5eb0[m fix SD3 text encoder dropout when training text encoders
-[33mff7c801[m fix model config loading for sd and sdxl
-[33ma13d4cf[m fix embedding setup to use vpred if configured (#369)
-[33mbeff1d7[m better SD3 prompt dropout
-[33m36a7503[m fix an exception when saving a backup from an exported config file
-[33m8add0f1[m add a disable_fp16_autocast_context to SD3 T5
-[33me4a392a[m add the vae shift factor during SD3 training
-[33mbe0db64[m fix T5 quantization to fp16 when loading from a single safetensors file
-[33m06d5e3a[m add support for saving SD3 models as a single safetensors file
-[33me4a9731[m Don't create TE layers that won't be trained or used.
-[33medc20b7[m fix an indexing error
-[33maaa7649[m don't enable gradient checkpointing for unloaded text encoders
-[33m18e594a[m enable text encoder dropout for SD3
-[33mfddd2f4[m use uniform sampling when selecting a SD3 training timestep
-[33m4d9de10[m ad options to include only some text encoders during SD3 training
-[33m14cb1fd[m Revert "Make only the sample settings scrollable, with a fixed width."
-[33mbd8cf23[m Make only the sample settings scrollable, with a fixed width.
-[33mc2b14d5[m Revert a11y fix for sample tool, it breaks stuff.
-[33mfbb86d6[m SD3 LoRA training
-[33mfe7c4b7[m add PixArt-Sigma to the readme file
-[33m51c7a84[m Accessibility fixes (#340)
-[33ma56d63c[m fix a dtype bug
-[33me5b00d1[m support for loading single file SD3 models and more fixes
-[33mec182e9[m add model saving for SD3
-[33mcf1246a[m fix the timestep selection during SD3 training
-[33ma0486b7[m fix a few loss calculation bugs for SD3
-[33m793fce7[m fixes and improvements for SD3
-[33m4c1d3b2[m remove all mentions of T5TokenizerFast
-[33me4affae[m initial SD3 model setup and data loader
-[33m454f46a[m pass add_special_tokens=True to T5
-[33m5bb7743[m sampling fixes for SD3
-[33m9332a56[m update dependencies for SD3 support
-[33m5bb2bbb[m StableDiffusion3Sampler. Based on StableDiffusionXLSampler.
-[33m2b01266[m add SD3 model loading
-[33mbab1441[m fix crop jitter
-[33m9b790cd[m clarify a fused_back_pass error message
-[33m3ce9746[m reduce memory usage after stopping a training run
-[33mfceb76c[m add a save now button to the backup tab
-[33m946836c[m fix black SDXL sample images
-[33m839c2a6[m add fused back pass support for adam
-[33mdc66fa7[m add fused back pass support for adamw
-[33md86ea35[m Implement Pixart Sigma (#317)
-[33m64f6c1a[m Merge remote-tracking branch 'refs/remotes/origin/dependency_update'
-[33m221420a[m fix a grad scalar bug when using fused back pass
-[33m6ecec2d[m dependency updates
-[33mf914d0d[m remove outdated information from the quick start guide
-[33m91b1570[m better support for loading and converting SDXL inpainting models from a single file
-[33mb532606[m fix SDXL inpainting sampling for noise schedulers other than DDIM
-[33mc8a1549[m add support for SDXL inpainting samples, and general sampling fixes
-[33mc53ec6e[m enable inpainting samples for SD models
-[33mf0bdfa6[m Rewrite ZLUDA installer. (#276)
-[33ma5111ca[m fix install.bat for ZLUDA installs
-[33m9767a4e[m move inpainting augmentations to the concept window
-[33m13f570f[m Merge pull request #309 from Calamdor/patch-1
-[33md83a7da[m Update QuickStartGuide.md
-[33m8406119[m update mgds to allow custom resolutions
-[33m95c2a8d[m Allow the user to specify a custom learning rate scheduler (#283)
-[33me9db8d8[m fix an exception when resuming from a backup using the prodigy optimizer
-[33meb87d60[m Merge pull request #302 from Calamdor/Steps-Calculation-Bug-Fix
-[33m65d08d4[m Merge pull request #300 from vladmandic/patch-1
-[33m9832f5a[m Update create.py
-[33md62634e[m Optionally bundle joint-trained embeddings into LoRAs (#278)
-[33mbfdf06f[m update safety checker skip in load
-[33m2e910ce[m fix sampling fom the sampling tool and script
-[33m338908d[m fix progress reporting in the console when resuming from a backup
-[33m21023d3[m fix SDXL caching
-[33m9a9328b[m Merge branch 'inline_bucketing'
-[33m4e32bce[m Merge pull request #296 from vladmandic/patch-1
-[33m5e3cfbc[m add missing import os
-[33m3e5c3cf[m inline bucketing fixes
-[33mf9b034d[m Merge branch 'master' into inline_bucketing
-[33md0d2939[m clarify the clip skip settings in the UI
-[33mfd25ad3[m Merge branch 'optimizer_state'
-[33m5494c97[m Implement Facebook's new schedule-free optimizers. (#242)
-[33m80238e9[m better handling of embedding optimizer states
-[33m2e28621[m add additional information to the backup optimizer state to enable config changes without manually clearing the optimizer state
-[33m76c2df2[m Merge pull request #277 from xirvian/master
-[33m0ea4e86[m fix the yaml config loading without an internet connection
-[33mb48ed22[m Add wraplength support to labels, fixing UI behavior for long concept names in concept tab
-[33m524e420[m Merge pull request #269 from SirTrippsalot/master
-[33m75422b0[m add a todo comment to __apply_fused_back_pass
-[33m6151f23[m Fix Fused Backpass + stop training
-[33m5118af0[m fixed model conversion of SDXL and stable cascade models
-[33mfa89fcb[m add a missing util file
-[33mdced13a[m add an option to force the padding mode of conv layers to circular
-[33mf3210d9[m add a second concept balancing strategy, where an exact number of samples can be specified
-[33m4805e11[m remove an unused config migration
-[33mac26661[m add an option to preserve embedding norms during training
-[33me965b7a[m fix backup loading for additional embeddings
-[33mece35d4[m fix model loading in the sample tool
-[33m2b9b170[m fix backup saving when training embeddings
-[33mb64198c[m Reduce default dataloader threads to 2.
-[33mba20789[m Merge pull request #247 from mx/parallel
-[33m0a78c44[m update mgds
-[33m761d1dc[m Add caption prefix and postfix (#256)
-[33m52520c6[m Merge branch 'universal_embeddings'
-[33mc14e231[m Add profiling section for CPU profiles and stack dump (#259)
-[33me7466d4[m option to disable latent caching for all remaining models and training modes
-[33m42c3ed1[m add an experimental option do completely disable latent caching to Stable Diffusion
-[33mac62b48[m Fix typo PipelineConfig -> PipelineState.
-[33mfed167a[m Merge pull request #225 from mx/came
-[33m6955092[m Restrict dataloader to user-configurable thread number.
-[33m152f285[m add fused back pass support to CAME
-[33mbb16722[m Merge pull request #243 from mx/sgd
-[33m706367f[m Load a PipelineState in MGDS.
-[33m1bcf966[m Fix SGD foreach option.
-[33mc959547[m Vae UI update and Tooltip update (#237)
-[33m4c413f7[m Merge branch 'master' into universal_embeddings
-[33m560d1d4[m loading and saving fixes for model conversion
-[33m8f5d715[m Fix prediction type change while training Stable Diffusion (#235)
-[33mbcd8804[m disable relative_step and scale_parameter in adafactor defaults
-[33ma4647fa[m pixart alpha embedding training and pivotal tuning
-[33mc3bdfe1[m Add ZLUDA support (#196)
-[33m4926cc7[m Vendor CAME rather than just implement extensions.
-[33m9a35e7f[m hide the triton warning on startup
-[33md7899d9[m wuerstchen and stable cascade pivotal tuning
-[33m5aabada[m SDXL pivotal tuning
-[33m055fac7[m Merge pull request #222 from hameerabbasi/fix-model-type
-[33m5f63653[m Change default loss_weight_strength to float.
-[33m46b48e2[m Fix preset training bug.
-[33m2199821[m remove StableDiffusionFineTuneDataLoader
-[33mb10c569[m Merge branch 'master' into universal_embeddings
-[33m07f8198[m Debiased estimation loss. (#212)
-[33mfd50ee6[m code cleanup and fixes for pivotal tuning
-[33medc098a[m fix adafactor training with disabled stochastic rounding
-[33md46df19[m Add stochastic rounding for CAME.
-[33m85360b8[m Enable the CAME optimizer.
-[33mff26890[m pivotal tuning for stable diffusion LoRA
-[33mfd53dd3[m add a UI example to the readme
-[33m5cb551d[m Merge branch 'master' into universal_embeddings
-[33ma75bde0[m ui bugfix for pivotal tuning
-[33mc009c30[m fix adam/adamw fused and foreach implementations
-[33m245f9b6[m fix loading of yml files for stable diffusion models
-[33m493a01d[m Merge branch 'master' into universal_embeddings
-[33m68224ce[m pivotal tuning fixes for stable diffusion fine tune
-[33m6c7af74[m fixes for pivotal tuning model loading and sampling
-[33m7c0b7b5[m Merge branch 'master' into universal_embeddings
-[33m3242026[m option to disable the autocast cache for reduced VRAM usage
-[33mf51e6af[m refactoring of optimizer patches
-[33m9c09bad[m GradScaler support for fused back pass
-[33m97c3452[m Merge branch 'master' into fused_back_pass
-[33m7b8e509[m fix else branch in install.sh and update.sh
-[33mdb6c6c8[m Merge pull request #193 from hameerabbasi/moar-backends
-[33ma1e4d23[m Modify `install.sh` and `update.sh` to be more generic.
-[33m1f42d5d[m update mgds
-[33m7c5f983[m Preserve back-compat by checking for `nvcc`.
-[33mef26ec3[m Preserve back-compat by adding shim `requirements.txt`.
-[33md9ac606[m experimental fused back pass for adafactor
-[33m3c18a91[m fix gradient checkpointing for stable cascade
-[33meb19d84[m Auto-detect default device.
-[33m7be0a84[m Merge remote-tracking branch 'origin/master' into moar-backends
-[33ma6722bf[m update diffusers to support the final stable cascade format
-[33m739cb0f[m additional embeddings tab
-[33m9d16726[m refactor stable diffusion lora loading
-[33mc86cfd9[m Merge pull request #185 from mx/tensorboard
-[33m4f565ac[m fix a naming inconsistency
-[33m5357f9f[m Add required env var.
-[33m81606a8[m Fix GC to not use too much memory.
-[33m03fd726[m Get training working on MPS.
-[33m9f63018[m Attempt to make compatible with ROCm, MPS and CUDA.
-[33m369b023[m custom prefix for saves during training
-[33m70c4c06[m model loading for SD pivotal tuning
-[33m43772ca[m update mgds
-[33m90cfc88[m Remove code that was duped into BaseStableDiffusionXL.
-[33m0745001[m Remove some code that got accidentally left behind in the move.
-[33mb595ae1[m Have the tensorboard recording done in the model setup.
-[33m1812d46[m fix bf16 sampling on SD1.5 models
-[33m740d0a5[m Group both losses under the same header.
-[33mefc2193[m Show the correct learning rates on tensorboard.
-[33mebf32bb[m Merge branch 'master' into universal_embeddings
-[33me36edb9[m fix multiple dtype issues during caching and sampling
-[33m76873db[m fix stable cascade lora key mapping
-[33md191f85[m Merge pull request #171 from mx/dropout
-[33ma0520f2[m move dropout setting to the LoRA tab
-[33meae7314[m Merge branch 'master' into fork/dropout
-[33m84f15e6[m cleanup LoRA dropout
-[33m4cf6d97[m Merge branch 'stochastic_rounding'
-[33m7363a21[m add a disclaimer to the modified optimizer step functions
-[33ma729c31[m only enable stochastic rounding for bf16 weights
-[33m3f3acd6[m fix loading of samples and concepts when loading a packed config file
-[33m379d75f[m add stochastic rounding as an optimizer setting to adam, adamw and adafactor
-[33m3cab0b2[m Remove dropout setting from the constructor.
-[33mfd78334[m improve the efficiency of the stochastic copy function
-[33m91d3027[m fix presets
-[33m7fcee19[m dropout_pct -> dropout_probability
-[33m5a13493[m Merge pull request #179 from hameerabbasi/issue-template
-[33madd26bf[m replace feature with enhancement label
-[33m995d08f[m Merge branch 'master' into stochastic_rounding
-[33m35e6eff[m fix a ui exception when switching to a different preset
-[33m0f40b32[m fix early stopping of text encoder training
-[33m763299d[m Merge branch 'master' into stochastic_rounding
-[33m82c76a9[m fix a model loader bug when training embeddings
-[33m05dc257[m fix SD lora training
-[33m3d2d7a9[m experimental stochastic rounding for adafactor and bf16 weights
-[33mf854625[m remove train_*_epochs settings from presets
-[33m5a1c573[m fix ui labels
-[33m2c1b1df[m better "stop_training_after" default settings
-[33m8b15e9a[m Add issue templates.
-[33me669400[m fix early stopping of model part training
-[33m939f9e5[m Merge branch 'master' into model_part_config
-[33m031077a[m Fix with less typos...
-[33m9074726[m Fix dropout in state dict (forgot everything had to be a tensor).
-[33m3e163eb[m Add dropout for lora training.
-[33m970e9a9[m fix stable cascade attention lora keys
-[33ma90d108[m model spec header for stable cascade lora and embedding files
-[33m50618b0[m change the stable cascade safetensors saving logic to save stage c and the text encoder
-[33me928d6c[m fix stable cascade min snr gamma for batch sizes over 1
-[33m130b5e8[m add Stable Cascade to the readme
-[33m2cf3ab1[m disable prior override for wuerstchen v2 and pixart
-[33mbe7c935[m enable min snr gamma for stable cascade
-[33m5583cf0[m Merge branch 'master' into stable_cascade
-[33m98f6cec[m fix DEFAULT attention processor for stable cascade
-[33mbe9caec[m add a mapping function for stable cascade lora keys
-[33m5298aaf[m fix config export
-[33mab3484a[m Merge pull request #155 from mx/minsnrgamma
-[33maa631d0[m change min snr gamme from int to float
-[33m08facee[m fix loading stable cascade backups
-[33m16f70ec[m LoRA and embedding training for stable cascade
-[33mc38d425[m add an option to save stable cascade models in safetensors format
-[33mb17d1a6[m fix stable cascade tokenizer saving
-[33m28be8be[m support for stable cascade stage C 1B
-[33m6b9c011[m disable relative step and scale parameter in presets
-[33m3b4134d[m stable cascade sample and train fixes
-[33me919e50[m stable diffusion pivotal tuning tests
-[33m113572e[m initial stable cascade fine tuning support
-[33mae3a969[m stable cascade sampling support
-[33me937b79[m option to include training config in the output model
-[33m3215295[m Add timestep information to the model_output_data.
-[33m578ec7b[m Merge branch 'master' into model_part_config
-[33mf56297c[m always call component commands after the value is changed
-[33m7d6b538[m No min snr gamma for alignprop.
-[33m3da1e31[m Fix training tab putting the options in the wrong place.
-[33m336339c[m Fix warning over is/==.
-[33m0c05e27[m Support v-prediction in minimum SNR gamma calculation.
-[33mb86192f[m Implement minimum SNR gamma.
-[33m1b1817c[m unify model part config
-[33m7c4531d[m correctly handle utf-8 in CaptionUI.py
-[33m396037e[m #.json is not a built in preset
-[33mebbc158[m always assume built-in configs are saved in the most recent version
-[33m0b2b8d7[m fix loading of built in presets
-[33mdc0c7b0[m fix migration for optimizer settings
-[33m769078e[m fix optimizer updates when switching to a different preset
-[33m80ed56f[m Merge pull request #153 from Sayat1/ADAM8bit-fix
-[33m475e95e[m adam8bit betas fix
-[33m8aed720[m fix optimizer creation
-[33ma35b407[m clean up data types in BaseConfig.py
-[33m510f739[m add config output to create_train_files.py
-[33mcb3910b[m Merge branch 'master' into config_rework
-[33m6448733[m update CLI documentation
-[33m26fc46e[m migrations to load old config format
-[33ma7ad71c[m load config as unpacked config when loading from ui
-[33mec2f4b9[m change export function to generate single json file
-[33m7528e70[m integrate optimizer pref settings into the main config json
-[33me501932[m rename Param to Config
-[33m4aa41b2[m unify param and arg classes, remove train by args script
-[33m2ad2af1[m Merge pull request #147 from mx/script
-[33m137691a[m Allow python and venv paths to be specified.
-[33med2eea1[m fix grad scaling
-[33mff8ee55[m fix an exception when loading pixart models
-[33m608083a[m experimental fp8 support
-[33m1160505[m fixed a transparency bug when using screen scaling
-[33m9560751[m Merge pull request #140 from hameerabbasi/fix-prodigy
-[33m38c0d2e[m Prodigy is not Adafactor anymore.
-[33m5ef6c90[m disable delete when pressing ctrl-d in the mask editing UI
-[33m4c5e6db[m fill tool for mask editing
-[33mcd6bd4b[m create full white mask when creating an initial mask with right click
-[33md6168fb[m allow access to the main window when some tool windows are open
-[33mc3be7f0[m PixArt LoRA training
-[33m6db9516[m Merge pull request #133 from prog0111/step-bias
-[33ma9db641[m add noise related settings to the ui for all models
-[33m3dad4bf[m Fix random number generation and tooltips
-[33m23494db[m Merge remote-tracking branch 'upstream/master' into step-bias
-[33mf1b1f80[m re-enable gradient checkpointing for T5
-[33m701d784[m disable gradient checkpointing for T5, it seems to be broken
-[33m0b0d74a[m Add global step as parameter for timesteps
-[33me499d89[m Merge remote-tracking branch 'upstream/master' into step-bias
-[33mea1a354[m Updating for upstream changes
-[33macdc2f5[m add option to add generated captions as new line
-[33mb7eafb2[m fix autocast context creation
-[33m0ca6f39[m align dtype conversions of _add_noise functions
-[33m448388e[m fix autocast for wuerstechen training
-[33maa4da9a[m fix number of scalars in tensorboard to reduce summarization
-[33m9eef5ee[m move noise generation of continuous schedulers
-[33ma12c063[m fix fp16 noise generation on PixArt
-[33ma197257[m Add min noising strength
-[33m83dfb8b[m Added numpy imports back in
-[33m6c99056[m Split noise distribution into two parameters
-[33m2e4f9b9[m Merged with upstream
-[33mead910e[m fix dtype conversion in the dataloader when autocast is disabled
-[33mcf02f54[m Merge branch 'pixart'
-[33m7c26783[m remove vb loss option for non variational models
-[33mb6c6040[m remove unused parameter from the pixart preset
-[33m0dfb1fc[m Fixed step bias to make its seed based on global step
-[33m1061f4a[m Initial step-bias implementation
-[33m8c405fb[m fix loading SD inpainting models from ckpt and safetensors
-[33mfe054ee[m fix loading of the NAI model
-[33m4e849a1[m vae override for PixArt models
-[33md3fa74e[m add rembg human model
-[33m88637ae[m print "Model converted" after converting a model
-[33m89ff874[m Merge branch 'master' into pixart
-[33mc0adb0d[m vae override option
-[33m980596d[m Merge pull request #127 from aplio/feature/add-include-subfolder-toggle-and-feat-to-captioning
-[33m1b079a5[m add subdir parameter to the caption ui script
-[33mad9a9ed[m move subdir button to the right
-[33m98c5305[m pixart diffusers to ckpt converter
-[33m1b54133[m fix AlignProp for all models
-[33m91d27e5[m feature. allow captionUI to include subdirs
-[33md763ced[m use switch instead of checkbox for subdirectories
-[33m2a732cc[m Merge pull request #128 from aplio/feature/use-gpu-for-onnxruntime
-[33m118d40c[m add GPU support for RembgModel
-[33m03df5dd[m fix dtype exception for text encoders during caching
-[33mcad974e[m Merge pull request #126 from orcinus/tb-smooth-loss
-[33m54cfe65[m nits. make ext check symmetrical
-[33m5dbee97[m fix. sub-folders -> sub-directories
-[33ma92bbfe[m nits. window was bit narrow
-[33m4c94ce1[m Nits. add include_subdirectories cmnt
-[33m5642ac3[m feature. add include_subdirectories to batch masking
-[33m5da4125[m Rename include_subfolders to include_subdirectories
-[33mf5d4013[m Update WDModel to support GPU execution
-[33me242691[m Update onnxruntime version to onnxruntime-gpu
-[33mb452988[m feature. enable image captioning to include subfolders
-[33me70f9b2[m Add smooth loss to tensorboard
-[33mb4e699c[m fix other models
-[33m73173f5[m Merge pull request #124 from captin411/master
-[33m466103d[m bug fixes and niceties
-[33m425d38e[m Merge pull request #125 from aplio/bugfix/wdmodel-was-not-present-in-scripts-aka-cli-captioning
-[33m36ac910[m Add WDModel to generate_captions.py
-[33m1d4d19d[m Set alpha for your mask brush in manual edit mode
-[33m8c86a41[m enable xformers for PixArt and improve caching vram usage
-[33m54f4563[m fix pixart tokenizer saving after deleting the starting file
-[33m2c6f34e[m fix default values in the optimizer window
-[33m4363bc2[m fix relative step for AdaFactor
-[33m04b290f[m add prompt output to PixArt debug mode
-[33m870c613[m fix PixArt text and batch handling
-[33md102d2e[m PixArt Alpha fine tuning
-[33mc2be0c6[m add device settings to the ui
-[33m9e61dab[m fix some debug mode bugs and add prompt output
-[33mabca656[m rename backup folders from "step" to "backup"
-[33macb522e[m add train progress to backup names
-[33mca125a6[m fix learning rate schedulers not working
-[33m46c091d[m Merge pull request #107 from magnusviri/add-shabang
-[33mbceffc7[m Add ! to the start of all unix scripts
-[33m33248cc[m fix manual sampling when using gradient accumulation
-[33m27a6763[m update the quick start guide to remove a reference to the "Latent Caching Epochs" setting
-[33mef8c721[m Merge branch 'repeats'
-[33mf506066[m fix exception when loading older concept files
-[33mbe1f774[m concept loss weight
-[33m5f229f3[m Merge pull request #98 from orcinus/sdxl_latent2rgb
-[33m93ecb7c[m fix formatting
-[33m95da510[m Merge pull request #102 from SirTrippsalot/master
-[33md572bcf[m fix bool args
-[33mbb0c2eb[m Merge pull request #99 from orcinus/expose_tensorboard
-[33m8149723[m fix ui and arg handling
-[33m5a62e52[m add shuffling when aspect ratio bucketing is disabled
-[33m27f6ccd[m resolution override without aspect ratio bucketing
-[33m303477d[m resolution override with aspect ratio bucketing
-[33mdfcf55e[m fixed image augmentations
-[33mcf5681f[m Fix arg calls
-[33m490454c[m UI Fixes and fixed missing close
-[33m38db43d[m UI code for sample logic
-[33m41833e9[m Setup trainargs for new sample logic
-[33mb4f63ff[m Add conditional logic for samples
-[33m8d93b24[m update mgds to fix concept name dependency while caching
-[33m2da2960[m Add option to bind Tensorboard to all interfaces
-[33m368f2ff[m Add quick and dirty SDXL latent2rgb conversion
-[33m1605554[m display folder name if no name is specified in concept
-[33m41166a6[m switch to disable concepts
-[33m250e891[m randomize seed when cloning concept
-[33mb84aee3[m fix ctk exception for file and dir entries
-[33mad0a7de[m repeats for Wuerstchen and fixes
-[33mdc9f262[m repeats for SDXL, optimized caching
-[33mc78e4e4[m fix TrainArgs argument names for exported scripts
-[33m49e3ad3[m initial version of concept variations and repeats
-[33mba5dc09[m Merge pull request #95
-[33m5dcccc5[m Detail installation process for linux
-[33mf548b7e[m Merge pull request #74
-[33mc6d406f[m Merge pull request #86 from dougbtv/dockerfile
-[33md9ce759[m use create_param_groups in all setup classes
-[33m3f45752[m remove file from gitignore
-[33md6f7b5d[m completed base integration of lr scaler
-[33m0744b25[m Merge remote-tracking branch 'Nerogar/master'
-[33m0639eff[m integrating in base
-[33m2af10cb[m Merge remote-tracking branch 'origin/master'
-[33m5d536d6[m add -e to git dependencies, trying to fix update issues
-[33m64b8323[m Provides a Dockerfile and example build instructions
-[33ma0348cc[m update resolution tooltip
-[33m21801cc[m Merge pull request #75 from hameerabbasi/calc-loss
-[33m5ab53e7[m multi resolution training
-[33me78700d[m Make fix default kwarg value.
-[33m0fc97cd[m compute mask mean only once
-[33m725bf0c[m fix diffusion loss for masked training of inpainting models
-[33mc2983c1[m small device fixes for GenerateLossesModel
-[33m1f57913[m embedding refactoring
-[33ma3b5ecc[m slightly reduce the height of the CaptionUI window
-[33me35ef68[m Fix timestep to be torch.Tensor.
-[33mea2f581[m Small backcompat fix.
-[33md178f57[m Merge branch 'master' into calc-loss
-[33m8b9c3a4[m Use deterministic timestep during image loss calculation.
-[33mabe153a[m fixed missing param group appends
-[33m81264f3[m fix pause between epochs when no caching is needed
-[33mf5f02d4[m remove kandinsky
-[33mcb7dd2b[m Use less VRAM.
-[33m3d00e90[m Attempt at compaction
-[33maeca2f4[m fix formulae errors
-[33mc246f5f[m enable text encoder training for wuerstchen fine tunes
-[33m5d671a4[m Cache the right epoch.
-[33mbdb882f[m First attempt at script that calculates loss.
-[33m17aa446[m LR calculation code, final fixes for PR
-[33m0f3dfd5[m Add missing imports
-[33m49cd2e4[m Wuerstchen implementation of loss, moved declarations in diffusion
-[33m44ad32e[m DiffusionMixin Implementation of loss code
-[33m73224d8[m Bug Fixes and Unifying arg naming
-[33mad28579[m Setup UI for loss and scaling
-[33m6ad1b4f[m Setup Enums for Loss and LR
-[33ma00e245[m Setup Trainargs for Loss and LR scaling
-[33m2dcd1e6[m fix alignprop for stable sd and sdxl
-[33m5e21895[m fix an exception when converting a model
-[33m8b436ff[m fix a missing output value in the SDXL data loader
-[33md5264e8[m Merge branch 'master' into wuerstchen
-[33m339fe39[m wuerstchen embedding training
-[33mbdfb1fa[m sampling tool to sample the model without training
-[33m9a79c5f[m fix continue from backup when fine tuning wuerstchen
-[33mad0181d[m fix normalize_masked_area_loss with disabled masked training
-[33m341f72c[m wuerstchen presets
-[33m18e003d[m simplify preset definitions
-[33m089a2ba[m saving of wuerstchen fine tunes
-[33m1625b43[m saving of wuerstchen LoRAs
-[33m14f89cc[m Merge branch 'master' into wuerstchen
-[33m6834aa0[m wuerstchen specific model settings
-[33m4a46550[m update dependencies to enable gradient checkpointing
-[33m595a408[m fix embedding training
-[33m1ead531[m fixed the continue from last backup function
-[33m9b722bf[m Merge branch 'master' into wuerstchen
-[33m455bfb9[m fixed text encoder output handling for wuerstchen v2
-[33m651aa86[m fixed text encoder output norm when training SD 1.5 without text encoder
-[33m60743a4[m removed align prop and masked training sections for wuerstchen
-[33m74af2b6[m Merge branch 'master' into wuerstchen
-[33m28abae3[m fixes for wuerstchen training
-[33me5555a9[m Merge pull request #63 from Lolzen/patch_linux
-[33mef3080c[m fix native python check and also check for VENV
-[33mf769ac9[m fix an exception when training SDXL embeddings with disabled text encoder 2 training
-[33m02e4818[m fix an exception when trying to open optimizer settings
-[33me876dbe[m rename "text encoder" settings to "text encoder 1" for SDXL in the ui
-[33mb1ed003[m additional fixes after a rebase
-[33me3ff9b7[m rebase fixup
-[33med34545[m simplify noise setup for wuerstchen
-[33me1b1e10[m initial wuerstchen-v2 support
-[33m8600242[m update SDXL presets
-[33md44e706[m separate text encoder settings for SDXL
-[33mb9307ad[m train ui modules based on selected model
-[33m1c504cc[m fix some left usages of extra_model_name
-[33m5eda7d1[m separation of model stage settings
-[33m60ce7e6[m fix the backup now button
-[33m5d6065d[m remove 'break' from install.sh; these are meant for loops
-[33mee35203[m add scipy as Linux dependency
-[33m0470e19[m fix sampling in png format
-[33mf1c8175[m fix dataloader bugs for vae fine tuning
-[33m0a99509[m fix exception when using create_train_files.py
-[33m25a1fe2[m actually let native python execute the install cmd
-[33m5f1e229[m remove whitespaces; add native python venv commands, fix typos
-[33me756f5c[m forgot to remove echo
-[33m8ee740b[m add rudimentary start-ui.sh file
-[33m8ce8940[m add rudimentary update.sh file
-[33mea39c4b[m fix typos
-[33me18fb6a[m add rudimentary install.sh file
-[33mda026c4[m backup now button
-[33me22b040[m fix exception when training embeddings with text encoder training disabled
-[33m06a858b[m re-enable safety checks when deleting the cache directory
-[33m49480b7[m update mgds
-[33m0bbadc8[m big fixes for vram optimizations
-[33m4169426[m further vram optimizations for training and caching
-[33m6fffe2e[m vram optimization during sampling
-[33ma4100e4[m fix SDXL sampling when rescale noise scheduler is enabled
-[33m7dcb642[m update mgds
-[33md1ac15f[m Merge pull request #59 from hameerabbasi/fix-align-prop-dtype
-[33m20201d8[m load HPSv2 with the correct precision
-[33mf090681[m Merge pull request #58
-[33m3a273c0[m Fix HPS as well.
-[33m4519715[m Fix align-prop dtype to match data.
-[33m860f32c[m Revert to CUDA 11.8 as CUDA 12.1 doesn't work yet in a lot of environments.
-[33mb6c81a6[m fix KeyError: 'loss_type'
-[33m29f819a[m Merge pull request #56 from hameerabbasi/xformers-compat
-[33m2891735[m Ensure PyTorch compatible with xformers is fetched.
-[33md14d85f[m invert masks for better add and subtract support on color masks
-[33m731dd0d[m AlignProp support for SDXL
-[33ma2efbdd[m HPSv2 support for AlignProp
-[33mea25079[m remove initial images from alignProp calculations
-[33m3aa0055[m initial work in progress AlignProp implementation
-[33ma59f3be[m add train_from_config script for training from a json config file instead of parameters
-[33mf81ae60[m rename factor to alpha
-[33m962db50[m Merge pull request #53 from hameerabbasi/adaschedule
-[33mdd4c3bc[m Pass initial_lr to AdafactorScheduler.
-[33m03e04d4[m Merge remote-tracking branch 'origin/master' into adaschedule
-[33m32f72e9[m Merge pull request #54 from SirTrippsalot/Optimizers
-[33me72d1ca[m fixed eps tuple wrong var
-[33m3010307[m Merge remote-tracking branch 'origin/master' into Optimizers
-[33mde6cc05[m Merge remote-tracking branch 'origin/master' into adaschedule
-[33m036ca9e[m Merge pull request #51 from hameerabbasi/non-binary-mask
-[33m8a08629[m Weight model by alpha everywhere.
-[33m8baff6e[m Fix blend mode and scale original mask by alpha.
-[33mad83194[m Add Adafactor Scheduler.
-[33mcc412b8[m Add new mode instead of mixing add/subtract and blending behaviour.
-[33m76d370c[m Update UI.
-[33m1e6de42[m Clarify intention of --alpha during mask generation.
-[33m943c9e0[m Update docstrings.
-[33ma47393f[m show model options in mask and caption scripts
-[33m00f5ea5[m Reduce memory usage.
-[33mbeb474d[m Fix some math.
-[33mbee0da5[m Enable generation of non-binary masks.
-[33m16250b9[m Merge pull request #50 from SirTrippsalot/Optimizers
-[33mecc733d[m Bugfix #Errors handling Bool in conditional optimizers logic
-[33m40e4de4[m Merge pull request #49 from SirTrippsalot/Optimizers
-[33m2013663[m Refactor or logic with if else
-[33m2c3065e[m Merge pull request #48 from SirTrippsalot/Optimizers
-[33m9683499[m Merge remote-tracking branch 'SirTrippsalot/Optimizers' into Optimizers
-[33m8299ffc[m bugfix for saving on optimizers window close
-[33mfcdb289[m call the command for options_adv at least once
-[33m66d8036[m fix nullable bool values for cli arguments (again)
-[33m1e0b9a9[m fix exception when loading from default values
-[33mb68dfc5[m Merge pull request #45 from SirTrippsalot/Optimizers
-[33m24cce91[m remove default values for optimizer settings
-[33m3e2e1a2[m actually restore defaults when clicking on "Load Defaults" in the advanced optimizer settings
-[33meb926ac[m fix inf handling for command line arguments
-[33mb2540b9[m Fixed inf bug, added loading prefs/defaults to trainui
-[33m76aaeb2[m Refactoring optimizer prefs
-[33m3397eae[m fix optimizer default issues
-[33mc4498ce[m use fused optimizers by default
-[33mc2e1e3e[m remove duplicated argument
-[33mbd8fe8a[m code formatting
-[33m32b1cea[m Support for default arguments in case of CLI with no arg provided
-[33m2f7e089[m Merge remote-tracking branch 'SirTrippsalot/Optimizers' into Optimizers
-[33m455aa69[m Added handling of defaults and user prefs per optimizer
-[33m613dc08[m fix tuple creation
-[33m9a3586b[m replace _ by - in optimizer cli arguments
-[33m5f0d37c[m Added Parser Args
-[33m6c78329[m fixed missing beta3 from dadapt_adan case
-[33m2f4e879[m Restored None defaults
-[33m68b14db[m Remove Deprecated tuple handling
-[33m11f566a[m Update Lion handling
-[33m3b3f770[m Refactor args and tuple handling
-[33m891fb18[m Merge branch 'master' into Optimizers
-[33m3437bfd[m fix command line parsing
-[33m5df3f44[m Add new trainargs, refactor dynamic_ui code
-[33m64d130c[m Added and enhanced torch optimizers
-[33m2a939fb[m Added more optimizer defaults, updated optimizer calls for dadapt
-[33ma4f28cb[m Expanded Lookup for initial testing
-[33m319c1bf[m Load Defaults Functionality
-[33m10c75d9[m Updating components entry and switch to accept Override
-[33m4bdb3ed[m remove added print
-[33mabc185c[m Merge remote-tracking branch 'origin/master' into Optimizers
-[33ma46e262[m Fixed bug with none handling causing TKINTER error
-[33md5af4dc[m args rework
-[33m77349e4[m Updated Args and UI
-[33mbbd5de0[m refactor model loading classes, add model spec to all sd models
-[33m22c48d1[m sdxl clip skip support
-[33me22afc9[m update tensorboard and remove explicit six deptendency
-[33m8b62f31[m fix v prediction sampling
-[33meaadeb2[m force utf-8 encoding for saved text files
-[33m76537a1[m update PIL to fix CVE-2023-4863
-[33m0926c57[m revert change to sample and concept file names when saving backups
-[33m1ab22cb[m Hotfix -- args
-[33m2a1fc3e[m Implement new options_adv component type
-[33ma339ff4[m fixed casing on tuple
-[33m5ba6d52[m Enable some disabled deatures for adafactor
-[33m77f229c[m Adding Arg datatypes, handling of inf, enhanced None, Tuples
-[33m892feac[m Optimizer Train Args
-[33m7d307db[m Add New/enhanced Optimizers to system
-[33m28d1ff8[m update gitignore (.bak)
-[33m6a88fa2[m Optimizers UI Changes
-[33m4320b5c[m save config when creating a backup
-[33m6f9fc34[m fix window scaling issues when editing masks
-[33m898a78d[m tag shuffling
-[33m71a2bbf[m perturbation noise
-[33m10f606a[m rolling backup options
-[33mc3ed854[m new option to create a mask by hex color values
-[33m83d76b9[m option to disable gradient checkpointing
-[33m0650480[m Merge pull request #42 from Janca/master
-[33m435db3f[m more backup loading feedback and presets
-[33m650a587[m Merge pull request #1 from Nerogar/master
-[33m6b70d96[m Added zero-config continue training
-[33m3d5e49d[m clear memory after each training run
-[33m7d0cfd4[m print loss information in the console
-[33mc20e62e[m add kohya headers to fix version detection in the A1111 webui
-[33m385e015[m temporary fix for tensorboard
-[33mb6cc3ed[m replace rembg dependency with custom code to reduce dependency hell
-[33me3e833b[m update bits and bytes to 0.41.1
-[33m1b50931[m option to disable samples in the sampling tab
-[33m5828d15[m embedding weight dtype setting in the ui
-[33m6966a0c[m improved dtype handling for reduced embedding vram usage
-[33m2bb867f[m readme update
-[33me1bca5b[m improved sampling ui
-[33md037f6a[m custom sampling progress bar
-[33mdaa55f8[m auto resize the sample image in the sample window
-[33m0b91b02[m simple manual sampling
-[33m569f661[m update presets to never automatically save
-[33m2366606[m random seed option for sampling
-[33m17e1962[m deterministic training
-[33meefc94f[m fix preview of RGBA images
-[33m82a91e6[m option to regularly save the model during training
-[33m6980c15[m sdxl inpainting LoRA preset
-[33ma815545[m initial sdxl inpainting support
-[33m8639264[m button on the sample tab for immediate sampling
-[33mbf1311c[m configurable image format, deterministic sampling
-[33m7bcefa0[m more schedulers for sampling
-[33mabbdf83[m fix initial noise sigma to enable Euler scheduler
-[33m33583e1[m ui fixes, sampler setting
-[33me517fae[m more sampling options for SD/SDXL models
-[33mcff7a26[m add current ema decay value to tensorboard
-[33m677efb5[m fix lora loading with partially trained models
-[33mb57063a[m fix dtype issues and sdxl embeddings when text encoder training is disabled
-[33m3d61eae[m readme update
-[33m12a8753[m wd14 tagging in caption ui
-[33mc8f13bc[m Merge pull request #33 from SirTrippsalot/master
-[33m946cf77[m adding optimizers: adagrad, rmsprop (with 8 bit)
-[33md2c1aeb[m sdxl embedding training
-[33me6ad155[m update mgds
-[33m18410dc[m fix lr schedulers with higher batch sizes
-[33mb96ead0[m fix lr schedulers with active gradient accumulation
-[33me0572a9[m REX lr scheduler
-[33m80bb176[m new "open in explorer" option in caption ui
-[33m2eab5c6[m more dtype fixes
-[33m0cc6215[m add weight dtypes to presets
-[33m7717a1b[m mixed dtype sampling
-[33mee08fec[m override dtypes for model stages
-[33m0990595[m better error message when backups can not be saved
-[33mddb27b0[m fix bfloat16 conversion
-[33m0a18e5b[m allow output of bfloat16 models
-[33mbb53f29[m update bitsandbytes
-[33m622746e[m fix vae fine tuning
-[33md211707[m readme update
-[33m4e9246c[m enable loading of embeddings without loading a base model
-[33m0187ba5[m model conversion ui
-[33m8306924[m improvements for LoRA training and model conversions
-[33m2f383fe[m reduce memory usage when using the SGD optimizer
-[33m858c58c[m Merge pull request #31 from allenbenz/less-ram-on-save
-[33me6ddf16[m Revert train.py
-[33m356c7c2[m set the default output format to safetensors for command line scripts
-[33m68f3185[m set the default output format to safetensors for all models
-[33m84e9b98[m better model spec support
-[33mf5a5b13[m include model spec when saving safetensors SDXL files
-[33mb36bb50[m Save the model if backup_before_save is disabled.
-[33m1a8fe51[m Put values changed by converted_state_dict on the cpu to avoid vram just before saving.
-[33me2438ce[m manual gc every 5 minutes during training
-[33mccbc17f[m rename sdxl presets from 0.9 to 1.0
-[33m6afccb8[m fixed for SDXL text encoder fine tuning
-[33mbac029e[m fix lora alpha scaling during training
-[33m7aea872[m prevent crashes when trying to save backups
-[33m8db4e64[m convert state dict to contiguous before saving model files
-[33mfba946e[m fix dependency errors during installation
-[33m69dc4cd[m remove fixed dtype when loading SD from a single safetensors file
-[33mf50eaaa[m remove fixed dtype when loading SDXL from a single safetensors file
-[33m060e9a8[m fix TrainArgs typo
-[33m955c3e9[m documentation fix
-[33m8426b91[m caption UI fixes
-[33m2969a1c[m more options for the masking and captioning scripts
-[33m03bc3f6[m captioning documentation
-[33mc23214d[m fix reloading of masking models
-[33m1816a25[m rembg support for generating masks
-[33m3cc42d4[m remove duplicated doc comments
-[33mac44034[m simple captioning and masking ui
-[33m5e22bb6[m fix for text encoder SDXL LoRA weight names
-[33mdc48429[m replace sd scheduler with DDIM during loading
-[33m94f20f3[m fix for cache clearing on empty caches
-[33m0f4742b[m automatically clear the cache before starting
-[33m732a6a0[m new script to create default files
-[33m8357e99[m update mgds
-[33mf9664e3[m Merge pull request #24 from FinFanFin/master
-[33m3bafe36[m fix sdsampler bug
-[33m2c4ec62[m Merge pull request #23 from FinFanFin/master
-[33m61dbb4e[m add support for the Prodigy optimizer
-[33mb16c13c[m fix tensorboard bug
-[33mba39e2b[m Merge pull request #21 from FinFanFin/master
-[33m7e8105b[m rename optimizer enum values
-[33mb010b26[m docs fix
-[33mcee296f[m add dadaptation and lion optimizers
-[33m57daa06[m update mgds
-[33m069b9ab[m replace transformers rep dependency with pypi version
-[33m07b225f[m Merge pull request #18 from float-trip/patch-1
-[33m4cde07f[m replace debug dir dialog with a dir_entry
-[33m6ff50be[m Use list of arguments instead of a string for Popen()
-[33m1eba125[m fix exception when starting an embedding training
-[33m5f6e64d[m embedding training documentation
-[33m8bb0ab6[m fix a parameter issue when loading ckpt models
-[33m12012f6[m update diffusers to solve issues when loading ckpt models
-[33m38939e8[m fix missing weight_dtype parameters when loading lora models
-[33m2ed9298[m fix being unable to click the prompt source button
-[33m48a751c[m remove debug stacktrace messages
-[33mb9c4876[m add an additional state_dict wrapper around exported ckpt files
-[33mf4138c1[m fix possibly high memory consumption during caching
-[33mc9cf1f4[m various SDXL fixes
-[33m195128a[m positional encodings for SDXL training
-[33m31c2c4d[m caching improvements for SDXL
-[33mcd18f92[m training on different weight data types
-[33m6165de3[m support for loading any SDXL model
-[33mf4f3a46[m support for saving single file SDXL checkpoints
-[33m59a325c[m work in progress on SDXL LoRA training
-[33m10261e6[m fix double . character when saving state dict of sd 1.5 text encoders
-[33m8b224eb[m fix for training on prompts from filenames
-[33mdbd75bf[m quick start guide
-[33m9b79e8a[m update mgds
-[33m3d0fb23[m Merge branch 'ema'
-[33m573f5c1[m sampling code cleanup
-[33mfc37ced[m fix for the batch size label
-[33m96571f9[m EMA support for all training methods
-[33m9244bb4[m kandinsky saving
-[33m65ebd2e[m ui tooltips
-[33m46eb760[m kandinsky lora training
-[33meb72179[m support for loading kandinsky models
-[33m9956128[m bitsandbytes 8 bit optimizers
-[33me78fd28[m support for nested datasets
-[33m7466cbd[m trim leading and trailing whitespace from paths
-[33ma5c818e[m xformers update
-[33m31c71cc[m support for stable diffusion v2
-[33md178840[m preset cleanup
-[33m344b4ae[m readme update
-[33m2db57c6[m typo
-[33m471cca8[m custom sampling code for more control over different parts of the calculations
-[33m26321e5[m refactorings and more data type/attention options
-[33mbdc80a0[m refactorings and more data type/attention options
-[33mf94c224[m fixed missing samples and concepts error on initial startup
-[33m8904e4e[m LoRA and Embedding training support in the UI
-[33mdfcf15f[m fixed an issue with incorrectly scaled conditioning images in the data loader
-[33md5f4f7f[m clip skip support
-[33m2efbeae[m support for different learning rate schedulers
-[33mb48474d[m more image augmentation options
-[33me42185d[m fixed being unable to restart training after stopping
-[33mc940c2b[m fixed tensorboard when running through start-ui.bat
-[33m801bfff[m one click scripts for installing, updating and starting the ui
-[33m0f1027d[m support for multiple prompts per sample, more data augmentation options, UI improvements and bug fixes
-[33m8db7fd2[m update to pytorch 2.0 and bug fixes
-[33m35fe5d1[m usable ui for fine tuning
-[33mf6e0c2b[m updated readme and docs for contributions
-[33mafb84fe[m ui for concept management
-[33m8179f6f[m more UI stuff
-[33m60888cd[m some initial UI work
-[33m3c7aa2b[m tensorboard integration for loss and sample tracking
-[33m1c941d4[m discord link
-[33mc5fc803[m lora training
-[33m4acd7a7[m different learning rates for unet and text encoder, cache only option
-[33m42f0fc4[m add a license
-[33me6a696c[m readme update
-[33m73e52e7[m embedding training
-[33mab0c165[m fix gradient accumulation (for real this time)
-[33m9c08419[m fix gradient accumulation
-[33m977cdc6[m mgds update
-[33m5325fc5[m basic vae training
-[33m89e6388[m add a backup, restore and continue functionality
-[33m57c57d2[m update mgds
-[33m5004964[m add proper support for masked training
-[33m6d52aa9[m readme
-[33mbbce76a[m many fixes
-[33m5130394[m initial commit

From c41077e67c9c3e233f10297c59b0dcdc5e2456c7 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Tue, 3 Sep 2024 19:37:31 +0900
Subject: [PATCH 09/17] Translate to English.

---
 modules/model/StableDiffusionXLModel.py | 10 +++----
 train.ipynb                             | 38 ++++++++++++-------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/modules/model/StableDiffusionXLModel.py b/modules/model/StableDiffusionXLModel.py
index ba610147..6af4380d 100644
--- a/modules/model/StableDiffusionXLModel.py
+++ b/modules/model/StableDiffusionXLModel.py
@@ -219,10 +219,10 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip):
                 if chunk.numel() == 0:
                     continue
 
-                # アテンションマスクの作成（1がマスクしない、0がマスクする）
+                # Create attention mask (1 for non-masked, 0 for masked)
                 attention_mask = torch.ones_like(chunk, dtype=torch.bool)
 
-                # まず、BOSとEOSを追加
+                # First, add BOS and EOS tokens
                 bos_tokens = torch.full((chunk.shape[0], 1), tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device)
                 eos_tokens = torch.full((chunk.shape[0], 1), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
                 chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1)
@@ -231,8 +231,8 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip):
                                             torch.zeros_like(eos_tokens, dtype=torch.bool) if i < len(chunks) - 1 else torch.ones_like(eos_tokens, dtype=torch.bool)],
                                             dim=1)
 
-                # パディングで埋める
-                if chunk.shape[1] < chunk_length + 2:  # +2 はBOSとEOSのため
+                # Fill with padding
+                if chunk.shape[1] < chunk_length + 2:  # +2 is for BOS and EOS
                     padding = torch.full((chunk.shape[0], chunk_length + 2 - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
                     chunk = torch.cat([chunk, padding], dim=1)
                     attention_mask = torch.cat([attention_mask, torch.zeros_like(padding, dtype=torch.bool)], dim=1)
@@ -290,7 +290,7 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip):
             text_encoder_2_output, pooled_text_encoder_2_output = __process_tokens(tokens_2, self.tokenizer_2, self.text_encoder_2, text_encoder_2_layer_skip)
 
         if text_encoder_1_output is None or text_encoder_2_output is None:
-            print("両方のテキストエンコーダーの出力がNoneです。入力テキストまたはトークンを確認してください。")
+            print("Both text encoder outputs are None. Check your input text or tokens.")
 
         text_encoder_output = torch.cat([text_encoder_1_output, text_encoder_2_output], dim=-1)
 
diff --git a/train.ipynb b/train.ipynb
index 7ab81b45..78d0716b 100644
--- a/train.ipynb
+++ b/train.ipynb
@@ -23,26 +23,26 @@
    },
    "outputs": [],
    "source": [
-    "# 依存関係のインストール\n",
+    "# Install dependencies\n",
     "!pip -r requirements.txt\n",
     "\n",
-    "# CUDAの確認\n",
+    "# Check CUDA availability\n",
     "import torch\n",
     "cuda_available = torch.cuda.is_available()\n",
     "print(f\"CUDA is {'available' if cuda_available else 'not available'}\")\n",
     "\n",
     "if not cuda_available:\n",
-    "    use_zluda = input(\"CUDAが利用できません。WindowsでAMD GPUを使用していますか？ (y/n) \")\n",
+    "    use_zluda = input(\"CUDA is not available. Are you using an AMD GPU on Windows? (y/n) \")\n",
     "    if use_zluda.lower() == 'y':\n",
-    "        print(\"ZLUDAのインストールを続行します\")\n",
+    "        print(\"Proceeding with ZLUDA installation\")\n",
     "        %run scripts/install_zluda.py\n",
     "    else:\n",
-    "        print(\"エラー：インストール中に問題が発生しました\")\n",
+    "        print(\"Error: An issue occurred during installation\")\n",
     "else:\n",
-    "    print(\"インストールが完了しました\")\n",
+    "    print(\"Installation completed\")\n",
     "\n",
-    "# 注意：この環境では%pipを使用していますが、\n",
-    "# 通常のコマンドラインでは!pipを使用することに注意してください"
+    "# Note: This environment uses %pip,\n",
+    "# but remember to use !pip in regular command line"
    ]
   },
   {
@@ -59,10 +59,10 @@
    },
    "outputs": [],
    "source": [
-    "# 必要なライブラリのインポート\n",
+    "# Import necessary libraries\n",
     "import os\n",
     "\n",
-    "# mgdsをリポジトリにcloneした場合\n",
+    "# If mgds is cloned to the repository\n",
     "# import sys\n",
     "# sys.path.append('mgds/src')\n",
     "\n",
@@ -84,25 +84,25 @@
    },
    "outputs": [],
    "source": [
-    "# TrainConfigのインスタンスを作成\n",
+    "# Create an instance of TrainConfig\n",
     "train_config = TrainConfig.default_values()\n",
     "with open(config_path, \"r\") as f:\n",
     "    train_config.from_dict(json.load(f))\n",
     "\n",
-    "# userwarningを表示しない\n",
+    "# Suppress user warnings\n",
     "import warnings\n",
     "warnings.filterwarnings('ignore')\n",
     "\n",
-    "# TrainConfigで読み込んだ\"debug_dir\",\"workspace_dir\",cache_dir\"が存在しなければ作成\n",
+    "# Create directories loaded in TrainConfig if they don't exist\n",
     "for dir_path in [train_config.debug_dir, train_config.workspace_dir, train_config.cache_dir]:\n",
     "    if not os.path.exists(dir_path):\n",
     "        os.makedirs(dir_path)\n",
     "\n",
-    "# コールバックとコマンドの設定\n",
+    "# Set up callbacks and commands\n",
     "callbacks = TrainCallbacks()\n",
     "commands = TrainCommands()\n",
     "\n",
-    "# トレーニングプロセスの開始\n",
+    "# Start the training process\n",
     "print(\"Destination_path: \", train_config.output_model_destination)\n",
     "print(\"Workspace_path: \", train_config.workspace_dir)\n",
     "print(\"Debug_path: \", train_config.debug_dir)\n",
@@ -112,14 +112,14 @@
     "trainer.start()\n",
     "\n",
     "try:\n",
-    "    # トレーニングの実行\n",
+    "    # Execute training\n",
     "    trainer.train()\n",
     "except Exception as e:\n",
-    "    print(f\"トレーニング中にエラーが発生しました: {e}\")\n",
+    "    print(f\"An error occurred during training: {e}\")\n",
     "finally:\n",
-    "    # トレーニング終了時の処理\n",
+    "    # Process at the end of training\n",
     "    trainer.end()\n",
-    "    print(\"トレーニングが終了しました\")"
+    "    print(\"Training has completed\")"
    ]
   }
  ],

From 4b18e793339b34bda2f8b8e19130c92c0c854dc6 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Tue, 3 Sep 2024 23:46:09 +0900
Subject: [PATCH 10/17] Unlock Flux Finetune.

---
 modules/ui/TopBar.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/ui/TopBar.py b/modules/ui/TopBar.py
index dc7c0739..76890db5 100644
--- a/modules/ui/TopBar.py
+++ b/modules/ui/TopBar.py
@@ -115,6 +115,7 @@ def __create_training_method(self):
             ]
         elif self.train_config.model_type.is_flux():
             values = [
+                ("Fine Tune", TrainingMethod.FINE_TUNE),
                 ("LoRA", TrainingMethod.LORA),
                 ("Embedding", TrainingMethod.EMBEDDING),
             ]

From 0d72d61aca655679b5008ed64f6c11e4c6454fda Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Thu, 5 Sep 2024 01:34:51 +0900
Subject: [PATCH 11/17] Tokenizer code is moved to clip_util.py

---
 modules/model/FluxModel.py                    |  11 +-
 modules/model/StableDiffusionModel.py         |  64 ++---------
 modules/model/StableDiffusionXLModel.py       | 106 +++++-------------
 modules/model/util/clip_util.py               |  63 ++++++++---
 .../modelLoader/FluxFineTuneModelLoader.py    |   3 +-
 5 files changed, 98 insertions(+), 149 deletions(-)

diff --git a/modules/model/FluxModel.py b/modules/model/FluxModel.py
index 84884582..ac7cf1b1 100644
--- a/modules/model/FluxModel.py
+++ b/modules/model/FluxModel.py
@@ -214,9 +214,8 @@ def encode_text(
         if tokens_1 is None and text is not None and self.tokenizer_1 is not None:
             tokenizer_output = self.tokenizer_1(
                 text,
-                padding='max_length',
-                truncation=True,
-                max_length=77,
+                # padding='max_length',
+                truncation=False,
                 return_tensors="pt",
             )
             tokens_1 = tokenizer_output.input_ids.to(self.text_encoder_1.device)
@@ -224,9 +223,9 @@ def encode_text(
         if tokens_2 is None and text is not None and self.tokenizer_2 is not None:
             tokenizer_output = self.tokenizer_2(
                 text,
-                padding='max_length',
+                # padding='max_length',
                 truncation=True,
-                max_length=77,
+                max_length=4096,
                 return_tensors="pt",
             )
             tokens_2 = tokenizer_output.input_ids.to(self.text_encoder_2.device)
@@ -241,7 +240,7 @@ def encode_text(
             text_encoder_output=None,
             add_pooled_output=True,
             pooled_text_encoder_output=pooled_text_encoder_1_output,
-            use_attention_mask=False,
+            use_attention_mask=True,
         )
         if pooled_text_encoder_1_output is None:
             pooled_text_encoder_1_output = torch.zeros(
diff --git a/modules/model/StableDiffusionModel.py b/modules/model/StableDiffusionModel.py
index 867633a9..c553040c 100644
--- a/modules/model/StableDiffusionModel.py
+++ b/modules/model/StableDiffusionModel.py
@@ -222,64 +222,24 @@ def encode_text(
             text_encoder_layer_skip: int = 0,
             text_encoder_output: Tensor | None = None,
     ):
-        chunk_length = 75
-        max_embeddings_multiples = 3
-
-        def __process_tokens(tokens):
-            if tokens is None or tokens.numel() == 0:
-                return None
-
-            chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)]
-            chunk_embeddings = []
-
-            for chunk in chunks:
-                if chunk.numel() == 0:
-                    continue
-
-                if chunk.shape[1] < chunk_length:
-                    padding = torch.full((chunk.shape[0], chunk_length - chunk.shape[1]), self.tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
-                    chunk = torch.cat([chunk, padding], dim=1)
-
-                bos_tokens = torch.full((chunk.shape[0], 1), self.tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device)
-                eos_tokens = torch.full((chunk.shape[0], 1), self.tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
-                chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1)
-                
-                with self.autocast_context:
-                    embedding, _ = encode_clip(
-                        text_encoder=self.text_encoder,
-                        tokens=chunk,
-                        default_layer=-1,
-                        layer_skip=text_encoder_layer_skip,
-                        text_encoder_output=None,
-                        add_pooled_output=False,
-                        use_attention_mask=False,
-                        add_layer_norm=True,
-                    )
-
-                chunk_embeddings.append(embedding)
-
-            if not chunk_embeddings:
-                return None
-
-            if len(chunk_embeddings) > max_embeddings_multiples:
-                chunk_embeddings = chunk_embeddings[:max_embeddings_multiples]
-
-            combined_embedding = torch.cat(chunk_embeddings, dim=1)
-
-            return combined_embedding
-
         if tokens is None:
             tokenizer_output = self.tokenizer(
                 text,
-                padding='max_length',
+                padding="max_length",
                 truncation=False,
                 return_tensors="pt",
             )
             tokens = tokenizer_output.input_ids.to(self.text_encoder.device)
 
-        text_encoder_output = __process_tokens(tokens)
-
-        if text_encoder_output is None:
-            print("Text encoder output is None. Check your input text or tokens.")
+        text_encoder_output, _ = encode_clip(
+            text_encoder=self.text_encoder,
+            tokens=tokens,
+            default_layer=-1,
+            layer_skip=text_encoder_layer_skip,
+            text_encoder_output=text_encoder_output,
+            add_pooled_output=False,
+            use_attention_mask=True,
+            add_layer_norm=True,
+        )
 
-        return text_encoder_output
+        return text_encoder_output
\ No newline at end of file
diff --git a/modules/model/StableDiffusionXLModel.py b/modules/model/StableDiffusionXLModel.py
index 6af4380d..573c0e69 100644
--- a/modules/model/StableDiffusionXLModel.py
+++ b/modules/model/StableDiffusionXLModel.py
@@ -203,95 +203,47 @@ def encode_text(
             text_encoder_2_output: Tensor = None,
             pooled_text_encoder_2_output: Tensor = None,
     ):
-        chunk_length = 75
-        max_embeddings_multiples = 3
-
-        def __process_tokens(tokens, tokenizer, text_encoder, layer_skip):
-            if tokens is None or tokens.numel() == 0:
-                return None, None
-
-            chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)]
-            chunk_embeddings = []
-            pooled_outputs = []
-            attention_masks = []
-
-            for i, chunk in enumerate(chunks):
-                if chunk.numel() == 0:
-                    continue
-
-                # Create attention mask (1 for non-masked, 0 for masked)
-                attention_mask = torch.ones_like(chunk, dtype=torch.bool)
-
-                # First, add BOS and EOS tokens
-                bos_tokens = torch.full((chunk.shape[0], 1), tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device)
-                eos_tokens = torch.full((chunk.shape[0], 1), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
-                chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1)
-                attention_mask = torch.cat([torch.zeros_like(bos_tokens, dtype=torch.bool) if i > 0 else torch.ones_like(bos_tokens, dtype=torch.bool),
-                                            attention_mask, 
-                                            torch.zeros_like(eos_tokens, dtype=torch.bool) if i < len(chunks) - 1 else torch.ones_like(eos_tokens, dtype=torch.bool)],
-                                            dim=1)
-
-                # Fill with padding
-                if chunk.shape[1] < chunk_length + 2:  # +2 is for BOS and EOS
-                    padding = torch.full((chunk.shape[0], chunk_length + 2 - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device)
-                    chunk = torch.cat([chunk, padding], dim=1)
-                    attention_mask = torch.cat([attention_mask, torch.zeros_like(padding, dtype=torch.bool)], dim=1)
-                
-                attention_masks.append(attention_mask)
-                
-                with self.autocast_context:
-                    outputs = text_encoder(
-                        chunk,
-                        attention_mask=attention_mask,
-                        output_hidden_states=True,
-                        return_dict=True,
-                    )
-                    embedding = outputs.hidden_states[-(2 + layer_skip)]
-                    if hasattr(outputs, 'text_embeds'):
-                        pooled_outputs.append(outputs.text_embeds)
-
-                chunk_embeddings.append(embedding)
-
-            if not chunk_embeddings:
-                return None, None
-
-            if len(chunk_embeddings) > max_embeddings_multiples:
-                chunk_embeddings = chunk_embeddings[:max_embeddings_multiples]
-                attention_masks = attention_masks[:max_embeddings_multiples]
-                if pooled_outputs:
-                    pooled_outputs = pooled_outputs[:max_embeddings_multiples]
-
-            combined_embedding = torch.cat(chunk_embeddings, dim=1)
-            # combined_attention_mask = torch.cat(attention_masks, dim=1)
-            pooled_output = pooled_outputs[0] if pooled_outputs else None
-
-            return combined_embedding, pooled_output
-
         if tokens_1 is None and text is not None:
-            tokens_1 = self.tokenizer_1(
+            tokenizer_output = self.tokenizer_1(
                 text,
                 padding='max_length',
                 truncation=False,
                 return_tensors="pt",
-            ).input_ids.to(self.text_encoder_1.device)
+            )
+            tokens_1 = tokenizer_output.input_ids.to(self.text_encoder_1.device)
 
         if tokens_2 is None and text is not None:
-            tokens_2 = self.tokenizer_2(
+            tokenizer_output = self.tokenizer_2(
                 text,
                 padding='max_length',
                 truncation=False,
                 return_tensors="pt",
-            ).input_ids.to(self.text_encoder_2.device)
-
-        if text_encoder_1_output is None:
-            text_encoder_1_output, _ = __process_tokens(tokens_1, self.tokenizer_1, self.text_encoder_1, text_encoder_1_layer_skip)
+            )
+            tokens_2 = tokenizer_output.input_ids.to(self.text_encoder_2.device)
 
-        if text_encoder_2_output is None or pooled_text_encoder_2_output is None:
-            text_encoder_2_output, pooled_text_encoder_2_output = __process_tokens(tokens_2, self.tokenizer_2, self.text_encoder_2, text_encoder_2_layer_skip)
+        text_encoder_1_output, _ = encode_clip(
+            text_encoder=self.text_encoder_1,
+            tokens=tokens_1,
+            default_layer=-2,
+            layer_skip=text_encoder_1_layer_skip,
+            text_encoder_output=text_encoder_1_output,
+            add_pooled_output=False,
+            use_attention_mask=True,
+            add_layer_norm=False,
+        )
 
-        if text_encoder_1_output is None or text_encoder_2_output is None:
-            print("Both text encoder outputs are None. Check your input text or tokens.")
+        text_encoder_2_output, pooled_text_encoder_2_output = encode_clip(
+            text_encoder=self.text_encoder_2,
+            tokens=tokens_2,
+            default_layer=-2,
+            layer_skip=text_encoder_2_layer_skip,
+            text_encoder_output=text_encoder_2_output,
+            add_pooled_output=True,
+            pooled_text_encoder_output=pooled_text_encoder_2_output,
+            use_attention_mask=True,
+            add_layer_norm=False,
+        )
 
-        text_encoder_output = torch.cat([text_encoder_1_output, text_encoder_2_output], dim=-1)
+        text_encoder_output = torch.concat([text_encoder_1_output, text_encoder_2_output], dim=-1)
 
-        return text_encoder_output, pooled_text_encoder_2_output
+        return text_encoder_output, pooled_text_encoder_2_output
\ No newline at end of file
diff --git a/modules/model/util/clip_util.py b/modules/model/util/clip_util.py
index 697bc0f0..c6695ba9 100644
--- a/modules/model/util/clip_util.py
+++ b/modules/model/util/clip_util.py
@@ -1,4 +1,5 @@
 from torch import Tensor
+import torch
 
 from transformers import CLIPTextModel, CLIPTextModelWithProjection
 
@@ -16,28 +17,64 @@ def encode_clip(
         attention_mask: Tensor | None = None,
         add_layer_norm: bool = True,
 ) -> tuple[Tensor, Tensor]:
-    if (add_output and text_encoder_output is None) \
-            or (add_pooled_output and pooled_text_encoder_output is None) \
-            and text_encoder is not None:
+    chunk_length = 75
+    max_embeddings_multiples = 3
 
-        text_encoder_output = text_encoder(
-            tokens,
-            attention_mask=attention_mask if use_attention_mask else None,
+    if tokens is None or tokens.numel() == 0:
+        return None, None
+
+    chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)]
+    chunk_embeddings = [] if add_output else None
+    pooled_outputs = [] if add_pooled_output else None
+
+    for i, chunk in enumerate(chunks):
+        if chunk.numel() == 0:
+            continue
+
+        # Create attention mask (1 for non-masked, 0 for masked)
+        chunk_attention_mask = torch.ones_like(chunk, dtype=torch.bool)
+
+        # First, add BOS and EOS tokens
+        bos_tokens = torch.full((chunk.shape[0], 1), text_encoder.config.bos_token_id, dtype=chunk.dtype, device=chunk.device)
+        eos_tokens = torch.full((chunk.shape[0], 1), text_encoder.config.eos_token_id, dtype=chunk.dtype, device=chunk.device)
+        chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1)
+        chunk_attention_mask = torch.cat([torch.zeros_like(bos_tokens, dtype=torch.bool) if i > 0 else torch.ones_like(bos_tokens, dtype=torch.bool),
+                                    chunk_attention_mask, 
+                                    torch.zeros_like(eos_tokens, dtype=torch.bool) if i < len(chunks) - 1 else torch.ones_like(eos_tokens, dtype=torch.bool)],
+                                    dim=1)
+
+        # Fill with padding
+        if chunk.shape[1] < chunk_length + 2:  # +2 is for BOS and EOS
+            padding = torch.full((chunk.shape[0], chunk_length + 2 - chunk.shape[1]), text_encoder.config.eos_token_id, dtype=chunk.dtype, device=chunk.device)
+            chunk = torch.cat([chunk, padding], dim=1)
+            chunk_attention_mask = torch.cat([chunk_attention_mask, torch.zeros_like(padding, dtype=torch.bool)], dim=1)
+        
+        outputs = text_encoder(
+            chunk,
+            attention_mask=chunk_attention_mask if use_attention_mask else None,
             return_dict=True,
             output_hidden_states=True,
         )
+        
+        if add_output:
+            embedding = outputs.hidden_states[default_layer - layer_skip]
+            chunk_embeddings.append(embedding)
 
-        pooled_text_encoder_output = None
         if add_pooled_output:
             if hasattr(text_encoder_output, "text_embeds"):
-                pooled_text_encoder_output = text_encoder_output.text_embeds
+                pooled_outputs.append(text_encoder_output.text_embeds)
             if hasattr(text_encoder_output, "pooler_output"):
-                pooled_text_encoder_output = text_encoder_output.pooler_output
+                pooled_outputs.append(text_encoder_output.pooler_output)
 
-        text_encoder_output = text_encoder_output.hidden_states[default_layer - layer_skip] if add_output else None
+    if chunk_embeddings is not None and len(chunk_embeddings) > max_embeddings_multiples:
+        chunk_embeddings = chunk_embeddings[:max_embeddings_multiples]
+    if pooled_outputs is not None and len(pooled_outputs) > max_embeddings_multiples:
+        pooled_outputs = pooled_outputs[:max_embeddings_multiples]
+    text_encoder_output = torch.cat(chunk_embeddings, dim=1) if chunk_embeddings is not None else None
+    pooled_text_encoder_output = pooled_outputs[0] if pooled_outputs else None
 
-        if add_layer_norm and text_encoder_output is not None:
-            final_layer_norm = text_encoder.text_model.final_layer_norm
-            text_encoder_output = final_layer_norm(text_encoder_output)
+    if add_layer_norm and text_encoder_output is not None:
+        final_layer_norm = text_encoder.text_model.final_layer_norm
+        text_encoder_output = final_layer_norm(text_encoder_output)
 
     return text_encoder_output, pooled_text_encoder_output
diff --git a/modules/modelLoader/FluxFineTuneModelLoader.py b/modules/modelLoader/FluxFineTuneModelLoader.py
index eb6be87b..db655270 100644
--- a/modules/modelLoader/FluxFineTuneModelLoader.py
+++ b/modules/modelLoader/FluxFineTuneModelLoader.py
@@ -23,7 +23,8 @@ def _default_model_spec_name(
     ) -> str | None:
         match model_type:
             case ModelType.FLUX_DEV_1:
-                return "resources/sd_model_spec/flux_dev_1.0.json"
+                # return "resources/sd_model_spec/flux_dev_1.0.json"
+                return None
             case _:
                 return None
 

From e157d7586b02ff6cf7d634690a3bcceaac13ba04 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Thu, 5 Sep 2024 23:01:18 +0900
Subject: [PATCH 12/17] Fix: clip_util.py.

---
 modules/model/FluxModel.py      |  4 ++--
 modules/model/util/clip_util.py | 39 +++++++++++++++++++--------------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/modules/model/FluxModel.py b/modules/model/FluxModel.py
index ac7cf1b1..42e24824 100644
--- a/modules/model/FluxModel.py
+++ b/modules/model/FluxModel.py
@@ -224,8 +224,8 @@ def encode_text(
             tokenizer_output = self.tokenizer_2(
                 text,
                 # padding='max_length',
-                truncation=True,
-                max_length=4096,
+                truncation=False,
+                max_length=99999999,
                 return_tensors="pt",
             )
             tokens_2 = tokenizer_output.input_ids.to(self.text_encoder_2.device)
diff --git a/modules/model/util/clip_util.py b/modules/model/util/clip_util.py
index c6695ba9..e2f5c3ac 100644
--- a/modules/model/util/clip_util.py
+++ b/modules/model/util/clip_util.py
@@ -24,8 +24,8 @@ def encode_clip(
         return None, None
 
     chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)]
-    chunk_embeddings = [] if add_output else None
-    pooled_outputs = [] if add_pooled_output else None
+    chunk_embeddings = []
+    pooled_outputs = []
 
     for i, chunk in enumerate(chunks):
         if chunk.numel() == 0:
@@ -59,22 +59,27 @@ def encode_clip(
         if add_output:
             embedding = outputs.hidden_states[default_layer - layer_skip]
             chunk_embeddings.append(embedding)
-
         if add_pooled_output:
-            if hasattr(text_encoder_output, "text_embeds"):
-                pooled_outputs.append(text_encoder_output.text_embeds)
-            if hasattr(text_encoder_output, "pooler_output"):
-                pooled_outputs.append(text_encoder_output.pooler_output)
+            if hasattr(outputs, "text_embeds"):
+                pooled_outputs.append(outputs.text_embeds)
+            elif hasattr(outputs, "pooler_output"):
+                pooled_outputs.append(outputs.pooler_output)
 
-    if chunk_embeddings is not None and len(chunk_embeddings) > max_embeddings_multiples:
-        chunk_embeddings = chunk_embeddings[:max_embeddings_multiples]
-    if pooled_outputs is not None and len(pooled_outputs) > max_embeddings_multiples:
-        pooled_outputs = pooled_outputs[:max_embeddings_multiples]
-    text_encoder_output = torch.cat(chunk_embeddings, dim=1) if chunk_embeddings is not None else None
-    pooled_text_encoder_output = pooled_outputs[0] if pooled_outputs else None
+    if add_output:
+        if chunk_embeddings and len(chunk_embeddings) > max_embeddings_multiples:
+            chunk_embeddings = chunk_embeddings[:max_embeddings_multiples]
+        text_encoder_output = torch.cat(chunk_embeddings, dim=1)
+        if add_layer_norm:
+            final_layer_norm = text_encoder.text_model.final_layer_norm
+            text_encoder_output = final_layer_norm(text_encoder_output)
+    else:
+        text_encoder_output = None
 
-    if add_layer_norm and text_encoder_output is not None:
-        final_layer_norm = text_encoder.text_model.final_layer_norm
-        text_encoder_output = final_layer_norm(text_encoder_output)
+    if add_pooled_output:
+        if pooled_outputs and len(pooled_outputs) > max_embeddings_multiples:
+            pooled_outputs = pooled_outputs[:max_embeddings_multiples]
+        pooled_text_encoder_output = pooled_outputs[0] if pooled_outputs else None
+    else:
+        pooled_text_encoder_output = None
 
-    return text_encoder_output, pooled_text_encoder_output
+    return text_encoder_output, pooled_text_encoder_output
\ No newline at end of file

From 795b3835e2806450d36eee1eb9609df635991003 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Sun, 8 Sep 2024 19:05:00 +0900
Subject: [PATCH 13/17] fix: attention mask device.

---
 modules/model/util/clip_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/model/util/clip_util.py b/modules/model/util/clip_util.py
index e2f5c3ac..1856b096 100644
--- a/modules/model/util/clip_util.py
+++ b/modules/model/util/clip_util.py
@@ -32,7 +32,7 @@ def encode_clip(
             continue
 
         # Create attention mask (1 for non-masked, 0 for masked)
-        chunk_attention_mask = torch.ones_like(chunk, dtype=torch.bool)
+        chunk_attention_mask = torch.ones_like(chunk, dtype=torch.bool, device=chunk.device)
 
         # First, add BOS and EOS tokens
         bos_tokens = torch.full((chunk.shape[0], 1), text_encoder.config.bos_token_id, dtype=chunk.dtype, device=chunk.device)

From 72c0e1258c48c45a7a69cbccf7e239dbd4b62ad8 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Mon, 9 Sep 2024 17:42:36 +0900
Subject: [PATCH 14/17] Fix: Accelerate launch.

---
 start-ui.bat | 13 ++++++++++---
 start-ui.sh  |  6 +++---
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/start-ui.bat b/start-ui.bat
index 708ae2a5..b666fca9 100644
--- a/start-ui.bat
+++ b/start-ui.bat
@@ -11,12 +11,19 @@ goto :end
 
 :activate_venv
 echo activating venv %VENV_DIR%
-set PYTHON="%VENV_DIR%\Scripts\python.exe"
+call "%VENV_DIR%\Scripts\activate.bat"
+echo venv activated: %VENV_DIR%
+
+set PYTHON=python
 if defined PROFILE (set PYTHON=%PYTHON% -m scalene --off --cpu --gpu --profile-all --no-browser)
 echo Using Python %PYTHON%
 
 :launch
-%PYTHON% scripts\train_ui.py
+accelerate launch scripts\train_ui.py
+if %ERRORLEVEL% NEQ 0 (
+    echo Failed to launch with accelerate. Launching with regular Python.
+    %PYTHON% scripts\train_ui.py
+)
 
 :end
-pause
+pause
\ No newline at end of file
diff --git a/start-ui.sh b/start-ui.sh
index b2029dd1..fd47d51e 100755
--- a/start-ui.sh
+++ b/start-ui.sh
@@ -37,16 +37,16 @@ elif [ -x "$(command -v python)" ]; then
 						if [[ -z "$VIRTUAL_ENV" ]]; then
     							echo "warning: No VIRTUAL_ENV set. exiting."
 						else
-							python scripts/train_ui.py
+							accelerate launch scripts/train_ui.py || python scripts/train_ui.py
 						fi
 					elif [ -x "$(command -v conda)" ]; then
 						#check for venv
 						if conda info --envs | grep -q ${conda_env}; 
 							then
-								bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; python scripts/train_ui.py")
+								bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || python scripts/train_ui.py")
 							else 
 								conda create -y -n $conda_env python==3.10;
-								bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; python scripts/train_ui.py")
+								bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || python scripts/train_ui.py")
 						fi
 					fi
 				else

From db9f2436752097cdc8ba66c3823dd8f81d3e9476 Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Tue, 10 Sep 2024 01:20:10 +0900
Subject: [PATCH 15/17] Fix: Accelerate launch 2.

---
 modules/trainer/GenericTrainer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/trainer/GenericTrainer.py b/modules/trainer/GenericTrainer.py
index 9fc5c8c7..5dcb298e 100644
--- a/modules/trainer/GenericTrainer.py
+++ b/modules/trainer/GenericTrainer.py
@@ -30,7 +30,7 @@
 from modules.util.torch_util import torch_gc
 from modules.util.TrainProgress import TrainProgress
 
-from accelerate import Accelerator
+from accelerate import Accelerator, DistributedDataParallelKwargs
 
 import torch
 from torch import Tensor, nn
@@ -63,7 +63,8 @@ class GenericTrainer(BaseTrainer):
     def __init__(self, config: TrainConfig, callbacks: TrainCallbacks, commands: TrainCommands):
         super(GenericTrainer, self).__init__(config, callbacks, commands)
 
-        self.accelerator = Accelerator()
+        ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
+        self.accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
 
         tensorboard_log_dir = os.path.join(config.workspace_dir, "tensorboard")
         os.makedirs(Path(tensorboard_log_dir).absolute(), exist_ok=True)

From 6c2c829b7ed0697b9531e5f7f7868d532e91affa Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Fri, 13 Sep 2024 22:36:14 +0900
Subject: [PATCH 16/17] Fix: Accelerate launch 3.

---
 modules/trainer/GenericTrainer.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/modules/trainer/GenericTrainer.py b/modules/trainer/GenericTrainer.py
index 5dcb298e..cb4ac7c1 100644
--- a/modules/trainer/GenericTrainer.py
+++ b/modules/trainer/GenericTrainer.py
@@ -66,6 +66,13 @@ def __init__(self, config: TrainConfig, callbacks: TrainCallbacks, commands: Tra
         ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
         self.accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
 
+        if hasattr(self.accelerator, 'device') and self.accelerator.device:
+            print(f"Accelerator device: {self.accelerator.device.type}")
+        if hasattr(self.accelerator, 'distributed_type') and self.accelerator.distributed_type:
+            print(f"Distributed type: {self.accelerator.distributed_type}")
+
+        print(f"if accelerator is not activated, using {torch.device(self.config.train_device)}")
+
         tensorboard_log_dir = os.path.join(config.workspace_dir, "tensorboard")
         os.makedirs(Path(tensorboard_log_dir).absolute(), exist_ok=True)
         self.tensorboard = SummaryWriter(os.path.join(tensorboard_log_dir, get_string_timestamp()))
@@ -477,7 +484,7 @@ def __before_eval(self):
             self.model.optimizer.eval()
 
     def train(self):
-        train_device = torch.device(self.config.train_device)
+        train_device = self.accelerator.device if self.accelerator.device else torch.device(self.config.train_device)
 
         train_progress = self.model.train_progress
 

From ca57444e550dbfa2d011f6e9a696ba5464039f9b Mon Sep 17 00:00:00 2001
From: celll1 <cellione06@gmail.com>
Date: Mon, 4 Nov 2024 13:20:10 +0900
Subject: [PATCH 17/17] test

---
 start-ui.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/start-ui.sh b/start-ui.sh
index fd47d51e..1263592f 100755
--- a/start-ui.sh
+++ b/start-ui.sh
@@ -17,7 +17,7 @@ fi
 
 if ! [ -x "$(command -v python)" ]; then
 	echo 'error: python not installed or found!'
-	break
+	exit 1
 elif [ -x "$(command -v python)" ]; then
 	major=$(python -c 'import platform; major, minor, patch = platform.python_version_tuple(); print(major)')
 	minor=$(python -c 'import platform; major, minor, patch = platform.python_version_tuple(); print(minor)')
@@ -37,16 +37,16 @@ elif [ -x "$(command -v python)" ]; then
 						if [[ -z "$VIRTUAL_ENV" ]]; then
     							echo "warning: No VIRTUAL_ENV set. exiting."
 						else
-							accelerate launch scripts/train_ui.py || python scripts/train_ui.py
+							accelerate launch scripts/train_ui.py || { echo "Failed to launch with accelerate. Falling back to python."; python scripts/train_ui.py; }
 						fi
 					elif [ -x "$(command -v conda)" ]; then
 						#check for venv
 						if conda info --envs | grep -q ${conda_env}; 
 							then
-								bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || python scripts/train_ui.py")
+								bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || { echo \"Failed to launch with accelerate. Falling back to python.\"; python scripts/train_ui.py; }")
 							else 
 								conda create -y -n $conda_env python==3.10;
-								bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || python scripts/train_ui.py")
+								bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || { echo \"Failed to launch with accelerate. Falling back to python.\"; python scripts/train_ui.py; }")
 						fi
 					fi
 				else