From 5b0a9d868620e5932fa17d3f96b344e46eff484b Mon Sep 17 00:00:00 2001 From: celll1 Date: Tue, 27 Aug 2024 17:14:09 +0900 Subject: [PATCH 01/17] Implement of accelerate and long token (under 231). --- modules/model/StableDiffusionModel.py | 63 +++++++++++++---- modules/model/StableDiffusionXLModel.py | 94 ++++++++++++++++--------- modules/trainer/GenericTrainer.py | 13 +++- requirements-global.txt | 5 +- 4 files changed, 127 insertions(+), 48 deletions(-) diff --git a/modules/model/StableDiffusionModel.py b/modules/model/StableDiffusionModel.py index d704a778..867633a9 100644 --- a/modules/model/StableDiffusionModel.py +++ b/modules/model/StableDiffusionModel.py @@ -222,25 +222,64 @@ def encode_text( text_encoder_layer_skip: int = 0, text_encoder_output: Tensor | None = None, ): + chunk_length = 75 + max_embeddings_multiples = 3 + + def __process_tokens(tokens): + if tokens is None or tokens.numel() == 0: + return None + + chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)] + chunk_embeddings = [] + + for chunk in chunks: + if chunk.numel() == 0: + continue + + if chunk.shape[1] < chunk_length: + padding = torch.full((chunk.shape[0], chunk_length - chunk.shape[1]), self.tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) + chunk = torch.cat([chunk, padding], dim=1) + + bos_tokens = torch.full((chunk.shape[0], 1), self.tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device) + eos_tokens = torch.full((chunk.shape[0], 1), self.tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) + chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1) + + with self.autocast_context: + embedding, _ = encode_clip( + text_encoder=self.text_encoder, + tokens=chunk, + default_layer=-1, + layer_skip=text_encoder_layer_skip, + text_encoder_output=None, + add_pooled_output=False, + use_attention_mask=False, + add_layer_norm=True, + ) + + chunk_embeddings.append(embedding) + + if not chunk_embeddings: + return None + + if len(chunk_embeddings) > max_embeddings_multiples: + chunk_embeddings = chunk_embeddings[:max_embeddings_multiples] + + combined_embedding = torch.cat(chunk_embeddings, dim=1) + + return combined_embedding + if tokens is None: tokenizer_output = self.tokenizer( text, padding='max_length', - truncation=True, - max_length=77, + truncation=False, return_tensors="pt", ) tokens = tokenizer_output.input_ids.to(self.text_encoder.device) - text_encoder_output, _ = encode_clip( - text_encoder=self.text_encoder, - tokens=tokens, - default_layer=-1, - layer_skip=text_encoder_layer_skip, - text_encoder_output=text_encoder_output, - add_pooled_output=False, - use_attention_mask=False, - add_layer_norm=True, - ) + text_encoder_output = __process_tokens(tokens) + + if text_encoder_output is None: + print("Text encoder output is None. Check your input text or tokens.") return text_encoder_output diff --git a/modules/model/StableDiffusionXLModel.py b/modules/model/StableDiffusionXLModel.py index 09384f68..b44bd9c5 100644 --- a/modules/model/StableDiffusionXLModel.py +++ b/modules/model/StableDiffusionXLModel.py @@ -203,49 +203,79 @@ def encode_text( text_encoder_2_output: Tensor = None, pooled_text_encoder_2_output: Tensor = None, ): + chunk_length = 75 + max_embeddings_multiples = 3 + + def __process_tokens(tokens, tokenizer, text_encoder, layer_skip): + if tokens is None or tokens.numel() == 0: + return None, None + + chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)] + chunk_embeddings = [] + pooled_outputs = [] + + for chunk in chunks: + if chunk.numel() == 0: + continue + + if chunk.shape[1] < chunk_length: + padding = torch.full((chunk.shape[0], chunk_length - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) + chunk = torch.cat([chunk, padding], dim=1) + + bos_tokens = torch.full((chunk.shape[0], 1), tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device) + eos_tokens = torch.full((chunk.shape[0], 1), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) + chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1) + + with self.autocast_context: + outputs = text_encoder( + chunk, + output_hidden_states=True, + return_dict=True, + ) + embedding = outputs.hidden_states[-(2 + layer_skip)] + if hasattr(outputs, 'text_embeds'): + pooled_outputs.append(outputs.text_embeds) + + chunk_embeddings.append(embedding) + + if not chunk_embeddings: + return None, None + + if len(chunk_embeddings) > max_embeddings_multiples: + chunk_embeddings = chunk_embeddings[:max_embeddings_multiples] + if pooled_outputs: + pooled_outputs = pooled_outputs[:max_embeddings_multiples] + + combined_embedding = torch.cat(chunk_embeddings, dim=1) + pooled_output = pooled_outputs[0] if pooled_outputs else None + + return combined_embedding, pooled_output + if tokens_1 is None and text is not None: - tokenizer_output = self.tokenizer_1( + tokens_1 = self.tokenizer_1( text, padding='max_length', - truncation=True, - max_length=77, + truncation=False, return_tensors="pt", - ) - tokens_1 = tokenizer_output.input_ids.to(self.text_encoder_1.device) + ).input_ids.to(self.text_encoder_1.device) if tokens_2 is None and text is not None: - tokenizer_output = self.tokenizer_2( + tokens_2 = self.tokenizer_2( text, padding='max_length', - truncation=True, - max_length=77, + truncation=False, return_tensors="pt", - ) - tokens_2 = tokenizer_output.input_ids.to(self.text_encoder_2.device) + ).input_ids.to(self.text_encoder_2.device) - text_encoder_1_output, _ = encode_clip( - text_encoder=self.text_encoder_1, - tokens=tokens_1, - default_layer=-2, - layer_skip=text_encoder_1_layer_skip, - text_encoder_output=text_encoder_1_output, - add_pooled_output=False, - use_attention_mask=False, - add_layer_norm=False, - ) + if text_encoder_1_output is None: + text_encoder_1_output, _ = __process_tokens(tokens_1, self.tokenizer_1, self.text_encoder_1, text_encoder_1_layer_skip) - text_encoder_2_output, pooled_text_encoder_2_output = encode_clip( - text_encoder=self.text_encoder_2, - tokens=tokens_2, - default_layer=-2, - layer_skip=text_encoder_2_layer_skip, - text_encoder_output=text_encoder_2_output, - add_pooled_output=True, - pooled_text_encoder_output=pooled_text_encoder_2_output, - use_attention_mask=False, - add_layer_norm=False, - ) + if text_encoder_2_output is None or pooled_text_encoder_2_output is None: + text_encoder_2_output, pooled_text_encoder_2_output = __process_tokens(tokens_2, self.tokenizer_2, self.text_encoder_2, text_encoder_2_layer_skip) + + if text_encoder_1_output is None or text_encoder_2_output is None: + print("Both text encoder outputs are None. Check your input text or tokens.") - text_encoder_output = torch.concat([text_encoder_1_output, text_encoder_2_output], dim=-1) + text_encoder_output = torch.cat([text_encoder_1_output, text_encoder_2_output], dim=-1) return text_encoder_output, pooled_text_encoder_2_output diff --git a/modules/trainer/GenericTrainer.py b/modules/trainer/GenericTrainer.py index 78ebe641..5ff41e36 100644 --- a/modules/trainer/GenericTrainer.py +++ b/modules/trainer/GenericTrainer.py @@ -29,6 +29,8 @@ from modules.util.torch_util import torch_gc from modules.util.TrainProgress import TrainProgress +from accelerate import Accelerator + import torch from torch import Tensor, nn from torch.nn import Parameter @@ -61,6 +63,8 @@ class GenericTrainer(BaseTrainer): def __init__(self, config: TrainConfig, callbacks: TrainCallbacks, commands: TrainCommands): super(GenericTrainer, self).__init__(config, callbacks, commands) + self.accelerator = Accelerator() + tensorboard_log_dir = os.path.join(config.workspace_dir, "tensorboard") os.makedirs(Path(tensorboard_log_dir).absolute(), exist_ok=True) self.tensorboard = SummaryWriter(os.path.join(tensorboard_log_dir, get_string_timestamp())) @@ -137,6 +141,11 @@ def start(self): self.data_loader = self.create_data_loader( self.model, self.model.train_progress ) + + self.model, self.data_loader = self.accelerator.prepare( + self.model, self.data_loader + ) + self.model_saver = self.create_model_saver() self.model_sampler = self.create_model_sampler(self.model) @@ -578,9 +587,9 @@ def sample_commands_fun(): loss = loss / self.config.gradient_accumulation_steps if scaler: - scaler.scale(loss).backward() + self.accelerator.backward(scaler.scale(loss)) else: - loss.backward() + self.accelerator.backward(loss) has_gradient = True accumulated_loss += loss.item() diff --git a/requirements-global.txt b/requirements-global.txt index 60f28732..0df71008 100644 --- a/requirements-global.txt +++ b/requirements-global.txt @@ -11,11 +11,12 @@ matplotlib==3.9.0 # pytorch accelerate==0.30.1 safetensors==0.4.3 -tensorboard==2.17.0 +tensorboard==2.17.1 pytorch-lightning==2.2.5 # stable diffusion --e git+https://github.com/huggingface/diffusers.git@dd4b731#egg=diffusers +# -e git+https://github.com/huggingface/diffusers.git@dd4b731#egg=diffusers +diffusers==0.30.0 transformers==4.42.3 omegaconf==2.3.0 # needed to load stable diffusion from single ckpt files invisible-watermark==0.2.0 # needed for the SDXL pipeline From 4c7052884df55871c54866f8ecd3d37fa35daf81 Mon Sep 17 00:00:00 2001 From: celll1 Date: Fri, 30 Aug 2024 21:27:03 +0900 Subject: [PATCH 02/17] implementation of attention mask --- modules/model/StableDiffusionXLModel.py | 26 ++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/modules/model/StableDiffusionXLModel.py b/modules/model/StableDiffusionXLModel.py index b44bd9c5..ba610147 100644 --- a/modules/model/StableDiffusionXLModel.py +++ b/modules/model/StableDiffusionXLModel.py @@ -213,22 +213,36 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip): chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)] chunk_embeddings = [] pooled_outputs = [] + attention_masks = [] - for chunk in chunks: + for i, chunk in enumerate(chunks): if chunk.numel() == 0: continue - if chunk.shape[1] < chunk_length: - padding = torch.full((chunk.shape[0], chunk_length - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) - chunk = torch.cat([chunk, padding], dim=1) + # アテンションマスクの作成(1がマスクしない、0がマスクする) + attention_mask = torch.ones_like(chunk, dtype=torch.bool) + # まず、BOSとEOSを追加 bos_tokens = torch.full((chunk.shape[0], 1), tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device) eos_tokens = torch.full((chunk.shape[0], 1), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1) + attention_mask = torch.cat([torch.zeros_like(bos_tokens, dtype=torch.bool) if i > 0 else torch.ones_like(bos_tokens, dtype=torch.bool), + attention_mask, + torch.zeros_like(eos_tokens, dtype=torch.bool) if i < len(chunks) - 1 else torch.ones_like(eos_tokens, dtype=torch.bool)], + dim=1) + + # パディングで埋める + if chunk.shape[1] < chunk_length + 2: # +2 はBOSとEOSのため + padding = torch.full((chunk.shape[0], chunk_length + 2 - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) + chunk = torch.cat([chunk, padding], dim=1) + attention_mask = torch.cat([attention_mask, torch.zeros_like(padding, dtype=torch.bool)], dim=1) + + attention_masks.append(attention_mask) with self.autocast_context: outputs = text_encoder( chunk, + attention_mask=attention_mask, output_hidden_states=True, return_dict=True, ) @@ -243,10 +257,12 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip): if len(chunk_embeddings) > max_embeddings_multiples: chunk_embeddings = chunk_embeddings[:max_embeddings_multiples] + attention_masks = attention_masks[:max_embeddings_multiples] if pooled_outputs: pooled_outputs = pooled_outputs[:max_embeddings_multiples] combined_embedding = torch.cat(chunk_embeddings, dim=1) + # combined_attention_mask = torch.cat(attention_masks, dim=1) pooled_output = pooled_outputs[0] if pooled_outputs else None return combined_embedding, pooled_output @@ -274,7 +290,7 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip): text_encoder_2_output, pooled_text_encoder_2_output = __process_tokens(tokens_2, self.tokenizer_2, self.text_encoder_2, text_encoder_2_layer_skip) if text_encoder_1_output is None or text_encoder_2_output is None: - print("Both text encoder outputs are None. Check your input text or tokens.") + print("両方のテキストエンコーダーの出力がNoneです。入力テキストまたはトークンを確認してください。") text_encoder_output = torch.cat([text_encoder_1_output, text_encoder_2_output], dim=-1) From 089b8f373068e6c163701e3aa41a812b860e2bf2 Mon Sep 17 00:00:00 2001 From: celll1 Date: Sat, 31 Aug 2024 01:19:29 +0900 Subject: [PATCH 03/17] Adam-mini --- modules/util/create.py | 17 +++++++++++++++++ modules/util/enum/Optimizer.py | 3 +++ modules/util/optimizer_util.py | 10 ++++++++++ requirements-global.txt | 1 + 4 files changed, 31 insertions(+) diff --git a/modules/util/create.py b/modules/util/create.py index ec11a8ad..faed4941 100644 --- a/modules/util/create.py +++ b/modules/util/create.py @@ -815,6 +815,23 @@ def create_optimizer( eps=optimizer_config.eps if optimizer_config.eps is not None else 1e-8, ) + # Adam-mini Optimizer + case Optimizer.ADAM_MINI: + from adam_mini import Adam_mini + optimizer = Adam_mini( + named_parameters=parameters, + lr=config.learning_rate, + betas=(optimizer_config.beta1 if optimizer_config.beta1 is not None else 0.9, + optimizer_config.beta2 if optimizer_config.beta2 is not None else 0.999), + eps=optimizer_config.eps if optimizer_config.eps is not None else 1e-8, + weight_decay=optimizer_config.weight_decay if optimizer_config.weight_decay is not None else 0.0, + # model_shardingは未実装(マルチGPUの場合はTrueにする) + model_sharding=optimizer_config.model_sharding if optimizer_config.model_sharding is not None else False, + # dim=model_config.dim, + # n_heads=model_config.n_heads, + # n_kv_heads=model_config.n_kv_heads, + ) + if state_dict is not None and optimizer is not None: if 'param_group_mapping' not in state_dict: # Old method of loading the optimizer state. This only works if the param groups did not change. diff --git a/modules/util/enum/Optimizer.py b/modules/util/enum/Optimizer.py index e21bb045..063cca6d 100644 --- a/modules/util/enum/Optimizer.py +++ b/modules/util/enum/Optimizer.py @@ -59,6 +59,9 @@ class Optimizer(Enum): TIGER = 'TIGER' AIDA = 'AIDA' + # ADAM_MINI + ADAM_MINI = 'ADAM_MINI' + @property def is_adaptive(self): return self in [ diff --git a/modules/util/optimizer_util.py b/modules/util/optimizer_util.py index 93af8e12..18896dbf 100644 --- a/modules/util/optimizer_util.py +++ b/modules/util/optimizer_util.py @@ -368,4 +368,14 @@ def init_model_parameters( "adam_debias": False, "eps": 1e-8, }, + Optimizer.ADAM_MINI: { + "beta1": 0.9, + "beta2": 0.999, + "eps": 1e-8, + "weight_decay": 0.0, + "model_sharding": False, + # "dim": None, + # "n_heads": None, + # "n_kv_heads": None, + }, } diff --git a/requirements-global.txt b/requirements-global.txt index 0df71008..21dcd816 100644 --- a/requirements-global.txt +++ b/requirements-global.txt @@ -34,6 +34,7 @@ lion-pytorch==0.1.4 # lion optimizer prodigyopt==1.0 # prodigy optimizer schedulefree==1.2.5 # schedule-free optimizers pytorch_optimizer==3.0.2 # pytorch optimizers +adam_mini # Profiling scalene==1.5.41 From 29dd300f76b9798613e150eb8e0d82bfcb06a473 Mon Sep 17 00:00:00 2001 From: celll1 Date: Sat, 31 Aug 2024 01:32:41 +0900 Subject: [PATCH 04/17] Fix: Adam-mini --- modules/util/config/TrainConfig.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/util/config/TrainConfig.py b/modules/util/config/TrainConfig.py index d844500d..9f9b6b6d 100644 --- a/modules/util/config/TrainConfig.py +++ b/modules/util/config/TrainConfig.py @@ -87,6 +87,7 @@ class TrainOptimizerConfig(BaseConfig): r: float adanorm: bool adam_debias: bool + model_sharding: bool def __init__(self, data: list[(str, Any, type, bool)]): super(TrainOptimizerConfig, self).__init__(data) @@ -154,6 +155,7 @@ def default_values(): data.append(("r", None, float, True)) data.append(("adanorm", False, bool, False)) data.append(("adam_debias", False, bool, False)) + data.append(("model_sharding", False, bool, False)) return TrainOptimizerConfig(data) From 43f72c19d3fc3dc43bf04da6b16991f81a59abf7 Mon Sep 17 00:00:00 2001 From: celll1 Date: Sat, 31 Aug 2024 01:38:14 +0900 Subject: [PATCH 05/17] Fix: Adam-mini 2 --- modules/ui/OptimizerParamsWindow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui/OptimizerParamsWindow.py b/modules/ui/OptimizerParamsWindow.py index 9f1be17b..4c2be345 100644 --- a/modules/ui/OptimizerParamsWindow.py +++ b/modules/ui/OptimizerParamsWindow.py @@ -144,7 +144,7 @@ def create_dynamic_ui( 'r': {'title': 'R', 'tooltip': 'EMA factor.', 'type': 'float'}, 'adanorm': {'title': 'AdaNorm', 'tooltip': 'Whether to use the AdaNorm variant', 'type': 'bool'}, 'adam_debias': {'title': 'Adam Debias', 'tooltip': 'Only correct the denominator to avoid inflating step sizes early in training.', 'type': 'bool'}, - + 'model_sharding': {'title': 'Model Sharding', 'tooltip': 'Whether to use model sharding for distributed training.', 'type': 'bool'}, } # @formatter:on From 87727c11ad056172e150a373747539a8a5109847 Mon Sep 17 00:00:00 2001 From: celll1 Date: Sat, 31 Aug 2024 02:05:25 +0900 Subject: [PATCH 06/17] Fix: Adam-mini 3 --- modules/util/create.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/util/create.py b/modules/util/create.py index faed4941..43cf16bc 100644 --- a/modules/util/create.py +++ b/modules/util/create.py @@ -818,8 +818,11 @@ def create_optimizer( # Adam-mini Optimizer case Optimizer.ADAM_MINI: from adam_mini import Adam_mini + named_parameters = [(f'group_{i}.param_{j}', param) + for i, group in enumerate(parameters) + for j, param in enumerate(group['params'])] optimizer = Adam_mini( - named_parameters=parameters, + named_parameters=named_parameters, lr=config.learning_rate, betas=(optimizer_config.beta1 if optimizer_config.beta1 is not None else 0.9, optimizer_config.beta2 if optimizer_config.beta2 is not None else 0.999), From 6c224b772dc64e17dd1c3d8b297ab919b3fe825e Mon Sep 17 00:00:00 2001 From: celll1 Date: Tue, 3 Sep 2024 18:01:42 +0900 Subject: [PATCH 07/17] Jupyter notebook --- modules/modelSampler/FluxSampler.py | 2 +- modules/modelSampler/PixArtAlphaSampler.py | 2 +- .../modelSampler/StableDiffusion3Sampler.py | 2 +- .../modelSampler/StableDiffusionSampler.py | 2 +- .../modelSampler/StableDiffusionXLSampler.py | 2 +- modules/modelSampler/WuerstchenSampler.py | 2 +- modules/module/BaseImageCaptionModel.py | 2 +- modules/module/BaseImageMaskModel.py | 2 +- modules/module/GenerateLossesModel.py | 2 +- modules/trainer/GenericTrainer.py | 3 +- train.ipynb | 147 ++++++++++++++++++ 11 files changed, 157 insertions(+), 11 deletions(-) create mode 100644 train.ipynb diff --git a/modules/modelSampler/FluxSampler.py b/modules/modelSampler/FluxSampler.py index 94a8eae7..9064bfff 100644 --- a/modules/modelSampler/FluxSampler.py +++ b/modules/modelSampler/FluxSampler.py @@ -15,7 +15,7 @@ import torch from PIL import Image -from tqdm import tqdm +from tqdm.auto import tqdm class FluxSampler(BaseModelSampler): diff --git a/modules/modelSampler/PixArtAlphaSampler.py b/modules/modelSampler/PixArtAlphaSampler.py index eda47672..9fc36633 100644 --- a/modules/modelSampler/PixArtAlphaSampler.py +++ b/modules/modelSampler/PixArtAlphaSampler.py @@ -15,7 +15,7 @@ import torch from PIL.Image import Image -from tqdm import tqdm +from tqdm.auto import tqdm class PixArtAlphaSampler(BaseModelSampler): diff --git a/modules/modelSampler/StableDiffusion3Sampler.py b/modules/modelSampler/StableDiffusion3Sampler.py index 4c39c60e..6cd021af 100644 --- a/modules/modelSampler/StableDiffusion3Sampler.py +++ b/modules/modelSampler/StableDiffusion3Sampler.py @@ -15,7 +15,7 @@ import torch from PIL import Image -from tqdm import tqdm +from tqdm.auto import tqdm class StableDiffusion3Sampler(BaseModelSampler): diff --git a/modules/modelSampler/StableDiffusionSampler.py b/modules/modelSampler/StableDiffusionSampler.py index 1bc2d987..57f217fa 100644 --- a/modules/modelSampler/StableDiffusionSampler.py +++ b/modules/modelSampler/StableDiffusionSampler.py @@ -17,7 +17,7 @@ from torchvision.transforms import transforms from PIL import Image -from tqdm import tqdm +from tqdm.auto import tqdm class StableDiffusionSampler(BaseModelSampler): diff --git a/modules/modelSampler/StableDiffusionXLSampler.py b/modules/modelSampler/StableDiffusionXLSampler.py index cc1cd3d5..73128378 100644 --- a/modules/modelSampler/StableDiffusionXLSampler.py +++ b/modules/modelSampler/StableDiffusionXLSampler.py @@ -17,7 +17,7 @@ from torchvision.transforms import transforms from PIL import Image -from tqdm import tqdm +from tqdm.auto import tqdm class StableDiffusionXLSampler(BaseModelSampler): diff --git a/modules/modelSampler/WuerstchenSampler.py b/modules/modelSampler/WuerstchenSampler.py index 93280926..f5910219 100644 --- a/modules/modelSampler/WuerstchenSampler.py +++ b/modules/modelSampler/WuerstchenSampler.py @@ -14,7 +14,7 @@ import torch from PIL import Image -from tqdm import tqdm +from tqdm.auto import tqdm class WuerstchenSampler(BaseModelSampler): diff --git a/modules/module/BaseImageCaptionModel.py b/modules/module/BaseImageCaptionModel.py index 7fd6ac79..5a9c7312 100644 --- a/modules/module/BaseImageCaptionModel.py +++ b/modules/module/BaseImageCaptionModel.py @@ -5,7 +5,7 @@ from modules.util import path_util from PIL import Image -from tqdm import tqdm +from tqdm.auto import tqdm class CaptionSample: diff --git a/modules/module/BaseImageMaskModel.py b/modules/module/BaseImageMaskModel.py index 1e37a574..768f7d22 100644 --- a/modules/module/BaseImageMaskModel.py +++ b/modules/module/BaseImageMaskModel.py @@ -9,7 +9,7 @@ from torchvision.transforms import transforms from PIL import Image -from tqdm import tqdm +from tqdm.auto import tqdm class MaskSample: diff --git a/modules/module/GenerateLossesModel.py b/modules/module/GenerateLossesModel.py index a734b91d..4fceac7e 100644 --- a/modules/module/GenerateLossesModel.py +++ b/modules/module/GenerateLossesModel.py @@ -11,7 +11,7 @@ import torch -from tqdm import tqdm +from tqdm.auto import tqdm class GenerateLossesModel: diff --git a/modules/trainer/GenericTrainer.py b/modules/trainer/GenericTrainer.py index d9f31eee..9fc5c8c7 100644 --- a/modules/trainer/GenericTrainer.py +++ b/modules/trainer/GenericTrainer.py @@ -40,8 +40,7 @@ from torchvision.transforms.functional import pil_to_tensor from PIL.Image import Image -from tqdm import tqdm - +from tqdm.auto import tqdm class GenericTrainer(BaseTrainer): model_loader: BaseModelLoader diff --git a/train.ipynb b/train.ipynb new file mode 100644 index 00000000..7ab81b45 --- /dev/null +++ b/train.ipynb @@ -0,0 +1,147 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_path = './config/test.json'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-24T11:02:30.546988Z", + "iopub.status.busy": "2024-03-24T11:02:30.546730Z", + "iopub.status.idle": "2024-03-24T11:04:12.062739Z", + "shell.execute_reply": "2024-03-24T11:04:12.061431Z", + "shell.execute_reply.started": "2024-03-24T11:02:30.546962Z" + } + }, + "outputs": [], + "source": [ + "# 依存関係のインストール\n", + "!pip -r requirements.txt\n", + "\n", + "# CUDAの確認\n", + "import torch\n", + "cuda_available = torch.cuda.is_available()\n", + "print(f\"CUDA is {'available' if cuda_available else 'not available'}\")\n", + "\n", + "if not cuda_available:\n", + " use_zluda = input(\"CUDAが利用できません。WindowsでAMD GPUを使用していますか? (y/n) \")\n", + " if use_zluda.lower() == 'y':\n", + " print(\"ZLUDAのインストールを続行します\")\n", + " %run scripts/install_zluda.py\n", + " else:\n", + " print(\"エラー:インストール中に問題が発生しました\")\n", + "else:\n", + " print(\"インストールが完了しました\")\n", + "\n", + "# 注意:この環境では%pipを使用していますが、\n", + "# 通常のコマンドラインでは!pipを使用することに注意してください" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-24T11:10:36.477241Z", + "iopub.status.busy": "2024-03-24T11:10:36.476649Z", + "iopub.status.idle": "2024-03-24T11:10:42.359595Z", + "shell.execute_reply": "2024-03-24T11:10:42.359022Z", + "shell.execute_reply.started": "2024-03-24T11:10:36.477212Z" + } + }, + "outputs": [], + "source": [ + "# 必要なライブラリのインポート\n", + "import os\n", + "\n", + "# mgdsをリポジトリにcloneした場合\n", + "# import sys\n", + "# sys.path.append('mgds/src')\n", + "\n", + "import json\n", + "from modules.util.config.TrainConfig import TrainConfig\n", + "from modules.util.callbacks.TrainCallbacks import TrainCallbacks\n", + "from modules.util.commands.TrainCommands import TrainCommands\n", + "from modules.trainer.GenericTrainer import GenericTrainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-24T11:27:13.229538Z", + "iopub.status.busy": "2024-03-24T11:27:13.229338Z" + } + }, + "outputs": [], + "source": [ + "# TrainConfigのインスタンスを作成\n", + "train_config = TrainConfig.default_values()\n", + "with open(config_path, \"r\") as f:\n", + " train_config.from_dict(json.load(f))\n", + "\n", + "# userwarningを表示しない\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# TrainConfigで読み込んだ\"debug_dir\",\"workspace_dir\",cache_dir\"が存在しなければ作成\n", + "for dir_path in [train_config.debug_dir, train_config.workspace_dir, train_config.cache_dir]:\n", + " if not os.path.exists(dir_path):\n", + " os.makedirs(dir_path)\n", + "\n", + "# コールバックとコマンドの設定\n", + "callbacks = TrainCallbacks()\n", + "commands = TrainCommands()\n", + "\n", + "# トレーニングプロセスの開始\n", + "print(\"Destination_path: \", train_config.output_model_destination)\n", + "print(\"Workspace_path: \", train_config.workspace_dir)\n", + "print(\"Debug_path: \", train_config.debug_dir)\n", + "print(\"Cache_path: \", train_config.cache_dir)\n", + "\n", + "trainer = GenericTrainer(train_config, callbacks, commands)\n", + "trainer.start()\n", + "\n", + "try:\n", + " # トレーニングの実行\n", + " trainer.train()\n", + "except Exception as e:\n", + " print(f\"トレーニング中にエラーが発生しました: {e}\")\n", + "finally:\n", + " # トレーニング終了時の処理\n", + " trainer.end()\n", + " print(\"トレーニングが終了しました\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From c43c05ecfca649cf74c216b8b272ceeac8154b1d Mon Sep 17 00:00:00 2001 From: celll1 Date: Tue, 3 Sep 2024 19:32:46 +0900 Subject: [PATCH 08/17] Remove log file. --- h --force origin master | 854 ---------------------------------------- 1 file changed, 854 deletions(-) delete mode 100644 h --force origin master diff --git a/h --force origin master b/h --force origin master deleted file mode 100644 index 044a277c..00000000 --- a/h --force origin master +++ /dev/null @@ -1,854 +0,0 @@ -d968558 (HEAD -> master, upstream/master) disable gradient checkpointing if OFF is selected -b727db2 fix gradient checkpointing for SC models -aa18c65 move model loading functions out of FluxModelLoader -98af3f1 deactivate int8 quantization -51a9e6f hide fine tune option for flux models -196e1c8 flux lora preset -f63b188 respect the continue_last_backup setting in the sampling tool -6847457 fix resuming from backup when training a DoRA -5da5ddb set flux guidance scale to 1.0 during training -50cf940 lazy initialization for the dora_scale parameter -d90cdf7 add a new cpu offloaded gradient checkpointing option -bac7d5e include flux as a model with multiple text encoders -f957d97 Merge branch 'refs/heads/master' into flux -4743314 fix embedding training for models with a single text encoder -e891861 add flux embedding training support -d027b4f fix saving of included metadata when running from cli -16b73e6 fix loading of unquantized flux models -2925dc8 add support for DoRA training on quantized base models -529c9fd experimental int8 quantization support. will need fixes in bnb to fully work -abfc72f add flux model saver -be4d3f3 flux lora support -b95329b initial Flux sampling and quantization support -0f66d0f add an unsqueeze call to fix SC training without text encoder training -5f19791 deactivate 300 token limit for PixArt Sigma -d931d6c move checkpointing_util.py out of stableDiffusion directory -1ee5a13 make display_name optional -81d5283 combine many common data loader functions to reduce code duplication -e847aa8 fix a call to encode_text() -64de057 reduce code duplication for embedding parameter groups -7d7a463 extract clip and t5 calls to a single function -4c0a624 split out Pixart text encoding to its own function -66e9715 split out SC text encoding to its own function -70fb48a split out SDXL text encoding to its own function -29dd5cb split out SD text encoding to its own function -87ae56d split out SD3 text encoding to its own function to reduce code duplication -abb6aa3 Merge pull request #429 from mx/ruff -96e3770 remove unused return values -955dee7 add line breaks to a long line -ce6ce9c Automated ruff fixes. -1158af5 Remove some lints that make the code less readable. -26bd9da Add missing comma to attn-mlp preset. -99a9f17 Ruff fix include order and sorting. -0412584 Add ruff config. -6187518 Revert "Merge pull request #419 from hameerabbasi/pre-commit" -caf6921 Merge pull request #419 from hameerabbasi/pre-commit -cb16854 Add `requirements-dev.txt`. -f43f6f2 Remove redundant line. -764995e some code cleanup -43fc10a Remove irrelevant stuff from `pyproject.toml`. -1b32d0d only load as backup if meta.json exists -5fec095 Propagate the train/eval state to lora modules. (#424) -e8554a9 Remove redundant `None`s. -48575a9 Fix the unneeded `_epoch_step`. -9fc4513 Introduce `pre-commit` and `ruff`. -ce11529 Introduce `pre-commit` and `ruff`. -f2d78c8 update mgds -ce13283 Merge pull request #423 from mx/eps -7c937d8 Remove the command switch for norm epsilon. -385b1f5 Optional epsilon for weight decompose. -33498b6 add a missing import -80ba9c3 Downgrade matplotlib to 3.9.0. -e34efac add "DoRA" to the decompose label -8ac2693 fix variable declarations in LoHaModule -5b21774 add a config option for SD3 attention masks -e59ec6e experimental attention mask for SD3 -d8d3f16 LoRA Refactor (#403) -ffa1f8a Merge pull request #409 from seniorsolt/Fix_errors_with_float16 -952a5c8 Set dtype in alphas_cumprod in debug process sd 1.5 -259215b Set dtype in __create_erode_kernel, eroded_mask and latent_mask sd 1.5 -cc76f38 Optimizers - ADABELIEF, TIGER, AIDA (#373) -9c0275d update mgds -889ddd2 Merge branch 'refs/heads/sd3_improvements' -5553e1d fix an exception when loading a backup in the sampling tool -3311aac add missing input fields -9ad1c5e revert onnxruntime to 1.18.0 -e2cc711 update pytorch and xformers -aca6afa add a timestep distribution visualization -23006f0 Merge pull request #387 from mx/a11y -1918055 add non uniform timestep sampling for SD3 -f1fb0e3 Undo the a11y changes to the masks / caption UI. -a446419 fix SD3 debug output -68046e0 Merge branch 'refs/heads/master' into sd3_improvements -474dd2c replace the vae distribution from the cache with a single sample to reduce space -d06a650 enable partial embedding training for all SD3 and SDXL training modes -fa6a08c update mgds -d7d47ea disable checkpointing during inference -4cf80e2 remove unused code -a66f705 update tensorboard to fix a protobuf version incompatibility -d0a3c6c rework SD3 embedding training to use a new train_embedding switch -0ae3129 fix SD3 embedding loading and saving -12f623e fixed SD3 loss calculation -a5a06c8 add SD3 embedding training -f78a74a fix a vae model conversion bug for SD models -c5911a4 readme update -a63bc9f fix a dtype exception when not using autocast -9102e12 add SD3 presets -4d745e5 update mgds -a33abce disable embedding training for SD3, it's not supported yet -8344cb1 don't create LoRA layers for untrained text encoders for SD3 -c8cf156 Merge branch 'refs/heads/master' into sd3 -8dc3a3d Merge pull request #370 from adriangaro/master -a93aaac Merge pull request #359 from mx/note -6cf5eb0 fix SD3 text encoder dropout when training text encoders -ff7c801 fix model config loading for sd and sdxl -a13d4cf fix embedding setup to use vpred if configured (#369) -beff1d7 better SD3 prompt dropout -36a7503 fix an exception when saving a backup from an exported config file -8add0f1 add a disable_fp16_autocast_context to SD3 T5 -e4a392a add the vae shift factor during SD3 training -be0db64 fix T5 quantization to fp16 when loading from a single safetensors file -06d5e3a add support for saving SD3 models as a single safetensors file -e4a9731 Don't create TE layers that won't be trained or used. -edc20b7 fix an indexing error -aaa7649 don't enable gradient checkpointing for unloaded text encoders -18e594a enable text encoder dropout for SD3 -fddd2f4 use uniform sampling when selecting a SD3 training timestep -4d9de10 ad options to include only some text encoders during SD3 training -14cb1fd Revert "Make only the sample settings scrollable, with a fixed width." -bd8cf23 Make only the sample settings scrollable, with a fixed width. -c2b14d5 Revert a11y fix for sample tool, it breaks stuff. -fbb86d6 SD3 LoRA training -fe7c4b7 add PixArt-Sigma to the readme file -51c7a84 Accessibility fixes (#340) -a56d63c fix a dtype bug -e5b00d1 support for loading single file SD3 models and more fixes -ec182e9 add model saving for SD3 -cf1246a fix the timestep selection during SD3 training -a0486b7 fix a few loss calculation bugs for SD3 -793fce7 fixes and improvements for SD3 -4c1d3b2 remove all mentions of T5TokenizerFast -e4affae initial SD3 model setup and data loader -454f46a pass add_special_tokens=True to T5 -5bb7743 sampling fixes for SD3 -9332a56 update dependencies for SD3 support -5bb2bbb StableDiffusion3Sampler. Based on StableDiffusionXLSampler. -2b01266 add SD3 model loading -bab1441 fix crop jitter -9b790cd clarify a fused_back_pass error message -3ce9746 reduce memory usage after stopping a training run -fceb76c add a save now button to the backup tab -946836c fix black SDXL sample images -839c2a6 add fused back pass support for adam -dc66fa7 add fused back pass support for adamw -d86ea35 Implement Pixart Sigma (#317) -64f6c1a Merge remote-tracking branch 'refs/remotes/origin/dependency_update' -221420a fix a grad scalar bug when using fused back pass -6ecec2d dependency updates -f914d0d remove outdated information from the quick start guide -91b1570 better support for loading and converting SDXL inpainting models from a single file -b532606 fix SDXL inpainting sampling for noise schedulers other than DDIM -c8a1549 add support for SDXL inpainting samples, and general sampling fixes -c53ec6e enable inpainting samples for SD models -f0bdfa6 Rewrite ZLUDA installer. (#276) -a5111ca fix install.bat for ZLUDA installs -9767a4e move inpainting augmentations to the concept window -13f570f Merge pull request #309 from Calamdor/patch-1 -d83a7da Update QuickStartGuide.md -8406119 update mgds to allow custom resolutions -95c2a8d Allow the user to specify a custom learning rate scheduler (#283) -e9db8d8 fix an exception when resuming from a backup using the prodigy optimizer -eb87d60 Merge pull request #302 from Calamdor/Steps-Calculation-Bug-Fix -65d08d4 Merge pull request #300 from vladmandic/patch-1 -9832f5a Update create.py -d62634e Optionally bundle joint-trained embeddings into LoRAs (#278) -bfdf06f update safety checker skip in load -2e910ce fix sampling fom the sampling tool and script -338908d fix progress reporting in the console when resuming from a backup -21023d3 fix SDXL caching -9a9328b Merge branch 'inline_bucketing' -4e32bce Merge pull request #296 from vladmandic/patch-1 -5e3cfbc add missing import os -3e5c3cf inline bucketing fixes -f9b034d Merge branch 'master' into inline_bucketing -d0d2939 clarify the clip skip settings in the UI -fd25ad3 Merge branch 'optimizer_state' -5494c97 Implement Facebook's new schedule-free optimizers. (#242) -80238e9 better handling of embedding optimizer states -2e28621 add additional information to the backup optimizer state to enable config changes without manually clearing the optimizer state -76c2df2 Merge pull request #277 from xirvian/master -0ea4e86 fix the yaml config loading without an internet connection -b48ed22 Add wraplength support to labels, fixing UI behavior for long concept names in concept tab -524e420 Merge pull request #269 from SirTrippsalot/master -75422b0 add a todo comment to __apply_fused_back_pass -6151f23 Fix Fused Backpass + stop training -5118af0 fixed model conversion of SDXL and stable cascade models -fa89fcb add a missing util file -dced13a add an option to force the padding mode of conv layers to circular -f3210d9 add a second concept balancing strategy, where an exact number of samples can be specified -4805e11 remove an unused config migration -ac26661 add an option to preserve embedding norms during training -e965b7a fix backup loading for additional embeddings -ece35d4 fix model loading in the sample tool -2b9b170 fix backup saving when training embeddings -b64198c Reduce default dataloader threads to 2. -ba20789 Merge pull request #247 from mx/parallel -0a78c44 update mgds -761d1dc Add caption prefix and postfix (#256) -52520c6 Merge branch 'universal_embeddings' -c14e231 Add profiling section for CPU profiles and stack dump (#259) -e7466d4 option to disable latent caching for all remaining models and training modes -42c3ed1 add an experimental option do completely disable latent caching to Stable Diffusion -ac62b48 Fix typo PipelineConfig -> PipelineState. -fed167a Merge pull request #225 from mx/came -6955092 Restrict dataloader to user-configurable thread number. -152f285 add fused back pass support to CAME -bb16722 Merge pull request #243 from mx/sgd -706367f Load a PipelineState in MGDS. -1bcf966 Fix SGD foreach option. -c959547 Vae UI update and Tooltip update (#237) -4c413f7 Merge branch 'master' into universal_embeddings -560d1d4 loading and saving fixes for model conversion -8f5d715 Fix prediction type change while training Stable Diffusion (#235) -bcd8804 disable relative_step and scale_parameter in adafactor defaults -a4647fa pixart alpha embedding training and pivotal tuning -c3bdfe1 Add ZLUDA support (#196) -4926cc7 Vendor CAME rather than just implement extensions. -9a35e7f hide the triton warning on startup -d7899d9 wuerstchen and stable cascade pivotal tuning -5aabada SDXL pivotal tuning -055fac7 Merge pull request #222 from hameerabbasi/fix-model-type -5f63653 Change default loss_weight_strength to float. -46b48e2 Fix preset training bug. -2199821 remove StableDiffusionFineTuneDataLoader -b10c569 Merge branch 'master' into universal_embeddings -07f8198 Debiased estimation loss. (#212) -fd50ee6 code cleanup and fixes for pivotal tuning -edc098a fix adafactor training with disabled stochastic rounding -d46df19 Add stochastic rounding for CAME. -85360b8 Enable the CAME optimizer. -ff26890 pivotal tuning for stable diffusion LoRA -fd53dd3 add a UI example to the readme -5cb551d Merge branch 'master' into universal_embeddings -a75bde0 ui bugfix for pivotal tuning -c009c30 fix adam/adamw fused and foreach implementations -245f9b6 fix loading of yml files for stable diffusion models -493a01d Merge branch 'master' into universal_embeddings -68224ce pivotal tuning fixes for stable diffusion fine tune -6c7af74 fixes for pivotal tuning model loading and sampling -7c0b7b5 Merge branch 'master' into universal_embeddings -3242026 option to disable the autocast cache for reduced VRAM usage -f51e6af refactoring of optimizer patches -9c09bad GradScaler support for fused back pass -97c3452 Merge branch 'master' into fused_back_pass -7b8e509 fix else branch in install.sh and update.sh -db6c6c8 Merge pull request #193 from hameerabbasi/moar-backends -a1e4d23 Modify `install.sh` and `update.sh` to be more generic. -1f42d5d update mgds -7c5f983 Preserve back-compat by checking for `nvcc`. -ef26ec3 Preserve back-compat by adding shim `requirements.txt`. -d9ac606 experimental fused back pass for adafactor -3c18a91 fix gradient checkpointing for stable cascade -eb19d84 Auto-detect default device. -7be0a84 Merge remote-tracking branch 'origin/master' into moar-backends -a6722bf update diffusers to support the final stable cascade format -739cb0f additional embeddings tab -9d16726 refactor stable diffusion lora loading -c86cfd9 Merge pull request #185 from mx/tensorboard -4f565ac fix a naming inconsistency -5357f9f Add required env var. -81606a8 Fix GC to not use too much memory. -03fd726 Get training working on MPS. -9f63018 Attempt to make compatible with ROCm, MPS and CUDA. -369b023 custom prefix for saves during training -70c4c06 model loading for SD pivotal tuning -43772ca update mgds -90cfc88 Remove code that was duped into BaseStableDiffusionXL. -0745001 Remove some code that got accidentally left behind in the move. -b595ae1 Have the tensorboard recording done in the model setup. -1812d46 fix bf16 sampling on SD1.5 models -740d0a5 Group both losses under the same header. -efc2193 Show the correct learning rates on tensorboard. -ebf32bb Merge branch 'master' into universal_embeddings -e36edb9 fix multiple dtype issues during caching and sampling -76873db fix stable cascade lora key mapping -d191f85 Merge pull request #171 from mx/dropout -a0520f2 move dropout setting to the LoRA tab -eae7314 Merge branch 'master' into fork/dropout -84f15e6 cleanup LoRA dropout -4cf6d97 Merge branch 'stochastic_rounding' -7363a21 add a disclaimer to the modified optimizer step functions -a729c31 only enable stochastic rounding for bf16 weights -3f3acd6 fix loading of samples and concepts when loading a packed config file -379d75f add stochastic rounding as an optimizer setting to adam, adamw and adafactor -3cab0b2 Remove dropout setting from the constructor. -fd78334 improve the efficiency of the stochastic copy function -91d3027 fix presets -7fcee19 dropout_pct -> dropout_probability -5a13493 Merge pull request #179 from hameerabbasi/issue-template -add26bf replace feature with enhancement label -995d08f Merge branch 'master' into stochastic_rounding -35e6eff fix a ui exception when switching to a different preset -0f40b32 fix early stopping of text encoder training -763299d Merge branch 'master' into stochastic_rounding -82c76a9 fix a model loader bug when training embeddings -05dc257 fix SD lora training -3d2d7a9 experimental stochastic rounding for adafactor and bf16 weights -f854625 remove train_*_epochs settings from presets -5a1c573 fix ui labels -2c1b1df better "stop_training_after" default settings -8b15e9a Add issue templates. -e669400 fix early stopping of model part training -939f9e5 Merge branch 'master' into model_part_config -031077a Fix with less typos... -9074726 Fix dropout in state dict (forgot everything had to be a tensor). -3e163eb Add dropout for lora training. -970e9a9 fix stable cascade attention lora keys -a90d108 model spec header for stable cascade lora and embedding files -50618b0 change the stable cascade safetensors saving logic to save stage c and the text encoder -e928d6c fix stable cascade min snr gamma for batch sizes over 1 -130b5e8 add Stable Cascade to the readme -2cf3ab1 disable prior override for wuerstchen v2 and pixart -be7c935 enable min snr gamma for stable cascade -5583cf0 Merge branch 'master' into stable_cascade -98f6cec fix DEFAULT attention processor for stable cascade -be9caec add a mapping function for stable cascade lora keys -5298aaf fix config export -ab3484a Merge pull request #155 from mx/minsnrgamma -aa631d0 change min snr gamme from int to float -08facee fix loading stable cascade backups -16f70ec LoRA and embedding training for stable cascade -c38d425 add an option to save stable cascade models in safetensors format -b17d1a6 fix stable cascade tokenizer saving -28be8be support for stable cascade stage C 1B -6b9c011 disable relative step and scale parameter in presets -3b4134d stable cascade sample and train fixes -e919e50 stable diffusion pivotal tuning tests -113572e initial stable cascade fine tuning support -ae3a969 stable cascade sampling support -e937b79 option to include training config in the output model -3215295 Add timestep information to the model_output_data. -578ec7b Merge branch 'master' into model_part_config -f56297c always call component commands after the value is changed -7d6b538 No min snr gamma for alignprop. -3da1e31 Fix training tab putting the options in the wrong place. -336339c Fix warning over is/==. -0c05e27 Support v-prediction in minimum SNR gamma calculation. -b86192f Implement minimum SNR gamma. -1b1817c unify model part config -7c4531d correctly handle utf-8 in CaptionUI.py -396037e #.json is not a built in preset -ebbc158 always assume built-in configs are saved in the most recent version -0b2b8d7 fix loading of built in presets -dc0c7b0 fix migration for optimizer settings -769078e fix optimizer updates when switching to a different preset -80ed56f Merge pull request #153 from Sayat1/ADAM8bit-fix -475e95e adam8bit betas fix -8aed720 fix optimizer creation -a35b407 clean up data types in BaseConfig.py -510f739 add config output to create_train_files.py -cb3910b Merge branch 'master' into config_rework -6448733 update CLI documentation -26fc46e migrations to load old config format -a7ad71c load config as unpacked config when loading from ui -ec2f4b9 change export function to generate single json file -7528e70 integrate optimizer pref settings into the main config json -e501932 rename Param to Config -4aa41b2 unify param and arg classes, remove train by args script -2ad2af1 Merge pull request #147 from mx/script -137691a Allow python and venv paths to be specified. -ed2eea1 fix grad scaling -ff8ee55 fix an exception when loading pixart models -608083a experimental fp8 support -1160505 fixed a transparency bug when using screen scaling -9560751 Merge pull request #140 from hameerabbasi/fix-prodigy -38c0d2e Prodigy is not Adafactor anymore. -5ef6c90 disable delete when pressing ctrl-d in the mask editing UI -4c5e6db fill tool for mask editing -cd6bd4b create full white mask when creating an initial mask with right click -d6168fb allow access to the main window when some tool windows are open -c3be7f0 PixArt LoRA training -6db9516 Merge pull request #133 from prog0111/step-bias -a9db641 add noise related settings to the ui for all models -3dad4bf Fix random number generation and tooltips -23494db Merge remote-tracking branch 'upstream/master' into step-bias -f1b1f80 re-enable gradient checkpointing for T5 -701d784 disable gradient checkpointing for T5, it seems to be broken -0b0d74a Add global step as parameter for timesteps -e499d89 Merge remote-tracking branch 'upstream/master' into step-bias -ea1a354 Updating for upstream changes -acdc2f5 add option to add generated captions as new line -b7eafb2 fix autocast context creation -0ca6f39 align dtype conversions of _add_noise functions -448388e fix autocast for wuerstechen training -aa4da9a fix number of scalars in tensorboard to reduce summarization -9eef5ee move noise generation of continuous schedulers -a12c063 fix fp16 noise generation on PixArt -a197257 Add min noising strength -83dfb8b Added numpy imports back in -6c99056 Split noise distribution into two parameters -2e4f9b9 Merged with upstream -ead910e fix dtype conversion in the dataloader when autocast is disabled -cf02f54 Merge branch 'pixart' -7c26783 remove vb loss option for non variational models -b6c6040 remove unused parameter from the pixart preset -0dfb1fc Fixed step bias to make its seed based on global step -1061f4a Initial step-bias implementation -8c405fb fix loading SD inpainting models from ckpt and safetensors -fe054ee fix loading of the NAI model -4e849a1 vae override for PixArt models -d3fa74e add rembg human model -88637ae print "Model converted" after converting a model -89ff874 Merge branch 'master' into pixart -c0adb0d vae override option -980596d Merge pull request #127 from aplio/feature/add-include-subfolder-toggle-and-feat-to-captioning -1b079a5 add subdir parameter to the caption ui script -ad9a9ed move subdir button to the right -98c5305 pixart diffusers to ckpt converter -1b54133 fix AlignProp for all models -91d27e5 feature. allow captionUI to include subdirs -d763ced use switch instead of checkbox for subdirectories -2a732cc Merge pull request #128 from aplio/feature/use-gpu-for-onnxruntime -118d40c add GPU support for RembgModel -03df5dd fix dtype exception for text encoders during caching -cad974e Merge pull request #126 from orcinus/tb-smooth-loss -54cfe65 nits. make ext check symmetrical -5dbee97 fix. sub-folders -> sub-directories -a92bbfe nits. window was bit narrow -4c94ce1 Nits. add include_subdirectories cmnt -5642ac3 feature. add include_subdirectories to batch masking -5da4125 Rename include_subfolders to include_subdirectories -f5d4013 Update WDModel to support GPU execution -e242691 Update onnxruntime version to onnxruntime-gpu -b452988 feature. enable image captioning to include subfolders -e70f9b2 Add smooth loss to tensorboard -b4e699c fix other models -73173f5 Merge pull request #124 from captin411/master -466103d bug fixes and niceties -425d38e Merge pull request #125 from aplio/bugfix/wdmodel-was-not-present-in-scripts-aka-cli-captioning -36ac910 Add WDModel to generate_captions.py -1d4d19d Set alpha for your mask brush in manual edit mode -8c86a41 enable xformers for PixArt and improve caching vram usage -54f4563 fix pixart tokenizer saving after deleting the starting file -2c6f34e fix default values in the optimizer window -4363bc2 fix relative step for AdaFactor -04b290f add prompt output to PixArt debug mode -870c613 fix PixArt text and batch handling -d102d2e PixArt Alpha fine tuning -c2be0c6 add device settings to the ui -9e61dab fix some debug mode bugs and add prompt output -abca656 rename backup folders from "step" to "backup" -acb522e add train progress to backup names -ca125a6 fix learning rate schedulers not working -46c091d Merge pull request #107 from magnusviri/add-shabang -bceffc7 Add ! to the start of all unix scripts -33248cc fix manual sampling when using gradient accumulation -27a6763 update the quick start guide to remove a reference to the "Latent Caching Epochs" setting -ef8c721 Merge branch 'repeats' -f506066 fix exception when loading older concept files -be1f774 concept loss weight -5f229f3 Merge pull request #98 from orcinus/sdxl_latent2rgb -93ecb7c fix formatting -95da510 Merge pull request #102 from SirTrippsalot/master -d572bcf fix bool args -bb0c2eb Merge pull request #99 from orcinus/expose_tensorboard -8149723 fix ui and arg handling -5a62e52 add shuffling when aspect ratio bucketing is disabled -27f6ccd resolution override without aspect ratio bucketing -303477d resolution override with aspect ratio bucketing -dfcf55e fixed image augmentations -cf5681f Fix arg calls -490454c UI Fixes and fixed missing close -38db43d UI code for sample logic -41833e9 Setup trainargs for new sample logic -b4f63ff Add conditional logic for samples -8d93b24 update mgds to fix concept name dependency while caching -2da2960 Add option to bind Tensorboard to all interfaces -368f2ff Add quick and dirty SDXL latent2rgb conversion -1605554 display folder name if no name is specified in concept -41166a6 switch to disable concepts -250e891 randomize seed when cloning concept -b84aee3 fix ctk exception for file and dir entries -ad0a7de repeats for Wuerstchen and fixes -dc9f262 repeats for SDXL, optimized caching -c78e4e4 fix TrainArgs argument names for exported scripts -49e3ad3 initial version of concept variations and repeats -ba5dc09 Merge pull request #95 -5dcccc5 Detail installation process for linux -f548b7e Merge pull request #74 -c6d406f Merge pull request #86 from dougbtv/dockerfile -d9ce759 use create_param_groups in all setup classes -3f45752 remove file from gitignore -d6f7b5d completed base integration of lr scaler -0744b25 Merge remote-tracking branch 'Nerogar/master' -0639eff integrating in base -2af10cb Merge remote-tracking branch 'origin/master' -5d536d6 add -e to git dependencies, trying to fix update issues -64b8323 Provides a Dockerfile and example build instructions -a0348cc update resolution tooltip -21801cc Merge pull request #75 from hameerabbasi/calc-loss -5ab53e7 multi resolution training -e78700d Make fix default kwarg value. -0fc97cd compute mask mean only once -725bf0c fix diffusion loss for masked training of inpainting models -c2983c1 small device fixes for GenerateLossesModel -1f57913 embedding refactoring -a3b5ecc slightly reduce the height of the CaptionUI window -e35ef68 Fix timestep to be torch.Tensor. -ea2f581 Small backcompat fix. -d178f57 Merge branch 'master' into calc-loss -8b9c3a4 Use deterministic timestep during image loss calculation. -abe153a fixed missing param group appends -81264f3 fix pause between epochs when no caching is needed -f5f02d4 remove kandinsky -cb7dd2b Use less VRAM. -3d00e90 Attempt at compaction -aeca2f4 fix formulae errors -c246f5f enable text encoder training for wuerstchen fine tunes -5d671a4 Cache the right epoch. -bdb882f First attempt at script that calculates loss. -17aa446 LR calculation code, final fixes for PR -0f3dfd5 Add missing imports -49cd2e4 Wuerstchen implementation of loss, moved declarations in diffusion -44ad32e DiffusionMixin Implementation of loss code -73224d8 Bug Fixes and Unifying arg naming -ad28579 Setup UI for loss and scaling -6ad1b4f Setup Enums for Loss and LR -a00e245 Setup Trainargs for Loss and LR scaling -2dcd1e6 fix alignprop for stable sd and sdxl -5e21895 fix an exception when converting a model -8b436ff fix a missing output value in the SDXL data loader -d5264e8 Merge branch 'master' into wuerstchen -339fe39 wuerstchen embedding training -bdfb1fa sampling tool to sample the model without training -9a79c5f fix continue from backup when fine tuning wuerstchen -ad0181d fix normalize_masked_area_loss with disabled masked training -341f72c wuerstchen presets -18e003d simplify preset definitions -089a2ba saving of wuerstchen fine tunes -1625b43 saving of wuerstchen LoRAs -14f89cc Merge branch 'master' into wuerstchen -6834aa0 wuerstchen specific model settings -4a46550 update dependencies to enable gradient checkpointing -595a408 fix embedding training -1ead531 fixed the continue from last backup function -9b722bf Merge branch 'master' into wuerstchen -455bfb9 fixed text encoder output handling for wuerstchen v2 -651aa86 fixed text encoder output norm when training SD 1.5 without text encoder -60743a4 removed align prop and masked training sections for wuerstchen -74af2b6 Merge branch 'master' into wuerstchen -28abae3 fixes for wuerstchen training -e5555a9 Merge pull request #63 from Lolzen/patch_linux -ef3080c fix native python check and also check for VENV -f769ac9 fix an exception when training SDXL embeddings with disabled text encoder 2 training -02e4818 fix an exception when trying to open optimizer settings -e876dbe rename "text encoder" settings to "text encoder 1" for SDXL in the ui -b1ed003 additional fixes after a rebase -e3ff9b7 rebase fixup -ed34545 simplify noise setup for wuerstchen -e1b1e10 initial wuerstchen-v2 support -8600242 update SDXL presets -d44e706 separate text encoder settings for SDXL -b9307ad train ui modules based on selected model -1c504cc fix some left usages of extra_model_name -5eda7d1 separation of model stage settings -60ce7e6 fix the backup now button -5d6065d remove 'break' from install.sh; these are meant for loops -ee35203 add scipy as Linux dependency -0470e19 fix sampling in png format -f1c8175 fix dataloader bugs for vae fine tuning -0a99509 fix exception when using create_train_files.py -25a1fe2 actually let native python execute the install cmd -5f1e229 remove whitespaces; add native python venv commands, fix typos -e756f5c forgot to remove echo -8ee740b add rudimentary start-ui.sh file -8ce8940 add rudimentary update.sh file -ea39c4b fix typos -e18fb6a add rudimentary install.sh file -da026c4 backup now button -e22b040 fix exception when training embeddings with text encoder training disabled -06a858b re-enable safety checks when deleting the cache directory -49480b7 update mgds -0bbadc8 big fixes for vram optimizations -4169426 further vram optimizations for training and caching -6fffe2e vram optimization during sampling -a4100e4 fix SDXL sampling when rescale noise scheduler is enabled -7dcb642 update mgds -d1ac15f Merge pull request #59 from hameerabbasi/fix-align-prop-dtype -20201d8 load HPSv2 with the correct precision -f090681 Merge pull request #58 -3a273c0 Fix HPS as well. -4519715 Fix align-prop dtype to match data. -860f32c Revert to CUDA 11.8 as CUDA 12.1 doesn't work yet in a lot of environments. -b6c81a6 fix KeyError: 'loss_type' -29f819a Merge pull request #56 from hameerabbasi/xformers-compat -2891735 Ensure PyTorch compatible with xformers is fetched. -d14d85f invert masks for better add and subtract support on color masks -731dd0d AlignProp support for SDXL -a2efbdd HPSv2 support for AlignProp -ea25079 remove initial images from alignProp calculations -3aa0055 initial work in progress AlignProp implementation -a59f3be add train_from_config script for training from a json config file instead of parameters -f81ae60 rename factor to alpha -962db50 Merge pull request #53 from hameerabbasi/adaschedule -dd4c3bc Pass initial_lr to AdafactorScheduler. -03e04d4 Merge remote-tracking branch 'origin/master' into adaschedule -32f72e9 Merge pull request #54 from SirTrippsalot/Optimizers -e72d1ca fixed eps tuple wrong var -3010307 Merge remote-tracking branch 'origin/master' into Optimizers -de6cc05 Merge remote-tracking branch 'origin/master' into adaschedule -036ca9e Merge pull request #51 from hameerabbasi/non-binary-mask -8a08629 Weight model by alpha everywhere. -8baff6e Fix blend mode and scale original mask by alpha. -ad83194 Add Adafactor Scheduler. -cc412b8 Add new mode instead of mixing add/subtract and blending behaviour. -76d370c Update UI. -1e6de42 Clarify intention of --alpha during mask generation. -943c9e0 Update docstrings. -a47393f show model options in mask and caption scripts -00f5ea5 Reduce memory usage. -beb474d Fix some math. -bee0da5 Enable generation of non-binary masks. -16250b9 Merge pull request #50 from SirTrippsalot/Optimizers -ecc733d Bugfix #Errors handling Bool in conditional optimizers logic -40e4de4 Merge pull request #49 from SirTrippsalot/Optimizers -2013663 Refactor or logic with if else -2c3065e Merge pull request #48 from SirTrippsalot/Optimizers -9683499 Merge remote-tracking branch 'SirTrippsalot/Optimizers' into Optimizers -8299ffc bugfix for saving on optimizers window close -fcdb289 call the command for options_adv at least once -66d8036 fix nullable bool values for cli arguments (again) -1e0b9a9 fix exception when loading from default values -b68dfc5 Merge pull request #45 from SirTrippsalot/Optimizers -24cce91 remove default values for optimizer settings -3e2e1a2 actually restore defaults when clicking on "Load Defaults" in the advanced optimizer settings -eb926ac fix inf handling for command line arguments -b2540b9 Fixed inf bug, added loading prefs/defaults to trainui -76aaeb2 Refactoring optimizer prefs -3397eae fix optimizer default issues -c4498ce use fused optimizers by default -c2e1e3e remove duplicated argument -bd8fe8a code formatting -32b1cea Support for default arguments in case of CLI with no arg provided -2f7e089 Merge remote-tracking branch 'SirTrippsalot/Optimizers' into Optimizers -455aa69 Added handling of defaults and user prefs per optimizer -613dc08 fix tuple creation -9a3586b replace _ by - in optimizer cli arguments -5f0d37c Added Parser Args -6c78329 fixed missing beta3 from dadapt_adan case -2f4e879 Restored None defaults -68b14db Remove Deprecated tuple handling -11f566a Update Lion handling -3b3f770 Refactor args and tuple handling -891fb18 Merge branch 'master' into Optimizers -3437bfd fix command line parsing -5df3f44 Add new trainargs, refactor dynamic_ui code -64d130c Added and enhanced torch optimizers -2a939fb Added more optimizer defaults, updated optimizer calls for dadapt -a4f28cb Expanded Lookup for initial testing -319c1bf Load Defaults Functionality -10c75d9 Updating components entry and switch to accept Override -4bdb3ed remove added print -abc185c Merge remote-tracking branch 'origin/master' into Optimizers -a46e262 Fixed bug with none handling causing TKINTER error -d5af4dc args rework -77349e4 Updated Args and UI -bbd5de0 refactor model loading classes, add model spec to all sd models -22c48d1 sdxl clip skip support -e22afc9 update tensorboard and remove explicit six deptendency -8b62f31 fix v prediction sampling -eaadeb2 force utf-8 encoding for saved text files -76537a1 update PIL to fix CVE-2023-4863 -0926c57 revert change to sample and concept file names when saving backups -1ab22cb Hotfix -- args -2a1fc3e Implement new options_adv component type -a339ff4 fixed casing on tuple -5ba6d52 Enable some disabled deatures for adafactor -77f229c Adding Arg datatypes, handling of inf, enhanced None, Tuples -892feac Optimizer Train Args -7d307db Add New/enhanced Optimizers to system -28d1ff8 update gitignore (.bak) -6a88fa2 Optimizers UI Changes -4320b5c save config when creating a backup -6f9fc34 fix window scaling issues when editing masks -898a78d tag shuffling -71a2bbf perturbation noise -10f606a rolling backup options -c3ed854 new option to create a mask by hex color values -83d76b9 option to disable gradient checkpointing -0650480 Merge pull request #42 from Janca/master -435db3f more backup loading feedback and presets -650a587 Merge pull request #1 from Nerogar/master -6b70d96 Added zero-config continue training -3d5e49d clear memory after each training run -7d0cfd4 print loss information in the console -c20e62e add kohya headers to fix version detection in the A1111 webui -385e015 temporary fix for tensorboard -b6cc3ed replace rembg dependency with custom code to reduce dependency hell -e3e833b update bits and bytes to 0.41.1 -1b50931 option to disable samples in the sampling tab -5828d15 embedding weight dtype setting in the ui -6966a0c improved dtype handling for reduced embedding vram usage -2bb867f readme update -e1bca5b improved sampling ui -d037f6a custom sampling progress bar -daa55f8 auto resize the sample image in the sample window -0b91b02 simple manual sampling -569f661 update presets to never automatically save -2366606 random seed option for sampling -17e1962 deterministic training -eefc94f fix preview of RGBA images -82a91e6 option to regularly save the model during training -6980c15 sdxl inpainting LoRA preset -a815545 initial sdxl inpainting support -8639264 button on the sample tab for immediate sampling -bf1311c configurable image format, deterministic sampling -7bcefa0 more schedulers for sampling -abbdf83 fix initial noise sigma to enable Euler scheduler -33583e1 ui fixes, sampler setting -e517fae more sampling options for SD/SDXL models -cff7a26 add current ema decay value to tensorboard -677efb5 fix lora loading with partially trained models -b57063a fix dtype issues and sdxl embeddings when text encoder training is disabled -3d61eae readme update -12a8753 wd14 tagging in caption ui -c8f13bc Merge pull request #33 from SirTrippsalot/master -946cf77 adding optimizers: adagrad, rmsprop (with 8 bit) -d2c1aeb sdxl embedding training -e6ad155 update mgds -18410dc fix lr schedulers with higher batch sizes -b96ead0 fix lr schedulers with active gradient accumulation -e0572a9 REX lr scheduler -80bb176 new "open in explorer" option in caption ui -2eab5c6 more dtype fixes -0cc6215 add weight dtypes to presets -7717a1b mixed dtype sampling -ee08fec override dtypes for model stages -0990595 better error message when backups can not be saved -ddb27b0 fix bfloat16 conversion -0a18e5b allow output of bfloat16 models -bb53f29 update bitsandbytes -622746e fix vae fine tuning -d211707 readme update -4e9246c enable loading of embeddings without loading a base model -0187ba5 model conversion ui -8306924 improvements for LoRA training and model conversions -2f383fe reduce memory usage when using the SGD optimizer -858c58c Merge pull request #31 from allenbenz/less-ram-on-save -e6ddf16 Revert train.py -356c7c2 set the default output format to safetensors for command line scripts -68f3185 set the default output format to safetensors for all models -84e9b98 better model spec support -f5a5b13 include model spec when saving safetensors SDXL files -b36bb50 Save the model if backup_before_save is disabled. -1a8fe51 Put values changed by converted_state_dict on the cpu to avoid vram just before saving. -e2438ce manual gc every 5 minutes during training -ccbc17f rename sdxl presets from 0.9 to 1.0 -6afccb8 fixed for SDXL text encoder fine tuning -bac029e fix lora alpha scaling during training -7aea872 prevent crashes when trying to save backups -8db4e64 convert state dict to contiguous before saving model files -fba946e fix dependency errors during installation -69dc4cd remove fixed dtype when loading SD from a single safetensors file -f50eaaa remove fixed dtype when loading SDXL from a single safetensors file -060e9a8 fix TrainArgs typo -955c3e9 documentation fix -8426b91 caption UI fixes -2969a1c more options for the masking and captioning scripts -03bc3f6 captioning documentation -c23214d fix reloading of masking models -1816a25 rembg support for generating masks -3cc42d4 remove duplicated doc comments -ac44034 simple captioning and masking ui -5e22bb6 fix for text encoder SDXL LoRA weight names -dc48429 replace sd scheduler with DDIM during loading -94f20f3 fix for cache clearing on empty caches -0f4742b automatically clear the cache before starting -732a6a0 new script to create default files -8357e99 update mgds -f9664e3 Merge pull request #24 from FinFanFin/master -3bafe36 fix sdsampler bug -2c4ec62 Merge pull request #23 from FinFanFin/master -61dbb4e add support for the Prodigy optimizer -b16c13c fix tensorboard bug -ba39e2b Merge pull request #21 from FinFanFin/master -7e8105b rename optimizer enum values -b010b26 docs fix -cee296f add dadaptation and lion optimizers -57daa06 update mgds -069b9ab replace transformers rep dependency with pypi version -07b225f Merge pull request #18 from float-trip/patch-1 -4cde07f replace debug dir dialog with a dir_entry -6ff50be Use list of arguments instead of a string for Popen() -1eba125 fix exception when starting an embedding training -5f6e64d embedding training documentation -8bb0ab6 fix a parameter issue when loading ckpt models -12012f6 update diffusers to solve issues when loading ckpt models -38939e8 fix missing weight_dtype parameters when loading lora models -2ed9298 fix being unable to click the prompt source button -48a751c remove debug stacktrace messages -b9c4876 add an additional state_dict wrapper around exported ckpt files -f4138c1 fix possibly high memory consumption during caching -c9cf1f4 various SDXL fixes -195128a positional encodings for SDXL training -31c2c4d caching improvements for SDXL -cd18f92 training on different weight data types -6165de3 support for loading any SDXL model -f4f3a46 support for saving single file SDXL checkpoints -59a325c work in progress on SDXL LoRA training -10261e6 fix double . character when saving state dict of sd 1.5 text encoders -8b224eb fix for training on prompts from filenames -dbd75bf quick start guide -9b79e8a update mgds -3d0fb23 Merge branch 'ema' -573f5c1 sampling code cleanup -fc37ced fix for the batch size label -96571f9 EMA support for all training methods -9244bb4 kandinsky saving -65ebd2e ui tooltips -46eb760 kandinsky lora training -eb72179 support for loading kandinsky models -9956128 bitsandbytes 8 bit optimizers -e78fd28 support for nested datasets -7466cbd trim leading and trailing whitespace from paths -a5c818e xformers update -31c71cc support for stable diffusion v2 -d178840 preset cleanup -344b4ae readme update -2db57c6 typo -471cca8 custom sampling code for more control over different parts of the calculations -26321e5 refactorings and more data type/attention options -bdc80a0 refactorings and more data type/attention options -f94c224 fixed missing samples and concepts error on initial startup -8904e4e LoRA and Embedding training support in the UI -dfcf15f fixed an issue with incorrectly scaled conditioning images in the data loader -d5f4f7f clip skip support -2efbeae support for different learning rate schedulers -b48474d more image augmentation options -e42185d fixed being unable to restart training after stopping -c940c2b fixed tensorboard when running through start-ui.bat -801bfff one click scripts for installing, updating and starting the ui -0f1027d support for multiple prompts per sample, more data augmentation options, UI improvements and bug fixes -8db7fd2 update to pytorch 2.0 and bug fixes -35fe5d1 usable ui for fine tuning -f6e0c2b updated readme and docs for contributions -afb84fe ui for concept management -8179f6f more UI stuff -60888cd some initial UI work -3c7aa2b tensorboard integration for loss and sample tracking -1c941d4 discord link -c5fc803 lora training -4acd7a7 different learning rates for unet and text encoder, cache only option -42f0fc4 add a license -e6a696c readme update -73e52e7 embedding training -ab0c165 fix gradient accumulation (for real this time) -9c08419 fix gradient accumulation -977cdc6 mgds update -5325fc5 basic vae training -89e6388 add a backup, restore and continue functionality -57c57d2 update mgds -5004964 add proper support for masked training -6d52aa9 readme -bbce76a many fixes -5130394 initial commit From c41077e67c9c3e233f10297c59b0dcdc5e2456c7 Mon Sep 17 00:00:00 2001 From: celll1 Date: Tue, 3 Sep 2024 19:37:31 +0900 Subject: [PATCH 09/17] Translate to English. --- modules/model/StableDiffusionXLModel.py | 10 +++---- train.ipynb | 38 ++++++++++++------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/modules/model/StableDiffusionXLModel.py b/modules/model/StableDiffusionXLModel.py index ba610147..6af4380d 100644 --- a/modules/model/StableDiffusionXLModel.py +++ b/modules/model/StableDiffusionXLModel.py @@ -219,10 +219,10 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip): if chunk.numel() == 0: continue - # アテンションマスクの作成(1がマスクしない、0がマスクする) + # Create attention mask (1 for non-masked, 0 for masked) attention_mask = torch.ones_like(chunk, dtype=torch.bool) - # まず、BOSとEOSを追加 + # First, add BOS and EOS tokens bos_tokens = torch.full((chunk.shape[0], 1), tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device) eos_tokens = torch.full((chunk.shape[0], 1), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1) @@ -231,8 +231,8 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip): torch.zeros_like(eos_tokens, dtype=torch.bool) if i < len(chunks) - 1 else torch.ones_like(eos_tokens, dtype=torch.bool)], dim=1) - # パディングで埋める - if chunk.shape[1] < chunk_length + 2: # +2 はBOSとEOSのため + # Fill with padding + if chunk.shape[1] < chunk_length + 2: # +2 is for BOS and EOS padding = torch.full((chunk.shape[0], chunk_length + 2 - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) chunk = torch.cat([chunk, padding], dim=1) attention_mask = torch.cat([attention_mask, torch.zeros_like(padding, dtype=torch.bool)], dim=1) @@ -290,7 +290,7 @@ def __process_tokens(tokens, tokenizer, text_encoder, layer_skip): text_encoder_2_output, pooled_text_encoder_2_output = __process_tokens(tokens_2, self.tokenizer_2, self.text_encoder_2, text_encoder_2_layer_skip) if text_encoder_1_output is None or text_encoder_2_output is None: - print("両方のテキストエンコーダーの出力がNoneです。入力テキストまたはトークンを確認してください。") + print("Both text encoder outputs are None. Check your input text or tokens.") text_encoder_output = torch.cat([text_encoder_1_output, text_encoder_2_output], dim=-1) diff --git a/train.ipynb b/train.ipynb index 7ab81b45..78d0716b 100644 --- a/train.ipynb +++ b/train.ipynb @@ -23,26 +23,26 @@ }, "outputs": [], "source": [ - "# 依存関係のインストール\n", + "# Install dependencies\n", "!pip -r requirements.txt\n", "\n", - "# CUDAの確認\n", + "# Check CUDA availability\n", "import torch\n", "cuda_available = torch.cuda.is_available()\n", "print(f\"CUDA is {'available' if cuda_available else 'not available'}\")\n", "\n", "if not cuda_available:\n", - " use_zluda = input(\"CUDAが利用できません。WindowsでAMD GPUを使用していますか? (y/n) \")\n", + " use_zluda = input(\"CUDA is not available. Are you using an AMD GPU on Windows? (y/n) \")\n", " if use_zluda.lower() == 'y':\n", - " print(\"ZLUDAのインストールを続行します\")\n", + " print(\"Proceeding with ZLUDA installation\")\n", " %run scripts/install_zluda.py\n", " else:\n", - " print(\"エラー:インストール中に問題が発生しました\")\n", + " print(\"Error: An issue occurred during installation\")\n", "else:\n", - " print(\"インストールが完了しました\")\n", + " print(\"Installation completed\")\n", "\n", - "# 注意:この環境では%pipを使用していますが、\n", - "# 通常のコマンドラインでは!pipを使用することに注意してください" + "# Note: This environment uses %pip,\n", + "# but remember to use !pip in regular command line" ] }, { @@ -59,10 +59,10 @@ }, "outputs": [], "source": [ - "# 必要なライブラリのインポート\n", + "# Import necessary libraries\n", "import os\n", "\n", - "# mgdsをリポジトリにcloneした場合\n", + "# If mgds is cloned to the repository\n", "# import sys\n", "# sys.path.append('mgds/src')\n", "\n", @@ -84,25 +84,25 @@ }, "outputs": [], "source": [ - "# TrainConfigのインスタンスを作成\n", + "# Create an instance of TrainConfig\n", "train_config = TrainConfig.default_values()\n", "with open(config_path, \"r\") as f:\n", " train_config.from_dict(json.load(f))\n", "\n", - "# userwarningを表示しない\n", + "# Suppress user warnings\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", - "# TrainConfigで読み込んだ\"debug_dir\",\"workspace_dir\",cache_dir\"が存在しなければ作成\n", + "# Create directories loaded in TrainConfig if they don't exist\n", "for dir_path in [train_config.debug_dir, train_config.workspace_dir, train_config.cache_dir]:\n", " if not os.path.exists(dir_path):\n", " os.makedirs(dir_path)\n", "\n", - "# コールバックとコマンドの設定\n", + "# Set up callbacks and commands\n", "callbacks = TrainCallbacks()\n", "commands = TrainCommands()\n", "\n", - "# トレーニングプロセスの開始\n", + "# Start the training process\n", "print(\"Destination_path: \", train_config.output_model_destination)\n", "print(\"Workspace_path: \", train_config.workspace_dir)\n", "print(\"Debug_path: \", train_config.debug_dir)\n", @@ -112,14 +112,14 @@ "trainer.start()\n", "\n", "try:\n", - " # トレーニングの実行\n", + " # Execute training\n", " trainer.train()\n", "except Exception as e:\n", - " print(f\"トレーニング中にエラーが発生しました: {e}\")\n", + " print(f\"An error occurred during training: {e}\")\n", "finally:\n", - " # トレーニング終了時の処理\n", + " # Process at the end of training\n", " trainer.end()\n", - " print(\"トレーニングが終了しました\")" + " print(\"Training has completed\")" ] } ], From 4b18e793339b34bda2f8b8e19130c92c0c854dc6 Mon Sep 17 00:00:00 2001 From: celll1 Date: Tue, 3 Sep 2024 23:46:09 +0900 Subject: [PATCH 10/17] Unlock Flux Finetune. --- modules/ui/TopBar.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/ui/TopBar.py b/modules/ui/TopBar.py index dc7c0739..76890db5 100644 --- a/modules/ui/TopBar.py +++ b/modules/ui/TopBar.py @@ -115,6 +115,7 @@ def __create_training_method(self): ] elif self.train_config.model_type.is_flux(): values = [ + ("Fine Tune", TrainingMethod.FINE_TUNE), ("LoRA", TrainingMethod.LORA), ("Embedding", TrainingMethod.EMBEDDING), ] From 0d72d61aca655679b5008ed64f6c11e4c6454fda Mon Sep 17 00:00:00 2001 From: celll1 Date: Thu, 5 Sep 2024 01:34:51 +0900 Subject: [PATCH 11/17] Tokenizer code is moved to clip_util.py --- modules/model/FluxModel.py | 11 +- modules/model/StableDiffusionModel.py | 64 ++--------- modules/model/StableDiffusionXLModel.py | 106 +++++------------- modules/model/util/clip_util.py | 63 ++++++++--- .../modelLoader/FluxFineTuneModelLoader.py | 3 +- 5 files changed, 98 insertions(+), 149 deletions(-) diff --git a/modules/model/FluxModel.py b/modules/model/FluxModel.py index 84884582..ac7cf1b1 100644 --- a/modules/model/FluxModel.py +++ b/modules/model/FluxModel.py @@ -214,9 +214,8 @@ def encode_text( if tokens_1 is None and text is not None and self.tokenizer_1 is not None: tokenizer_output = self.tokenizer_1( text, - padding='max_length', - truncation=True, - max_length=77, + # padding='max_length', + truncation=False, return_tensors="pt", ) tokens_1 = tokenizer_output.input_ids.to(self.text_encoder_1.device) @@ -224,9 +223,9 @@ def encode_text( if tokens_2 is None and text is not None and self.tokenizer_2 is not None: tokenizer_output = self.tokenizer_2( text, - padding='max_length', + # padding='max_length', truncation=True, - max_length=77, + max_length=4096, return_tensors="pt", ) tokens_2 = tokenizer_output.input_ids.to(self.text_encoder_2.device) @@ -241,7 +240,7 @@ def encode_text( text_encoder_output=None, add_pooled_output=True, pooled_text_encoder_output=pooled_text_encoder_1_output, - use_attention_mask=False, + use_attention_mask=True, ) if pooled_text_encoder_1_output is None: pooled_text_encoder_1_output = torch.zeros( diff --git a/modules/model/StableDiffusionModel.py b/modules/model/StableDiffusionModel.py index 867633a9..c553040c 100644 --- a/modules/model/StableDiffusionModel.py +++ b/modules/model/StableDiffusionModel.py @@ -222,64 +222,24 @@ def encode_text( text_encoder_layer_skip: int = 0, text_encoder_output: Tensor | None = None, ): - chunk_length = 75 - max_embeddings_multiples = 3 - - def __process_tokens(tokens): - if tokens is None or tokens.numel() == 0: - return None - - chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)] - chunk_embeddings = [] - - for chunk in chunks: - if chunk.numel() == 0: - continue - - if chunk.shape[1] < chunk_length: - padding = torch.full((chunk.shape[0], chunk_length - chunk.shape[1]), self.tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) - chunk = torch.cat([chunk, padding], dim=1) - - bos_tokens = torch.full((chunk.shape[0], 1), self.tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device) - eos_tokens = torch.full((chunk.shape[0], 1), self.tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) - chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1) - - with self.autocast_context: - embedding, _ = encode_clip( - text_encoder=self.text_encoder, - tokens=chunk, - default_layer=-1, - layer_skip=text_encoder_layer_skip, - text_encoder_output=None, - add_pooled_output=False, - use_attention_mask=False, - add_layer_norm=True, - ) - - chunk_embeddings.append(embedding) - - if not chunk_embeddings: - return None - - if len(chunk_embeddings) > max_embeddings_multiples: - chunk_embeddings = chunk_embeddings[:max_embeddings_multiples] - - combined_embedding = torch.cat(chunk_embeddings, dim=1) - - return combined_embedding - if tokens is None: tokenizer_output = self.tokenizer( text, - padding='max_length', + padding="max_length", truncation=False, return_tensors="pt", ) tokens = tokenizer_output.input_ids.to(self.text_encoder.device) - text_encoder_output = __process_tokens(tokens) - - if text_encoder_output is None: - print("Text encoder output is None. Check your input text or tokens.") + text_encoder_output, _ = encode_clip( + text_encoder=self.text_encoder, + tokens=tokens, + default_layer=-1, + layer_skip=text_encoder_layer_skip, + text_encoder_output=text_encoder_output, + add_pooled_output=False, + use_attention_mask=True, + add_layer_norm=True, + ) - return text_encoder_output + return text_encoder_output \ No newline at end of file diff --git a/modules/model/StableDiffusionXLModel.py b/modules/model/StableDiffusionXLModel.py index 6af4380d..573c0e69 100644 --- a/modules/model/StableDiffusionXLModel.py +++ b/modules/model/StableDiffusionXLModel.py @@ -203,95 +203,47 @@ def encode_text( text_encoder_2_output: Tensor = None, pooled_text_encoder_2_output: Tensor = None, ): - chunk_length = 75 - max_embeddings_multiples = 3 - - def __process_tokens(tokens, tokenizer, text_encoder, layer_skip): - if tokens is None or tokens.numel() == 0: - return None, None - - chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)] - chunk_embeddings = [] - pooled_outputs = [] - attention_masks = [] - - for i, chunk in enumerate(chunks): - if chunk.numel() == 0: - continue - - # Create attention mask (1 for non-masked, 0 for masked) - attention_mask = torch.ones_like(chunk, dtype=torch.bool) - - # First, add BOS and EOS tokens - bos_tokens = torch.full((chunk.shape[0], 1), tokenizer.bos_token_id, dtype=chunk.dtype, device=chunk.device) - eos_tokens = torch.full((chunk.shape[0], 1), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) - chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1) - attention_mask = torch.cat([torch.zeros_like(bos_tokens, dtype=torch.bool) if i > 0 else torch.ones_like(bos_tokens, dtype=torch.bool), - attention_mask, - torch.zeros_like(eos_tokens, dtype=torch.bool) if i < len(chunks) - 1 else torch.ones_like(eos_tokens, dtype=torch.bool)], - dim=1) - - # Fill with padding - if chunk.shape[1] < chunk_length + 2: # +2 is for BOS and EOS - padding = torch.full((chunk.shape[0], chunk_length + 2 - chunk.shape[1]), tokenizer.eos_token_id, dtype=chunk.dtype, device=chunk.device) - chunk = torch.cat([chunk, padding], dim=1) - attention_mask = torch.cat([attention_mask, torch.zeros_like(padding, dtype=torch.bool)], dim=1) - - attention_masks.append(attention_mask) - - with self.autocast_context: - outputs = text_encoder( - chunk, - attention_mask=attention_mask, - output_hidden_states=True, - return_dict=True, - ) - embedding = outputs.hidden_states[-(2 + layer_skip)] - if hasattr(outputs, 'text_embeds'): - pooled_outputs.append(outputs.text_embeds) - - chunk_embeddings.append(embedding) - - if not chunk_embeddings: - return None, None - - if len(chunk_embeddings) > max_embeddings_multiples: - chunk_embeddings = chunk_embeddings[:max_embeddings_multiples] - attention_masks = attention_masks[:max_embeddings_multiples] - if pooled_outputs: - pooled_outputs = pooled_outputs[:max_embeddings_multiples] - - combined_embedding = torch.cat(chunk_embeddings, dim=1) - # combined_attention_mask = torch.cat(attention_masks, dim=1) - pooled_output = pooled_outputs[0] if pooled_outputs else None - - return combined_embedding, pooled_output - if tokens_1 is None and text is not None: - tokens_1 = self.tokenizer_1( + tokenizer_output = self.tokenizer_1( text, padding='max_length', truncation=False, return_tensors="pt", - ).input_ids.to(self.text_encoder_1.device) + ) + tokens_1 = tokenizer_output.input_ids.to(self.text_encoder_1.device) if tokens_2 is None and text is not None: - tokens_2 = self.tokenizer_2( + tokenizer_output = self.tokenizer_2( text, padding='max_length', truncation=False, return_tensors="pt", - ).input_ids.to(self.text_encoder_2.device) - - if text_encoder_1_output is None: - text_encoder_1_output, _ = __process_tokens(tokens_1, self.tokenizer_1, self.text_encoder_1, text_encoder_1_layer_skip) + ) + tokens_2 = tokenizer_output.input_ids.to(self.text_encoder_2.device) - if text_encoder_2_output is None or pooled_text_encoder_2_output is None: - text_encoder_2_output, pooled_text_encoder_2_output = __process_tokens(tokens_2, self.tokenizer_2, self.text_encoder_2, text_encoder_2_layer_skip) + text_encoder_1_output, _ = encode_clip( + text_encoder=self.text_encoder_1, + tokens=tokens_1, + default_layer=-2, + layer_skip=text_encoder_1_layer_skip, + text_encoder_output=text_encoder_1_output, + add_pooled_output=False, + use_attention_mask=True, + add_layer_norm=False, + ) - if text_encoder_1_output is None or text_encoder_2_output is None: - print("Both text encoder outputs are None. Check your input text or tokens.") + text_encoder_2_output, pooled_text_encoder_2_output = encode_clip( + text_encoder=self.text_encoder_2, + tokens=tokens_2, + default_layer=-2, + layer_skip=text_encoder_2_layer_skip, + text_encoder_output=text_encoder_2_output, + add_pooled_output=True, + pooled_text_encoder_output=pooled_text_encoder_2_output, + use_attention_mask=True, + add_layer_norm=False, + ) - text_encoder_output = torch.cat([text_encoder_1_output, text_encoder_2_output], dim=-1) + text_encoder_output = torch.concat([text_encoder_1_output, text_encoder_2_output], dim=-1) - return text_encoder_output, pooled_text_encoder_2_output + return text_encoder_output, pooled_text_encoder_2_output \ No newline at end of file diff --git a/modules/model/util/clip_util.py b/modules/model/util/clip_util.py index 697bc0f0..c6695ba9 100644 --- a/modules/model/util/clip_util.py +++ b/modules/model/util/clip_util.py @@ -1,4 +1,5 @@ from torch import Tensor +import torch from transformers import CLIPTextModel, CLIPTextModelWithProjection @@ -16,28 +17,64 @@ def encode_clip( attention_mask: Tensor | None = None, add_layer_norm: bool = True, ) -> tuple[Tensor, Tensor]: - if (add_output and text_encoder_output is None) \ - or (add_pooled_output and pooled_text_encoder_output is None) \ - and text_encoder is not None: + chunk_length = 75 + max_embeddings_multiples = 3 - text_encoder_output = text_encoder( - tokens, - attention_mask=attention_mask if use_attention_mask else None, + if tokens is None or tokens.numel() == 0: + return None, None + + chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)] + chunk_embeddings = [] if add_output else None + pooled_outputs = [] if add_pooled_output else None + + for i, chunk in enumerate(chunks): + if chunk.numel() == 0: + continue + + # Create attention mask (1 for non-masked, 0 for masked) + chunk_attention_mask = torch.ones_like(chunk, dtype=torch.bool) + + # First, add BOS and EOS tokens + bos_tokens = torch.full((chunk.shape[0], 1), text_encoder.config.bos_token_id, dtype=chunk.dtype, device=chunk.device) + eos_tokens = torch.full((chunk.shape[0], 1), text_encoder.config.eos_token_id, dtype=chunk.dtype, device=chunk.device) + chunk = torch.cat([bos_tokens, chunk, eos_tokens], dim=1) + chunk_attention_mask = torch.cat([torch.zeros_like(bos_tokens, dtype=torch.bool) if i > 0 else torch.ones_like(bos_tokens, dtype=torch.bool), + chunk_attention_mask, + torch.zeros_like(eos_tokens, dtype=torch.bool) if i < len(chunks) - 1 else torch.ones_like(eos_tokens, dtype=torch.bool)], + dim=1) + + # Fill with padding + if chunk.shape[1] < chunk_length + 2: # +2 is for BOS and EOS + padding = torch.full((chunk.shape[0], chunk_length + 2 - chunk.shape[1]), text_encoder.config.eos_token_id, dtype=chunk.dtype, device=chunk.device) + chunk = torch.cat([chunk, padding], dim=1) + chunk_attention_mask = torch.cat([chunk_attention_mask, torch.zeros_like(padding, dtype=torch.bool)], dim=1) + + outputs = text_encoder( + chunk, + attention_mask=chunk_attention_mask if use_attention_mask else None, return_dict=True, output_hidden_states=True, ) + + if add_output: + embedding = outputs.hidden_states[default_layer - layer_skip] + chunk_embeddings.append(embedding) - pooled_text_encoder_output = None if add_pooled_output: if hasattr(text_encoder_output, "text_embeds"): - pooled_text_encoder_output = text_encoder_output.text_embeds + pooled_outputs.append(text_encoder_output.text_embeds) if hasattr(text_encoder_output, "pooler_output"): - pooled_text_encoder_output = text_encoder_output.pooler_output + pooled_outputs.append(text_encoder_output.pooler_output) - text_encoder_output = text_encoder_output.hidden_states[default_layer - layer_skip] if add_output else None + if chunk_embeddings is not None and len(chunk_embeddings) > max_embeddings_multiples: + chunk_embeddings = chunk_embeddings[:max_embeddings_multiples] + if pooled_outputs is not None and len(pooled_outputs) > max_embeddings_multiples: + pooled_outputs = pooled_outputs[:max_embeddings_multiples] + text_encoder_output = torch.cat(chunk_embeddings, dim=1) if chunk_embeddings is not None else None + pooled_text_encoder_output = pooled_outputs[0] if pooled_outputs else None - if add_layer_norm and text_encoder_output is not None: - final_layer_norm = text_encoder.text_model.final_layer_norm - text_encoder_output = final_layer_norm(text_encoder_output) + if add_layer_norm and text_encoder_output is not None: + final_layer_norm = text_encoder.text_model.final_layer_norm + text_encoder_output = final_layer_norm(text_encoder_output) return text_encoder_output, pooled_text_encoder_output diff --git a/modules/modelLoader/FluxFineTuneModelLoader.py b/modules/modelLoader/FluxFineTuneModelLoader.py index eb6be87b..db655270 100644 --- a/modules/modelLoader/FluxFineTuneModelLoader.py +++ b/modules/modelLoader/FluxFineTuneModelLoader.py @@ -23,7 +23,8 @@ def _default_model_spec_name( ) -> str | None: match model_type: case ModelType.FLUX_DEV_1: - return "resources/sd_model_spec/flux_dev_1.0.json" + # return "resources/sd_model_spec/flux_dev_1.0.json" + return None case _: return None From e157d7586b02ff6cf7d634690a3bcceaac13ba04 Mon Sep 17 00:00:00 2001 From: celll1 Date: Thu, 5 Sep 2024 23:01:18 +0900 Subject: [PATCH 12/17] Fix: clip_util.py. --- modules/model/FluxModel.py | 4 ++-- modules/model/util/clip_util.py | 39 +++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/modules/model/FluxModel.py b/modules/model/FluxModel.py index ac7cf1b1..42e24824 100644 --- a/modules/model/FluxModel.py +++ b/modules/model/FluxModel.py @@ -224,8 +224,8 @@ def encode_text( tokenizer_output = self.tokenizer_2( text, # padding='max_length', - truncation=True, - max_length=4096, + truncation=False, + max_length=99999999, return_tensors="pt", ) tokens_2 = tokenizer_output.input_ids.to(self.text_encoder_2.device) diff --git a/modules/model/util/clip_util.py b/modules/model/util/clip_util.py index c6695ba9..e2f5c3ac 100644 --- a/modules/model/util/clip_util.py +++ b/modules/model/util/clip_util.py @@ -24,8 +24,8 @@ def encode_clip( return None, None chunks = [tokens[:, i:i + chunk_length] for i in range(0, tokens.shape[1], chunk_length)] - chunk_embeddings = [] if add_output else None - pooled_outputs = [] if add_pooled_output else None + chunk_embeddings = [] + pooled_outputs = [] for i, chunk in enumerate(chunks): if chunk.numel() == 0: @@ -59,22 +59,27 @@ def encode_clip( if add_output: embedding = outputs.hidden_states[default_layer - layer_skip] chunk_embeddings.append(embedding) - if add_pooled_output: - if hasattr(text_encoder_output, "text_embeds"): - pooled_outputs.append(text_encoder_output.text_embeds) - if hasattr(text_encoder_output, "pooler_output"): - pooled_outputs.append(text_encoder_output.pooler_output) + if hasattr(outputs, "text_embeds"): + pooled_outputs.append(outputs.text_embeds) + elif hasattr(outputs, "pooler_output"): + pooled_outputs.append(outputs.pooler_output) - if chunk_embeddings is not None and len(chunk_embeddings) > max_embeddings_multiples: - chunk_embeddings = chunk_embeddings[:max_embeddings_multiples] - if pooled_outputs is not None and len(pooled_outputs) > max_embeddings_multiples: - pooled_outputs = pooled_outputs[:max_embeddings_multiples] - text_encoder_output = torch.cat(chunk_embeddings, dim=1) if chunk_embeddings is not None else None - pooled_text_encoder_output = pooled_outputs[0] if pooled_outputs else None + if add_output: + if chunk_embeddings and len(chunk_embeddings) > max_embeddings_multiples: + chunk_embeddings = chunk_embeddings[:max_embeddings_multiples] + text_encoder_output = torch.cat(chunk_embeddings, dim=1) + if add_layer_norm: + final_layer_norm = text_encoder.text_model.final_layer_norm + text_encoder_output = final_layer_norm(text_encoder_output) + else: + text_encoder_output = None - if add_layer_norm and text_encoder_output is not None: - final_layer_norm = text_encoder.text_model.final_layer_norm - text_encoder_output = final_layer_norm(text_encoder_output) + if add_pooled_output: + if pooled_outputs and len(pooled_outputs) > max_embeddings_multiples: + pooled_outputs = pooled_outputs[:max_embeddings_multiples] + pooled_text_encoder_output = pooled_outputs[0] if pooled_outputs else None + else: + pooled_text_encoder_output = None - return text_encoder_output, pooled_text_encoder_output + return text_encoder_output, pooled_text_encoder_output \ No newline at end of file From 795b3835e2806450d36eee1eb9609df635991003 Mon Sep 17 00:00:00 2001 From: celll1 Date: Sun, 8 Sep 2024 19:05:00 +0900 Subject: [PATCH 13/17] fix: attention mask device. --- modules/model/util/clip_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/model/util/clip_util.py b/modules/model/util/clip_util.py index e2f5c3ac..1856b096 100644 --- a/modules/model/util/clip_util.py +++ b/modules/model/util/clip_util.py @@ -32,7 +32,7 @@ def encode_clip( continue # Create attention mask (1 for non-masked, 0 for masked) - chunk_attention_mask = torch.ones_like(chunk, dtype=torch.bool) + chunk_attention_mask = torch.ones_like(chunk, dtype=torch.bool, device=chunk.device) # First, add BOS and EOS tokens bos_tokens = torch.full((chunk.shape[0], 1), text_encoder.config.bos_token_id, dtype=chunk.dtype, device=chunk.device) From 72c0e1258c48c45a7a69cbccf7e239dbd4b62ad8 Mon Sep 17 00:00:00 2001 From: celll1 Date: Mon, 9 Sep 2024 17:42:36 +0900 Subject: [PATCH 14/17] Fix: Accelerate launch. --- start-ui.bat | 13 ++++++++++--- start-ui.sh | 6 +++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/start-ui.bat b/start-ui.bat index 708ae2a5..b666fca9 100644 --- a/start-ui.bat +++ b/start-ui.bat @@ -11,12 +11,19 @@ goto :end :activate_venv echo activating venv %VENV_DIR% -set PYTHON="%VENV_DIR%\Scripts\python.exe" +call "%VENV_DIR%\Scripts\activate.bat" +echo venv activated: %VENV_DIR% + +set PYTHON=python if defined PROFILE (set PYTHON=%PYTHON% -m scalene --off --cpu --gpu --profile-all --no-browser) echo Using Python %PYTHON% :launch -%PYTHON% scripts\train_ui.py +accelerate launch scripts\train_ui.py +if %ERRORLEVEL% NEQ 0 ( + echo Failed to launch with accelerate. Launching with regular Python. + %PYTHON% scripts\train_ui.py +) :end -pause +pause \ No newline at end of file diff --git a/start-ui.sh b/start-ui.sh index b2029dd1..fd47d51e 100755 --- a/start-ui.sh +++ b/start-ui.sh @@ -37,16 +37,16 @@ elif [ -x "$(command -v python)" ]; then if [[ -z "$VIRTUAL_ENV" ]]; then echo "warning: No VIRTUAL_ENV set. exiting." else - python scripts/train_ui.py + accelerate launch scripts/train_ui.py || python scripts/train_ui.py fi elif [ -x "$(command -v conda)" ]; then #check for venv if conda info --envs | grep -q ${conda_env}; then - bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; python scripts/train_ui.py") + bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || python scripts/train_ui.py") else conda create -y -n $conda_env python==3.10; - bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; python scripts/train_ui.py") + bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || python scripts/train_ui.py") fi fi else From db9f2436752097cdc8ba66c3823dd8f81d3e9476 Mon Sep 17 00:00:00 2001 From: celll1 Date: Tue, 10 Sep 2024 01:20:10 +0900 Subject: [PATCH 15/17] Fix: Accelerate launch 2. --- modules/trainer/GenericTrainer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/trainer/GenericTrainer.py b/modules/trainer/GenericTrainer.py index 9fc5c8c7..5dcb298e 100644 --- a/modules/trainer/GenericTrainer.py +++ b/modules/trainer/GenericTrainer.py @@ -30,7 +30,7 @@ from modules.util.torch_util import torch_gc from modules.util.TrainProgress import TrainProgress -from accelerate import Accelerator +from accelerate import Accelerator, DistributedDataParallelKwargs import torch from torch import Tensor, nn @@ -63,7 +63,8 @@ class GenericTrainer(BaseTrainer): def __init__(self, config: TrainConfig, callbacks: TrainCallbacks, commands: TrainCommands): super(GenericTrainer, self).__init__(config, callbacks, commands) - self.accelerator = Accelerator() + ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True) + self.accelerator = Accelerator(kwargs_handlers=[ddp_kwargs]) tensorboard_log_dir = os.path.join(config.workspace_dir, "tensorboard") os.makedirs(Path(tensorboard_log_dir).absolute(), exist_ok=True) From 6c2c829b7ed0697b9531e5f7f7868d532e91affa Mon Sep 17 00:00:00 2001 From: celll1 Date: Fri, 13 Sep 2024 22:36:14 +0900 Subject: [PATCH 16/17] Fix: Accelerate launch 3. --- modules/trainer/GenericTrainer.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/trainer/GenericTrainer.py b/modules/trainer/GenericTrainer.py index 5dcb298e..cb4ac7c1 100644 --- a/modules/trainer/GenericTrainer.py +++ b/modules/trainer/GenericTrainer.py @@ -66,6 +66,13 @@ def __init__(self, config: TrainConfig, callbacks: TrainCallbacks, commands: Tra ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True) self.accelerator = Accelerator(kwargs_handlers=[ddp_kwargs]) + if hasattr(self.accelerator, 'device') and self.accelerator.device: + print(f"Accelerator device: {self.accelerator.device.type}") + if hasattr(self.accelerator, 'distributed_type') and self.accelerator.distributed_type: + print(f"Distributed type: {self.accelerator.distributed_type}") + + print(f"if accelerator is not activated, using {torch.device(self.config.train_device)}") + tensorboard_log_dir = os.path.join(config.workspace_dir, "tensorboard") os.makedirs(Path(tensorboard_log_dir).absolute(), exist_ok=True) self.tensorboard = SummaryWriter(os.path.join(tensorboard_log_dir, get_string_timestamp())) @@ -477,7 +484,7 @@ def __before_eval(self): self.model.optimizer.eval() def train(self): - train_device = torch.device(self.config.train_device) + train_device = self.accelerator.device if self.accelerator.device else torch.device(self.config.train_device) train_progress = self.model.train_progress From ca57444e550dbfa2d011f6e9a696ba5464039f9b Mon Sep 17 00:00:00 2001 From: celll1 Date: Mon, 4 Nov 2024 13:20:10 +0900 Subject: [PATCH 17/17] test --- start-ui.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/start-ui.sh b/start-ui.sh index fd47d51e..1263592f 100755 --- a/start-ui.sh +++ b/start-ui.sh @@ -17,7 +17,7 @@ fi if ! [ -x "$(command -v python)" ]; then echo 'error: python not installed or found!' - break + exit 1 elif [ -x "$(command -v python)" ]; then major=$(python -c 'import platform; major, minor, patch = platform.python_version_tuple(); print(major)') minor=$(python -c 'import platform; major, minor, patch = platform.python_version_tuple(); print(minor)') @@ -37,16 +37,16 @@ elif [ -x "$(command -v python)" ]; then if [[ -z "$VIRTUAL_ENV" ]]; then echo "warning: No VIRTUAL_ENV set. exiting." else - accelerate launch scripts/train_ui.py || python scripts/train_ui.py + accelerate launch scripts/train_ui.py || { echo "Failed to launch with accelerate. Falling back to python."; python scripts/train_ui.py; } fi elif [ -x "$(command -v conda)" ]; then #check for venv if conda info --envs | grep -q ${conda_env}; then - bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || python scripts/train_ui.py") + bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || { echo \"Failed to launch with accelerate. Falling back to python.\"; python scripts/train_ui.py; }") else conda create -y -n $conda_env python==3.10; - bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || python scripts/train_ui.py") + bash --init-file <(echo ". \"$HOME/.bashrc\"; conda activate $conda_env; accelerate launch scripts/train_ui.py || { echo \"Failed to launch with accelerate. Falling back to python.\"; python scripts/train_ui.py; }") fi fi else