From e3be1a1758ad6372c6ff5c7700a985a79d54de82 Mon Sep 17 00:00:00 2001 From: Jaret Burkett Date: Sun, 13 Aug 2023 13:52:38 -0600 Subject: [PATCH] Added WIP slider training colab --- notebooks/SliderTraining.ipynb | 340 +++++++++++++++++++++++++++++++++ 1 file changed, 340 insertions(+) create mode 100644 notebooks/SliderTraining.ipynb diff --git a/notebooks/SliderTraining.ipynb b/notebooks/SliderTraining.ipynb new file mode 100644 index 00000000..6ae8b8f0 --- /dev/null +++ b/notebooks/SliderTraining.ipynb @@ -0,0 +1,340 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "machine_shape": "hm", + "gpuType": "V100" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# AI Toolkit by Ostris\n", + "## Slider Training\n", + "\n", + "This is a quick colab demo for training sliders like can be found in my CivitAI profile https://civitai.com/user/Ostris/models . I will work on making it more user friendly, but for now, it will get you started." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "source": [ + "!git clone https://github.com/ostris/ai-toolkit" + ], + "metadata": { + "id": "BvAG0GKAh59G" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XGZqVER_aQJW" + }, + "outputs": [], + "source": [ + "!cd ai-toolkit && git submodule update --init --recursive && pip install -r requirements.txt\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "import os\n", + "import sys\n", + "sys.path.append('/content/ai-toolkit')\n", + "from toolkit.job import run_job\n", + "from collections import OrderedDict\n", + "from PIL import Image" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "## Setup\n", + "\n", + "This is your config. It is documented pretty well. Normally you would do this as a yaml file, but for colab, this will work. This will run as is without modification, but feel free to edit as you want." + ], + "metadata": { + "id": "N8UUFzVRigbC" + } + }, + { + "cell_type": "code", + "source": [ + "from collections import OrderedDict\n", + "\n", + "job_to_run = OrderedDict({\n", + " # This is the config I use on my sliders, It is solid and tested\n", + " 'job': 'train',\n", + " 'config': {\n", + " # the name will be used to create a folder in the output folder\n", + " # it will also replace any [name] token in the rest of this config\n", + " 'name': 'detail_slider_v1',\n", + " # folder will be created with name above in folder below\n", + " # it can be relative to the project root or absolute\n", + " 'training_folder': \"output/LoRA\",\n", + " 'device': 'cuda', # cpu, cuda:0, etc\n", + " # for tensorboard logging, we will make a subfolder for this job\n", + " 'log_dir': \"output/.tensorboard\",\n", + " # you can stack processes for other jobs, It is not tested with sliders though\n", + " # just use one for now\n", + " 'process': [\n", + " {\n", + " 'type': 'slider', # tells runner to run the slider process\n", + " # network is the LoRA network for a slider, I recommend to leave this be\n", + " 'network': {\n", + " 'type': \"lora\",\n", + " # rank / dim of the network. Bigger is not always better. Especially for sliders. 8 is good\n", + " 'linear': 8, # \"rank\" or \"dim\"\n", + " 'linear_alpha': 4, # Do about half of rank \"alpha\"\n", + " # 'conv': 4, # for convolutional layers \"locon\"\n", + " # 'conv_alpha': 4, # Do about half of conv \"alpha\"\n", + " },\n", + " # training config\n", + " 'train': {\n", + " # this is also used in sampling. Stick with ddpm unless you know what you are doing\n", + " 'noise_scheduler': \"ddpm\", # or \"ddpm\", \"lms\", \"euler_a\"\n", + " # how many steps to train. More is not always better. I rarely go over 1000\n", + " 'steps': 100,\n", + " # I have had good results with 4e-4 to 1e-4 at 500 steps\n", + " 'lr': 2e-4,\n", + " # enables gradient checkpoint, saves vram, leave it on\n", + " 'gradient_checkpointing': True,\n", + " # train the unet. I recommend leaving this true\n", + " 'train_unet': True,\n", + " # train the text encoder. I don't recommend this unless you have a special use case\n", + " # for sliders we are adjusting representation of the concept (unet),\n", + " # not the description of it (text encoder)\n", + " 'train_text_encoder': False,\n", + "\n", + " # just leave unless you know what you are doing\n", + " # also supports \"dadaptation\" but set lr to 1 if you use that,\n", + " # but it learns too fast and I don't recommend it\n", + " 'optimizer': \"adamw\",\n", + " # only constant for now\n", + " 'lr_scheduler': \"constant\",\n", + " # we randomly denoise random num of steps form 1 to this number\n", + " # while training. Just leave it\n", + " 'max_denoising_steps': 40,\n", + " # works great at 1. I do 1 even with my 4090.\n", + " # higher may not work right with newer single batch stacking code anyway\n", + " 'batch_size': 1,\n", + " # bf16 works best if your GPU supports it (modern)\n", + " 'dtype': 'bf16', # fp32, bf16, fp16\n", + " # I don't recommend using unless you are trying to make a darker lora. Then do 0.1 MAX\n", + " # although, the way we train sliders is comparative, so it probably won't work anyway\n", + " 'noise_offset': 0.0,\n", + " },\n", + "\n", + " # the model to train the LoRA network on\n", + " 'model': {\n", + " # name_or_path can be a hugging face name, local path or url to model\n", + " # on civit ai with or without modelVersionId. They will be cached in /model folder\n", + " # epicRealisim v5\n", + " 'name_or_path': \"https://civitai.com/models/25694?modelVersionId=134065\",\n", + " 'is_v2': False, # for v2 models\n", + " 'is_v_pred': False, # for v-prediction models (most v2 models)\n", + " # has some issues with the dual text encoder and the way we train sliders\n", + " # it works bit weights need to probably be higher to see it.\n", + " 'is_xl': False, # for SDXL models\n", + " },\n", + "\n", + " # saving config\n", + " 'save': {\n", + " 'dtype': 'float16', # precision to save. I recommend float16\n", + " 'save_every': 50, # save every this many steps\n", + " # this will remove step counts more than this number\n", + " # allows you to save more often in case of a crash without filling up your drive\n", + " 'max_step_saves_to_keep': 2,\n", + " },\n", + "\n", + " # sampling config\n", + " 'sample': {\n", + " # must match train.noise_scheduler, this is not used here\n", + " # but may be in future and in other processes\n", + " 'sampler': \"ddpm\",\n", + " # sample every this many steps\n", + " 'sample_every': 20,\n", + " # image size\n", + " 'width': 512,\n", + " 'height': 512,\n", + " # prompts to use for sampling. Do as many as you want, but it slows down training\n", + " # pick ones that will best represent the concept you are trying to adjust\n", + " # allows some flags after the prompt\n", + " # --m [number] # network multiplier. LoRA weight. -3 for the negative slide, 3 for the positive\n", + " # slide are good tests. will inherit sample.network_multiplier if not set\n", + " # --n [string] # negative prompt, will inherit sample.neg if not set\n", + " # Only 75 tokens allowed currently\n", + " # I like to do a wide positive and negative spread so I can see a good range and stop\n", + " # early if the network is braking down\n", + " 'prompts': [\n", + " \"a woman in a coffee shop, black hat, blonde hair, blue jacket --m -5\",\n", + " \"a woman in a coffee shop, black hat, blonde hair, blue jacket --m -3\",\n", + " \"a woman in a coffee shop, black hat, blonde hair, blue jacket --m 3\",\n", + " \"a woman in a coffee shop, black hat, blonde hair, blue jacket --m 5\",\n", + " \"a golden retriever sitting on a leather couch, --m -5\",\n", + " \"a golden retriever sitting on a leather couch --m -3\",\n", + " \"a golden retriever sitting on a leather couch --m 3\",\n", + " \"a golden retriever sitting on a leather couch --m 5\",\n", + " \"a man with a beard and red flannel shirt, wearing vr goggles, walking into traffic --m -5\",\n", + " \"a man with a beard and red flannel shirt, wearing vr goggles, walking into traffic --m -3\",\n", + " \"a man with a beard and red flannel shirt, wearing vr goggles, walking into traffic --m 3\",\n", + " \"a man with a beard and red flannel shirt, wearing vr goggles, walking into traffic --m 5\",\n", + " ],\n", + " # negative prompt used on all prompts above as default if they don't have one\n", + " 'neg': \"cartoon, fake, drawing, illustration, cgi, animated, anime, monochrome\",\n", + " # seed for sampling. 42 is the answer for everything\n", + " 'seed': 42,\n", + " # walks the seed so s1 is 42, s2 is 43, s3 is 44, etc\n", + " # will start over on next sample_every so s1 is always seed\n", + " # works well if you use same prompt but want different results\n", + " 'walk_seed': False,\n", + " # cfg scale (4 to 10 is good)\n", + " 'guidance_scale': 7,\n", + " # sampler steps (20 to 30 is good)\n", + " 'sample_steps': 20,\n", + " # default network multiplier for all prompts\n", + " # since we are training a slider, I recommend overriding this with --m [number]\n", + " # in the prompts above to get both sides of the slider\n", + " 'network_multiplier': 1.0,\n", + " },\n", + "\n", + " # logging information\n", + " 'logging': {\n", + " 'log_every': 10, # log every this many steps\n", + " 'use_wandb': False, # not supported yet\n", + " 'verbose': False, # probably done need unless you are debugging\n", + " },\n", + "\n", + " # slider training config, best for last\n", + " 'slider': {\n", + " # resolutions to train on. [ width, height ]. This is less important for sliders\n", + " # as we are not teaching the model anything it doesn't already know\n", + " # but must be a size it understands [ 512, 512 ] for sd_v1.5 and [ 768, 768 ] for sd_v2.1\n", + " # and [ 1024, 1024 ] for sd_xl\n", + " # you can do as many as you want here\n", + " 'resolutions': [\n", + " [512, 512],\n", + " # [ 512, 768 ]\n", + " # [ 768, 768 ]\n", + " ],\n", + " # slider training uses 4 combined steps for a single round. This will do it in one gradient\n", + " # step. It is highly optimized and shouldn't take anymore vram than doing without it,\n", + " # since we break down batches for gradient accumulation now. so just leave it on.\n", + " 'batch_full_slide': True,\n", + " # These are the concepts to train on. You can do as many as you want here,\n", + " # but they can conflict outweigh each other. Other than experimenting, I recommend\n", + " # just doing one for good results\n", + " 'targets': [\n", + " # target_class is the base concept we are adjusting the representation of\n", + " # for example, if we are adjusting the representation of a person, we would use \"person\"\n", + " # if we are adjusting the representation of a cat, we would use \"cat\" It is not\n", + " # a keyword necessarily but what the model understands the concept to represent.\n", + " # \"person\" will affect men, women, children, etc but will not affect cats, dogs, etc\n", + " # it is the models base general understanding of the concept and everything it represents\n", + " # you can leave it blank to affect everything. In this example, we are adjusting\n", + " # detail, so we will leave it blank to affect everything\n", + " {\n", + " 'target_class': \"\",\n", + " # positive is the prompt for the positive side of the slider.\n", + " # It is the concept that will be excited and amplified in the model when we slide the slider\n", + " # to the positive side and forgotten / inverted when we slide\n", + " # the slider to the negative side. It is generally best to include the target_class in\n", + " # the prompt. You want it to be the extreme of what you want to train on. For example,\n", + " # if you want to train on fat people, you would use \"an extremely fat, morbidly obese person\"\n", + " # as the prompt. Not just \"fat person\"\n", + " # max 75 tokens for now\n", + " 'positive': \"high detail, 8k, intricate, detailed, high resolution, high res, high quality\",\n", + " # negative is the prompt for the negative side of the slider and works the same as positive\n", + " # it does not necessarily work the same as a negative prompt when generating images\n", + " # these need to be polar opposites.\n", + " # max 76 tokens for now\n", + " 'negative': \"blurry, boring, fuzzy, low detail, low resolution, low res, low quality\",\n", + " # the loss for this target is multiplied by this number.\n", + " # if you are doing more than one target it may be good to set less important ones\n", + " # to a lower number like 0.1 so they don't outweigh the primary target\n", + " 'weight': 1.0,\n", + " },\n", + " ],\n", + " },\n", + " },\n", + " ]\n", + " },\n", + "\n", + " # You can put any information you want here, and it will be saved in the model.\n", + " # The below is an example, but you can put your grocery list in it if you want.\n", + " # It is saved in the model so be aware of that. The software will include this\n", + " # plus some other information for you automatically\n", + " 'meta': {\n", + " # [name] gets replaced with the name above\n", + " 'name': \"[name]\",\n", + " 'version': '1.0',\n", + " # 'creator': {\n", + " # 'name': 'your name',\n", + " # 'email': 'your@gmail.com',\n", + " # 'website': 'https://your.website'\n", + " # }\n", + " }\n", + "})\n" + ], + "metadata": { + "id": "_t28QURYjRQO" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Run it\n", + "\n", + "Below does all the magic. Check your folders to the left. Items will be in output/LoRA/your_name_v1 In the samples folder, there are preiodic sampled. This doesnt work great with colab. Ill update soon." + ], + "metadata": { + "id": "h6F1FlM2Wb3l" + } + }, + { + "cell_type": "code", + "source": [ + "run_job(job_to_run)\n" + ], + "metadata": { + "id": "HkajwI8gteOh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Done\n", + "\n", + "Check your ourput dir and get your slider\n" + ], + "metadata": { + "id": "Hblgb5uwW5SD" + } + } + ] +}