diff --git a/colabs/diffusers/assets/diffusers-autolog-5.gif b/colabs/diffusers/assets/diffusers-autolog-5.gif new file mode 100644 index 00000000..a3673e06 Binary files /dev/null and b/colabs/diffusers/assets/diffusers-autolog-5.gif differ diff --git a/colabs/diffusers/pixart-alpha-diffusers.ipynb b/colabs/diffusers/pixart-alpha-diffusers.ipynb index 5d79c222..443e5704 100644 --- a/colabs/diffusers/pixart-alpha-diffusers.ipynb +++ b/colabs/diffusers/pixart-alpha-diffusers.ipynb @@ -28,8 +28,7 @@ }, "outputs": [], "source": [ - "!pip install diffusers transformers accelerate sentencepiece ftfy > install.log\n", - "!pip install git+https://github.com/wandb/wandb@feat/diffusers-autologger > install.log" + "!pip install diffusers transformers accelerate sentencepiece ftfy wandb > install.log" ] }, { diff --git a/colabs/diffusers/sdxl-diffusers.ipynb b/colabs/diffusers/sdxl-diffusers.ipynb new file mode 100644 index 00000000..302184b8 --- /dev/null +++ b/colabs/diffusers/sdxl-diffusers.ipynb @@ -0,0 +1,153 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image Generation with Stable Diffusion XL using 🤗 Diffusers\n", + "\n", + "\n", + "\n", + "[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/wandb/examples/blob/master/colabs/diffusers/sdxl-diffusers.ipynb)\n", + "\n", + "This notebook demonstrates the following:\n", + "- Performing text-conditional image-generations with the [Stable Diffusion XL](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl) using [🤗 Diffusers](https://huggingface.co/docs/diffusers).\n", + "- Manage image generation experiments using [Weights & Biases](http://wandb.ai/site).\n", + "- Log the prompts, generated images and experiment configs to [Weigts & Biases](http://wandb.ai/site) for visalization.\n", + "\n", + "![](./assets/diffusers-autolog-5.gif)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install diffusers transformers accelerate wandb > install.log" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "\n", + "import torch\n", + "from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline\n", + "\n", + "import wandb\n", + "from wandb.integration.diffusers import autolog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_model_id = \"stabilityai/stable-diffusion-xl-base-1.0\" # @param [\"stabilityai/stable-diffusion-xl-base-1.0\", \"segmind/SSD-1B\", \"stabilityai/sdxl-turbo\"]\n", + "\n", + "base_pipeline = StableDiffusionXLPipeline.from_pretrained(\n", + " \"stabilityai/stable-diffusion-xl-base-1.0\",\n", + " torch_dtype=torch.float16,\n", + " variant=\"fp16\",\n", + " use_safetensors=True,\n", + ")\n", + "\n", + "base_pipeline.enable_model_cpu_offload()\n", + "\n", + "refiner_pipeline = StableDiffusionXLImg2ImgPipeline.from_pretrained(\n", + " \"stabilityai/stable-diffusion-xl-refiner-1.0\",\n", + " text_encoder_2=base_pipeline.text_encoder_2,\n", + " vae=base_pipeline.vae,\n", + " torch_dtype=torch.float16,\n", + " use_safetensors=True,\n", + " variant=\"fp16\",\n", + ")\n", + "refiner_pipeline.enable_model_cpu_offload()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wandb_project = \"pixart-alpha\" # @param {type:\"string\"}\n", + "\n", + "prompt_1 = \"a photograph of an evil and vile looking demon in Bengali attire eating fish. The demon has large and bloody teeth. The demon is sitting on the branches of a giant Banyan tree, dimly lit, bluish and dark color palette, realistic, 8k\" # @param {type:\"string\"}\n", + "prompt_2 = \"\" # @param {type:\"string\"}\n", + "negative_prompt_1 = \"static, frame, painting, illustration, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, deformed toes standing still, posing\" # @param {type:\"string\"}\n", + "negative_prompt_2 = \"static, frame, painting, illustration, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, deformed toes standing still, posing\" # @param {type:\"string\"}\n", + "num_inference_steps = 50 # @param {type:\"slider\", min:10, max:100, step:1}\n", + "guidance_scale = 5.0 # @param {type:\"slider\", min:0, max:10, step:0.1}\n", + "height = 1024 # @param {type:\"slider\", min:512, max:2560, step:32}\n", + "width = 1024 # @param {type:\"slider\", min:512, max:2560, step:32}\n", + "seed = None # @param {type:\"raw\"}\n", + "\n", + "\n", + "def autogenerate_seed():\n", + " max_seed = int(1024 * 1024 * 1024)\n", + " seed = random.randint(1, max_seed)\n", + " seed = -seed if seed < 0 else seed\n", + " seed = seed % max_seed\n", + " return seed\n", + "\n", + "\n", + "seed = autogenerate_seed() if seed is None else seed\n", + "\n", + "# Make the experiment reproducible by controlling randomness.\n", + "# The seed would be automatically logged to WandB.\n", + "generator_base = torch.Generator(device=\"cuda\").manual_seed(seed)\n", + "generator_refiner = torch.Generator(device=\"cuda\").manual_seed(seed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Call WandB Autolog for Diffusers. This would automatically log\n", + "# the prompts, generated images, pipeline architecture and all\n", + "# associated experiment configs to Weights & Biases, thus making your\n", + "# image generation experiments easy to reproduce, share and analyze.\n", + "autolog(init=dict(project=wandb_project))\n", + "\n", + "image = base_pipeline(\n", + " prompt=prompt_1,\n", + " prompt_2=prompt_2,\n", + " negative_prompt=negative_prompt_1,\n", + " negative_prompt_2=negative_prompt_2,\n", + " num_inference_steps=num_inference_steps,\n", + " output_type=\"latent\",\n", + " generator=generator_base,\n", + " guidance_scale=guidance_scale,\n", + ").images[0]\n", + "\n", + "image = refiner_pipeline(\n", + " prompt=prompt_1,\n", + " prompt_2=prompt_2,\n", + " negative_prompt=negative_prompt_1,\n", + " negative_prompt_2=negative_prompt_2,\n", + " image=image[None, :],\n", + " num_inference_steps=num_inference_steps,\n", + " guidance_scale=guidance_scale,\n", + " generator=generator_refiner,\n", + ").images[0]\n", + "\n", + "wandb.finish()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}