Merge pull request #127 from huggingface/main

Merge changes
Skquark · Nov 21, 2023 · 6aaa8a6 · 6aaa8a6
2 parents fe0deca + 7457aa6
commit 6aaa8a6
Show file tree

Hide file tree

Showing 163 changed files with 2,284 additions and 556 deletions.
diff --git a/.github/workflows/pr_quality.yml b/.github/workflows/pr_quality.yml
@@ -27,9 +27,8 @@ jobs:
           pip install .[quality]
       - name: Check quality
         run: |
-          black --check examples tests src utils scripts
-          ruff examples tests src utils scripts
-          doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
+          ruff check examples tests src utils scripts
+          ruff format examples tests src utils scripts --check
 
   check_repository_consistency:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/pr_test_peft_backend.yml b/.github/workflows/pr_test_peft_backend.yml
@@ -20,20 +20,15 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        config:
-          - name: LoRA
-            framework: lora
-            runner: docker-cpu
-            image: diffusers/diffusers-pytorch-cpu
-            report: torch_cpu_lora
+        lib-versions: ["main", "latest"]
 
 
-    name: ${{ matrix.config.name }}
+    name: LoRA - ${{ matrix.lib-versions }}
 
-    runs-on: ${{ matrix.config.runner }}
+    runs-on: docker-cpu
 
     container:
-      image: ${{ matrix.config.image }}
+      image: diffusers/diffusers-pytorch-cpu
       options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
 
     defaults:
@@ -50,18 +45,21 @@ jobs:
       run: |
         apt-get update && apt-get install libsndfile1-dev libgl1 -y
         python -m pip install -e .[quality,test]
-        python -m pip install git+https://github.com/huggingface/accelerate.git
-        python -m pip install -U git+https://github.com/huggingface/transformers.git
-        python -m pip install -U git+https://github.com/huggingface/peft.git
+        if [ "${{ matrix.lib-versions }}" == "main" ]; then
+            python -m pip install -U git+https://github.com/huggingface/peft.git
+            python -m pip install -U git+https://github.com/huggingface/transformers.git
+            python -m pip install -U git+https://github.com/huggingface/accelerate.git
+        else
+            python -m pip install -U peft transformers accelerate
+        fi
 
     - name: Environment
       run: |
         python utils/print_env.py
 
     - name: Run fast PyTorch LoRA CPU tests with PEFT backend
-      if: ${{ matrix.config.framework == 'lora' }}
       run: |
         python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \
           -s -v \
           --make-reports=tests_${{ matrix.config.report }} \
-          tests/lora/test_lora_layers_peft.py
+          tests/lora/test_lora_layers_peft.py
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -410,7 +410,7 @@ Diffusers has grown a lot. Here is the command for it:
  $ make test
  ```
 
-🧨 Diffusers relies on `black` and `isort` to format its source code
+🧨 Diffusers relies on `ruff` and `isort` to format its source code
 consistently. After you make changes, apply automatic style corrections and code verifications
 that can't be automated in one go with:
 

diff --git a/Makefile b/Makefile
@@ -9,8 +9,8 @@ modified_only_fixup:
 	$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
 	@if test -n "$(modified_py_files)"; then \
 		echo "Checking/fixing $(modified_py_files)"; \
-		black $(modified_py_files); \
-		ruff $(modified_py_files); \
+		ruff check $(modified_py_files) --fix; \
+		ruff format $(modified_py_files);\
 	else \
 		echo "No library .py files were modified"; \
 	fi
@@ -40,23 +40,21 @@ repo-consistency:
 # this target runs checks on all files
 
 quality:
-	black --check $(check_dirs)
-	ruff $(check_dirs)
-	doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source
+	ruff check $(check_dirs) setup.py
+	ruff format --check $(check_dirs) setup.py 
 	python utils/check_doc_toc.py
 
 # Format source code automatically and check is there are any problems left that need manual fixing
 
 extra_style_checks:
 	python utils/custom_init_isort.py
-	doc-builder style src/diffusers docs/source --max_len 119 --path_to_docs docs/source
 	python utils/check_doc_toc.py --fix_and_overwrite
 
 # this target runs checks on all files and potentially modifies some of them
 
 style:
-	black $(check_dirs)
-	ruff $(check_dirs) --fix
+	ruff check $(check_dirs) setup.py --fix
+	ruff format $(check_dirs) setup.py
 	${MAKE} autogenerate_code
 	${MAKE} extra_style_checks
 

diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -19,6 +19,7 @@
     title: Train a diffusion model
   - local: tutorials/using_peft_for_inference
     title: Inference with PEFT
+  title: Tutorials
 - sections:
   - sections:
     - local: using-diffusers/loading_overview
@@ -185,13 +186,21 @@
   - sections:
     - local: api/configuration
       title: Configuration
-    - local: api/loaders
-      title: Loaders
     - local: api/logging
       title: Logging
     - local: api/outputs
       title: Outputs
     title: Main Classes
+  - sections:
+    - local: api/loaders/lora
+      title: LoRA
+    - local: api/loaders/single_file
+      title: Single files
+    - local: api/loaders/textual_inversion
+      title: Textual Inversion
+    - local: api/loaders/unet
+      title: UNet
+    title: Loaders
   - sections:
     - local: api/models/overview
       title: Overview

diff --git a/docs/source/en/api/loaders.md b/docs/source/en/api/loaders.md
diff --git a/docs/source/en/api/loaders/lora.md b/docs/source/en/api/loaders/lora.md
@@ -0,0 +1,32 @@
+<!--Copyright 2023 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# LoRA
+
+LoRA is a fast and lightweight training method that inserts and trains a significantly smaller number of parameters instead of all the model parameters. This produces a smaller file (~100 MBs) and makes it easier to quickly train a model to learn a new concept. LoRA weights are typically loaded into the UNet, text encoder or both. There are two classes for loading LoRA weights:
+
+- [`LoraLoaderMixin`] provides functions for loading and unloading, fusing and unfusing, enabling and disabling, and more functions for managing LoRA weights. This class can be used with any model.
+- [`StableDiffusionXLLoraLoaderMixin`] is a [Stable Diffusion (SDXL)](../../api/pipelines/stable_diffusion/stable_diffusion_xl) version of the [`LoraLoaderMixin`] class for loading and saving LoRA weights. It can only be used with the SDXL model.
+
+<Tip>
+
+To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
+
+</Tip>
+
+## LoraLoaderMixin
+
+[[autodoc]] loaders.lora.LoraLoaderMixin
+
+## StableDiffusionXLLoraLoaderMixin
+
+[[autodoc]] loaders.lora.StableDiffusionXLLoraLoaderMixin
diff --git a/docs/source/en/api/loaders/single_file.md b/docs/source/en/api/loaders/single_file.md
@@ -0,0 +1,37 @@
+<!--Copyright 2023 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Single files
+
+Diffusers supports loading pretrained pipeline (or model) weights stored in a single file, such as a `ckpt` or `safetensors` file. These single file types are typically produced from community trained models. There are three classes for loading single file weights:
+
+- [`FromSingleFileMixin`] supports loading pretrained pipeline weights stored in a single file, which can either be a `ckpt` or `safetensors` file.
+- [`FromOriginalVAEMixin`] supports loading a pretrained [`AutoencoderKL`] from pretrained ControlNet weights stored in a single file, which can either be a `ckpt` or `safetensors` file.
+- [`FromOriginalControlnetMixin`] supports loading pretrained ControlNet weights stored in a single file, which can either be a `ckpt` or `safetensors` file.
+
+<Tip>
+
+To learn more about how to load single file weights, see the [Load different Stable Diffusion formats](../../using-diffusers/other-formats) loading guide.
+
+</Tip>
+
+## FromSingleFileMixin
+
+[[autodoc]] loaders.single_file.FromSingleFileMixin
+
+## FromOriginalVAEMixin
+
+[[autodoc]] loaders.single_file.FromOriginalVAEMixin
+
+## FromOriginalControlnetMixin
+
+[[autodoc]] loaders.single_file.FromOriginalControlnetMixin
diff --git a/docs/source/en/api/loaders/textual_inversion.md b/docs/source/en/api/loaders/textual_inversion.md
@@ -0,0 +1,27 @@
+<!--Copyright 2023 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Textual Inversion
+
+Textual Inversion is a training method for personalizing models by learning new text embeddings from a few example images. The file produced from training is extremely small (a few KBs) and the new embeddings can be loaded into the text encoder.
+
+[`TextualInversionLoaderMixin`] provides a function for loading Textual Inversion embeddings from Diffusers and Automatic1111 into the text encoder and loading a special token to activate the embeddings.
+
+<Tip>
+
+To learn more about how to load Textual Inversion embeddings, see the [Textual Inversion](../../using-diffusers/loading_adapters#textual-inversion) loading guide.
+
+</Tip>
+
+## TextualInversionLoaderMixin
+
+[[autodoc]] loaders.textual_inversion.TextualInversionLoaderMixin
diff --git a/docs/source/en/api/loaders/unet.md b/docs/source/en/api/loaders/unet.md
@@ -0,0 +1,27 @@
+<!--Copyright 2023 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# UNet
+
+Some training methods - like LoRA and Custom Diffusion - typically target the UNet's attention layers, but these training methods can also target other non-attention layers. Instead of training all of a model's parameters, only a subset of the parameters are trained, which is faster and more efficient. This class is useful if you're *only* loading weights into a UNet. If you need to load weights into the text encoder or a text encoder and UNet, try using the [`~loaders.LoraLoaderMixin.load_lora_weights`] function instead.
+
+The [`UNet2DConditionLoadersMixin`] class provides functions for loading and saving weights, fusing and unfusing LoRAs, disabling and enabling LoRAs, and setting and deleting adapters.
+
+<Tip>
+
+To learn more about how to load LoRA weights, see the [LoRA](../../using-diffusers/loading_adapters#lora) loading guide.
+
+</Tip>
+
+## UNet2DConditionLoadersMixin
+
+[[autodoc]] loaders.unet.UNet2DConditionLoadersMixin
diff --git a/docs/source/en/tutorials/basic_training.md b/docs/source/en/tutorials/basic_training.md
@@ -326,7 +326,8 @@ Now you can wrap all these components together in a training loop with 🤗 Acce
 
 ...             # Sample a random timestep for each image
 ...             timesteps = torch.randint(
-...                 0, noise_scheduler.config.num_train_timesteps, (bs,), device=clean_images.device
+...                 0, noise_scheduler.config.num_train_timesteps, (bs,), device=clean_images.device,
+...                 dtype=torch.int64
 ...             )
 
 ...             # Add noise to the clean images according to the noise magnitude at each timestep

diff --git a/docs/source/en/using-diffusers/custom_pipeline_overview.md b/docs/source/en/using-diffusers/custom_pipeline_overview.md
@@ -165,4 +165,25 @@ video_frames = pipeline(
     guidance_scale=9.0,
     output_type="pt"
 ).frames
+```
+
+As an additional reference example, you can refer to the repository structure of [stabilityai/japanese-stable-diffusion-xl](https://huggingface.co/stabilityai/japanese-stable-diffusion-xl/), that makes use of the `trust_remote_code` feature:
+
+```python
+
+from diffusers import DiffusionPipeline
+import torch
+
+pipeline = DiffusionPipeline.from_pretrained(
+    "stabilityai/japanese-stable-diffusion-xl", trust_remote_code=True
+)
+pipeline.to("cuda")
+
+# if using torch < 2.0
+# pipeline.enable_xformers_memory_efficient_attention()
+
+prompt = "柴犬、カラフルアート"
+
+image = pipeline(prompt=prompt).images[0]
+
 ```
diff --git a/docs/source/ko/tutorials/basic_training.md b/docs/source/ko/tutorials/basic_training.md
@@ -327,7 +327,8 @@ TensorBoard에 로깅, 그래디언트 누적 및 혼합 정밀도 학습을 쉽
 
 ...             # 각 이미지를 위한 랜덤한 타임스텝(timestep)을 샘플링합니다.
 ...             timesteps = torch.randint(
-...                 0, noise_scheduler.config.num_train_timesteps, (bs,), device=clean_images.device
+...                 0, noise_scheduler.config.num_train_timesteps, (bs,), device=clean_images.device,
+...                 dtype=torch.int64
 ...             )
 
 ...             # 각 타임스텝의 노이즈 크기에 따라 깨끗한 이미지에 노이즈를 추가합니다.