-
Notifications
You must be signed in to change notification settings - Fork 19
/
processors.py
219 lines (191 loc) · 7.51 KB
/
processors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import gc
import random
from typing import List, Any
import torch
from PIL import Image
from tqdm import tqdm
from extensions.sd_dreambooth_extension.dreambooth.utils.utils import printm
from extensions.sd_smartprocess import super_resolution
from extensions.sd_smartprocess.clipcrop import CropClip
from extensions.sd_smartprocess.interrogators.clip_interrogator import CLIPInterrogator
from extensions.sd_smartprocess.interrogators.booru_interrogator import BooruInterrogator
from modules import shared
# Base processor
class Processor:
def __init__(self):
printm("Model loaded.")
# Unload models
def unload(self):
if torch.has_cuda:
torch.cuda.empty_cache()
gc.collect()
printm("Model unloaded.")
# Process images
def process(self, images: List[Image.Image]) -> List[Any]:
raise Exception("Not Implemented")
# CLIP Processing
class ClipProcessor(Processor):
def __init__(
self,
clip_use_v2,
clip_append_artist,
clip_append_medium,
clip_append_movement,
clip_append_flavor,
clip_append_trending,
num_beams,
min_clip_tokens,
max_clip_tokens,
max_flavors
):
self.description = "Processing CLIP"
if shared.interrogator is not None:
shared.interrogator.unload()
self.max_flavors = max_flavors
shared.state.textinfo = "Loading CLIP Model..."
self.model = CLIPInterrogator(
clip_use_v2,
clip_append_artist,
clip_append_medium,
clip_append_movement,
clip_append_flavor,
clip_append_trending,
num_beams,
min_clip_tokens,
max_clip_tokens
)
super().__init__()
def process(self, images: List[Image.Image], short:bool=False) -> List[str]:
output = []
shared.state.job_count = len(images)
shared.state.textinfo = f"{self.description}..."
for img in tqdm(images, desc=self.description):
short_caption = self.model.interrogate(img, short=short, max_flavors=self.max_flavors)
output.append(short_caption)
shared.state.current_image = img
shared.state.job_no += 1
return output
def unload(self):
if self.model.clip_model:
del self.model.clip_model
if self.model.blip_model:
del self.model.blip_model
super().unload()
# Danbooru Processing
class BooruProcessor(Processor):
def __init__(self, min_score: float):
self.description = "Processing Danbooru"
shared.state.textinfo = "Loading DeepDanbooru Model..."
self.model = BooruInterrogator()
self.min_score = min_score
super().__init__()
def process(self, images: List[Image.Image]) -> List[List[str]]:
output = []
shared.state.job_count = len(images)
shared.state.textinfo = f"{self.description}..."
for img in tqdm(images, desc=self.description):
out_tags = []
tags = self.model.interrogate(img)
for tag in sorted(tags, key=tags.get, reverse=True):
if tags[tag] >= self.min_score:
out_tags.append(tag)
output.append(out_tags)
shared.state.job_count += 1
def unload(self):
self.model.unload()
super().unload()
# WD14 Processing
# Crop Processing
class CropProcessor(Processor):
def __init__(self, subject_class: str, pad: bool, crop: bool):
self.description = "Cropping"
if crop:
shared.state.textinfo = "Loading CROP Model..."
self.model = CropClip() if crop else None
self.subject_class = subject_class
self.pad = pad
self.crop = crop
super().__init__()
def process(self, images: List[Image.Image], captions: List[str] = None) -> List[Image.Image]:
output = []
shared.state.job_count = len(images)
shared.state.textinfo = f"{self.description}..."
for img, caption in tqdm(zip(images, captions), desc=self.description):
cropped = self._process_img(img, caption)
output.append(cropped)
shared.state.job_no += 1
return output
def _process_img(self, img, short_caption):
if self.subject_class is not None and self.subject_class != "":
short_caption = self.subject_class
src_ratio = img.width / img.height
# Pad image before cropping?
if src_ratio != 1 and self.pad:
if img.width > img.height:
pad_width = img.width
pad_height = img.width
else:
pad_width = img.height
pad_height = img.height
res = Image.new("RGB", (pad_width, pad_height))
res.paste(img, box=(pad_width // 2 - img.width // 2, pad_height // 2 - img.height // 2))
img = res
if self.crop:
# Do the actual crop clip
im_data = self.model.get_center(img, prompt=short_caption)
crop_width = im_data[1] - im_data[0]
center_x = im_data[0] + (crop_width / 2)
crop_height = im_data[3] - im_data[2]
center_y = im_data[2] + (crop_height / 2)
crop_ratio = crop_width / crop_height
dest_ratio = 1
tgt_width = crop_width
tgt_height = crop_height
if crop_ratio != dest_ratio:
if crop_width > crop_height:
tgt_height = crop_width / dest_ratio
tgt_width = crop_width
else:
tgt_width = crop_height / dest_ratio
tgt_height = crop_height
# Reverse the above if dest is too big
if tgt_width > img.width or tgt_height > img.height:
if tgt_width > img.width:
tgt_width = img.width
tgt_height = tgt_width / dest_ratio
else:
tgt_height = img.height
tgt_width = tgt_height / dest_ratio
tgt_height = int(tgt_height)
tgt_width = int(tgt_width)
left = max(center_x - (tgt_width / 2), 0)
right = min(center_x + (tgt_width / 2), img.width)
top = max(center_y - (tgt_height / 2), 0)
bottom = min(center_y + (tgt_height / 2), img.height)
img = img.crop((left, top, right, bottom))
return img
def unload(self):
if self.model is not None:
self.model.unload()
super().unload()
# Upscale Processing
class UpscaleProcessor(Processor):
def __init__(self):
self.description = "Upscaling"
shared.state.textinfo = "Loading Stable-Diffusion Upscaling Model..."
self.sampler, self.model = super_resolution.initialize_model()
super().__init__()
def process(self, images: List[Image.Image], captions: List[str] = None) -> List[Image.Image]:
output = []
shared.state.job_count = len(images)
shared.state.textinfo = f"{self.description}..."
for img, caption in tqdm(zip(images, captions), desc=self.description):
seed = int(random.randrange(2147483647))
img = super_resolution.predict(self.sampler, img, caption, 75, 1, 10, seed, 0, 20)
output.append(img)
shared.state.job_no += 1
return output
def unload(self):
del self.sampler
del self.model
super().unload()