You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hello, I updated the maxperf file for systems that only run on CLI. It does not save the files to disk at the moment. While I was checking to see if there was any problem, I noticed that it created only 6 images in the genImage function. Sorry for the code I am actualy C# developer :)
`
import sys
import PIL
import os
import numpy as np
import torch
from diffusers import AutoPipelineForText2Image
from sfast.compilers.stable_diffusion_pipeline_compiler import (compile, CompilationConfig)
mw = None
batchSize = 10
prompts = ['Evil space kitty', 'Cute dog in hat, H.R. Giger style', 'Horse wearing a tie', 'Cartoon pig', 'Donkey on Mars', 'Cute kitties baked in a cake', 'Boxing chickens on farm, Maxfield Parish style', 'Future spaceship', 'A city of the past', 'Jabba the Hut wearing jewelery',
'istanbul photo scenery', 'a nice girl with hat','a dog playing footbal','an umbrella and raining',' paper cut plane flying on a desk','a cup coffee and child toys','space ship on a lake','a knife and a fork on a table','futuristic microphone','an apple, a banana, a melon']
# xformers and Triton are suggested for achieving best performance.
# It might be slow for Triton to generate, compile and fine-tune kernels.
try:
import xformers
config.enable_xformers = True
except ImportError:
print('xformers not installed, skip')
# NOTE:
# When GPU VRAM is insufficient or the architecture is too old, Triton might be slow.
# Disable Triton if you encounter this problem.
try:
import triton
config.enable_triton = True
except ImportError:
print('Triton not installed, skip')
# NOTE:
# CUDA Graph is suggested for small batch sizes and small resolutions to reduce CPU overhead.
# My implementation can handle dynamic shape with increased need for GPU memory.
# But when your GPU VRAM is insufficient or the image resolution is high,
# CUDA Graph could cause less efficient VRAM utilization and slow down the inference,
# especially when on Windows or WSL which has the "shared VRAM" mechanism.
# If you meet problems related to it, you should disable it.
config.enable_cuda_graph = True
if True:
config.enable_jit = True
config.enable_jit_freeze = True
config.trace_scheduler = True
config.enable_cnn_optimization = True
config.preserve_parameters = False
config.prefer_lowp_gemm = True
pipe = compile(pipe, config)
def genImage(output_dir, seqno, prompts, batchSize):
global pipe
seed = random.randint(0, 2147483647)
torch.manual_seed(seed)
images = genit(0, prompts=prompts, batchSize=batchSize, nSteps=1)
for idx, img in enumerate(images):
img_path = os.path.join(output_dir, f'image_{seqno}_{idx}.png')
#img.save(img_path)
print(img_path)
return len(images)
Hello, I updated the maxperf file for systems that only run on CLI. It does not save the files to disk at the moment. While I was checking to see if there was any problem, I noticed that it created only 6 images in the genImage function. Sorry for the code I am actualy C# developer :)
`
import sys
import PIL
import os
import numpy as np
import torch
from diffusers import AutoPipelineForText2Image
from sfast.compilers.stable_diffusion_pipeline_compiler import (compile, CompilationConfig)
torch.set_grad_enabled(False)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
mw = None
batchSize = 10
prompts = ['Evil space kitty', 'Cute dog in hat, H.R. Giger style', 'Horse wearing a tie', 'Cartoon pig', 'Donkey on Mars', 'Cute kitties baked in a cake', 'Boxing chickens on farm, Maxfield Parish style', 'Future spaceship', 'A city of the past', 'Jabba the Hut wearing jewelery',
'istanbul photo scenery', 'a nice girl with hat','a dog playing footbal','an umbrella and raining',' paper cut plane flying on a desk','a cup coffee and child toys','space ship on a lake','a knife and a fork on a table','futuristic microphone','an apple, a banana, a melon']
def dwencode(pipe, prompts, batchSize: int, nTokens: int):
tokenizer = pipe.tokenizer
text_encoder = pipe.text_encoder
pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sd-turbo", torch_dtype=torch.float16, variant="fp16")
pipe.to("cuda")
#pipe.unet.to(memory_format=torch.channels_last)
from diffusers import AutoencoderTiny
pipe.vae = AutoencoderTiny.from_pretrained('madebyollin/taesd', torch_device='cuda', torch_dtype=torch.float16)
pipe.vae = pipe.vae.cuda()
pipe.set_progress_bar_config(disable=True)
if True:
config = CompilationConfig.Default()
import time
import random
import torch
def genit(mode, prompts, batchSize, nSteps):
#tm0 = time.time()
pe = dwencode(pipe, prompts, batchSize, 9)
images = pipe(
prompt_embeds = pe,
width=512, height=512,
num_inference_steps = nSteps,
guidance_scale = 1,
output_type="pil",
return_dict=False
)[0]
#print(f"time = {(1000*(time.time() - tm0)):3.1f} milliseconds")
if name == 'main':
output_dir = 'spew'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
`
The text was updated successfully, but these errors were encountered: