Working on linux without GUI #14

aokocax · 2023-12-07T06:49:01Z

Hello, I updated the maxperf file for systems that only run on CLI. It does not save the files to disk at the moment. While I was checking to see if there was any problem, I noticed that it created only 6 images in the genImage function. Sorry for the code I am actualy C# developer :)

`
import sys
import PIL
import os

import numpy as np

import torch
from diffusers import AutoPipelineForText2Image
from sfast.compilers.stable_diffusion_pipeline_compiler import (compile, CompilationConfig)

torch.set_grad_enabled(False)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

mw = None
batchSize = 10
prompts = ['Evil space kitty', 'Cute dog in hat, H.R. Giger style', 'Horse wearing a tie', 'Cartoon pig', 'Donkey on Mars', 'Cute kitties baked in a cake', 'Boxing chickens on farm, Maxfield Parish style', 'Future spaceship', 'A city of the past', 'Jabba the Hut wearing jewelery',
'istanbul photo scenery', 'a nice girl with hat','a dog playing footbal','an umbrella and raining',' paper cut plane flying on a desk','a cup coffee and child toys','space ship on a lake','a knife and a fork on a table','futuristic microphone','an apple, a banana, a melon']

def dwencode(pipe, prompts, batchSize: int, nTokens: int):
tokenizer = pipe.tokenizer
text_encoder = pipe.text_encoder

if nTokens < 0 or nTokens > 75:
    raise BaseException("n random tokens must be between 0 and 75")

if nTokens > 0:
    randIIs = torch.randint(low=0, high=49405, size=(batchSize, nTokens), device='cuda')

text_inputs = tokenizer(
    prompts,
    padding = "max_length",
    max_length = tokenizer.model_max_length,
    truncation = True,
    return_tensors = "pt",
).to('cuda')

tii = text_inputs.input_ids

# Find the end mark which is deterimine the prompt len(pl)
# terms of user tokens
#pl = np.where(tii[0] == 49407)[0][0] - 1
pl = (tii[0] == torch.tensor(49407, device='cuda')).nonzero()[0][0].item() - 1

if nTokens > 0:
    # TODO: Efficiency
    for i in range(batchSize):
        tii[i][1+pl:1+pl+nTokens] = randIIs[i]
        tii[i][1+pl+nTokens] = 49407

if False:
    for bi in range(batchSize):
        print(f"{mw.seqno:05d}-{bi:02d}: ", end='')
        for tid in tii[bi][1:1+pl+nTokens]:
            print(f"{tokenizer.decode(tid)} ", end='')
        print('')

prompt_embeds = text_encoder(tii.to('cuda'), attention_mask=None)
prompt_embeds = prompt_embeds[0]
prompt_embeds = prompt_embeds.to(dtype=pipe.unet.dtype, device='cuda')

bs_embed, seq_len, _ = prompt_embeds.shape
prompt_embeds = prompt_embeds.repeat(1, 1, 1)
prompt_embeds = prompt_embeds.view(bs_embed * 1, seq_len, -1)

return prompt_embeds

pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sd-turbo", torch_dtype=torch.float16, variant="fp16")
pipe.to("cuda")
#pipe.unet.to(memory_format=torch.channels_last)

from diffusers import AutoencoderTiny
pipe.vae = AutoencoderTiny.from_pretrained('madebyollin/taesd', torch_device='cuda', torch_dtype=torch.float16)
pipe.vae = pipe.vae.cuda()

pipe.set_progress_bar_config(disable=True)

if True:
config = CompilationConfig.Default()

# xformers and Triton are suggested for achieving best performance.
# It might be slow for Triton to generate, compile and fine-tune kernels.
try:
    import xformers
    config.enable_xformers = True
except ImportError:
    print('xformers not installed, skip')
# NOTE:
# When GPU VRAM is insufficient or the architecture is too old, Triton might be slow.
# Disable Triton if you encounter this problem.
try:
    import triton
    config.enable_triton = True
except ImportError:
    print('Triton not installed, skip')
# NOTE:
# CUDA Graph is suggested for small batch sizes and small resolutions to reduce CPU overhead.
# My implementation can handle dynamic shape with increased need for GPU memory.
# But when your GPU VRAM is insufficient or the image resolution is high,
# CUDA Graph could cause less efficient VRAM utilization and slow down the inference,
# especially when on Windows or WSL which has the "shared VRAM" mechanism.
# If you meet problems related to it, you should disable it.
config.enable_cuda_graph = True

if True:
    config.enable_jit = True
    config.enable_jit_freeze = True
    config.trace_scheduler = True
    config.enable_cnn_optimization = True
    config.preserve_parameters = False
    config.prefer_lowp_gemm = True

pipe = compile(pipe, config)




def genImage(output_dir, seqno, prompts, batchSize):
    global pipe
    seed = random.randint(0, 2147483647)
    torch.manual_seed(seed)

    images = genit(0, prompts=prompts, batchSize=batchSize, nSteps=1)
    for idx, img in enumerate(images):
        img_path = os.path.join(output_dir, f'image_{seqno}_{idx}.png')
        #img.save(img_path)
        print(img_path)
    return len(images)

import time
import random
import torch

def genit(mode, prompts, batchSize, nSteps):
#tm0 = time.time()
pe = dwencode(pipe, prompts, batchSize, 9)
images = pipe(
prompt_embeds = pe,
width=512, height=512,
num_inference_steps = nSteps,
guidance_scale = 1,
output_type="pil",
return_dict=False
)[0]
#print(f"time = {(1000*(time.time() - tm0)):3.1f} milliseconds")

return images

if name == 'main':
output_dir = 'spew'
if not os.path.exists(output_dir):
os.makedirs(output_dir)

seqno = 0
if len(sys.argv) == 2:
    batchSize = int(sys.argv[1])
    if batchSize > 20:
        print('Batchsize must not be greater than 20.')
        sys.exit(1)
    prompts = prompts[:batchSize]
else:
    batchSize = 20
start_time = time.time()
counter = 0

while True:
    seqno += 1
    counter += 1
    genImage(output_dir, seqno, prompts, batchSize)
    current_time = time.time()
    if current_time - start_time >= 1:  
        print(f"{counter} iterations in the last second.")
        start_time = current_time  
        counter = 0  #

`

The text was updated successfully, but these errors were encountered:

aokocax changed the title ~~Linux on without GUI~~ Working on linux without GUI Dec 7, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Working on linux without GUI #14

Working on linux without GUI #14

aokocax commented Dec 7, 2023 •

edited

Loading

Working on linux without GUI #14

Working on linux without GUI #14

Comments

aokocax commented Dec 7, 2023 • edited Loading

aokocax commented Dec 7, 2023 •

edited

Loading