| | from PIL import Image |
| | import torch |
| | import numpy as np |
| |
|
| | from transformers import Qwen2_5_VLForConditionalGeneration |
| |
|
| | from diffusers import ( |
| | QwenImagePipeline, |
| | QwenImageTransformer2DModel, |
| | QwenImageInpaintPipeline, |
| | ) |
| |
|
| | from optimum.quanto import quantize, qint8, freeze |
| |
|
| |
|
| | prompt = ( |
| | "equirectangular, a woman and a man sitting at a cafe, the woman has red hair " |
| | "and she's wearing purple sweater with a black scarf and a white hat, the man " |
| | "is sitting on the other side of the table and he's wearing a white shirt with " |
| | "a purple scarf and red hat, both of them are sipping their coffee while in the " |
| | "table there's some cake slices on their respective plates, each with forks and " |
| | "knives at each side." |
| | ) |
| | negative_prompt = "" |
| | output_filename = "qwen_int8.png" |
| | width, height = 2048, 1024 |
| | true_cfg_scale = 4.0 |
| | num_inference_steps = 25 |
| | seed = 42 |
| |
|
| | lora_model_id = "ProGamerGov/qwen-360-diffusion" |
| | lora_filename = "qwen-360-diffusion-int8-bf16-v1.safetensors" |
| |
|
| | |
| | model_id = "Qwen/Qwen-Image" |
| | torch_dtype = torch.bfloat16 |
| | device = "cuda" |
| |
|
| | fix_seam = True |
| | inpaint_strength, seam_width = 0.5, 0.10 |
| |
|
| |
|
| | def shift_equirect(img): |
| | """Horizontal 50% shift using torch.roll.""" |
| | t = torch.from_numpy(np.array(img)).permute(2, 0, 1).float() / 255.0 |
| | t = torch.roll(t, shifts=(0, t.shape[2] // 2), dims=(1, 2)) |
| | return Image.fromarray((t.permute(1, 2, 0).numpy() * 255).astype(np.uint8)) |
| |
|
| |
|
| | def create_seam_mask(w, h, frac=0.10): |
| | """Create vertical seam mask as PIL Image (center seam).""" |
| | mask = torch.zeros((h, w)) |
| | seam_w = max(1, int(w * frac)) |
| | c = w // 2 |
| | mask[:, c - seam_w // 2:c + seam_w // 2] = 1.0 |
| | return Image.fromarray((mask.numpy() * 255).astype("uint8"), "L") |
| |
|
| |
|
| | def load_pipeline(text_encoder, transformer, mode="t2i"): |
| | pip_class = QwenImagePipeline if mode == "t2i" else QwenImageInpaintPipeline |
| | pipe = pip_class.from_pretrained( |
| | model_id, |
| | transformer=transformer, |
| | text_encoder=text_encoder, |
| | torch_dtype=torch_dtype, |
| | use_safetensors=True, |
| | low_cpu_mem_usage=True, |
| | ) |
| | pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) |
| | pipe.enable_model_cpu_offload() |
| | pipe.enable_vae_tiling() |
| |
|
| | |
| | return pipe |
| |
|
| |
|
| | def main(): |
| | |
| | transformer = QwenImageTransformer2DModel.from_pretrained( |
| | model_id, |
| | subfolder="transformer", |
| | torch_dtype=torch_dtype, |
| | low_cpu_mem_usage=True, |
| | ) |
| | quantize(transformer, weights=qint8) |
| | freeze(transformer) |
| |
|
| | |
| | text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained( |
| | model_id, |
| | subfolder="text_encoder", |
| | torch_dtype=torch_dtype, |
| | low_cpu_mem_usage=True, |
| | device_map={"": "cpu"}, |
| | ) |
| | quantize(text_encoder, weights=qint8) |
| | freeze(text_encoder) |
| |
|
| | |
| | generator = torch.Generator(device=device).manual_seed(seed) |
| | pipe = load_pipeline(text_encoder, transformer, mode="t2i") |
| |
|
| | |
| | image = pipe( |
| | prompt=prompt, |
| | negative_prompt=negative_prompt, |
| | width=width, |
| | height=height, |
| | num_inference_steps=num_inference_steps, |
| | true_cfg_scale=true_cfg_scale, |
| | generator=generator, |
| | ).images[0] |
| |
|
| | image.save(output_filename) |
| |
|
| | |
| | if fix_seam: |
| | del pipe |
| | if torch.cuda.is_available(): |
| | torch.cuda.empty_cache() |
| |
|
| | shifted = shift_equirect(image) |
| | mask = create_seam_mask(width, height, frac=seam_width) |
| |
|
| | pipe = load_pipeline(text_encoder, transformer, mode="i2i") |
| | image_fixed = pipe( |
| | prompt=prompt, |
| | negative_prompt=negative_prompt, |
| | image=shifted, |
| | mask_image=mask, |
| | strength=inpaint_strength, |
| | width=width, |
| | height=height, |
| | num_inference_steps=num_inference_steps, |
| | true_cfg_scale=true_cfg_scale, |
| | generator=generator, |
| | ).images[0] |
| | image_fixed = shift_equirect(image_fixed) |
| | image_fixed.save(output_filename.replace(".png", "_seamfix.png")) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|