qwen-360-diffusion / run_qwen_image_int8.py

Fix nf4 script and add int8 script

4f9093b verified 3 months ago

4.47 kB

	from PIL import Image
	import torch
	import numpy as np

	from transformers import Qwen2_5_VLForConditionalGeneration

	from diffusers import (
	QwenImagePipeline,
	QwenImageTransformer2DModel,
	QwenImageInpaintPipeline,
	)

	from optimum.quanto import quantize, qint8, freeze


	prompt = (
	"equirectangular, a woman and a man sitting at a cafe, the woman has red hair "
	"and she's wearing purple sweater with a black scarf and a white hat, the man "
	"is sitting on the other side of the table and he's wearing a white shirt with "
	"a purple scarf and red hat, both of them are sipping their coffee while in the "
	"table there's some cake slices on their respective plates, each with forks and "
	"knives at each side."
	)
	negative_prompt = ""
	output_filename = "qwen_int8.png"
	width, height = 2048, 1024
	true_cfg_scale = 4.0
	num_inference_steps = 25
	seed = 42

	lora_model_id = "ProGamerGov/qwen-360-diffusion"
	lora_filename = "qwen-360-diffusion-int8-bf16-v1.safetensors"

	# Use the base fp16/bf16 model, not the nf4 variant
	model_id = "Qwen/Qwen-Image"
	torch_dtype = torch.bfloat16
	device = "cuda"

	fix_seam = True
	inpaint_strength, seam_width = 0.5, 0.10


	def shift_equirect(img):
	"""Horizontal 50% shift using torch.roll."""
	t = torch.from_numpy(np.array(img)).permute(2, 0, 1).float() / 255.0
	t = torch.roll(t, shifts=(0, t.shape[2] // 2), dims=(1, 2))
	return Image.fromarray((t.permute(1, 2, 0).numpy() * 255).astype(np.uint8))


	def create_seam_mask(w, h, frac=0.10):
	"""Create vertical seam mask as PIL Image (center seam)."""
	mask = torch.zeros((h, w))
	seam_w = max(1, int(w * frac))
	c = w // 2
	mask[:, c - seam_w // 2:c + seam_w // 2] = 1.0
	return Image.fromarray((mask.numpy() * 255).astype("uint8"), "L")


	def load_pipeline(text_encoder, transformer, mode="t2i"):
	pip_class = QwenImagePipeline if mode == "t2i" else QwenImageInpaintPipeline
	pipe = pip_class.from_pretrained(
	model_id,
	transformer=transformer,
	text_encoder=text_encoder,
	torch_dtype=torch_dtype,
	use_safetensors=True,
	low_cpu_mem_usage=True,
	)
	pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
	pipe.enable_model_cpu_offload()
	pipe.enable_vae_tiling()

	# This still works with the quantized transformer
	return pipe


	def main():
	# 1) Load and INT8-quantize transformer on CPU
	transformer = QwenImageTransformer2DModel.from_pretrained(
	model_id,
	subfolder="transformer",
	torch_dtype=torch_dtype,
	low_cpu_mem_usage=True,
	)
	quantize(transformer, weights=qint8)
	freeze(transformer)

	# 2) Load and INT8-quantize text encoder on CPU
	text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained(
	model_id,
	subfolder="text_encoder",
	torch_dtype=torch_dtype,
	low_cpu_mem_usage=True,
	device_map={"": "cpu"}, # keep it on CPU; offload will move as needed
	)
	quantize(text_encoder, weights=qint8)
	freeze(text_encoder)

	# 3) Build T2I pipeline
	generator = torch.Generator(device=device).manual_seed(seed)
	pipe = load_pipeline(text_encoder, transformer, mode="t2i")

	# 4) First pass: base panorama
	image = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	width=width,
	height=height,
	num_inference_steps=num_inference_steps,
	true_cfg_scale=true_cfg_scale,
	generator=generator,
	).images[0]

	image.save(output_filename)

	# 5) Optional seam-fix pass using inpainting
	if fix_seam:
	del pipe
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	shifted = shift_equirect(image) # roll 50% to expose seam
	mask = create_seam_mask(width, height, frac=seam_width)

	pipe = load_pipeline(text_encoder, transformer, mode="i2i")
	image_fixed = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image=shifted,
	mask_image=mask,
	strength=inpaint_strength,
	width=width,
	height=height,
	num_inference_steps=num_inference_steps,
	true_cfg_scale=true_cfg_scale,
	generator=generator,
	).images[0]
	image_fixed = shift_equirect(image_fixed)
	image_fixed.save(output_filename.replace(".png", "_seamfix.png"))


	if __name__ == "__main__":
	main()