Zimg-script

Running on Zero

App Files Files Community

rahul7star commited on 10 days ago

Commit

ece8251

verified ·

1 Parent(s): 01ffa33

Create app_lora.py

Browse files

Files changed (1) hide show

app_lora.py +1091 -0

app_lora.py ADDED Viewed

	@@ -0,0 +1,1091 @@

+import torch
+import spaces
+import gradio as gr
+import sys
+import platform
+import diffusers
+import transformers
+import psutil
+import os
+import time
+import traceback
+from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
+from diffusers import ZImagePipeline, AutoModel
+from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
+latent_history = []
+# ============================================================
+# LOGGING BUFFER
+# ============================================================
+LOGS = ""
+def log(msg):
+    global LOGS
+    print(msg)
+    LOGS += msg + "\n"
+    return msg
+# ============================================================
+# SYSTEM METRICS — LIVE GPU + CPU MONITORING
+# ============================================================
+def log_system_stats(tag=""):
+    try:
+        log(f"\n===== 🔥 SYSTEM STATS {tag} =====")
+        # ============= GPU STATS =============
+        if torch.cuda.is_available():
+            allocated = torch.cuda.memory_allocated(0) / 1e9
+            reserved = torch.cuda.memory_reserved(0) / 1e9
+            total = torch.cuda.get_device_properties(0).total_memory / 1e9
+            free = total - allocated
+            log(f"💠 GPU Total     : {total:.2f} GB")
+            log(f"💠 GPU Allocated : {allocated:.2f} GB")
+            log(f"💠 GPU Reserved  : {reserved:.2f} GB")
+            log(f"💠 GPU Free      : {free:.2f} GB")
+        # ============= CPU STATS ============
+        cpu = psutil.cpu_percent()
+        ram_used = psutil.virtual_memory().used / 1e9
+        ram_total = psutil.virtual_memory().total / 1e9
+        log(f"🧠 CPU Usage     : {cpu}%")
+        log(f"🧠 RAM Used      : {ram_used:.2f} GB / {ram_total:.2f} GB")
+    except Exception as e:
+        log(f"⚠️ Failed to log system stats: {e}")
+# ============================================================
+# ENVIRONMENT INFO
+# ============================================================
+log("===================================================")
+log("🔍 Z-IMAGE-TURBO DEBUGGING + LIVE METRIC LOGGER")
+log("===================================================\n")
+log(f"📌 PYTHON VERSION       : {sys.version.replace(chr(10),' ')}")
+log(f"📌 PLATFORM             : {platform.platform()}")
+log(f"📌 TORCH VERSION        : {torch.__version__}")
+log(f"📌 TRANSFORMERS VERSION : {transformers.__version__}")
+log(f"📌 DIFFUSERS VERSION    : {diffusers.__version__}")
+log(f"📌 CUDA AVAILABLE       : {torch.cuda.is_available()}")
+log_system_stats("AT STARTUP")
+if not torch.cuda.is_available():
+    raise RuntimeError("❌ CUDA Required")
+device = "cuda"
+gpu_id = 0
+# ============================================================
+# MODEL SETTINGS
+# ============================================================
+model_cache = "./weights/"
+model_id = "Tongyi-MAI/Z-Image-Turbo"
+torch_dtype = torch.bfloat16
+USE_CPU_OFFLOAD = False
+log("\n===================================================")
+log("🧠 MODEL CONFIGURATION")
+log("===================================================")
+log(f"Model ID              : {model_id}")
+log(f"Model Cache Directory : {model_cache}")
+log(f"torch_dtype           : {torch_dtype}")
+log(f"USE_CPU_OFFLOAD       : {USE_CPU_OFFLOAD}")
+log_system_stats("BEFORE TRANSFORMER LOAD")
+# ============================================================
+# FUNCTION TO CONVERT LATENTS TO IMAGE
+# ============================================================
+def latent_to_image(latent):
+    """
+    Convert a latent tensor to a PIL image using pipe.vae
+    """
+    try:
+        img_tensor = pipe.vae.decode(latent)
+        img_tensor = (img_tensor / 2 + 0.5).clamp(0, 1)
+        pil_img = T.ToPILImage()(img_tensor[0].cpu())  # <--- single image
+        return pil_img
+    except Exception as e:
+        log(f"⚠️ Failed to decode latent: {e}")
+        # fallback blank image
+        return Image.new("RGB", (latent.shape[-1]*8, latent.shape[-2]*8), color=(255,255,255))
+# ============================================================
+# SAFE TRANSFORMER INSPECTION
+# ============================================================
+def inspect_transformer(model, name):
+    log(f"\n🔍🔍 FULL TRANSFORMER DEBUG DUMP: {name}")
+    log("=" * 80)
+    try:
+        log(f"Model class          : {model.__class__.__name__}")
+        log(f"DType                : {getattr(model, 'dtype', 'unknown')}")
+        log(f"Device               : {next(model.parameters()).device}")
+        log(f"Requires Grad?       : {any(p.requires_grad for p in model.parameters())}")
+        # Check quantization
+        if hasattr(model, "is_loaded_in_4bit"):
+            log(f"4bit Quantization    : {model.is_loaded_in_4bit}")
+        if hasattr(model, "is_loaded_in_8bit"):
+            log(f"8bit Quantization    : {model.is_loaded_in_8bit}")
+        # Find blocks
+        candidates = ["transformer_blocks", "blocks", "layers", "encoder", "model"]
+        blocks = None
+        chosen_attr = None
+        for attr in candidates:
+            if hasattr(model, attr):
+                blocks = getattr(model, attr)
+                chosen_attr = attr
+                break
+        log(f"Block container attr : {chosen_attr}")
+        if blocks is None:
+            log("⚠️ No valid block container found.")
+            return
+        if not hasattr(blocks, "__len__"):
+            log("⚠️ Blocks exist but not iterable.")
+            return
+        total = len(blocks)
+        log(f"Total Blocks         : {total}")
+        log("-" * 80)
+        # Inspect first N blocks
+        N = min(20, total)
+        for i in range(N):
+            block = blocks[i]
+            log(f"\n🧩 Block [{i}/{total-1}]")
+            log(f"Class: {block.__class__.__name__}")
+            # Print submodules
+            for n, m in block.named_children():
+                log(f"  ├─ {n}: {m.__class__.__name__}")
+            # Print attention related
+            if hasattr(block, "attn"):
+                attn = block.attn
+                log(f"  ├─ Attention: {attn.__class__.__name__}")
+                log(f"  │   Heads     : {getattr(attn, 'num_heads', 'unknown')}")
+                log(f"  │   Dim       : {getattr(attn, 'hidden_size', 'unknown')}")
+                log(f"  │   Backend   : {getattr(attn, 'attention_backend', 'unknown')}")
+            # Device + dtype info
+            try:
+                dev = next(block.parameters()).device
+                log(f"  ├─ Device     : {dev}")
+            except StopIteration:
+                pass
+            try:
+                dt = next(block.parameters()).dtype
+                log(f"  ├─ DType      : {dt}")
+            except StopIteration:
+                pass
+        log("\n🔚 END TRANSFORMER DEBUG DUMP")
+        log("=" * 80)
+    except Exception as e:
+        log(f"❌ ERROR IN INSPECTOR: {e}")
+import torch
+import time
+# ---------- UTILITY ----------
+def pretty_header(title):
+    log("\n\n" + "=" * 80)
+    log(f"🎛️  {title}")
+    log("=" * 80 + "\n")
+# ---------- MEMORY ----------
+def get_vram(prefix=""):
+    try:
+        allocated = torch.cuda.memory_allocated() / 1024**2
+        reserved = torch.cuda.memory_reserved() / 1024**2
+        log(f"{prefix}Allocated VRAM : {allocated:.2f} MB")
+        log(f"{prefix}Reserved VRAM  : {reserved:.2f} MB")
+    except:
+        log(f"{prefix}VRAM: CUDA not available")
+# ---------- MODULE INSPECT ----------
+def inspect_module(name, module):
+    pretty_header(f"🔬 Inspecting {name}")
+    try:
+        log(f"📦 Class          : {module.__class__.__name__}")
+        log(f"🔢 DType          : {getattr(module, 'dtype', 'unknown')}")
+        log(f"💻 Device         : {next(module.parameters()).device}")
+        log(f"🧮 Params         : {sum(p.numel() for p in module.parameters()):,}")
+        # Quantization state
+        if hasattr(module, "is_loaded_in_4bit"):
+            log(f"⚙️  4-bit QLoRA     : {module.is_loaded_in_4bit}")
+        if hasattr(module, "is_loaded_in_8bit"):
+            log(f"⚙️  8-bit load      : {module.is_loaded_in_8bit}")
+        # Attention backend (DiT)
+        if hasattr(module, "set_attention_backend"):
+            try:
+                attn = getattr(module, "attention_backend", None)
+                log(f"🚀 Attention Backend: {attn}")
+            except:
+                pass
+        # Search for blocks
+        candidates = ["transformer_blocks", "blocks", "layers", "encoder", "model"]
+        blocks = None
+        chosen_attr = None
+        for attr in candidates:
+            if hasattr(module, attr):
+                blocks = getattr(module, attr)
+                chosen_attr = attr
+                break
+        log(f"\n📚 Block Container : {chosen_attr}")
+        if blocks is None:
+            log("⚠️ No block structure found")
+            return
+        if not hasattr(blocks, "__len__"):
+            log("⚠️ Blocks exist but are not iterable")
+            return
+        total = len(blocks)
+        log(f"🔢 Total Blocks   : {total}\n")
+        # Inspect first 15 blocks
+        N = min(15, total)
+        for i in range(N):
+            blk = blocks[i]
+            log(f"\n🧩 Block [{i}/{total-1}] — {blk.__class__.__name__}")
+            for n, m in blk.named_children():
+                log(f"   ├─ {n:<15} {m.__class__.__name__}")
+            # Attention details
+            if hasattr(blk, "attn"):
+                a = blk.attn
+                log(f"   ├─ Attention")
+                log(f"   │    Heads    : {getattr(a, 'num_heads', 'unknown')}")
+                log(f"   │    Dim      : {getattr(a, 'hidden_size', 'unknown')}")
+                log(f"   │    Backend  : {getattr(a, 'attention_backend', 'unknown')}")
+            # Device / dtype
+            try:
+                log(f"   ├─ Device     : {next(blk.parameters()).device}")
+                log(f"   ├─ DType      : {next(blk.parameters()).dtype}")
+            except StopIteration:
+                pass
+        get_vram("   ▶ ")
+    except Exception as e:
+        log(f"❌ Module inspect error: {e}")
+# ---------- LORA INSPECTION ----------
+def inspect_loras(pipe):
+    pretty_header("🧩 LoRA ADAPTERS")
+    try:
+        if not hasattr(pipe, "lora_state_dict") and not hasattr(pipe, "adapter_names"):
+            log("⚠️ No LoRA system detected.")
+            return
+        if hasattr(pipe, "adapter_names"):
+            names = pipe.adapter_names
+            log(f"Available Adapters: {names}")
+        if hasattr(pipe, "active_adapters"):
+            log(f"Active Adapters   : {pipe.active_adapters}")
+        if hasattr(pipe, "lora_scale"):
+            log(f"LoRA Scale        : {pipe.lora_scale}")
+        # LoRA modules
+        if hasattr(pipe, "transformer") and hasattr(pipe.transformer, "modules"):
+            for name, module in pipe.transformer.named_modules():
+                if "lora" in name.lower():
+                    log(f"   🔧 LoRA Module: {name}  ({module.__class__.__name__})")
+    except Exception as e:
+        log(f"❌ LoRA inspect error: {e}")
+# ---------- PIPELINE INSPECTOR ----------
+def debug_pipeline(pipe):
+    pretty_header("🚀 FULL PIPELINE DEBUGGING")
+    try:
+        log(f"Pipeline Class       : {pipe.__class__.__name__}")
+        log(f"Attention Impl       : {getattr(pipe, 'attn_implementation', 'unknown')}")
+        log(f"Device               : {pipe.device}")
+    except:
+        pass
+    get_vram("▶ ")
+    # Inspect TRANSFORMER
+    if hasattr(pipe, "transformer"):
+        inspect_module("Transformer", pipe.transformer)
+    # Inspect TEXT ENCODER
+    if hasattr(pipe, "text_encoder") and pipe.text_encoder is not None:
+        inspect_module("Text Encoder", pipe.text_encoder)
+    # Inspect UNET (if ZImage pipeline has it)
+    if hasattr(pipe, "unet"):
+        inspect_module("UNet", pipe.unet)
+    # LoRA adapters
+    inspect_loras(pipe)
+    pretty_header("🎉 END DEBUG REPORT")
+# ============================================================
+# LOAD TRANSFORMER — WITH LIVE STATS
+# ============================================================
+log("\n===================================================")
+log("🔧 LOADING TRANSFORMER BLOCK")
+log("===================================================")
+log("📌 Logging memory before load:")
+log_system_stats("START TRANSFORMER LOAD")
+try:
+    quant_cfg = DiffusersBitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch_dtype,
+        bnb_4bit_use_double_quant=True,
+    )
+    transformer = AutoModel.from_pretrained(
+        model_id,
+        cache_dir=model_cache,
+        subfolder="transformer",
+        quantization_config=quant_cfg,
+        torch_dtype=torch_dtype,
+        device_map=device,
+    )
+    log("✅ Transformer loaded successfully.")
+except Exception as e:
+    log(f"❌ Transformer load failed: {e}")
+    transformer = None
+log_system_stats("AFTER TRANSFORMER LOAD")
+if transformer:
+    inspect_transformer(transformer, "Transformer")
+# ============================================================
+# LOAD TEXT ENCODER
+# ============================================================
+log("\n===================================================")
+log("🔧 LOADING TEXT ENCODER")
+log("===================================================")
+log_system_stats("START TEXT ENCODER LOAD")
+try:
+    quant_cfg2 = TransformersBitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch_dtype,
+        bnb_4bit_use_double_quant=True,
+    )
+    text_encoder = AutoModel.from_pretrained(
+        model_id,
+        cache_dir=model_cache,
+        subfolder="text_encoder",
+        quantization_config=quant_cfg2,
+        torch_dtype=torch_dtype,
+        device_map=device,
+    )
+    log("✅ Text encoder loaded successfully.")
+except Exception as e:
+    log(f"❌ Text encoder load failed: {e}")
+    text_encoder = None
+log_system_stats("AFTER TEXT ENCODER LOAD")
+if text_encoder:
+    inspect_transformer(text_encoder, "Text Encoder")
+# ============================================================
+# BUILD PIPELINE
+# ============================================================
+log("\n===================================================")
+log("🔧 BUILDING PIPELINE")
+log("===================================================")
+log_system_stats("START PIPELINE BUILD")
+try:
+    pipe = ZImagePipeline.from_pretrained(
+        model_id,
+        transformer=transformer,
+        text_encoder=text_encoder,
+        torch_dtype=torch_dtype,
+    )
+    # Prefer flash attention if supported
+    try:
+        if hasattr(pipe, "transformer") and hasattr(pipe.transformer, "set_attention_backend"):
+            pipe.transformer.set_attention_backend("_flash_3")
+            log("✅ transformer.set_attention_backend('_flash_3') called")
+    except Exception as _e:
+        log(f"⚠️ set_attention_backend failed: {_e}")
+    # 🚫 NO default LoRA here
+    # 🚫 NO fuse
+    # 🚫 NO unload
+    pipe.to("cuda")
+    log("✅ Pipeline built successfully.")
+    LOGS += log("Pipeline build completed.") + "\n"
+except Exception as e:
+    log(f"❌ Pipeline build failed: {e}")
+    log(traceback.format_exc())
+    pipe = None
+log_system_stats("AFTER PIPELINE BUILD")
+# -----------------------------
+# Monkey-patch prepare_latents (safe)
+# -----------------------------
+if pipe is not None and hasattr(pipe, "prepare_latents"):
+    original_prepare_latents = pipe.prepare_latents
+    def logged_prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
+        try:
+            result_latents = original_prepare_latents(batch_size, num_channels_latents, height, width, dtype, device, generator, latents)
+            log_msg = f"🔹 prepare_latents called | shape={result_latents.shape}, dtype={result_latents.dtype}, device={result_latents.device}"
+            if hasattr(self, "_latents_log"):
+                self._latents_log.append(log_msg)
+            else:
+                self._latents_log = [log_msg]
+            return result_latents
+        except Exception as e:
+            log(f"⚠️ prepare_latents wrapper failed: {e}")
+            raise
+    # apply patch safely
+    try:
+        pipe.prepare_latents = logged_prepare_latents.__get__(pipe)
+        log("✅ prepare_latents monkey-patched")
+    except Exception as e:
+        log(f"⚠️ Failed to attach prepare_latents patch: {e}")
+else:
+    log("❌ WARNING: Pipe not initialized or prepare_latents missing; skipping prepare_latents patch")
+from PIL import Image
+import torch
+# --------------------------
+# Helper: Safe latent extractor
+# --------------------------
+def safe_get_latents(pipe, height, width, generator, device, LOGS):
+    """
+    Safely prepare latents for any ZImagePipeline variant.
+    Returns latents tensor, logs issues instead of failing.
+    """
+    try:
+        # Determine number of channels
+        num_channels = 4  # default fallback
+        if hasattr(pipe, "unet") and hasattr(pipe.unet, "in_channels"):
+            num_channels = pipe.unet.in_channels
+        elif hasattr(pipe, "vae") and hasattr(pipe.vae, "latent_channels"):
+            num_channels = pipe.vae.latent_channels  # some pipelines define this
+        LOGS.append(f"🔹 Using num_channels={num_channels} for latents")
+        latents = pipe.prepare_latents(
+            batch_size=1,
+            num_channels_latents=num_channels,
+            height=height,
+            width=width,
+            dtype=torch.float32,
+            device=device,
+            generator=generator,
+        )
+        LOGS.append(f"🔹 Latents shape: {latents.shape}, dtype: {latents.dtype}, device: {latents.device}")
+        return latents
+    except Exception as e:
+        LOGS.append(f"⚠️ Latent extraction failed: {e}")
+        # fallback: guess a safe shape
+        fallback_channels = 16  # try standard default for ZImage pipelines
+        latents = torch.randn((1, fallback_channels, height // 8, width // 8),
+                              generator=generator, device=device)
+        LOGS.append(f"🔹 Using fallback random latents shape: {latents.shape}")
+        return latents
+# --------------------------
+# Main generation function (kept exactly as your logic)
+# --------------------------
+from huggingface_hub import HfApi, HfFolder
+import torch
+import os
+HF_REPO_ID = "rahul7star/Zstudio-latent"  # Model repo
+HF_TOKEN = HfFolder.get_token()  # Make sure you are logged in via `huggingface-cli login`
+def upload_latents_to_hf(latent_dict, filename="latents.pt"):
+    local_path = f"/tmp/{filename}"
+    torch.save(latent_dict, local_path)
+    try:
+        api = HfApi()
+        api.upload_file(
+            path_or_fileobj=local_path,
+            path_in_repo=filename,
+            repo_id=HF_REPO_ID,
+            token=HF_TOKEN,
+            repo_type="model"  # since this is a model repo
+        )
+        os.remove(local_path)
+        return f"https://huggingface.co/{HF_REPO_ID}/resolve/main/{filename}"
+    except Exception as e:
+        os.remove(local_path)
+        raise e
+import asyncio
+import torch
+from PIL import Image
+async def async_upload_latents(latent_dict, filename, LOGS):
+    try:
+        hf_url = await upload_latents_to_hf(latent_dict, filename=filename)  # assume this can be async
+        LOGS.append(f"🔹 All preview latents uploaded: {hf_url}")
+    except Exception as e:
+        LOGS.append(f"⚠️ Failed to upload all preview latents: {e}")
+# this code genetae all frame for latest GPU expseinve bt decide fails sp use this later
+@spaces.GPU
+def generate_image_all_latents(prompt, height, width, steps, seed, guidance_scale=0.0):
+    LOGS = []
+    device = "cpu"  # FORCE CPU
+    generator = torch.Generator(device).manual_seed(int(seed))
+    placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
+    latent_gallery = []
+    final_gallery = []
+    last_four_latents = []  # we only upload 4
+    # --------------------------------------------------
+    # LATENT PREVIEW GENERATION (CPU MODE)
+    # --------------------------------------------------
+    try:
+        latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        latents = latents.to("cpu")  # keep EVERYTHING CPU
+        timestep_count = len(pipe.scheduler.timesteps)
+        preview_every = max(1, timestep_count // 10)
+        for i, t in enumerate(pipe.scheduler.timesteps):
+            # -------------- decode latent preview --------------
+            try:
+                with torch.no_grad():
+                    latent_cpu = latents.to(pipe.vae.dtype)  # match VAE dtype
+                    decoded = pipe.vae.decode(latent_cpu).sample  # [1,3,H,W]
+                    decoded = (decoded / 2 + 0.5).clamp(0, 1)
+                    decoded = decoded[0].permute(1,2,0).cpu().numpy()
+                    latent_img = Image.fromarray((decoded * 255).astype("uint8"))
+            except Exception:
+                latent_img = placeholder
+                LOGS.append("⚠️ Latent preview decode failed.")
+            latent_gallery.append(latent_img)
+            # store last 4 latent states
+            if len(last_four_latents) >= 4:
+                last_four_latents.pop(0)
+            last_four_latents.append(latents.cpu().clone())
+            # UI preview yields
+            if i % preview_every == 0:
+                yield None, latent_gallery, LOGS
+        # --------------------------------------------------
+        # UPLOAD LAST 4 LATENTS (SYNC)
+        # --------------------------------------------------
+        try:
+            upload_dict = {
+                "last_4_latents": last_four_latents,
+                "prompt": prompt,
+                "seed": seed
+            }
+            hf_url = upload_latents_to_hf(
+                upload_dict,
+                filename=f"latents_last4_{seed}.pt"
+            )
+            LOGS.append(f"🔹 Uploaded last 4 latents: {hf_url}")
+        except Exception as e:
+            LOGS.append(f"⚠️ Failed to upload latents: {e}")
+    except Exception as e:
+        LOGS.append(f"⚠️ Latent generation failed: {e}")
+        latent_gallery.append(placeholder)
+        yield None, latent_gallery, LOGS
+    # --------------------------------------------------
+    # FINAL IMAGE - UNTOUCHED
+    # --------------------------------------------------
+    try:
+        output = pipe(
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_inference_steps=steps,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        )
+        final_img = output.images[0]
+        LOGS.append("✅ Standard pipeline succeeded.")
+        yield final_img, latent_gallery, LOGS
+    except Exception as e2:
+        LOGS.append(f"❌ Standard pipeline failed: {e2}")
+        yield placeholder, latent_gallery, LOGS
+@spaces.GPU
+def generate_image(prompt, height, width, steps, seed, guidance_scale=0.0):
+    LOGS = []
+    device = "cuda"
+    cpu_device = "cpu"
+    generator = torch.Generator(device).manual_seed(int(seed))
+    placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
+    latent_gallery = []
+    final_gallery = []
+    last_latents = []  # store last 5 preview latents on CPU
+    try:
+        # --- Initial latents ---
+        latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        latents = latents.float().to(cpu_device)  # move to CPU
+        num_previews = min(10, steps)
+        preview_indices = torch.linspace(0, steps - 1, num_previews).long()
+        for i, step_idx in enumerate(preview_indices):
+            try:
+                with torch.no_grad():
+                    # --- Z-Image Turbo-style denoise simulation ---
+                    t = 1.0 - (i / num_previews)  # linear decay [1.0 -> 0.0]
+                    noise_scale = t ** 0.5  # reduce noise over steps (sqrt for smoother)
+                    denoise_latent = latents * t + torch.randn_like(latents) * noise_scale
+                    # Move to VAE device & dtype
+                    denoise_latent = denoise_latent.to(pipe.vae.device).to(pipe.vae.dtype)
+                    # Decode latent to image
+                    decoded = pipe.vae.decode(denoise_latent, return_dict=False)[0]
+                    decoded = (decoded / 2 + 0.5).clamp(0, 1)
+                    decoded = decoded.cpu().permute(0, 2, 3, 1).float().numpy()
+                    decoded = (decoded * 255).round().astype("uint8")
+                    latent_img = Image.fromarray(decoded[0])
+            except Exception as e:
+                LOGS.append(f"⚠️ Latent preview decode failed: {e}")
+                latent_img = placeholder
+            latent_gallery.append(latent_img)
+            # Keep last 5 latents only
+            last_latents.append(denoise_latent.cpu().clone())
+            if len(last_latents) > 5:
+                last_latents.pop(0)
+            # Show only last 5 previews in UI
+            yield None, latent_gallery[-5:], LOGS
+        # Optionally: upload last 5 latents
+        # latent_dict = {"latents": last_latents, "prompt": prompt, "seed": seed}
+        # hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_last5_{seed}.pt")
+        # LOGS.append(f"🔹 Last 5 latents uploaded: {hf_url}")
+    except Exception as e:
+        LOGS.append(f"⚠️ Latent generation failed: {e}")
+        latent_gallery.append(placeholder)
+        yield None, latent_gallery[-5:], LOGS
+    # --- Final image on GPU ---
+    try:
+        output = pipe(
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_inference_steps=steps,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        )
+        final_img = output.images[0]
+        final_gallery.append(final_img)
+        latent_gallery.append(final_img)
+        LOGS.append("✅ Standard pipeline succeeded.")
+        yield final_img, latent_gallery[-5:] + [final_img], LOGS  # last 5 previews + final
+    except Exception as e2:
+        LOGS.append(f"❌ Standard pipeline failed: {e2}")
+        final_gallery.append(placeholder)
+        latent_gallery.append(placeholder)
+        yield placeholder, latent_gallery[-5:] + [placeholder], LOGS
+# this is astable vesopn tha can gen final and a noise to latent
+@spaces.GPU
+def generate_image_verygood_realnoise(prompt, height, width, steps, seed, guidance_scale=0.0):
+    LOGS = []
+    device = "cuda"
+    generator = torch.Generator(device).manual_seed(int(seed))
+    placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
+    latent_gallery = []
+    final_gallery = []
+    # --- Generate latent previews ---
+    try:
+        latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        latents = latents.float()  # keep float32 until decode
+        num_previews = min(10, steps)
+        preview_steps = torch.linspace(0, 1, num_previews)
+        for alpha in preview_steps:
+            try:
+                with torch.no_grad():
+                    # Simulate denoising progression like Z-Image Turbo
+                    preview_latent = latents * alpha + latents * 0  # optional: simple progression
+                    # Move to same device and dtype as VAE
+                    preview_latent = preview_latent.to(pipe.vae.device).to(pipe.vae.dtype)
+                    # Decode
+                    decoded = pipe.vae.decode(preview_latent, return_dict=False)[0]
+                    # Convert to PIL following same logic as final image
+                    decoded = (decoded / 2 + 0.5).clamp(0, 1)
+                    decoded = decoded.cpu().permute(0, 2, 3, 1).float().numpy()
+                    decoded = (decoded * 255).round().astype("uint8")
+                    latent_img = Image.fromarray(decoded[0])
+            except Exception as e:
+                LOGS.append(f"⚠️ Latent preview decode failed: {e}")
+                latent_img = placeholder
+            latent_gallery.append(latent_img)
+            yield None, latent_gallery, LOGS
+    except Exception as e:
+        LOGS.append(f"⚠️ Latent generation failed: {e}")
+        latent_gallery.append(placeholder)
+        yield None, latent_gallery, LOGS
+    # --- Final image: untouched ---
+    try:
+        output = pipe(
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_inference_steps=steps,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        )
+        final_img = output.images[0]
+        final_gallery.append(final_img)
+        latent_gallery.append(final_img)  # fallback preview
+        LOGS.append("✅ Standard pipeline succeeded.")
+        yield final_img, latent_gallery, LOGS
+    except Exception as e2:
+        LOGS.append(f"❌ Standard pipeline failed: {e2}")
+        final_gallery.append(placeholder)
+        latent_gallery.append(placeholder)
+        yield placeholder, latent_gallery, LOGS
+# DO NOT TOUCH this is astable vesopn tha can gen final and a noise to latent with latent upload to repo
+@spaces.GPU
+def generate_image_safe(prompt, height, width, steps, seed, guidance_scale=0.0):
+    LOGS = []
+    device = "cuda"
+    generator = torch.Generator(device).manual_seed(int(seed))
+    placeholder = Image.new("RGB", (width, height), color=(255, 255, 255))
+    latent_gallery = []
+    final_gallery = []
+    # --- Generate latent previews in a loop ---
+    try:
+        latents = safe_get_latents(pipe, height, width, generator, device, LOGS)
+        # Convert latents to float32 if necessary
+        if latents.dtype != torch.float32:
+            latents = latents.float()
+        # Loop for multiple previews before final image
+        num_previews = min(10, steps)  # show ~10 previews
+        preview_steps = torch.linspace(0, 1, num_previews)
+        for i, alpha in enumerate(preview_steps):
+            try:
+                with torch.no_grad():
+                    # Simple noise interpolation for preview (simulate denoising progress)
+                    preview_latent = latents * alpha + torch.randn_like(latents) * (1 - alpha)
+                    # Decode to PIL
+                    latent_img_tensor = pipe.vae.decode(preview_latent).sample  # [1,3,H,W]
+                    latent_img_tensor = (latent_img_tensor / 2 + 0.5).clamp(0, 1)
+                    latent_img_tensor = latent_img_tensor.cpu().permute(0, 2, 3, 1)[0]
+                    latent_img = Image.fromarray((latent_img_tensor.numpy() * 255).astype('uint8'))
+            except Exception as e:
+                LOGS.append(f"⚠️ Latent preview decode failed: {e}")
+                latent_img = placeholder
+            latent_gallery.append(latent_img)
+            yield None, latent_gallery, LOGS  # update Gradio with intermediate preview
+        # Save final latents to HF
+        latent_dict = {"latents": latents.cpu(), "prompt": prompt, "seed": seed}
+        try:
+            hf_url = upload_latents_to_hf(latent_dict, filename=f"latents_{seed}.pt")
+            LOGS.append(f"🔹 Latents uploaded: {hf_url}")
+        except Exception as e:
+            LOGS.append(f"⚠️ Failed to upload latents: {e}")
+    except Exception as e:
+        LOGS.append(f"⚠️ Latent generation failed: {e}")
+        latent_gallery.append(placeholder)
+        yield None, latent_gallery, LOGS
+    # --- Final image: untouched standard pipeline ---
+    try:
+        output = pipe(
+            prompt=prompt,
+            height=height,
+            width=width,
+            num_inference_steps=steps,
+            guidance_scale=guidance_scale,
+            generator=generator,
+        )
+        final_img = output.images[0]
+        final_gallery.append(final_img)
+        latent_gallery.append(final_img)  # fallback preview if needed
+        LOGS.append("✅ Standard pipeline succeeded.")
+        yield final_img, latent_gallery, LOGS
+    except Exception as e2:
+        LOGS.append(f"❌ Standard pipeline failed: {e2}")
+        final_gallery.append(placeholder)
+        latent_gallery.append(placeholder)
+        yield placeholder, latent_gallery, LOGS
+import gradio as gr
+with gr.Blocks(title="Z-Image-Turbo") as demo:
+    gr.Markdown("# 🎨 Z-Image-Turbo (LoRA-enabled UI)")
+    # =========================
+    # MAIN TABS
+    # =========================
+    with gr.Tabs():
+        # -------- Image Tab --------
+        with gr.TabItem("Image & Latents"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    prompt = gr.Textbox(
+                        label="Prompt",
+                        value="boat in Ocean"
+                    )
+                    height = gr.Slider(
+                        256, 2048, value=1024, step=8, label="Height"
+                    )
+                    width = gr.Slider(
+                        256, 2048, value=1024, step=8, label="Width"
+                    )
+                    steps = gr.Slider(
+                        1, 50, value=20, step=1, label="Inference Steps"
+                    )
+                    seed = gr.Number(
+                        value=42, label="Seed"
+                    )
+                    run_btn = gr.Button("🚀 Generate Image")
+                with gr.Column(scale=1):
+                    final_image = gr.Image(label="Final Image")
+                    latent_gallery = gr.Gallery(
+                        label="Latent Steps",
+                        columns=4,
+                        height=256,
+                        preview=True,
+                    )
+        # -------- Logs Tab --------
+        with gr.TabItem("Logs"):
+            logs_box = gr.Textbox(
+                label="Logs",
+                lines=25,
+                interactive=False
+            )
+    # =========================
+    # LoRA CONTROLS
+    # =========================
+    gr.Markdown("## 🧩 LoRA Controls")
+    with gr.Row():
+        lora_repo = gr.Textbox(
+            label="LoRA Repo (HF)",
+            value="rahul7star/ZImageLora",
+            placeholder="username/repo"
+        )
+        lora_file = gr.Dropdown(
+            label="LoRA file (.safetensors)",
+            choices=[]
+        )
+        lora_strength = gr.Slider(
+            0.0, 2.0, value=1.0, step=0.05, label="LoRA strength"
+        )
+    with gr.Row():
+        refresh_lora_btn = gr.Button("🔄 Refresh LoRA List")
+        apply_lora_btn = gr.Button("✅ Apply LoRA")
+        clear_lora_btn = gr.Button("❌ Clear LoRA")
+    # =========================
+    # CALLBACKS
+    # =========================
+    def refresh_lora_list(repo_name):
+        try:
+            files = list_loras_from_repo(repo_name)
+            if not files:
+                log(f"⚠️ No LoRA files found in {repo_name}")
+                return gr.update(choices=[], value=None)
+            log(f"📦 Found {len(files)} LoRA files in {repo_name}")
+            return gr.update(choices=files, value=files[0])
+        except Exception as e:
+            log(f"❌ Failed to list LoRA files: {e}")
+            return gr.update(choices=[], value=None)
+    refresh_lora_btn.click(
+        refresh_lora_list,
+        inputs=[lora_repo],
+        outputs=[lora_file]
+    )
+    def apply_lora(repo_name, lora_filename, strength):
+        global pipe
+        if pipe is None:
+            return "❌ Pipeline not initialized"
+        if not lora_filename:
+            return "⚠️ No LoRA file selected"
+        try:
+            pipe.load_lora_weights(
+                repo_name,
+                weight_name=lora_filename,
+                adapter_name="ui_lora"
+            )
+            pipe.set_adapters(["ui_lora"], [strength])
+            log(f"✅ Applied LoRA: {repo_name}/{lora_filename} (strength={strength})")
+            if hasattr(pipe, "peft_config"):
+                log(f"🎯 Active adapters: {list(pipe.peft_config.keys())}")
+            return "LoRA applied"
+        except Exception as e:
+            log(f"❌ Failed to apply LoRA: {e}")
+            return f"Failed: {e}"
+    apply_lora_btn.click(
+        apply_lora,
+        inputs=[lora_repo, lora_file, lora_strength],
+        outputs=[logs_box]
+    )
+    def clear_lora():
+        global pipe
+        if pipe is None:
+            return "❌ Pipeline not initialized"
+        try:
+            pipe.set_adapters([], [])
+            log("🧹 LoRA cleared")
+            return "LoRA cleared"
+        except Exception as e:
+            log(f"❌ Failed to clear LoRA: {e}")
+            return f"Failed: {e}"
+    clear_lora_btn.click(
+        clear_lora,
+        outputs=[logs_box]
+    )
+    # =========================
+    # GENERATION
+    # =========================
+    run_btn.click(
+        generate_image,
+        inputs=[prompt, height, width, steps, seed],
+        outputs=[final_image, latent_gallery, logs_box]
+    )
+demo.launch()