wan2-2-fp8da-aoti-preview

Running on Zero

App Files Files Community

r3gm commited on Oct 30

Commit

67e379c

verified ·

1 Parent(s): 3a9a5d3

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -23

app.py CHANGED Viewed

@@ -17,6 +17,16 @@ from torchao.quantization import Int8WeightOnlyConfig
 import aoti
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
@@ -34,6 +44,16 @@ MAX_FRAMES_MODEL = 160
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
@@ -52,6 +72,9 @@ pipe = WanImageToVideoPipeline.from_pretrained(
     torch_dtype=torch.bfloat16,
 ).to('cuda')
 pipe.load_lora_weights(
     "Kijai/WanVideo_comfy",
     weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
@@ -76,24 +99,17 @@ pipe.load_lora_weights(
     weight_name="livewallpaper_wan22_14b_i2v_low_model_0_1_e26.safetensors",
     adapter_name="livewallpaper"
 )
-default_transformer = copy.deepcopy(pipe.transformer)
 pipe.set_adapters(["livewallpaper"], adapter_weights=[1.])
-pipe.fuse_lora(adapter_names=["livewallpaper"], lora_scale=2., components=["transformer"])
 pipe.unload_lora_weights()
 quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
 quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
 quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
-quantize_(default_transformer, Float8DynamicActivationFloat8WeightConfig())
 aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
 aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
-aoti.aoti_blocks_load(default_transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
-static_transformer = pipe.transformer
-pipe.transformer = default_transformer
 default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
 default_negative_prompt = "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, 整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, 画得不好的手部, 画得不好的脸部, 畸形的, 毁容的, 形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
@@ -174,7 +190,9 @@ def get_inference_duration(
     guidance_scale,
     guidance_scale_2,
     current_seed,
-    live_wallpaper_style,
 ):
     BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
     BASE_STEP_DURATION = 15
@@ -195,14 +213,22 @@ def run_inference(
     guidance_scale,
     guidance_scale_2,
     current_seed,
-    live_wallpaper_style,
     progress=gr.Progress(track_tqdm=True),
 ):
-    if live_wallpaper_style:
-        pipe.transformer = static_transformer
-    output_frames = pipe(
         image=resized_image,
         last_image=processed_last_image,
         prompt=prompt,
@@ -216,9 +242,8 @@ def run_inference(
         generator=torch.Generator(device="cuda").manual_seed(current_seed),
     ).frames[0]
-    pipe.transformer = default_transformer
-    return output_frames
 def generate_video(
@@ -233,7 +258,8 @@ def generate_video(
     seed=42,
     randomize_seed=False,
     quality=5,
-    live_wallpaper_style=False,
     progress=gr.Progress(track_tqdm=True),
 ):
     """
@@ -263,8 +289,8 @@ def generate_video(
             Defaults to False.
         quality (float, optional): Video output quality. Default is 5. Uses variable bit rate.
             Highest quality is 10, lowest is 1.
-        live_wallpaper_style (bool, optional): Whether to use the live wallpaper transformer.
-            Defaults to False.
         progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
     Returns:
@@ -303,7 +329,8 @@ def generate_video(
         guidance_scale,
         guidance_scale_2,
         current_seed,
-        live_wallpaper_style,
         progress,
     )
@@ -323,7 +350,6 @@ with gr.Blocks() as demo:
         with gr.Column():
             input_image_component = gr.Image(type="pil", label="Input Image")
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
-            live_wallpaper_style_checkbox = gr.Checkbox(label="Live Wallpaper Style", value=False, interactive=True)
             duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
             quality_slider = gr.Slider(minimum=1, maximum=10, step=1, value=6, label="Video Quality")
@@ -335,6 +361,13 @@ with gr.Blocks() as demo:
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
                 guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
                 guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
             generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
@@ -345,7 +378,7 @@ with gr.Blocks() as demo:
         input_image_component, last_image_component, prompt_input, steps_slider,
         negative_prompt_input, duration_seconds_input,
         guidance_scale_input, guidance_scale_2_input, seed_input, randomize_seed_checkbox,
-        quality_slider, live_wallpaper_style_checkbox
     ]
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, file_output, seed_input])

 import aoti
+from diffusers import (
+    FlowMatchEulerDiscreteScheduler,
+    SASolverScheduler,
+    DEISMultistepScheduler,
+    DPMSolverMultistepInverseScheduler,
+    UniPCMultistepScheduler,
+    DPMSolverMultistepScheduler,
+    DPMSolverSinglestepScheduler,
+)
 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
+SCHEDULER_MAP = {
+    "FlowMatchEulerDiscrete": FlowMatchEulerDiscreteScheduler,
+    "SASolver": SASolverScheduler,
+    "DEISMultistep": DEISMultistepScheduler,
+    "DPMSolverMultistepInverse": DPMSolverMultistepInverseScheduler,
+    "UniPCMultistep": UniPCMultistepScheduler,
+    "DPMSolverMultistep": DPMSolverMultistepScheduler,
+    "DPMSolverSinglestep": DPMSolverSinglestepScheduler,
+}
 pipe = WanImageToVideoPipeline.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
 ).to('cuda')
+original_scheduler = copy.deepcopy(pipe.scheduler.config)
+print(original_scheduler)
 pipe.load_lora_weights(
     "Kijai/WanVideo_comfy",
     weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
     weight_name="livewallpaper_wan22_14b_i2v_low_model_0_1_e26.safetensors",
     adapter_name="livewallpaper"
 )
 pipe.set_adapters(["livewallpaper"], adapter_weights=[1.])
+pipe.fuse_lora(adapter_names=["livewallpaper"], lora_scale=1., components=["transformer"])
 pipe.unload_lora_weights()
 quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
 quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
 quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
 aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
 aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
 default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
 default_negative_prompt = "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, 整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, 画得不好的手部, 画得不好的脸部, 畸形的, 毁容的, 形态畸形的肢体, 手指融合, 静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
     guidance_scale,
     guidance_scale_2,
     current_seed,
+    scheduler_name,
+    flow_shift,
+    progress
 ):
     BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
     BASE_STEP_DURATION = 15
     guidance_scale,
     guidance_scale_2,
     current_seed,
+    scheduler_name,
+    flow_shift,
     progress=gr.Progress(track_tqdm=True),
 ):
+    scheduler_class = SCHEDULER_MAP.get(scheduler_name)
+    if scheduler_class != pipe.scheduler._class_name or flow_shift != pipe.scheduler.config.get("flow_shift", "shift"):
+        config = copy.deepcopy(original_scheduler.config)
+        print("update scheduler")
+        if scheduler_class == FlowMatchEulerDiscreteScheduler:
+            config['shift'] = flow_shift
+        else:
+            config['flow_shift'] = flow_shift
+        pipe.scheduler = scheduler_class.from_config(config)
+    result = pipe(
         image=resized_image,
         last_image=processed_last_image,
         prompt=prompt,
         generator=torch.Generator(device="cuda").manual_seed(current_seed),
     ).frames[0]
+    pipe.scheduler = original_scheduler
+    return result
 def generate_video(
     seed=42,
     randomize_seed=False,
     quality=5,
+    scheduler="UniPCMultistep",
+    flow_shift=6.0,
     progress=gr.Progress(track_tqdm=True),
 ):
     """
             Defaults to False.
         quality (float, optional): Video output quality. Default is 5. Uses variable bit rate.
             Highest quality is 10, lowest is 1.
+        scheduler (str, optional): The name of the scheduler to use for inference. Defaults to "UniPCMultistep".
+        flow_shift (float, optional): The flow shift value for compatible schedulers. Defaults to 6.0.
         progress (gr.Progress, optional): Gradio progress tracker. Defaults to gr.Progress(track_tqdm=True).
     Returns:
         guidance_scale,
         guidance_scale_2,
         current_seed,
+        scheduler,
+        flow_shift,
         progress,
     )
         with gr.Column():
             input_image_component = gr.Image(type="pil", label="Input Image")
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
             duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
             quality_slider = gr.Slider(minimum=1, maximum=10, step=1, value=6, label="Video Quality")
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
                 guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
                 guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
+                scheduler_dropdown = gr.Dropdown(
+                    label="Scheduler",
+                    choices=list(SCHEDULER_MAP.keys()),
+                    value="UniPCMultistep",
+                    info="Select a custom scheduler."
+                )
+                flow_shift_slider = gr.Slider(minimum=0.5, maximum=15.0, step=0.1, value=3.0, label="Flow Shift")
             generate_button = gr.Button("Generate Video", variant="primary")
         with gr.Column():
         input_image_component, last_image_component, prompt_input, steps_slider,
         negative_prompt_input, duration_seconds_input,
         guidance_scale_input, guidance_scale_2_input, seed_input, randomize_seed_checkbox,
+        quality_slider, scheduler_dropdown, flow_shift_slider,
     ]
     generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, file_output, seed_input])