video-ffmpeg

Running

App Files Files Community

Tim13ekd commited on 9 days ago

Commit

882c245

verified ·

1 Parent(s): 6f097ba

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -628

app.py CHANGED Viewed

@@ -1,665 +1,103 @@
 import gradio as gr
-import spaces
-from PIL import Image
-from moviepy.editor import VideoFileClip, AudioFileClip
-import os
-from openai import OpenAI
 import subprocess
-from pathlib import Path
-import uuid
 import tempfile
-import shlex
 import shutil
-# Supported models configuration
-MODELS = {
-    "deepseek-ai/DeepSeek-V3": {
-        "base_url": "https://router.huggingface.co/sambanova/v1",
-        "env_key": "HF_TOKEN",
-        "model_name": "DeepSeek-V3-0324",
-    },
-}
-# Initialize client with first available model
-client = OpenAI(
-    base_url=next(iter(MODELS.values()))["base_url"],
-    api_key=os.environ[next(iter(MODELS.values()))["env_key"]],
-)
-allowed_medias = [
-    ".png",
-    ".jpg",
-    ".webp",
-    ".jpeg",
-    ".tiff",
-    ".bmp",
-    ".gif",
-    ".svg",
-    ".mp3",
-    ".wav",
-    ".ogg",
-    ".mp4",
-    ".avi",
-    ".mov",
-    ".mkv",
-    ".flv",
-    ".wmv",
-    ".webm",
-    ".mpg",
-    ".mpeg",
-    ".m4v",
-    ".3gp",
-    ".3g2",
-    ".3gpp",
 ]
-def get_files_infos(files):
-    results = []
-    for file in files:
-        file_path = Path(file.name)
-        info = {}
-        info["size"] = os.path.getsize(file_path)
-        # Sanitize filename by replacing spaces with underscores
-        info["name"] = file_path.name.replace(" ", "_")
-        file_extension = file_path.suffix
-        if file_extension in (".mp4", ".avi", ".mkv", ".mov"):
-            info["type"] = "video"
-            video = VideoFileClip(file.name)
-            info["duration"] = video.duration
-            info["dimensions"] = "{}x{}".format(video.size[0], video.size[1])
-            if video.audio:
-                info["type"] = "video/audio"
-                info["audio_channels"] = video.audio.nchannels
-            video.close()
-        elif file_extension in (".mp3", ".wav"):
-            info["type"] = "audio"
-            audio = AudioFileClip(file.name)
-            info["duration"] = audio.duration
-            info["audio_channels"] = audio.nchannels
-            audio.close()
-        elif file_extension in (
-            ".png",
-            ".jpg",
-            ".jpeg",
-            ".tiff",
-            ".bmp",
-            ".gif",
-            ".svg",
-        ):
-            info["type"] = "image"
-            img = Image.open(file.name)
-            info["dimensions"] = "{}x{}".format(img.size[0], img.size[1])
-        results.append(info)
-    return results
-def get_completion(
-    prompt,
-    files_info,
-    top_p,
-    temperature,
-    model_choice,
-    conversation_history=None,
-    previous_error=None,
-    previous_command=None,
-):
-    # Create table header
-    files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels |\n"
-    files_info_string += "|------|------|------------|-----------|--------|\n"
-    # Add each file as a table row
-    for file_info in files_info:
-        dimensions = file_info.get("dimensions", "-")
-        duration = (
-            f"{file_info.get('duration', '-')}s" if "duration" in file_info else "-"
-        )
-        audio = (
-            f"{file_info.get('audio_channels', '-')} channels"
-            if "audio_channels" in file_info
-            else "-"
-        )
-        files_info_string += f"| {file_info['type']} | {file_info['name']} | {dimensions} | {duration} | {audio} |\n"
-    # Build the user message with optional error feedback
-    user_content = f"""Always output the media as video/mp4 and output file with "output.mp4".
-The current assets and objective follow.
-AVAILABLE ASSETS LIST:
-{files_info_string}
-OBJECTIVE: {prompt} and output at "output.mp4"
-First, think step-by-step about what I'm asking for and reformulate it into a clear technical specification.
-Then provide the FFMPEG command that will accomplish this task."""
-    # Add error feedback if this is a retry
-    if previous_error and previous_command:
-        user_content += f"""
-IMPORTANT: This is a retry attempt. The previous command failed with the following error:
-PREVIOUS COMMAND (FAILED):
-{previous_command}
-ERROR MESSAGE:
-{previous_error}
-Please analyze the error and generate a corrected command that addresses the specific issue.
-COMMON SLIDESHOW ERROR FIXES:
-- If you see "do not match the corresponding output link" → Images have different dimensions, use scale+pad approach
-- If you see "Padded dimensions cannot be smaller than input dimensions" → Fix pad calculation or use standard resolution (1920x1080 or 1080x1920)
-- If you see "Failed to configure input pad" → Check scale and pad syntax, ensure proper filter chain
-- If you see "Invalid argument" in filters → Simplify filter_complex syntax and check parentheses
-FORMAT DETECTION KEYWORDS:
-- "vertical", "portrait", "9:16", "TikTok", "Instagram Stories", "phone" → Use 1080x1920
-- "horizontal", "landscape", "16:9", "YouTube", "TV" → Use 1920x1080 (default)
-- "square", "1:1", "Instagram post" → Use 1080x1080"""
-    user_content += "\n\nYOUR RESPONSE:"
-    # Initialize conversation with system message and first user message
-    if conversation_history is None:
-        messages = [
-            {
-                "role": "system",
-                "content": """
-You are a very experienced media engineer, controlling a UNIX terminal.
-You are an FFMPEG expert with years of experience and multiple contributions to the FFMPEG project.
-You are given:
-(1) a set of video, audio and/or image assets. Including their name, duration, dimensions and file size
-(2) the description of a new video you need to create from the list of assets
-Your objective is to generate the SIMPLEST POSSIBLE single ffmpeg command to create the requested video.
-Key requirements:
-    - First, think step-by-step about what the user is asking for and reformulate it into a clear technical specification
-    - Use the absolute minimum number of ffmpeg options needed
-    - Avoid complex filter chains or filter_complex if possible
-    - Prefer simple concatenation, scaling, and basic filters
-    - Output exactly ONE command that will be directly pasted into the terminal
-    - Never output multiple commands chained together
-    - Output the command in a single line (no line breaks or multiple lines)
-    - If the user asks for waveform visualization make sure to set the mode to `line` with and the use the full width of the video. Also concatenate the audio into a single channel.
-    - For image sequences: Use -framerate and pattern matching (like 'img%d.jpg') when possible, falling back to individual image processing with -loop 1 and appropriate filters only when necessary.
-    - When showing file operations or commands, always use explicit paths and filenames without wildcards - avoid using asterisk (*) or glob patterns. Instead, use specific numbered sequences (like %d), explicit file lists, or show the full filename.
-CRITICAL SLIDESHOW GUIDANCE:
-When creating slideshows from multiple images with different dimensions, ALWAYS follow this proven pattern:
-1. CHOOSE A STANDARD RESOLUTION: Pick 1920x1080 (1080p) as the default target resolution for slideshows, UNLESS the user explicitly requests a different format (e.g., "vertical video", "9:16 ratio", "portrait mode", "TikTok format" → use 1080x1920)
-2. USE SIMPLE SCALE+PAD APPROACH: For each image, scale to fit within the chosen resolution maintaining aspect ratio, then pad with black bars
-3. PROVEN SLIDESHOW PATTERN:
-   ```
-   ffmpeg -loop 1 -t 3 -i image1.jpg -loop 1 -t 3 -i image2.jpg -filter_complex "[0]scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2,setsar=1[v0];[1]scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2,setsar=1[v1];[v0][v1]concat=n=2:v=1:a=0" -c:v libx264 -pix_fmt yuv420p -movflags +faststart output.mp4
-   ```
-4. SLIDESHOW RULES:
-   - Use 1920x1080 as target resolution by default, adjust if user specifies format
-   - For horizontal: scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2
-   - For vertical: scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2
-   - Always add setsar=1 after padding to fix aspect ratio issues
-   - Use 3-second duration per image by default (-t 3)
-   - For 3+ images, extend the pattern: [v0][v1][v2]concat=n=3:v=1:a=0
-5. DIMENSION MISMATCH FIXES:
-   - Never try to concat images with different dimensions directly
-   - Always normalize dimensions first with scale+pad
-   - Black padding is preferable to stretching/distorting images
-6. SLIDESHOW TRANSITIONS:
-   - For fade transitions, add fade=t=in:st=0:d=0.5,fade=t=out:st=2.5:d=0.5 after setsar=1
-   - Keep transitions simple - complex transitions often fail
-   - Only add transitions if specifically requested
-7. SLIDESHOW TIMING:
-   - Default to 3 seconds per image
-   - Adjust timing based on user request (e.g., "5 seconds per image")
-   - Total duration = (number of images × seconds per image)
-Remember: Simpler is better. Only use advanced ffmpeg features if absolutely necessary for the requested output.
-""",
-            },
-            {
-                "role": "user",
-                "content": user_content,
-            },
-        ]
-    else:
-        # Use existing conversation history
-        messages = conversation_history[:]
-        # If there's a previous error, add it as a separate message exchange
-        if previous_error and previous_command:
-            # Add the failed command as assistant response
-            messages.append({
-                "role": "assistant",
-                "content": f"I'll execute this FFmpeg command:\n\n```bash\n{previous_command}\n```"
-            })
-            # Add the error as user feedback
-            messages.append({
-                "role": "user",
-                "content": f"""The command failed with the following error:
-ERROR MESSAGE:
-{previous_error}
-Please analyze the error and generate a corrected command that addresses the specific issue.
-COMMON SLIDESHOW ERROR FIXES:
-- If you see "do not match the corresponding output link" → Images have different dimensions, use scale+pad approach
-- If you see "Padded dimensions cannot be smaller than input dimensions" → Fix pad calculation or use standard resolution (1920x1080 or 1080x1920)
-- If you see "Failed to configure input pad" → Check scale and pad syntax, ensure proper filter chain
-- If you see "Invalid argument" in filters → Simplify filter_complex syntax and check parentheses
-FORMAT DETECTION KEYWORDS:
-- "vertical", "portrait", "9:16", "TikTok", "Instagram Stories", "phone" → Use 1080x1920
-- "horizontal", "landscape", "16:9", "YouTube", "TV" → Use 1920x1080 (default)
-- "square", "1:1", "Instagram post" → Use 1080x1080
-Please provide a corrected FFmpeg command."""
-            })
-        else:
-            # Add new user request to existing conversation
-            messages.append({
-                "role": "user",
-                "content": user_content,
-            })
     try:
-        # Print the complete prompt
-        print("\n=== COMPLETE PROMPT ===")
-        for msg in messages:
-            print(f"\n[{msg['role'].upper()}]:")
-            print(msg["content"])
-        print("=====================\n")
-        if model_choice not in MODELS:
-            raise ValueError(f"Model {model_choice} is not supported")
-        model_config = MODELS[model_choice]
-        client.base_url = model_config["base_url"]
-        client.api_key = os.environ[model_config["env_key"]]
-        model = model_config.get("model_name", model_choice)
-        completion = client.chat.completions.create(
-            model=model,
-            messages=messages,
-            temperature=temperature,
-            top_p=top_p,
-            max_tokens=2048,
-        )
-        content = completion.choices[0].message.content
-        print(f"\n=== RAW API RESPONSE ===\n{content}\n========================\n")
-        # Extract command from code block if present
-        import re
-        command = None
-        # Try multiple code block patterns
-        code_patterns = [
-            r"```(?:bash|sh|shell)?\n(.*?)\n```",  # Standard code blocks
-            r"```\n(.*?)\n```",  # Plain code blocks
-            r"`([^`]*ffmpeg[^`]*)`",  # Inline code with ffmpeg
-        ]
-        for pattern in code_patterns:
-            matches = re.findall(pattern, content, re.DOTALL | re.IGNORECASE)
-            for match in matches:
-                if "ffmpeg" in match.lower():
-                    command = match.strip()
-                    break
-            if command:
-                break
-        # If no code block found, try to find ffmpeg lines directly
-        if not command:
-            ffmpeg_lines = [
-                line.strip()
-                for line in content.split("\n")
-                if line.strip().lower().startswith("ffmpeg")
-            ]
-            if ffmpeg_lines:
-                command = ffmpeg_lines[0]
-        # Last resort: look for any line containing ffmpeg
-        if not command:
-            for line in content.split("\n"):
-                line = line.strip()
-                if "ffmpeg" in line.lower() and len(line) > 10:
-                    command = line
-                    break
-        if not command:
-            print(f"ERROR: No ffmpeg command found in response")
-            command = content.replace("\n", " ").strip()
-        print(f"=== EXTRACTED COMMAND ===\n{command}\n========================\n")
-        # remove output.mp4 with the actual output file path
-        command = command.replace("output.mp4", "")
-        # Add the assistant's response to conversation history
-        messages.append({
-            "role": "assistant",
-            "content": content
-        })
-        return command, messages
-    except Exception as e:
-        raise Exception("API Error")
-@spaces.GPU(duration=120)
-def execute_ffmpeg_command(args, temp_dir, output_file_path):
-    """Execute FFmpeg command with GPU acceleration"""
-    final_command = args + ["-y", output_file_path]
-    print(f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n")
-    subprocess.run(final_command, cwd=temp_dir)
-    return output_file_path
-def compose_video(
-    prompt: str,
-    files: list = None,
-    top_p: float = 0.7,
-    temperature: float = 0.1,
-    model_choice: str = "deepseek-ai/DeepSeek-V3",
-) -> str:
-    """
-    Compose videos from existing media assets using natural language instructions.
-    This tool is NOT for AI video generation. Instead, it uses AI to generate FFmpeg
-    commands that combine, edit, and transform your uploaded images, videos, and audio
-    files based on natural language descriptions.
-    Args:
-        prompt (str): Natural language instructions for video composition (e.g., "Create a slideshow with background music")
-        files (list, optional): List of media files (images, videos, audio) to use
-        top_p (float): Top-p sampling parameter for AI model (0.0-1.0, default: 0.7)
-        temperature (float): Temperature parameter for AI model creativity (0.0-5.0, default: 0.1)
-        model_choice (str): AI model to use for command generation (default: "deepseek-ai/DeepSeek-V3")
-    Returns:
-        str: Path to the generated video file
-    Example:
-        compose_video("Create a 10-second slideshow from the images with fade transitions", files=[img1, img2, img3])
-    """
-    return update(files or [], prompt, top_p, temperature, model_choice)
-def update(
-    files,
-    prompt,
-    top_p=1,
-    temperature=1,
-    model_choice="deepseek-ai/DeepSeek-V3",
-):
-    if prompt == "":
-        raise gr.Error("Please enter a prompt.")
-    files_info = get_files_infos(files)
-    # disable this if you're running the app locally or on your own server
-    for file_info in files_info:
-        if file_info["type"] == "video":
-            if file_info["duration"] > 120:
-                raise gr.Error(
-                    "Please make sure all videos are less than 2 minute long."
-                )
-        if file_info["size"] > 100000000:
-            raise gr.Error("Please make sure all files are less than 100MB in size.")
-    attempts = 0
-    command_attempts = []
-    previous_error = None
-    previous_command = None
-    conversation_history = None
-    while attempts < 2:
-        print("ATTEMPT", attempts + 1)
-        try:
-            command_string, conversation_history = get_completion(
-                prompt,
-                files_info,
-                top_p,
-                temperature,
-                model_choice,
-                conversation_history,
-                previous_error,
-                previous_command,
-            )
-            print(
-                f"""///PROMPT {prompt} \n\n/// START OF COMMAND ///:\n\n{command_string}\n\n/// END OF COMMAND ///\n\n"""
-            )
-            # split command string into list of arguments
-            args = shlex.split(command_string)
-            if args[0] != "ffmpeg":
-                raise Exception("Command does not start with ffmpeg")
-            temp_dir = tempfile.mkdtemp()
-            # copy files to temp dir with sanitized names
-            for file in files:
-                file_path = Path(file.name)
-                sanitized_name = file_path.name.replace(" ", "_")
-                shutil.copy(file_path, Path(temp_dir) / sanitized_name)
-            # test if ffmpeg command is valid dry run
-            ffmpeg_dry_run = subprocess.run(
-                args + ["-f", "null", "-"],
-                stderr=subprocess.PIPE,
-                text=True,
-                cwd=temp_dir,
-            )
-            # Extract command for display
-            command_for_display = f"ffmpeg {' '.join(args[1:])} -y output.mp4"
-            if ffmpeg_dry_run.returncode == 0:
-                print("Command is valid.")
-                # Add successful command to attempts
-                command_attempts.append(
-                    {
-                        "command": command_for_display,
-                        "status": "✅ Valid",
-                        "attempt": attempts + 1,
-                    }
-                )
-            else:
-                print("Command is not valid. Error output:")
-                print(ffmpeg_dry_run.stderr)
-                # Add failed command to attempts with error
-                command_attempts.append(
-                    {
-                        "command": command_for_display,
-                        "status": "❌ Invalid",
-                        "error": ffmpeg_dry_run.stderr,
-                        "attempt": attempts + 1,
-                    }
-                )
-                # Store error details for next retry
-                previous_error = ffmpeg_dry_run.stderr
-                previous_command = command_for_display
-                raise Exception(
-                    f"FFMPEG command validation failed: {ffmpeg_dry_run.stderr}"
-                )
-            output_file_name = f"output_{uuid.uuid4()}.mp4"
-            output_file_path = str((Path(temp_dir) / output_file_name).resolve())
-            execute_ffmpeg_command(args, temp_dir, output_file_path)
-            # Generate command display with all attempts
-            command_display = generate_command_display(command_attempts)
-            return output_file_path, gr.update(value=command_display)
-        except Exception as e:
-            attempts += 1
-            if attempts >= 2:
-                print("FROM UPDATE", e)
-                # Show all attempted commands even on final failure
-                command_display = generate_command_display(command_attempts)
-                command_display += (
-                    f"\n\n### Final Error\n❌ All attempts failed. Last error: {str(e)}"
-                )
-                return None, gr.update(value=command_display)
-def generate_command_display(command_attempts):
-    """Generate a markdown display of all command attempts"""
-    if not command_attempts:
-        return "### No commands generated"
-    display = "### Generated Commands\n\n"
-    for attempt in command_attempts:
-        display += f"**Attempt {attempt['attempt']}** {attempt['status']}\n"
-        display += f"```bash\n{attempt['command']}\n```\n"
-        if attempt["status"] == "❌ Invalid" and "error" in attempt:
-            display += f"<details>\n<summary>🔍 Error Details</summary>\n\n```\n{attempt['error']}\n```\n</details>\n\n"
-        else:
-            display += "\n"
-    return display
-# Create MCP-compatible interface
-mcp_interface = gr.Interface(
-    fn=compose_video,
-    inputs=[
-        gr.Textbox(
-            value="Create a slideshow with background music",
-            label="Video Composition Instructions",
-        ),
-        gr.File(file_count="multiple", label="Media Files", file_types=allowed_medias),
-        gr.Slider(0.0, 1.0, value=0.7, label="Top-p"),
-        gr.Slider(0.0, 5.0, value=0.1, label="Temperature"),
-        gr.Radio(
-            choices=list(MODELS.keys()), value=list(MODELS.keys())[0], label="Model"
-        ),
-    ],
-    outputs=gr.Video(label="Generated Video"),
-    title="AI Video Composer MCP Tool",
-    description="Compose videos from media assets using natural language",
-)
 with gr.Blocks() as demo:
     gr.Markdown(
-        """
-            # 🏞 AI Video Composer
-            Compose new videos from your assets using natural language. Add video, image and audio assets and let [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324) generate a new video for you (using FFMPEG).
-        """,
-        elem_id="header",
     )
     with gr.Row():
         with gr.Column():
-            user_files = gr.File(
                 file_count="multiple",
-                label="Media files",
-                file_types=allowed_medias,
             )
-            user_prompt = gr.Textbox(
-                placeholder="eg: Remove the 3 first seconds of the video",
-                label="Instructions",
                 lines=3,
             )
-            btn = gr.Button("Run")
-            with gr.Accordion("Parameters", open=False):
-                model_choice = gr.Radio(
-                    choices=list(MODELS.keys()),
-                    value=list(MODELS.keys())[0],
-                    label="Model",
-                )
-                top_p = gr.Slider(
-                    minimum=-0,
-                    maximum=1.0,
-                    value=0.7,
-                    step=0.05,
-                    interactive=True,
-                    label="Top-p (nucleus sampling)",
-                )
-                temperature = gr.Slider(
-                    minimum=-0,
-                    maximum=5.0,
-                    value=0.1,
-                    step=0.1,
-                    interactive=True,
-                    label="Temperature",
-                )
-        with gr.Column():
-            generated_video = gr.Video(
-                interactive=False, label="Generated Video", include_audio=True
-            )
-            generated_command = gr.Markdown()
-        btn.click(
-            fn=update,
-            inputs=[user_files, user_prompt, top_p, temperature, model_choice],
-            outputs=[generated_video, generated_command],
-        )
-    with gr.Row():
-        gr.Examples(
-            examples=[
-                [
-                    ["./examples/ai_talk.wav", "./examples/bg-image.png"],
-                    "Use the image as the background with a waveform visualization for the audio positioned in center of the video.",
-                    0.7,
-                    0.1,
-                    list(MODELS.keys())[0],
-                ],
-                [
-                    ["./examples/ai_talk.wav", "./examples/bg-image.png"],
-                    "Use the image as the background with a waveform visualization for the audio positioned in center of the video. Make sure the waveform has a max height of 250 pixels.",
-                    0.7,
-                    0.1,
-                    list(MODELS.keys())[0],
-                ],
-                [
-                    [
-                        "./examples/cat1.jpeg",
-                        "./examples/cat2.jpeg",
-                        "./examples/cat3.jpeg",
-                        "./examples/cat4.jpeg",
-                        "./examples/cat5.jpeg",
-                        "./examples/cat6.jpeg",
-                        "./examples/heat-wave.mp3",
-                    ],
-                    "Create a 3x2 grid of the cat images with the audio as background music. Make the video duration match the audio duration.",
-                    0.7,
-                    0.1,
-                    list(MODELS.keys())[0],
-                ],
-            ],
-            inputs=[user_files, user_prompt, top_p, temperature, model_choice],
-            outputs=[generated_video, generated_command],
-            fn=update,
-            run_on_click=True,
-            cache_examples=False,
-        )
-    with gr.Row():
-        gr.Markdown(
-            """
-            If you have idea to improve this please open a PR:
-            [![Open a Pull Request](https://huggingface.co/datasets/huggingface/badges/raw/main/open-a-pr-lg-light.svg)](https://huggingface.co/spaces/huggingface-projects/video-composer-gpt4/discussions)
-            """,
-        )
-# Launch MCP interface for tool access
-mcp_interface.queue(default_concurrency_limit=200)
-# Launch main demo
-demo.queue(default_concurrency_limit=200)
-demo.launch(show_api=False, ssr_mode=False, mcp_server=True)

 import gradio as gr
 import subprocess
 import tempfile
 import shutil
+from pathlib import Path
+import shlex
+import uuid
+import os
+ALLOWED_MEDIA = [
+    ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif",
+    ".mp3", ".wav", ".ogg",
+    ".mp4", ".mov", ".mkv", ".avi", ".webm"
 ]
+def run_ffmpeg(files, command):
+    if not files:
+        raise gr.Error("Bitte lade mindestens eine Datei hoch.")
+    if not command.strip():
+        raise gr.Error("Bitte gib einen FFmpeg Command ein.")
+    temp_dir = tempfile.mkdtemp()
     try:
+        # Dateien ins Temp-Verzeichnis kopieren
+        for file in files:
+            src = Path(file.name)
+            dst = Path(temp_dir) / src.name.replace(" ", "_")
+            shutil.copy(src, dst)
+        output_name = f"output_{uuid.uuid4()}.mp4"
+        output_path = Path(temp_dir) / output_name
+        # Command vorbereiten
+        cmd = command.strip()
+        if not cmd.startswith("ffmpeg"):
+            cmd = "ffmpeg " + cmd
+        args = shlex.split(cmd)
+        # Falls output.mp4 verwendet wird → ersetzen
+        args = [str(output_path) if a == "output.mp4" else a for a in args]
+        # Falls kein Output definiert → anhängen
+        if not any(a.endswith(".mp4") for a in args):
+            args.append(str(output_path))
+        # Ausführen
+        process = subprocess.run(
+            args,
+            cwd=temp_dir,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+        )
+        if process.returncode != 0:
+            raise gr.Error(f"FFmpeg Fehler:\n{process.stderr}")
+        return str(output_path), f"```bash\n{' '.join(args)}\n```"
+    finally:
+        pass
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎬 FFmpeg Command Tool")
     gr.Markdown(
+        "Lade Dateien hoch und führe **manuell eingegebene FFmpeg-Commands** aus. "
+        "Kein KI-Processing – 100 % direktes FFmpeg."
     )
     with gr.Row():
         with gr.Column():
+            files = gr.File(
                 file_count="multiple",
+                label="Media Dateien",
+                file_types=ALLOWED_MEDIA,
             )
+            command = gr.Textbox(
+                label="FFmpeg Command",
+                placeholder="z.B.: -i input.mp4 -ss 00:00:05 -t 10 -c copy output.mp4",
                 lines=3,
             )
+            run_btn = gr.Button("Run FFmpeg")
+        with gr.Column():
+            video_out = gr.Video(label="Output Video")
+            command_out = gr.Markdown(label="Ausgeführter Command")
+    run_btn.click(
+        fn=run_ffmpeg,
+        inputs=[files, command],
+        outputs=[video_out, command_out],
+    )
+demo.queue()
+demo.launch()