Tim13ekd commited on
Commit
882c245
·
verified ·
1 Parent(s): 6f097ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -628
app.py CHANGED
@@ -1,665 +1,103 @@
1
  import gradio as gr
2
- import spaces
3
-
4
- from PIL import Image
5
- from moviepy.editor import VideoFileClip, AudioFileClip
6
-
7
- import os
8
- from openai import OpenAI
9
  import subprocess
10
- from pathlib import Path
11
- import uuid
12
  import tempfile
13
- import shlex
14
  import shutil
 
 
 
 
15
 
16
- # Supported models configuration
17
- MODELS = {
18
- "deepseek-ai/DeepSeek-V3": {
19
- "base_url": "https://router.huggingface.co/sambanova/v1",
20
- "env_key": "HF_TOKEN",
21
- "model_name": "DeepSeek-V3-0324",
22
- },
23
- }
24
-
25
- # Initialize client with first available model
26
- client = OpenAI(
27
- base_url=next(iter(MODELS.values()))["base_url"],
28
- api_key=os.environ[next(iter(MODELS.values()))["env_key"]],
29
- )
30
-
31
- allowed_medias = [
32
- ".png",
33
- ".jpg",
34
- ".webp",
35
- ".jpeg",
36
- ".tiff",
37
- ".bmp",
38
- ".gif",
39
- ".svg",
40
- ".mp3",
41
- ".wav",
42
- ".ogg",
43
- ".mp4",
44
- ".avi",
45
- ".mov",
46
- ".mkv",
47
- ".flv",
48
- ".wmv",
49
- ".webm",
50
- ".mpg",
51
- ".mpeg",
52
- ".m4v",
53
- ".3gp",
54
- ".3g2",
55
- ".3gpp",
56
  ]
57
 
58
 
59
- def get_files_infos(files):
60
- results = []
61
- for file in files:
62
- file_path = Path(file.name)
63
- info = {}
64
- info["size"] = os.path.getsize(file_path)
65
- # Sanitize filename by replacing spaces with underscores
66
- info["name"] = file_path.name.replace(" ", "_")
67
- file_extension = file_path.suffix
68
-
69
- if file_extension in (".mp4", ".avi", ".mkv", ".mov"):
70
- info["type"] = "video"
71
- video = VideoFileClip(file.name)
72
- info["duration"] = video.duration
73
- info["dimensions"] = "{}x{}".format(video.size[0], video.size[1])
74
- if video.audio:
75
- info["type"] = "video/audio"
76
- info["audio_channels"] = video.audio.nchannels
77
- video.close()
78
- elif file_extension in (".mp3", ".wav"):
79
- info["type"] = "audio"
80
- audio = AudioFileClip(file.name)
81
- info["duration"] = audio.duration
82
- info["audio_channels"] = audio.nchannels
83
- audio.close()
84
- elif file_extension in (
85
- ".png",
86
- ".jpg",
87
- ".jpeg",
88
- ".tiff",
89
- ".bmp",
90
- ".gif",
91
- ".svg",
92
- ):
93
- info["type"] = "image"
94
- img = Image.open(file.name)
95
- info["dimensions"] = "{}x{}".format(img.size[0], img.size[1])
96
- results.append(info)
97
- return results
98
-
99
-
100
- def get_completion(
101
- prompt,
102
- files_info,
103
- top_p,
104
- temperature,
105
- model_choice,
106
- conversation_history=None,
107
- previous_error=None,
108
- previous_command=None,
109
- ):
110
- # Create table header
111
- files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels |\n"
112
- files_info_string += "|------|------|------------|-----------|--------|\n"
113
-
114
- # Add each file as a table row
115
- for file_info in files_info:
116
- dimensions = file_info.get("dimensions", "-")
117
- duration = (
118
- f"{file_info.get('duration', '-')}s" if "duration" in file_info else "-"
119
- )
120
- audio = (
121
- f"{file_info.get('audio_channels', '-')} channels"
122
- if "audio_channels" in file_info
123
- else "-"
124
- )
125
-
126
- files_info_string += f"| {file_info['type']} | {file_info['name']} | {dimensions} | {duration} | {audio} |\n"
127
-
128
- # Build the user message with optional error feedback
129
- user_content = f"""Always output the media as video/mp4 and output file with "output.mp4".
130
- The current assets and objective follow.
131
-
132
- AVAILABLE ASSETS LIST:
133
-
134
- {files_info_string}
135
-
136
- OBJECTIVE: {prompt} and output at "output.mp4"
137
-
138
- First, think step-by-step about what I'm asking for and reformulate it into a clear technical specification.
139
- Then provide the FFMPEG command that will accomplish this task."""
140
-
141
- # Add error feedback if this is a retry
142
- if previous_error and previous_command:
143
- user_content += f"""
144
-
145
- IMPORTANT: This is a retry attempt. The previous command failed with the following error:
146
-
147
- PREVIOUS COMMAND (FAILED):
148
- {previous_command}
149
-
150
- ERROR MESSAGE:
151
- {previous_error}
152
-
153
- Please analyze the error and generate a corrected command that addresses the specific issue.
154
-
155
- COMMON SLIDESHOW ERROR FIXES:
156
- - If you see "do not match the corresponding output link" → Images have different dimensions, use scale+pad approach
157
- - If you see "Padded dimensions cannot be smaller than input dimensions" → Fix pad calculation or use standard resolution (1920x1080 or 1080x1920)
158
- - If you see "Failed to configure input pad" → Check scale and pad syntax, ensure proper filter chain
159
- - If you see "Invalid argument" in filters → Simplify filter_complex syntax and check parentheses
160
-
161
- FORMAT DETECTION KEYWORDS:
162
- - "vertical", "portrait", "9:16", "TikTok", "Instagram Stories", "phone" → Use 1080x1920
163
- - "horizontal", "landscape", "16:9", "YouTube", "TV" → Use 1920x1080 (default)
164
- - "square", "1:1", "Instagram post" → Use 1080x1080"""
165
-
166
- user_content += "\n\nYOUR RESPONSE:"
167
-
168
- # Initialize conversation with system message and first user message
169
- if conversation_history is None:
170
- messages = [
171
- {
172
- "role": "system",
173
- "content": """
174
- You are a very experienced media engineer, controlling a UNIX terminal.
175
- You are an FFMPEG expert with years of experience and multiple contributions to the FFMPEG project.
176
-
177
- You are given:
178
- (1) a set of video, audio and/or image assets. Including their name, duration, dimensions and file size
179
- (2) the description of a new video you need to create from the list of assets
180
-
181
- Your objective is to generate the SIMPLEST POSSIBLE single ffmpeg command to create the requested video.
182
-
183
- Key requirements:
184
- - First, think step-by-step about what the user is asking for and reformulate it into a clear technical specification
185
- - Use the absolute minimum number of ffmpeg options needed
186
- - Avoid complex filter chains or filter_complex if possible
187
- - Prefer simple concatenation, scaling, and basic filters
188
- - Output exactly ONE command that will be directly pasted into the terminal
189
- - Never output multiple commands chained together
190
- - Output the command in a single line (no line breaks or multiple lines)
191
- - If the user asks for waveform visualization make sure to set the mode to `line` with and the use the full width of the video. Also concatenate the audio into a single channel.
192
- - For image sequences: Use -framerate and pattern matching (like 'img%d.jpg') when possible, falling back to individual image processing with -loop 1 and appropriate filters only when necessary.
193
- - When showing file operations or commands, always use explicit paths and filenames without wildcards - avoid using asterisk (*) or glob patterns. Instead, use specific numbered sequences (like %d), explicit file lists, or show the full filename.
194
-
195
- CRITICAL SLIDESHOW GUIDANCE:
196
- When creating slideshows from multiple images with different dimensions, ALWAYS follow this proven pattern:
197
-
198
- 1. CHOOSE A STANDARD RESOLUTION: Pick 1920x1080 (1080p) as the default target resolution for slideshows, UNLESS the user explicitly requests a different format (e.g., "vertical video", "9:16 ratio", "portrait mode", "TikTok format" → use 1080x1920)
199
- 2. USE SIMPLE SCALE+PAD APPROACH: For each image, scale to fit within the chosen resolution maintaining aspect ratio, then pad with black bars
200
- 3. PROVEN SLIDESHOW PATTERN:
201
- ```
202
- ffmpeg -loop 1 -t 3 -i image1.jpg -loop 1 -t 3 -i image2.jpg -filter_complex "[0]scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2,setsar=1[v0];[1]scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2,setsar=1[v1];[v0][v1]concat=n=2:v=1:a=0" -c:v libx264 -pix_fmt yuv420p -movflags +faststart output.mp4
203
- ```
204
-
205
- 4. SLIDESHOW RULES:
206
- - Use 1920x1080 as target resolution by default, adjust if user specifies format
207
- - For horizontal: scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2
208
- - For vertical: scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2
209
- - Always add setsar=1 after padding to fix aspect ratio issues
210
- - Use 3-second duration per image by default (-t 3)
211
- - For 3+ images, extend the pattern: [v0][v1][v2]concat=n=3:v=1:a=0
212
-
213
- 5. DIMENSION MISMATCH FIXES:
214
- - Never try to concat images with different dimensions directly
215
- - Always normalize dimensions first with scale+pad
216
- - Black padding is preferable to stretching/distorting images
217
-
218
- 6. SLIDESHOW TRANSITIONS:
219
- - For fade transitions, add fade=t=in:st=0:d=0.5,fade=t=out:st=2.5:d=0.5 after setsar=1
220
- - Keep transitions simple - complex transitions often fail
221
- - Only add transitions if specifically requested
222
-
223
- 7. SLIDESHOW TIMING:
224
- - Default to 3 seconds per image
225
- - Adjust timing based on user request (e.g., "5 seconds per image")
226
- - Total duration = (number of images × seconds per image)
227
-
228
- Remember: Simpler is better. Only use advanced ffmpeg features if absolutely necessary for the requested output.
229
- """,
230
- },
231
- {
232
- "role": "user",
233
- "content": user_content,
234
- },
235
- ]
236
- else:
237
- # Use existing conversation history
238
- messages = conversation_history[:]
239
-
240
- # If there's a previous error, add it as a separate message exchange
241
- if previous_error and previous_command:
242
- # Add the failed command as assistant response
243
- messages.append({
244
- "role": "assistant",
245
- "content": f"I'll execute this FFmpeg command:\n\n```bash\n{previous_command}\n```"
246
- })
247
-
248
- # Add the error as user feedback
249
- messages.append({
250
- "role": "user",
251
- "content": f"""The command failed with the following error:
252
-
253
- ERROR MESSAGE:
254
- {previous_error}
255
 
256
- Please analyze the error and generate a corrected command that addresses the specific issue.
 
257
 
258
- COMMON SLIDESHOW ERROR FIXES:
259
- - If you see "do not match the corresponding output link" → Images have different dimensions, use scale+pad approach
260
- - If you see "Padded dimensions cannot be smaller than input dimensions" → Fix pad calculation or use standard resolution (1920x1080 or 1080x1920)
261
- - If you see "Failed to configure input pad" → Check scale and pad syntax, ensure proper filter chain
262
- - If you see "Invalid argument" in filters → Simplify filter_complex syntax and check parentheses
263
 
264
- FORMAT DETECTION KEYWORDS:
265
- - "vertical", "portrait", "9:16", "TikTok", "Instagram Stories", "phone" → Use 1080x1920
266
- - "horizontal", "landscape", "16:9", "YouTube", "TV" → Use 1920x1080 (default)
267
- - "square", "1:1", "Instagram post" → Use 1080x1080
268
-
269
- Please provide a corrected FFmpeg command."""
270
- })
271
- else:
272
- # Add new user request to existing conversation
273
- messages.append({
274
- "role": "user",
275
- "content": user_content,
276
- })
277
  try:
278
- # Print the complete prompt
279
- print("\n=== COMPLETE PROMPT ===")
280
- for msg in messages:
281
- print(f"\n[{msg['role'].upper()}]:")
282
- print(msg["content"])
283
- print("=====================\n")
284
-
285
- if model_choice not in MODELS:
286
- raise ValueError(f"Model {model_choice} is not supported")
287
-
288
- model_config = MODELS[model_choice]
289
- client.base_url = model_config["base_url"]
290
- client.api_key = os.environ[model_config["env_key"]]
291
- model = model_config.get("model_name", model_choice)
292
-
293
- completion = client.chat.completions.create(
294
- model=model,
295
- messages=messages,
296
- temperature=temperature,
297
- top_p=top_p,
298
- max_tokens=2048,
299
- )
300
- content = completion.choices[0].message.content
301
- print(f"\n=== RAW API RESPONSE ===\n{content}\n========================\n")
302
-
303
- # Extract command from code block if present
304
- import re
305
- command = None
306
-
307
- # Try multiple code block patterns
308
- code_patterns = [
309
- r"```(?:bash|sh|shell)?\n(.*?)\n```", # Standard code blocks
310
- r"```\n(.*?)\n```", # Plain code blocks
311
- r"`([^`]*ffmpeg[^`]*)`", # Inline code with ffmpeg
312
- ]
313
-
314
- for pattern in code_patterns:
315
- matches = re.findall(pattern, content, re.DOTALL | re.IGNORECASE)
316
- for match in matches:
317
- if "ffmpeg" in match.lower():
318
- command = match.strip()
319
- break
320
- if command:
321
- break
322
-
323
- # If no code block found, try to find ffmpeg lines directly
324
- if not command:
325
- ffmpeg_lines = [
326
- line.strip()
327
- for line in content.split("\n")
328
- if line.strip().lower().startswith("ffmpeg")
329
- ]
330
- if ffmpeg_lines:
331
- command = ffmpeg_lines[0]
332
-
333
- # Last resort: look for any line containing ffmpeg
334
- if not command:
335
- for line in content.split("\n"):
336
- line = line.strip()
337
- if "ffmpeg" in line.lower() and len(line) > 10:
338
- command = line
339
- break
340
-
341
- if not command:
342
- print(f"ERROR: No ffmpeg command found in response")
343
- command = content.replace("\n", " ").strip()
344
-
345
- print(f"=== EXTRACTED COMMAND ===\n{command}\n========================\n")
346
-
347
- # remove output.mp4 with the actual output file path
348
- command = command.replace("output.mp4", "")
349
-
350
- # Add the assistant's response to conversation history
351
- messages.append({
352
- "role": "assistant",
353
- "content": content
354
- })
355
-
356
- return command, messages
357
- except Exception as e:
358
- raise Exception("API Error")
359
-
360
-
361
- @spaces.GPU(duration=120)
362
- def execute_ffmpeg_command(args, temp_dir, output_file_path):
363
- """Execute FFmpeg command with GPU acceleration"""
364
- final_command = args + ["-y", output_file_path]
365
- print(f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n")
366
- subprocess.run(final_command, cwd=temp_dir)
367
- return output_file_path
368
-
369
-
370
- def compose_video(
371
- prompt: str,
372
- files: list = None,
373
- top_p: float = 0.7,
374
- temperature: float = 0.1,
375
- model_choice: str = "deepseek-ai/DeepSeek-V3",
376
- ) -> str:
377
- """
378
- Compose videos from existing media assets using natural language instructions.
379
 
380
- This tool is NOT for AI video generation. Instead, it uses AI to generate FFmpeg
381
- commands that combine, edit, and transform your uploaded images, videos, and audio
382
- files based on natural language descriptions.
383
 
384
- Args:
385
- prompt (str): Natural language instructions for video composition (e.g., "Create a slideshow with background music")
386
- files (list, optional): List of media files (images, videos, audio) to use
387
- top_p (float): Top-p sampling parameter for AI model (0.0-1.0, default: 0.7)
388
- temperature (float): Temperature parameter for AI model creativity (0.0-5.0, default: 0.1)
389
- model_choice (str): AI model to use for command generation (default: "deepseek-ai/DeepSeek-V3")
390
 
391
- Returns:
392
- str: Path to the generated video file
393
-
394
- Example:
395
- compose_video("Create a 10-second slideshow from the images with fade transitions", files=[img1, img2, img3])
396
- """
397
- return update(files or [], prompt, top_p, temperature, model_choice)
398
-
399
-
400
- def update(
401
- files,
402
- prompt,
403
- top_p=1,
404
- temperature=1,
405
- model_choice="deepseek-ai/DeepSeek-V3",
406
- ):
407
- if prompt == "":
408
- raise gr.Error("Please enter a prompt.")
409
-
410
- files_info = get_files_infos(files)
411
- # disable this if you're running the app locally or on your own server
412
- for file_info in files_info:
413
- if file_info["type"] == "video":
414
- if file_info["duration"] > 120:
415
- raise gr.Error(
416
- "Please make sure all videos are less than 2 minute long."
417
- )
418
- if file_info["size"] > 100000000:
419
- raise gr.Error("Please make sure all files are less than 100MB in size.")
420
-
421
- attempts = 0
422
- command_attempts = []
423
- previous_error = None
424
- previous_command = None
425
- conversation_history = None
426
-
427
- while attempts < 2:
428
- print("ATTEMPT", attempts + 1)
429
- try:
430
- command_string, conversation_history = get_completion(
431
- prompt,
432
- files_info,
433
- top_p,
434
- temperature,
435
- model_choice,
436
- conversation_history,
437
- previous_error,
438
- previous_command,
439
- )
440
- print(
441
- f"""///PROMPT {prompt} \n\n/// START OF COMMAND ///:\n\n{command_string}\n\n/// END OF COMMAND ///\n\n"""
442
- )
443
-
444
- # split command string into list of arguments
445
- args = shlex.split(command_string)
446
- if args[0] != "ffmpeg":
447
- raise Exception("Command does not start with ffmpeg")
448
- temp_dir = tempfile.mkdtemp()
449
- # copy files to temp dir with sanitized names
450
- for file in files:
451
- file_path = Path(file.name)
452
- sanitized_name = file_path.name.replace(" ", "_")
453
- shutil.copy(file_path, Path(temp_dir) / sanitized_name)
454
-
455
- # test if ffmpeg command is valid dry run
456
- ffmpeg_dry_run = subprocess.run(
457
- args + ["-f", "null", "-"],
458
- stderr=subprocess.PIPE,
459
- text=True,
460
- cwd=temp_dir,
461
- )
462
 
463
- # Extract command for display
464
- command_for_display = f"ffmpeg {' '.join(args[1:])} -y output.mp4"
465
 
466
- if ffmpeg_dry_run.returncode == 0:
467
- print("Command is valid.")
468
- # Add successful command to attempts
469
- command_attempts.append(
470
- {
471
- "command": command_for_display,
472
- "status": "✅ Valid",
473
- "attempt": attempts + 1,
474
- }
475
- )
476
- else:
477
- print("Command is not valid. Error output:")
478
- print(ffmpeg_dry_run.stderr)
479
 
480
- # Add failed command to attempts with error
481
- command_attempts.append(
482
- {
483
- "command": command_for_display,
484
- "status": "❌ Invalid",
485
- "error": ffmpeg_dry_run.stderr,
486
- "attempt": attempts + 1,
487
- }
488
- )
489
 
490
- # Store error details for next retry
491
- previous_error = ffmpeg_dry_run.stderr
492
- previous_command = command_for_display
493
-
494
- raise Exception(
495
- f"FFMPEG command validation failed: {ffmpeg_dry_run.stderr}"
496
- )
497
-
498
- output_file_name = f"output_{uuid.uuid4()}.mp4"
499
- output_file_path = str((Path(temp_dir) / output_file_name).resolve())
500
- execute_ffmpeg_command(args, temp_dir, output_file_path)
501
-
502
- # Generate command display with all attempts
503
- command_display = generate_command_display(command_attempts)
504
- return output_file_path, gr.update(value=command_display)
505
-
506
- except Exception as e:
507
- attempts += 1
508
- if attempts >= 2:
509
- print("FROM UPDATE", e)
510
- # Show all attempted commands even on final failure
511
- command_display = generate_command_display(command_attempts)
512
- command_display += (
513
- f"\n\n### Final Error\n❌ All attempts failed. Last error: {str(e)}"
514
- )
515
- return None, gr.update(value=command_display)
516
-
517
-
518
- def generate_command_display(command_attempts):
519
- """Generate a markdown display of all command attempts"""
520
- if not command_attempts:
521
- return "### No commands generated"
522
-
523
- display = "### Generated Commands\n\n"
524
-
525
- for attempt in command_attempts:
526
- display += f"**Attempt {attempt['attempt']}** {attempt['status']}\n"
527
- display += f"```bash\n{attempt['command']}\n```\n"
528
 
529
- if attempt["status"] == "❌ Invalid" and "error" in attempt:
530
- display += f"<details>\n<summary>🔍 Error Details</summary>\n\n```\n{attempt['error']}\n```\n</details>\n\n"
531
- else:
532
- display += "\n"
533
 
534
- return display
535
 
 
 
536
 
537
- # Create MCP-compatible interface
538
- mcp_interface = gr.Interface(
539
- fn=compose_video,
540
- inputs=[
541
- gr.Textbox(
542
- value="Create a slideshow with background music",
543
- label="Video Composition Instructions",
544
- ),
545
- gr.File(file_count="multiple", label="Media Files", file_types=allowed_medias),
546
- gr.Slider(0.0, 1.0, value=0.7, label="Top-p"),
547
- gr.Slider(0.0, 5.0, value=0.1, label="Temperature"),
548
- gr.Radio(
549
- choices=list(MODELS.keys()), value=list(MODELS.keys())[0], label="Model"
550
- ),
551
- ],
552
- outputs=gr.Video(label="Generated Video"),
553
- title="AI Video Composer MCP Tool",
554
- description="Compose videos from media assets using natural language",
555
- )
556
 
557
  with gr.Blocks() as demo:
 
558
  gr.Markdown(
559
- """
560
- # 🏞 AI Video Composer
561
- Compose new videos from your assets using natural language. Add video, image and audio assets and let [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324) generate a new video for you (using FFMPEG).
562
- """,
563
- elem_id="header",
564
  )
 
565
  with gr.Row():
566
  with gr.Column():
567
- user_files = gr.File(
568
  file_count="multiple",
569
- label="Media files",
570
- file_types=allowed_medias,
571
  )
572
- user_prompt = gr.Textbox(
573
- placeholder="eg: Remove the 3 first seconds of the video",
574
- label="Instructions",
 
575
  lines=3,
576
  )
577
- btn = gr.Button("Run")
578
- with gr.Accordion("Parameters", open=False):
579
- model_choice = gr.Radio(
580
- choices=list(MODELS.keys()),
581
- value=list(MODELS.keys())[0],
582
- label="Model",
583
- )
584
- top_p = gr.Slider(
585
- minimum=-0,
586
- maximum=1.0,
587
- value=0.7,
588
- step=0.05,
589
- interactive=True,
590
- label="Top-p (nucleus sampling)",
591
- )
592
- temperature = gr.Slider(
593
- minimum=-0,
594
- maximum=5.0,
595
- value=0.1,
596
- step=0.1,
597
- interactive=True,
598
- label="Temperature",
599
- )
600
- with gr.Column():
601
- generated_video = gr.Video(
602
- interactive=False, label="Generated Video", include_audio=True
603
- )
604
- generated_command = gr.Markdown()
605
-
606
- btn.click(
607
- fn=update,
608
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
609
- outputs=[generated_video, generated_command],
610
- )
611
- with gr.Row():
612
- gr.Examples(
613
- examples=[
614
- [
615
- ["./examples/ai_talk.wav", "./examples/bg-image.png"],
616
- "Use the image as the background with a waveform visualization for the audio positioned in center of the video.",
617
- 0.7,
618
- 0.1,
619
- list(MODELS.keys())[0],
620
- ],
621
- [
622
- ["./examples/ai_talk.wav", "./examples/bg-image.png"],
623
- "Use the image as the background with a waveform visualization for the audio positioned in center of the video. Make sure the waveform has a max height of 250 pixels.",
624
- 0.7,
625
- 0.1,
626
- list(MODELS.keys())[0],
627
- ],
628
- [
629
- [
630
- "./examples/cat1.jpeg",
631
- "./examples/cat2.jpeg",
632
- "./examples/cat3.jpeg",
633
- "./examples/cat4.jpeg",
634
- "./examples/cat5.jpeg",
635
- "./examples/cat6.jpeg",
636
- "./examples/heat-wave.mp3",
637
- ],
638
- "Create a 3x2 grid of the cat images with the audio as background music. Make the video duration match the audio duration.",
639
- 0.7,
640
- 0.1,
641
- list(MODELS.keys())[0],
642
- ],
643
- ],
644
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
645
- outputs=[generated_video, generated_command],
646
- fn=update,
647
- run_on_click=True,
648
- cache_examples=False,
649
- )
650
 
651
- with gr.Row():
652
- gr.Markdown(
653
- """
654
- If you have idea to improve this please open a PR:
655
 
656
- [![Open a Pull Request](https://huggingface.co/datasets/huggingface/badges/raw/main/open-a-pr-lg-light.svg)](https://huggingface.co/spaces/huggingface-projects/video-composer-gpt4/discussions)
657
- """,
658
- )
659
 
660
- # Launch MCP interface for tool access
661
- mcp_interface.queue(default_concurrency_limit=200)
 
 
 
662
 
663
- # Launch main demo
664
- demo.queue(default_concurrency_limit=200)
665
- demo.launch(show_api=False, ssr_mode=False, mcp_server=True)
 
1
  import gradio as gr
 
 
 
 
 
 
 
2
  import subprocess
 
 
3
  import tempfile
 
4
  import shutil
5
+ from pathlib import Path
6
+ import shlex
7
+ import uuid
8
+ import os
9
 
10
+ ALLOWED_MEDIA = [
11
+ ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif",
12
+ ".mp3", ".wav", ".ogg",
13
+ ".mp4", ".mov", ".mkv", ".avi", ".webm"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ]
15
 
16
 
17
+ def run_ffmpeg(files, command):
18
+ if not files:
19
+ raise gr.Error("Bitte lade mindestens eine Datei hoch.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ if not command.strip():
22
+ raise gr.Error("Bitte gib einen FFmpeg Command ein.")
23
 
24
+ temp_dir = tempfile.mkdtemp()
 
 
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  try:
27
+ # Dateien ins Temp-Verzeichnis kopieren
28
+ for file in files:
29
+ src = Path(file.name)
30
+ dst = Path(temp_dir) / src.name.replace(" ", "_")
31
+ shutil.copy(src, dst)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ output_name = f"output_{uuid.uuid4()}.mp4"
34
+ output_path = Path(temp_dir) / output_name
 
35
 
36
+ # Command vorbereiten
37
+ cmd = command.strip()
 
 
 
 
38
 
39
+ if not cmd.startswith("ffmpeg"):
40
+ cmd = "ffmpeg " + cmd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ args = shlex.split(cmd)
 
43
 
44
+ # Falls output.mp4 verwendet wird → ersetzen
45
+ args = [str(output_path) if a == "output.mp4" else a for a in args]
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ # Falls kein Output definiert anhängen
48
+ if not any(a.endswith(".mp4") for a in args):
49
+ args.append(str(output_path))
 
 
 
 
 
 
50
 
51
+ # Ausführen
52
+ process = subprocess.run(
53
+ args,
54
+ cwd=temp_dir,
55
+ stdout=subprocess.PIPE,
56
+ stderr=subprocess.PIPE,
57
+ text=True,
58
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ if process.returncode != 0:
61
+ raise gr.Error(f"FFmpeg Fehler:\n{process.stderr}")
 
 
62
 
63
+ return str(output_path), f"```bash\n{' '.join(args)}\n```"
64
 
65
+ finally:
66
+ pass
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  with gr.Blocks() as demo:
70
+ gr.Markdown("# 🎬 FFmpeg Command Tool")
71
  gr.Markdown(
72
+ "Lade Dateien hoch und führe **manuell eingegebene FFmpeg-Commands** aus. "
73
+ "Kein KI-Processing 100 % direktes FFmpeg."
 
 
 
74
  )
75
+
76
  with gr.Row():
77
  with gr.Column():
78
+ files = gr.File(
79
  file_count="multiple",
80
+ label="Media Dateien",
81
+ file_types=ALLOWED_MEDIA,
82
  )
83
+
84
+ command = gr.Textbox(
85
+ label="FFmpeg Command",
86
+ placeholder="z.B.: -i input.mp4 -ss 00:00:05 -t 10 -c copy output.mp4",
87
  lines=3,
88
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ run_btn = gr.Button("Run FFmpeg")
 
 
 
91
 
92
+ with gr.Column():
93
+ video_out = gr.Video(label="Output Video")
94
+ command_out = gr.Markdown(label="Ausgeführter Command")
95
 
96
+ run_btn.click(
97
+ fn=run_ffmpeg,
98
+ inputs=[files, command],
99
+ outputs=[video_out, command_out],
100
+ )
101
 
102
+ demo.queue()
103
+ demo.launch()