video-ffmpeg

Sleeping

App Files Files Community

Tim13ekd commited on 6 days ago

Commit

7644c1e

verified ·

1 Parent(s): 377308b

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -98

app.py CHANGED Viewed

@@ -27,24 +27,19 @@ def get_font_path():
     return None # Fallback: FFmpeg soll selbst suchen (klappt manchmal nicht)
 def save_temp_audio(audio_file):
-    if isinstance(audio_file, str):
-        ext = Path(audio_file).suffix
-        if ext.lower() not in allowed_audios:
-            ext = ".mp3"
-        temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
-        with open(temp_audio, "wb") as f:
-            f.write(audio_file.encode())
-        return temp_audio
-    elif hasattr(audio_file, 'name'):
         ext = Path(audio_file.name).suffix
         if ext.lower() not in allowed_audios:
             ext = ".mp3"
-        temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
         audio_file.seek(0)
         with open(temp_audio, "wb") as f:
             shutil.copyfileobj(audio_file, f)
-        return temp_audio
-    return None
 def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos):
     """Erstellt einen FFmpeg drawtext Filter, der ein Wort mit weichen Übergängen (Alpha-Kanal) einblendet."""
@@ -62,10 +57,7 @@ def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_
     fade_out_start = end_time - WORD_FADE_DURATION
     # Alpha-Ausdruck für smooth Fade-In und Fade-Out
-    # if(lt(t, start_time), 0, ...) -> Vor Startzeit: Alpha = 0
-    # if(lt(t, fade_in_end), (t-start_time)/WORD_FADE_DURATION, ...) -> Fade-In
-    # if(lt(t, fade_out_start), 1, ...) -> Volle Deckkraft
-    # if(lt(t, end_time), (end_time-t)/WORD_FADE_DURATION, 0) -> Fade-Out
     alpha_expression = (
         f"if(lt(t,{start_time}), 0, "
         f"if(lt(t,{fade_in_end}), (t-{start_time})/{WORD_FADE_DURATION}, "
@@ -83,102 +75,80 @@ def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_
 def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file):
-    # Debug Print
-    print(f"DEBUG: Font Size: {font_size}, Y-Pos: {y_pos}, Duration/Word: {duration_per_word}, Fade: {fade_duration}")
     if not images:
         return None, "❌ Keine Bilder ausgewählt"
     temp_dir = tempfile.mkdtemp()
-    clips_with_text = []
     # Text in Wörter aufteilen
     words = input_text.split() if input_text else []
-    # Berechne die Gesamt-Textdauer
-    total_text_duration = len(words) * duration_per_word
     # Schriftart finden
     font_path = get_font_path()
     font_option = f":fontfile='{font_path}'" if font_path else ""
     # Audio verarbeiten
-    temp_audio_file = None
-    if audio_file:
-        temp_audio_file = save_temp_audio(audio_file)
-    # --- 1. ERSTES BILD (Sequenzieller Text) ---
-    # Dauer des ersten Clips ist mindestens so lang wie der Text
-    duration_clip_1 = max(duration_per_image, total_text_duration)
-    # **KORREKTUR für FFmpeg:** Fade-Out Startzeit in Python berechnen
-    fade_out_start_1 = duration_clip_1 - fade_duration
-    if fade_out_start_1 < 0: fade_out_start_1 = 0
-    # Generiere die sequentiellen Drawtext-Filter
-    drawtext_filters = []
-    current_time = 0.0
-    for word in words:
-        filter_str = create_timed_drawtext(word, current_time, duration_per_word, font_option, font_size, y_pos)
-        drawtext_filters.append(filter_str)
-        current_time += duration_per_word
-    # 1. Basisanpassungen
-    base_filters = (
-        "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
-        "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
-        "fps=25,format=yuv420p"
-    )
-    # 2. Fade-Filter (jetzt mit korrekter berechneter Startzeit)
-    fade_img_filter_1 = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start_1}:d={fade_duration}"
-    if drawtext_filters:
-        all_drawtext_filters = ",".join(drawtext_filters)
-        vf_filters_clip1 = f"{base_filters},{all_drawtext_filters},{fade_img_filter_1}"
-    else:
-        vf_filters_clip1 = f"{base_filters},{fade_img_filter_1}"
-    # Erstelle Clip 1
-    img_path_1 = Path(images[0].name)
-    clip_path_1 = Path(temp_dir) / "clip_with_text_0.mp4"
-    cmd_1 = [
-        "ffmpeg", "-y", "-loop", "1", "-i", str(img_path_1),
-        "-t", str(duration_clip_1),
-        "-vf", vf_filters_clip1,
-        str(clip_path_1)
-    ]
-    try:
-        subprocess.run(cmd_1, check=True, capture_output=True, text=True)
-        clips_with_text.append(clip_path_1)
-    except subprocess.CalledProcessError as e:
-        return None, f"❌ FFmpeg Fehler bei Bild 1 (mit Text):\n{e.stderr}"
-    # --- 2. FOLGE-BILDER (Nur Bild mit Fade) ---
-    for i in range(1, len(images)):
         img_path = Path(images[i].name)
-        clip_path = Path(temp_dir) / f"clip_{i}.mp4"
-        # **KORREKTUR für FFmpeg:** Fade-Out Startzeit in Python berechnen
-        fade_out_start_n = duration_per_image - fade_duration
-        if fade_out_start_n < 0: fade_out_start_n = 0
-        # Nur Bild-Filter mit Fade (jetzt mit korrekter berechneter Startzeit)
-        fade_img_filter = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start_n}:d={fade_duration}"
-        vf_filters_clip = (
             "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
             "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
-            f"fps=25,format=yuv420p,{fade_img_filter}"
         )
         cmd = [
             "ffmpeg", "-y", "-loop", "1", "-i", str(img_path),
-            "-t", str(duration_per_image),
             "-vf", vf_filters_clip,
             str(clip_path)
         ]
@@ -187,9 +157,12 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
             subprocess.run(cmd, check=True, capture_output=True, text=True)
             clips_with_text.append(clip_path)
         except subprocess.CalledProcessError as e:
-            return None, f"❌ FFmpeg Fehler bei Bild {i+1} (ohne Text):\n{e.stderr}"
-    # Zusammenfügen
     filelist_path = Path(temp_dir) / "filelist.txt"
     with open(filelist_path, "w") as f:
         for clip in clips_with_text:
@@ -205,11 +178,14 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
     ]
     try:
-        subprocess.run(cmd_concat, check=True)
     except subprocess.CalledProcessError as e:
         return None, f"❌ FFmpeg Fehler beim Zusammenfügen:\n{e.stderr}"
-    # Audio hinzufügen falls vorhanden
     if temp_audio_file:
         final_output = Path(temp_dir) / f"final_{uuid.uuid4().hex}.mp4"
         cmd_audio = [
@@ -218,12 +194,19 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
             str(final_output)
         ]
         try:
-            subprocess.run(cmd_audio, check=True)
         except subprocess.CalledProcessError as e:
             return None, f"❌ FFmpeg Fehler beim Hinzufügen von Audio:\n{e.stderr}"
         return str(final_output), "✅ Video mit Audio erstellt!"
-    return str(output_video), "✅ Video erstellt (ohne Audio)"
 # Gradio UI
 with gr.Blocks() as demo:
@@ -231,12 +214,13 @@ with gr.Blocks() as demo:
     with gr.Row():
         img_input = gr.Files(label="Bilder", file_types=allowed_medias)
-        text_input = gr.Textbox(label="Text (Wörter erscheinen nacheinander auf dem ersten Bild)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.")
     with gr.Row():
-        duration_image_input = gr.Number(value=3, label="Dauer pro BILD (s) [für Bild 2+ und Min-Dauer für Bild 1]")
         duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]")
-        fade_input = gr.Number(value=0.5, label="Bild-Fade Dauer (s)") # Geändert für Klarheit
         font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
         ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)")

     return None # Fallback: FFmpeg soll selbst suchen (klappt manchmal nicht)
 def save_temp_audio(audio_file):
+    """Speichert die hochgeladene Audio-Datei in einem temporären Verzeichnis."""
+    if hasattr(audio_file, 'name'):
         ext = Path(audio_file.name).suffix
         if ext.lower() not in allowed_audios:
             ext = ".mp3"
+        temp_audio_dir = Path(tempfile.mkdtemp())
+        temp_audio = temp_audio_dir / f"input{ext}"
         audio_file.seek(0)
         with open(temp_audio, "wb") as f:
             shutil.copyfileobj(audio_file, f)
+        # Rückgabe des Verzeichnisses, das später gelöscht werden kann, und des Dateipfads
+        return temp_audio_dir, temp_audio
+    return None, None
 def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos):
     """Erstellt einen FFmpeg drawtext Filter, der ein Wort mit weichen Übergängen (Alpha-Kanal) einblendet."""
     fade_out_start = end_time - WORD_FADE_DURATION
     # Alpha-Ausdruck für smooth Fade-In und Fade-Out
+    # Steuert die Deckkraft basierend auf der Zeit t (relativ zum Clip-Start)
     alpha_expression = (
         f"if(lt(t,{start_time}), 0, "
         f"if(lt(t,{fade_in_end}), (t-{start_time})/{WORD_FADE_DURATION}, "
 def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file):
     if not images:
         return None, "❌ Keine Bilder ausgewählt"
     temp_dir = tempfile.mkdtemp()
     # Text in Wörter aufteilen
     words = input_text.split() if input_text else []
+    total_words = len(words)
+    num_images = len(images)
+    # Berechnung der gleichmäßigen Verteilung der Wörter auf die Bilder
+    base_words_per_clip = total_words // num_images
+    remainder = total_words % num_images
+    current_word_index = 0
+    clips_with_text = [] # Paths der generierten MP4-Clips
     # Schriftart finden
     font_path = get_font_path()
     font_option = f":fontfile='{font_path}'" if font_path else ""
     # Audio verarbeiten
+    audio_temp_dir, temp_audio_file = save_temp_audio(audio_file) if audio_file else (None, None)
+    # --- 1. SCHLEIFE: Erstelle jeden Clip mit seinem Textsegment ---
+    for i in range(num_images):
         img_path = Path(images[i].name)
+        clip_path = Path(temp_dir) / f"clip_with_text_{i}.mp4"
+        # 1. Bestimme das Wortsegment für diesen Clip
+        words_on_this_clip = base_words_per_clip + (1 if i < remainder else 0)
+        # Extrahieren des Segments aus der Gesamtliste der Wörter
+        word_segment = words[current_word_index : current_word_index + words_on_this_clip]
+        current_word_index += len(word_segment)
+        # 2. Berechne die Clip-Dauer
+        text_duration = len(word_segment) * duration_per_word
+        # Die Dauer ist das Maximum aus der gewünschten Bilddauer und der benötigten Textdauer
+        duration_clip = max(duration_per_image, text_duration)
+        # 3. Generiere Drawtext Filter (Startzeit ist relativ zum Clip-Start, also 0)
+        drawtext_filters = []
+        word_start_time = 0.0
+        for word in word_segment:
+            filter_str = create_timed_drawtext(word, word_start_time, duration_per_word, font_option, font_size, y_pos)
+            drawtext_filters.append(filter_str)
+            word_start_time += duration_per_word
+        # 4. Basis- und Fade-Filter
+        base_filters = (
             "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
             "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
+            "fps=25,format=yuv420p"
         )
+        fade_out_start = duration_clip - fade_duration
+        if fade_out_start < 0: fade_out_start = 0
+        fade_img_filter = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start}:d={fade_duration}"
+        # 5. Kombiniere alle Filter
+        if drawtext_filters:
+            all_drawtext_filters = ",".join(drawtext_filters)
+            vf_filters_clip = f"{base_filters},{all_drawtext_filters},{fade_img_filter}"
+        else:
+            # Kein Text mehr: Nur Bild mit Fade
+            vf_filters_clip = f"{base_filters},{fade_img_filter}"
+        # 6. FFmpeg Command zum Erstellen des Clips
         cmd = [
             "ffmpeg", "-y", "-loop", "1", "-i", str(img_path),
+            "-t", str(duration_clip),
             "-vf", vf_filters_clip,
             str(clip_path)
         ]
             subprocess.run(cmd, check=True, capture_output=True, text=True)
             clips_with_text.append(clip_path)
         except subprocess.CalledProcessError as e:
+            # Bereinigung bei Fehler
+            shutil.rmtree(temp_dir)
+            if audio_temp_dir: shutil.rmtree(audio_temp_dir)
+            return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"
+    # --- 2. ZUSAMMENFÜGEN ---
     filelist_path = Path(temp_dir) / "filelist.txt"
     with open(filelist_path, "w") as f:
         for clip in clips_with_text:
     ]
     try:
+        subprocess.run(cmd_concat, check=True, capture_output=True, text=True)
     except subprocess.CalledProcessError as e:
+        shutil.rmtree(temp_dir)
+        if audio_temp_dir: shutil.rmtree(audio_temp_dir)
         return None, f"❌ FFmpeg Fehler beim Zusammenfügen:\n{e.stderr}"
+    # --- 3. AUDIO HINZUFÜGEN (falls vorhanden) ---
+    final_output = output_video
     if temp_audio_file:
         final_output = Path(temp_dir) / f"final_{uuid.uuid4().hex}.mp4"
         cmd_audio = [
             str(final_output)
         ]
         try:
+            subprocess.run(cmd_audio, check=True, capture_output=True, text=True)
         except subprocess.CalledProcessError as e:
+            shutil.rmtree(temp_dir)
+            if audio_temp_dir: shutil.rmtree(audio_temp_dir)
             return None, f"❌ FFmpeg Fehler beim Hinzufügen von Audio:\n{e.stderr}"
+        # Bereinige das separate Audio-Temp-Verzeichnis
+        if audio_temp_dir: shutil.rmtree(audio_temp_dir)
         return str(final_output), "✅ Video mit Audio erstellt!"
+    # Nur Video-Pfad zurückgeben
+    return str(final_output), "✅ Video erstellt (ohne Audio)"
 # Gradio UI
 with gr.Blocks() as demo:
     with gr.Row():
         img_input = gr.Files(label="Bilder", file_types=allowed_medias)
+        # TEXT WURDE GEÄNDERT: Neue Beschreibung für Textverteilung
+        text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.")
     with gr.Row():
+        duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")
         duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]")
+        fade_input = gr.Number(value=0.5, label="Bild-Fade Dauer (s)")
         font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
         ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)")