video-ffmpeg

Sleeping

App Files Files Community

Tim13ekd commited on 5 days ago

Commit

8d4c431

verified ·

1 Parent(s): 1bd9ab8

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -13

app.py CHANGED Viewed

@@ -58,8 +58,9 @@ def save_temp_audio(audio_file_path):
         return None, None
-def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos):
-    """Erstellt einen FFmpeg drawtext Filter, der ein Wort mit weichen Übergängen (Alpha-Kanal) einblendet."""
     global FFMPEG_ESCAPE_CHAR
     global WORD_FADE_DURATION
@@ -74,24 +75,62 @@ def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_
     fade_out_start = end_time - WORD_FADE_DURATION
     # Alpha-Ausdruck für smooth Fade-In und Fade-Out
-    # Steuert die Deckkraft basierend auf der Zeit t (relativ zum Clip-Start)
     alpha_expression = (
         f"if(lt(t,{start_time}), 0, "
         f"if(lt(t,{fade_in_end}), (t-{start_time})/{WORD_FADE_DURATION}, "
         f"if(lt(t,{fade_out_start}), 1, "
         f"if(lt(t,{end_time}), ({end_time}-t)/{WORD_FADE_DURATION}, 0))))"
     )
-    # Erstelle den Filterstring
     drawtext_filter = (
-        f"drawtext=text='{escaped_word}'{font_option}:fontcolor=white:fontsize={font_size}:borderw=2:bordercolor=black:"
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
-        f"alpha='{alpha_expression}'" # Steuert die Deckkraft (Smoothness)
     )
     return drawtext_filter
-def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file):
     if not images:
         return None, "❌ Keine Bilder ausgewählt"
@@ -115,7 +154,6 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
     font_option = f":fontfile='{font_path}'" if font_path else ""
     # Audio verarbeiten
-    # audio_file ist der Pfad-String von Gradio
     audio_temp_dir, temp_audio_file = save_temp_audio(audio_file) if audio_file else (None, None)
@@ -140,7 +178,8 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
         drawtext_filters = []
         word_start_time = 0.0
         for word in word_segment:
-            filter_str = create_timed_drawtext(word, word_start_time, duration_per_word, font_option, font_size, y_pos)
             drawtext_filters.append(filter_str)
             word_start_time += duration_per_word
@@ -232,15 +271,24 @@ with gr.Blocks() as demo:
     with gr.Row():
         img_input = gr.Files(label="Bilder", file_types=allowed_medias)
-        # TEXT WURDE GEÄNDERT: Neue Beschreibung für Textverteilung
         text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.")
     with gr.Row():
         duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")
         duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]")
         fade_input = gr.Number(value=0.5, label="Bild-Fade Dauer (s)")
         font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
         ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)")
     audio_input = gr.File(label="Audio (optional)", file_types=allowed_audios)
     btn = gr.Button("Erstellen", variant="primary")
@@ -248,8 +296,8 @@ with gr.Blocks() as demo:
     out_video = gr.Video(label="Ergebnis")
     status = gr.Textbox(label="Status")
-    # KORREKTE REIHENFOLGE DER INPUTS:
-    # (images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file)
     btn.click(
         fn=generate_slideshow_with_audio,
         inputs=[
@@ -260,7 +308,8 @@ with gr.Blocks() as demo:
             fade_input,
             font_size_input,
             ypos_input,
-            audio_input
         ],
         outputs=[out_video, status]
     )

         return None, None
+def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos, style):
+    """Erstellt einen FFmpeg drawtext Filter, der ein Wort mit weichen Übergängen (Alpha-Kanal) einblendet,
+       basierend auf dem gewählten Stil."""
     global FFMPEG_ESCAPE_CHAR
     global WORD_FADE_DURATION
     fade_out_start = end_time - WORD_FADE_DURATION
     # Alpha-Ausdruck für smooth Fade-In und Fade-Out
     alpha_expression = (
         f"if(lt(t,{start_time}), 0, "
         f"if(lt(t,{fade_in_end}), (t-{start_time})/{WORD_FADE_DURATION}, "
         f"if(lt(t,{fade_out_start}), 1, "
         f"if(lt(t,{end_time}), ({end_time}-t)/{WORD_FADE_DURATION}, 0))))"
     )
+    # --- STYLING BASIEREND AUF AUSWAHL ---
+    params = {
+        "fontcolor": "white",
+        "borderw": 2,
+        "bordercolor": "black",
+        "box": 0,
+        "boxcolor": "",
+        "fontsize_override": font_size
+    }
+    if style == "pop":
+        # Heller, auffälliger Text
+        params["fontcolor"] = "yellow"
+        params["borderw"] = 3
+        params["fontsize_override"] = font_size * 1.1
+    elif style == "bold word":
+        # Starker Kontrast, dickerer Rand
+        params["fontcolor"] = "white"
+        params["borderw"] = 4
+        params["fontsize_override"] = font_size * 1.05
+    elif style == "badge":
+        # Text in einem leicht transparenten Kasten
+        params["fontcolor"] = "white"
+        params["borderw"] = 0
+        params["box"] = 1
+        # [email protected] ist semi-transparentes Schwarz
+        params["boxcolor"] = "[email protected]"
+    # Default ist "modern" (params bleiben Standard)
+    # Filter-String basierend auf den dynamischen Parametern erstellen
     drawtext_filter = (
+        f"drawtext=text='{escaped_word}'{font_option}:"
+        f"fontcolor={params['fontcolor']}:"
+        f"fontsize={params['fontsize_override']}:"
+        f"borderw={params['borderw']}:"
+        f"bordercolor={params['bordercolor']}:"
+        # Füge Box-Parameter nur hinzu, wenn box=1 (Badge-Stil)
+        + (f"box={params['box']}:boxcolor={params['boxcolor']}:boxborderw=10:" if params["box"] else "") +
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
+        f"alpha='{alpha_expression}'"
     )
     return drawtext_filter
+def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file, subtitle_style):
     if not images:
         return None, "❌ Keine Bilder ausgewählt"
     font_option = f":fontfile='{font_path}'" if font_path else ""
     # Audio verarbeiten
     audio_temp_dir, temp_audio_file = save_temp_audio(audio_file) if audio_file else (None, None)
         drawtext_filters = []
         word_start_time = 0.0
         for word in word_segment:
+            # Füge den Stil-Parameter hinzu
+            filter_str = create_timed_drawtext(word, word_start_time, duration_per_word, font_option, font_size, y_pos, subtitle_style)
             drawtext_filters.append(filter_str)
             word_start_time += duration_per_word
     with gr.Row():
         img_input = gr.Files(label="Bilder", file_types=allowed_medias)
         text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.")
     with gr.Row():
         duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")
         duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]")
         fade_input = gr.Number(value=0.5, label="Bild-Fade Dauer (s)")
+    with gr.Row():
         font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
         ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)")
+        # NEUE EINGABE FÜR STILE
+        subtitle_style_input = gr.Dropdown(
+            ["modern", "pop", "bold word", "badge"],
+            label="Untertitel-Stil",
+            value="modern",
+            interactive=True
+        )
     audio_input = gr.File(label="Audio (optional)", file_types=allowed_audios)
     btn = gr.Button("Erstellen", variant="primary")
     out_video = gr.Video(label="Ergebnis")
     status = gr.Textbox(label="Status")
+    # KORREKTE REIHENFOLGE DER INPUTS aktualisiert um 'subtitle_style_input':
+    # (images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file, subtitle_style)
     btn.click(
         fn=generate_slideshow_with_audio,
         inputs=[
             fade_input,
             font_size_input,
             ypos_input,
+            audio_input,
+            subtitle_style_input # NEUE EINGABE
         ],
         outputs=[out_video, status]
     )