video-ffmpeg

Sleeping

App Files Files Community

Tim13ekd commited on 5 days ago

Commit

0ffcbef

verified ·

1 Parent(s): 4717f77

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -69

app.py CHANGED Viewed

@@ -28,21 +28,17 @@ FONT_OPTIONS = list(FONT_MAP.keys())
 def get_font_path(font_name):
     """
     Gibt den tatsächlichen, existierenden Pfad für die ausgewählte Schriftart zurück.
-    Falls der ausgewählte Pfad nicht existiert, wird ein Fallback verwendet.
     """
     requested_path = FONT_MAP.get(font_name)
-    # 1. Wenn der angefragte Pfad existiert oder None (System Default) ist, verwende ihn
     if requested_path is None or os.path.exists(requested_path):
         return requested_path
-    # 2. Fallback: Suche nach einem funktionierenden Pfad
     for name, path in FONT_MAP.items():
         if path and os.path.exists(path):
             print(f"Warnung: Ausgewählte Schriftart '{font_name}' nicht gefunden. Verwende Fallback: '{name}'")
             return path
-    # 3. Letzter Fallback: None (System Default)
     print("Warnung: Keine bevorzugten Schriftarten gefunden. Verwende FFmpeg System Standard.")
     return None
@@ -70,41 +66,40 @@ def save_temp_audio(audio_file_path):
 def create_sentence_base_filter(full_text, duration_clip, font_option, font_size, y_pos, style):
     """
-    Erstellt den FFmpeg drawtext Filter für die Basisschicht (den gesamten Satzabschnitt),
-    der für die volle Clip-Dauer sichtbar ist (stabile Positionierung).
     """
-    # Standard-Stil
     base_params = {
         "fontcolor": "white",
-        "borderw": 0, # Kein Rand
         "bordercolor": "black",
-        "box": 0, "boxcolor": "",
         "fontsize": font_size
     }
     style_lower = style.lower()
-    # SPEZIALFALL: Modern Style (graue, semi-transparente Hintergrundbox)
-    if style_lower == "modern":
-        base_params["box"] = 1
-        # Dunkelgrau (0x444444) mit 60% Transparenz (@0.6)
-        base_params["boxcolor"] = "[email protected]"
-        base_params["borderw"] = 0 # Kein Text-Rand bei Hintergrundbox
-        base_params["fontsize"] = font_size
-    # SPEZIALFALL: Pop Style (schwarze Box)
-    elif style_lower == "pop":
         base_params["box"] = 1
-        base_params["boxcolor"] = "0x000000@0.6"
-        base_params["fontsize"] = font_size * 1.1
-    # Für andere Stile wird der Basistext nur als Schatten (borderw=2) gezeichnet
-    elif style_lower in ["bold", "badge", "word"]:
-        base_params["borderw"] = 2
     escaped_text = full_text.replace(':', FFMPEG_ESCAPE_CHAR + ':')
-    # Filter für den gesamten Satz, sichtbar für die gesamte Clip-Dauer
     drawtext_filter = (
         f"drawtext=text='{escaped_text}':"
         f"fontcolor={base_params['fontcolor']}:"
@@ -116,19 +111,30 @@ def create_sentence_base_filter(full_text, duration_clip, font_option, font_size
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
     )
-    # Fügt fontfile nur hinzu, wenn vorhanden
     if font_option:
-        # font_option enthält bereits 'fontfile='
         drawtext_filter += f":{font_option}"
-    drawtext_filter += f":enable='between(t, 0, {duration_clip})'"
     return drawtext_filter
 def create_highlight_word_filter(word, full_text, start_time, duration, font_option, font_size, y_pos, style):
     """
-    Erstellt den FFmpeg drawtext Filter für die Highlight-Schicht (nur das aktive Wort).
     """
     word_end_time = start_time + duration
     # Alpha-Ausdruck für smooth Fade-In und Fade-Out der HIGHLIGHT-FARBE
@@ -144,42 +150,36 @@ def create_highlight_word_filter(word, full_text, start_time, duration, font_opt
         "fontcolor": "yellow",
         "borderw": 0,
         "bordercolor": "black",
-        "box": 0, "boxcolor": "",
         "fontsize_override": font_size * 1.05 # Leicht vergrößert
     }
     style_lower = style.lower()
-    if style_lower == "modern":
-        # Modern: Gelbe Schrift, kein Rand
-        params["fontcolor"] = "yellow"
-        params["borderw"] = 0
-    elif style_lower == "bold":
-        # Bold: Gelb mit starkem Rand
         params["fontcolor"] = "yellow"
         params["borderw"] = 4
-    elif style_lower in ["badge", "word", "pop"]:
         params["fontcolor"] = "yellow"
         params["borderw"] = 0
     escaped_word = word.replace(':', FFMPEG_ESCAPE_CHAR + ':')
-    # Filter für das einzelne, hervorgehobene Wort
     drawtext_filter = (
         f"drawtext=text='{escaped_word}':"
         f"fontcolor={params['fontcolor']}:"
         f"fontsize={params['fontsize_override']}:"
         f"borderw={params['borderw']}:"
         f"bordercolor={params['bordercolor']}:"
-        + (f"box={params['box']}:boxcolor={params['boxcolor']}:boxborderw=10:" if params["box"] else "") +
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
     )
     if font_option:
-        # font_option enthält bereits 'fontfile='
         drawtext_filter += f":{font_option}"
     # Der Highlight-Filter ist nur aktiv, wenn das Wort aktiv ist (via Alpha-Expression).
@@ -206,15 +206,13 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
     current_word_index = 0
     clips_with_text = []
-    # NEU: Schriftart finden basierend auf der Auswahl
     font_path = get_font_path(selected_font)
     # Pfad für FFmpeg vorbereiten und maskieren.
     font_option = ""
     if font_path:
-        # Ersetze eventuelle Backslashes in Pfaden (obwohl unwahrscheinlich unter Linux)
         escaped_font_path = str(font_path).replace(FFMPEG_ESCAPE_CHAR, FFMPEG_ESCAPE_CHAR + FFMPEG_ESCAPE_CHAR)
-        # Behandelt das Doppelpunkt-Problem von FFmpeg in Pfaden (wichtig für Filtergraphen)
         escaped_font_path = escaped_font_path.replace(':', FFMPEG_ESCAPE_CHAR + ':')
         font_option = f"fontfile='{escaped_font_path}'"
@@ -241,25 +239,27 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
         drawtext_filters = []
         if full_text:
-            # ERSTE SCHICHT: Der gesamte Satz (als STABILE BASIS mit Kasten)
             base_filter = create_sentence_base_filter(full_text, duration_clip, font_option, font_size, y_pos, subtitle_style)
             drawtext_filters.append(base_filter)
-            # ZWEITE SCHICHT: Highlight-Layer für jedes Wort
-            word_start_time = 0.0
-            for word in word_segment:
-                highlight_filter = create_highlight_word_filter(
-                    word,
-                    full_text,
-                    word_start_time,
-                    duration_per_word,
-                    font_option,
-                    font_size,
-                    y_pos,
-                    subtitle_style
-                )
-                drawtext_filters.append(highlight_filter)
-                word_start_time += duration_per_word
         # 3. Basis- und Fade-Filter
@@ -275,7 +275,7 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
         # 4. Kombiniere alle Filter
         if drawtext_filters:
-            # Wichtig: Der Basis-Satz muss als erster Filter, die Highlights als letzte Filter stehen.
             all_drawtext_filters = ",".join(drawtext_filters)
             vf_filters_clip = f"{base_filters},{all_drawtext_filters},{fade_img_filter}"
         else:
@@ -291,11 +291,9 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
         ]
         try:
-            # Hinzufügen von stdout/stderr Erfassung für bessere Fehlerprotokollierung
             subprocess.run(cmd, check=True, capture_output=True, text=True)
             clips_with_text.append(clip_path)
         except subprocess.CalledProcessError as e:
-            # Bereinigung bei Fehler
             shutil.rmtree(temp_dir)
             if audio_temp_dir: shutil.rmtree(audio_temp_dir)
             return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"
@@ -370,11 +368,11 @@ with gr.Blocks() as demo:
         font_size_input = gr.Number(value=80, label="Schriftgröße (px)", scale=1)
         ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)", scale=2)
-        # Untertitel-Stile
         subtitle_style_input = gr.Dropdown(
-            ["Modern", "Pop", "Bold", "Badge", "Word"],
             label="Untertitel-Stil",
-            value="Modern",
             interactive=True,
             scale=1
         )
@@ -397,9 +395,9 @@ with gr.Blocks() as demo:
             ypos_input,
             audio_input,
             subtitle_style_input,
-            font_select_input # NEUER Input
         ],
         outputs=[out_video, status]
     )
-demo.launch()

 def get_font_path(font_name):
     """
     Gibt den tatsächlichen, existierenden Pfad für die ausgewählte Schriftart zurück.
     """
     requested_path = FONT_MAP.get(font_name)
     if requested_path is None or os.path.exists(requested_path):
         return requested_path
     for name, path in FONT_MAP.items():
         if path and os.path.exists(path):
             print(f"Warnung: Ausgewählte Schriftart '{font_name}' nicht gefunden. Verwende Fallback: '{name}'")
             return path
     print("Warnung: Keine bevorzugten Schriftarten gefunden. Verwende FFmpeg System Standard.")
     return None
 def create_sentence_base_filter(full_text, duration_clip, font_option, font_size, y_pos, style):
     """
+    Erstellt den FFmpeg drawtext Filter für die Basisschicht (den gesamten Satzabschnitt).
+    Dies ist entweder der gesamte Satz oder die Box für statischen Text.
     """
     base_params = {
         "fontcolor": "white",
+        "borderw": 2, # Standard Schatten/Rand
         "bordercolor": "black",
+        "box": 1, "boxcolor": "[email protected]", # Semi-transparente schwarze Box
         "fontsize": font_size
     }
     style_lower = style.lower()
+    if style_lower == "highlight":
+        # Hervorheben: Der gesamte Satz als Basis, aber nur mit leichtem Schatten
+        base_params["box"] = 0
+        base_params["borderw"] = 2
+    elif style_lower == "static":
+        # Statisch: Der gesamte Satz in einer Box, keine Animation, bleibt die ganze Zeit sichtbar
         base_params["box"] = 1
+        base_params["borderw"] = 0
+    elif style_lower == "dynamic":
+        # Dynamisch: Große Schrift, leichte Box, wird später vom Highlight überlagert
+        base_params["box"] = 1
+        base_params["boxcolor"] = "[email protected]"
+        base_params["borderw"] = 0
+        base_params["fontsize"] = font_size * 1.2
     escaped_text = full_text.replace(':', FFMPEG_ESCAPE_CHAR + ':')
+    # Filter für den gesamten Satz
     drawtext_filter = (
         f"drawtext=text='{escaped_text}':"
         f"fontcolor={base_params['fontcolor']}:"
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
     )
     if font_option:
         drawtext_filter += f":{font_option}"
+    # Der statische Stil wird sofort und für die gesamte Clip-Dauer eingeblendet
+    if style_lower == "static":
+        drawtext_filter += f":enable='between(t, 0, {duration_clip})'"
+    # Für Highlight und Dynamic brauchen wir die Basis als konstante Referenz
+    else:
+        # Bei "Highlight" und "Dynamic" ist dies der Basis-Text, der IMMER sichtbar ist.
+        drawtext_filter += f":enable='between(t, 0, {duration_clip})'"
     return drawtext_filter
 def create_highlight_word_filter(word, full_text, start_time, duration, font_option, font_size, y_pos, style):
     """
+    Erstellt den FFmpeg drawtext Filter für die Highlight-Schicht (nur das aktive Wort),
+    es sei denn, der Stil ist 'Static'.
     """
+    # Wenn statisch, wird kein Highlight benötigt
+    if style.lower() == "static":
+        return None
     word_end_time = start_time + duration
     # Alpha-Ausdruck für smooth Fade-In und Fade-Out der HIGHLIGHT-FARBE
         "fontcolor": "yellow",
         "borderw": 0,
         "bordercolor": "black",
         "fontsize_override": font_size * 1.05 # Leicht vergrößert
     }
     style_lower = style.lower()
+    if style_lower == "dynamic":
+        # Dynamisch: Schrift deutlich größer und mit Rand, zentriert.
         params["fontcolor"] = "yellow"
         params["borderw"] = 4
+        params["fontsize_override"] = font_size * 1.5
+    else: # Highlight
+        # Highlight: Gelbe Schrift, kein Rand
         params["fontcolor"] = "yellow"
         params["borderw"] = 0
     escaped_word = word.replace(':', FFMPEG_ESCAPE_CHAR + ':')
+    # Filter für das einzelne, hervorgehobene Wort (Das gesamte Wort wird gezeichnet)
     drawtext_filter = (
         f"drawtext=text='{escaped_word}':"
         f"fontcolor={params['fontcolor']}:"
         f"fontsize={params['fontsize_override']}:"
         f"borderw={params['borderw']}:"
         f"bordercolor={params['bordercolor']}:"
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
     )
     if font_option:
         drawtext_filter += f":{font_option}"
     # Der Highlight-Filter ist nur aktiv, wenn das Wort aktiv ist (via Alpha-Expression).
     current_word_index = 0
     clips_with_text = []
+    # Schriftart finden basierend auf der Auswahl
     font_path = get_font_path(selected_font)
     # Pfad für FFmpeg vorbereiten und maskieren.
     font_option = ""
     if font_path:
         escaped_font_path = str(font_path).replace(FFMPEG_ESCAPE_CHAR, FFMPEG_ESCAPE_CHAR + FFMPEG_ESCAPE_CHAR)
         escaped_font_path = escaped_font_path.replace(':', FFMPEG_ESCAPE_CHAR + ':')
         font_option = f"fontfile='{escaped_font_path}'"
         drawtext_filters = []
         if full_text:
+            # ERSTE SCHICHT: Der gesamte Satz (als STABILE BASIS oder STATISCHE BOX)
             base_filter = create_sentence_base_filter(full_text, duration_clip, font_option, font_size, y_pos, subtitle_style)
             drawtext_filters.append(base_filter)
+            # ZWEITE SCHICHT: Highlight-Layer (nur wenn nicht "Static")
+            if subtitle_style.lower() != "static":
+                word_start_time = 0.0
+                for word in word_segment:
+                    highlight_filter = create_highlight_word_filter(
+                        word,
+                        full_text,
+                        word_start_time,
+                        duration_per_word,
+                        font_option,
+                        font_size,
+                        y_pos,
+                        subtitle_style
+                    )
+                    if highlight_filter:
+                        drawtext_filters.append(highlight_filter)
+                    word_start_time += duration_per_word
         # 3. Basis- und Fade-Filter
         # 4. Kombiniere alle Filter
         if drawtext_filters:
+            # Die Reihenfolge ist wichtig: Basis zuerst, Highlights zuletzt
             all_drawtext_filters = ",".join(drawtext_filters)
             vf_filters_clip = f"{base_filters},{all_drawtext_filters},{fade_img_filter}"
         else:
         ]
         try:
             subprocess.run(cmd, check=True, capture_output=True, text=True)
             clips_with_text.append(clip_path)
         except subprocess.CalledProcessError as e:
             shutil.rmtree(temp_dir)
             if audio_temp_dir: shutil.rmtree(audio_temp_dir)
             return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"
         font_size_input = gr.Number(value=80, label="Schriftgröße (px)", scale=1)
         ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)", scale=2)
+        # NEU: Reduzierte Untertitel-Stile
         subtitle_style_input = gr.Dropdown(
+            ["Highlight", "Dynamic", "Static"],
             label="Untertitel-Stil",
+            value="Highlight",
             interactive=True,
             scale=1
         )
             ypos_input,
             audio_input,
             subtitle_style_input,
+            font_select_input
         ],
         outputs=[out_video, status]
     )
+demo.launch(