Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -58,8 +58,9 @@ def save_temp_audio(audio_file_path):
|
|
| 58 |
return None, None
|
| 59 |
|
| 60 |
|
| 61 |
-
def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos):
|
| 62 |
-
"""Erstellt einen FFmpeg drawtext Filter, der ein Wort mit weichen Übergängen (Alpha-Kanal) einblendet
|
|
|
|
| 63 |
global FFMPEG_ESCAPE_CHAR
|
| 64 |
global WORD_FADE_DURATION
|
| 65 |
|
|
@@ -74,24 +75,62 @@ def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_
|
|
| 74 |
fade_out_start = end_time - WORD_FADE_DURATION
|
| 75 |
|
| 76 |
# Alpha-Ausdruck für smooth Fade-In und Fade-Out
|
| 77 |
-
# Steuert die Deckkraft basierend auf der Zeit t (relativ zum Clip-Start)
|
| 78 |
alpha_expression = (
|
| 79 |
f"if(lt(t,{start_time}), 0, "
|
| 80 |
f"if(lt(t,{fade_in_end}), (t-{start_time})/{WORD_FADE_DURATION}, "
|
| 81 |
f"if(lt(t,{fade_out_start}), 1, "
|
| 82 |
f"if(lt(t,{end_time}), ({end_time}-t)/{WORD_FADE_DURATION}, 0))))"
|
| 83 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
-
#
|
|
|
|
|
|
|
| 86 |
drawtext_filter = (
|
| 87 |
-
f"drawtext=text='{escaped_word}'{font_option}:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
|
| 89 |
-
f"alpha='{alpha_expression}'"
|
| 90 |
)
|
| 91 |
return drawtext_filter
|
| 92 |
|
| 93 |
|
| 94 |
-
def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file):
|
| 95 |
|
| 96 |
if not images:
|
| 97 |
return None, "❌ Keine Bilder ausgewählt"
|
|
@@ -115,7 +154,6 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
|
|
| 115 |
font_option = f":fontfile='{font_path}'" if font_path else ""
|
| 116 |
|
| 117 |
# Audio verarbeiten
|
| 118 |
-
# audio_file ist der Pfad-String von Gradio
|
| 119 |
audio_temp_dir, temp_audio_file = save_temp_audio(audio_file) if audio_file else (None, None)
|
| 120 |
|
| 121 |
|
|
@@ -140,7 +178,8 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
|
|
| 140 |
drawtext_filters = []
|
| 141 |
word_start_time = 0.0
|
| 142 |
for word in word_segment:
|
| 143 |
-
|
|
|
|
| 144 |
drawtext_filters.append(filter_str)
|
| 145 |
word_start_time += duration_per_word
|
| 146 |
|
|
@@ -232,15 +271,24 @@ with gr.Blocks() as demo:
|
|
| 232 |
|
| 233 |
with gr.Row():
|
| 234 |
img_input = gr.Files(label="Bilder", file_types=allowed_medias)
|
| 235 |
-
# TEXT WURDE GEÄNDERT: Neue Beschreibung für Textverteilung
|
| 236 |
text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.")
|
| 237 |
|
| 238 |
with gr.Row():
|
| 239 |
duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")
|
| 240 |
duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]")
|
| 241 |
fade_input = gr.Number(value=0.5, label="Bild-Fade Dauer (s)")
|
|
|
|
|
|
|
| 242 |
font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
|
| 243 |
ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
audio_input = gr.File(label="Audio (optional)", file_types=allowed_audios)
|
| 246 |
btn = gr.Button("Erstellen", variant="primary")
|
|
@@ -248,8 +296,8 @@ with gr.Blocks() as demo:
|
|
| 248 |
out_video = gr.Video(label="Ergebnis")
|
| 249 |
status = gr.Textbox(label="Status")
|
| 250 |
|
| 251 |
-
# KORREKTE REIHENFOLGE DER INPUTS:
|
| 252 |
-
# (images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file)
|
| 253 |
btn.click(
|
| 254 |
fn=generate_slideshow_with_audio,
|
| 255 |
inputs=[
|
|
@@ -260,7 +308,8 @@ with gr.Blocks() as demo:
|
|
| 260 |
fade_input,
|
| 261 |
font_size_input,
|
| 262 |
ypos_input,
|
| 263 |
-
audio_input
|
|
|
|
| 264 |
],
|
| 265 |
outputs=[out_video, status]
|
| 266 |
)
|
|
|
|
| 58 |
return None, None
|
| 59 |
|
| 60 |
|
| 61 |
+
def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos, style):
|
| 62 |
+
"""Erstellt einen FFmpeg drawtext Filter, der ein Wort mit weichen Übergängen (Alpha-Kanal) einblendet,
|
| 63 |
+
basierend auf dem gewählten Stil."""
|
| 64 |
global FFMPEG_ESCAPE_CHAR
|
| 65 |
global WORD_FADE_DURATION
|
| 66 |
|
|
|
|
| 75 |
fade_out_start = end_time - WORD_FADE_DURATION
|
| 76 |
|
| 77 |
# Alpha-Ausdruck für smooth Fade-In und Fade-Out
|
|
|
|
| 78 |
alpha_expression = (
|
| 79 |
f"if(lt(t,{start_time}), 0, "
|
| 80 |
f"if(lt(t,{fade_in_end}), (t-{start_time})/{WORD_FADE_DURATION}, "
|
| 81 |
f"if(lt(t,{fade_out_start}), 1, "
|
| 82 |
f"if(lt(t,{end_time}), ({end_time}-t)/{WORD_FADE_DURATION}, 0))))"
|
| 83 |
)
|
| 84 |
+
|
| 85 |
+
# --- STYLING BASIEREND AUF AUSWAHL ---
|
| 86 |
+
|
| 87 |
+
params = {
|
| 88 |
+
"fontcolor": "white",
|
| 89 |
+
"borderw": 2,
|
| 90 |
+
"bordercolor": "black",
|
| 91 |
+
"box": 0,
|
| 92 |
+
"boxcolor": "",
|
| 93 |
+
"fontsize_override": font_size
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
if style == "pop":
|
| 97 |
+
# Heller, auffälliger Text
|
| 98 |
+
params["fontcolor"] = "yellow"
|
| 99 |
+
params["borderw"] = 3
|
| 100 |
+
params["fontsize_override"] = font_size * 1.1
|
| 101 |
+
|
| 102 |
+
elif style == "bold word":
|
| 103 |
+
# Starker Kontrast, dickerer Rand
|
| 104 |
+
params["fontcolor"] = "white"
|
| 105 |
+
params["borderw"] = 4
|
| 106 |
+
params["fontsize_override"] = font_size * 1.05
|
| 107 |
+
|
| 108 |
+
elif style == "badge":
|
| 109 |
+
# Text in einem leicht transparenten Kasten
|
| 110 |
+
params["fontcolor"] = "white"
|
| 111 |
+
params["borderw"] = 0
|
| 112 |
+
params["box"] = 1
|
| 113 |
+
# [email protected] ist semi-transparentes Schwarz
|
| 114 |
+
params["boxcolor"] = "[email protected]"
|
| 115 |
|
| 116 |
+
# Default ist "modern" (params bleiben Standard)
|
| 117 |
+
|
| 118 |
+
# Filter-String basierend auf den dynamischen Parametern erstellen
|
| 119 |
drawtext_filter = (
|
| 120 |
+
f"drawtext=text='{escaped_word}'{font_option}:"
|
| 121 |
+
f"fontcolor={params['fontcolor']}:"
|
| 122 |
+
f"fontsize={params['fontsize_override']}:"
|
| 123 |
+
f"borderw={params['borderw']}:"
|
| 124 |
+
f"bordercolor={params['bordercolor']}:"
|
| 125 |
+
# Füge Box-Parameter nur hinzu, wenn box=1 (Badge-Stil)
|
| 126 |
+
+ (f"box={params['box']}:boxcolor={params['boxcolor']}:boxborderw=10:" if params["box"] else "") +
|
| 127 |
f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
|
| 128 |
+
f"alpha='{alpha_expression}'"
|
| 129 |
)
|
| 130 |
return drawtext_filter
|
| 131 |
|
| 132 |
|
| 133 |
+
def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file, subtitle_style):
|
| 134 |
|
| 135 |
if not images:
|
| 136 |
return None, "❌ Keine Bilder ausgewählt"
|
|
|
|
| 154 |
font_option = f":fontfile='{font_path}'" if font_path else ""
|
| 155 |
|
| 156 |
# Audio verarbeiten
|
|
|
|
| 157 |
audio_temp_dir, temp_audio_file = save_temp_audio(audio_file) if audio_file else (None, None)
|
| 158 |
|
| 159 |
|
|
|
|
| 178 |
drawtext_filters = []
|
| 179 |
word_start_time = 0.0
|
| 180 |
for word in word_segment:
|
| 181 |
+
# Füge den Stil-Parameter hinzu
|
| 182 |
+
filter_str = create_timed_drawtext(word, word_start_time, duration_per_word, font_option, font_size, y_pos, subtitle_style)
|
| 183 |
drawtext_filters.append(filter_str)
|
| 184 |
word_start_time += duration_per_word
|
| 185 |
|
|
|
|
| 271 |
|
| 272 |
with gr.Row():
|
| 273 |
img_input = gr.Files(label="Bilder", file_types=allowed_medias)
|
|
|
|
| 274 |
text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.")
|
| 275 |
|
| 276 |
with gr.Row():
|
| 277 |
duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")
|
| 278 |
duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]")
|
| 279 |
fade_input = gr.Number(value=0.5, label="Bild-Fade Dauer (s)")
|
| 280 |
+
|
| 281 |
+
with gr.Row():
|
| 282 |
font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
|
| 283 |
ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)")
|
| 284 |
+
|
| 285 |
+
# NEUE EINGABE FÜR STILE
|
| 286 |
+
subtitle_style_input = gr.Dropdown(
|
| 287 |
+
["modern", "pop", "bold word", "badge"],
|
| 288 |
+
label="Untertitel-Stil",
|
| 289 |
+
value="modern",
|
| 290 |
+
interactive=True
|
| 291 |
+
)
|
| 292 |
|
| 293 |
audio_input = gr.File(label="Audio (optional)", file_types=allowed_audios)
|
| 294 |
btn = gr.Button("Erstellen", variant="primary")
|
|
|
|
| 296 |
out_video = gr.Video(label="Ergebnis")
|
| 297 |
status = gr.Textbox(label="Status")
|
| 298 |
|
| 299 |
+
# KORREKTE REIHENFOLGE DER INPUTS aktualisiert um 'subtitle_style_input':
|
| 300 |
+
# (images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file, subtitle_style)
|
| 301 |
btn.click(
|
| 302 |
fn=generate_slideshow_with_audio,
|
| 303 |
inputs=[
|
|
|
|
| 308 |
fade_input,
|
| 309 |
font_size_input,
|
| 310 |
ypos_input,
|
| 311 |
+
audio_input,
|
| 312 |
+
subtitle_style_input # NEUE EINGABE
|
| 313 |
],
|
| 314 |
outputs=[out_video, status]
|
| 315 |
)
|