import gradio as gr import tempfile from pathlib import Path import uuid import subprocess import shutil import os # NEU: Definition des Escape-Zeichens, um den F-String-Fehler bei FFmpeg zu vermeiden FFMPEG_ESCAPE_CHAR = "\\" # Erlaubte Dateiformate allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"] allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"] def get_font_path(): """Versucht, eine Standard-Schriftart im Linux-System zu finden.""" possible_fonts = [ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf" ] for font in possible_fonts: if os.path.exists(font): return font return None # Fallback: FFmpeg soll selbst suchen (klappt manchmal nicht) def save_temp_audio(audio_file): if isinstance(audio_file, str): ext = Path(audio_file).suffix if ext.lower() not in allowed_audios: ext = ".mp3" temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}" with open(temp_audio, "wb") as f: f.write(audio_file.encode()) return temp_audio elif hasattr(audio_file, 'name'): ext = Path(audio_file.name).suffix if ext.lower() not in allowed_audios: ext = ".mp3" temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}" audio_file.seek(0) with open(temp_audio, "wb") as f: shutil.copyfileobj(audio_file, f) return temp_audio return None def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos): """Erstellt einen FFmpeg drawtext Filter, der ein Wort für eine bestimmte Zeit einblendet.""" global FFMPEG_ESCAPE_CHAR # 1. Escaping: Ersetze alle ":" durch "\:" für FFmpeg escaped_word = word.replace(':', f"{FFMPEG_ESCAPE_CHAR}:") # Definiere die Start- und Endzeit des Wortes end_time = start_time + duration # Erstelle den Filterstring drawtext_filter = ( f"drawtext=text='{escaped_word}'{font_option}:fontcolor=white:fontsize={font_size}:borderw=2:bordercolor=black:" f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:" f"enable='between(t,{start_time},{end_time})'" ) return drawtext_filter def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file): # Debug Print print(f"DEBUG: Font Size: {font_size}, Y-Pos: {y_pos}, Duration/Word: {duration_per_word}, Fade: {fade_duration}") if not images: return None, "❌ Keine Bilder ausgewählt" temp_dir = tempfile.mkdtemp() clips_with_text = [] # Text in Wörter aufteilen words = input_text.split() if input_text else [] # Berechne die Gesamt-Textdauer total_text_duration = len(words) * duration_per_word # Schriftart finden font_path = get_font_path() font_option = f":fontfile='{font_path}'" if font_path else "" # Audio verarbeiten temp_audio_file = None if audio_file: temp_audio_file = save_temp_audio(audio_file) # --- 1. ERSTES BILD (Sequenzieller Text) --- # Dauer des ersten Clips ist mindestens so lang wie der Text duration_clip_1 = max(duration_per_image, total_text_duration) # **KORREKTUR für FFmpeg:** Fade-Out Startzeit in Python berechnen fade_out_start_1 = duration_clip_1 - fade_duration if fade_out_start_1 < 0: fade_out_start_1 = 0 # Generiere die sequentiellen Drawtext-Filter drawtext_filters = [] current_time = 0.0 for word in words: filter_str = create_timed_drawtext(word, current_time, duration_per_word, font_option, font_size, y_pos) drawtext_filters.append(filter_str) current_time += duration_per_word # 1. Basisanpassungen base_filters = ( "scale=w=1280:h=720:force_original_aspect_ratio=decrease," "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black," "fps=25,format=yuv420p" ) # 2. Fade-Filter (jetzt mit korrekter berechneter Startzeit) fade_img_filter_1 = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start_1}:d={fade_duration}" if drawtext_filters: all_drawtext_filters = ",".join(drawtext_filters) vf_filters_clip1 = f"{base_filters},{all_drawtext_filters},{fade_img_filter_1}" else: vf_filters_clip1 = f"{base_filters},{fade_img_filter_1}" # Erstelle Clip 1 img_path_1 = Path(images[0].name) clip_path_1 = Path(temp_dir) / "clip_with_text_0.mp4" cmd_1 = [ "ffmpeg", "-y", "-loop", "1", "-i", str(img_path_1), "-t", str(duration_clip_1), "-vf", vf_filters_clip1, str(clip_path_1) ] try: subprocess.run(cmd_1, check=True, capture_output=True, text=True) clips_with_text.append(clip_path_1) except subprocess.CalledProcessError as e: return None, f"❌ FFmpeg Fehler bei Bild 1 (mit Text):\n{e.stderr}" # --- 2. FOLGE-BILDER (Nur Bild mit Fade) --- for i in range(1, len(images)): img_path = Path(images[i].name) clip_path = Path(temp_dir) / f"clip_{i}.mp4" # **KORREKTUR für FFmpeg:** Fade-Out Startzeit in Python berechnen fade_out_start_n = duration_per_image - fade_duration if fade_out_start_n < 0: fade_out_start_n = 0 # Nur Bild-Filter mit Fade (jetzt mit korrekter berechneter Startzeit) fade_img_filter = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start_n}:d={fade_duration}" vf_filters_clip = ( "scale=w=1280:h=720:force_original_aspect_ratio=decrease," "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black," f"fps=25,format=yuv420p,{fade_img_filter}" ) cmd = [ "ffmpeg", "-y", "-loop", "1", "-i", str(img_path), "-t", str(duration_per_image), "-vf", vf_filters_clip, str(clip_path) ] try: subprocess.run(cmd, check=True, capture_output=True, text=True) clips_with_text.append(clip_path) except subprocess.CalledProcessError as e: return None, f"❌ FFmpeg Fehler bei Bild {i+1} (ohne Text):\n{e.stderr}" # Zusammenfügen filelist_path = Path(temp_dir) / "filelist.txt" with open(filelist_path, "w") as f: for clip in clips_with_text: f.write(f"file '{clip}'\n") output_video = Path(temp_dir) / f"slideshow_{uuid.uuid4().hex}.mp4" cmd_concat = [ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(filelist_path), "-c:v", "libx264", "-pix_fmt", "yuv420p", str(output_video) ] try: subprocess.run(cmd_concat, check=True) except subprocess.CalledProcessError as e: return None, f"❌ FFmpeg Fehler beim Zusammenfügen:\n{e.stderr}" # Audio hinzufügen falls vorhanden if temp_audio_file: final_output = Path(temp_dir) / f"final_{uuid.uuid4().hex}.mp4" cmd_audio = [ "ffmpeg", "-y", "-i", str(output_video), "-i", str(temp_audio_file), "-c:v", "copy", "-c:a", "aac", "-shortest", str(final_output) ] try: subprocess.run(cmd_audio, check=True) except subprocess.CalledProcessError as e: return None, f"❌ FFmpeg Fehler beim Hinzufügen von Audio:\n{e.stderr}" return str(final_output), "✅ Video mit Audio erstellt!" return str(output_video), "✅ Video erstellt (ohne Audio)" # Gradio UI with gr.Blocks() as demo: gr.Markdown("# Slideshow Generator") with gr.Row(): img_input = gr.Files(label="Bilder", file_types=allowed_medias) text_input = gr.Textbox(label="Text (Wörter erscheinen nacheinander auf dem ersten Bild)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.") with gr.Row(): duration_image_input = gr.Number(value=3, label="Dauer pro BILD (s) [für Bild 2+ und Min-Dauer für Bild 1]") duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]") fade_input = gr.Number(value=0.5, label="Fade Dauer (s)") font_size_input = gr.Number(value=80, label="Schriftgröße (px)") ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)") audio_input = gr.File(label="Audio (optional)", file_types=allowed_audios) btn = gr.Button("Erstellen", variant="primary") out_video = gr.Video(label="Ergebnis") status = gr.Textbox(label="Status") # KORREKTE REIHENFOLGE DER INPUTS: # (images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file) btn.click( fn=generate_slideshow_with_audio, inputs=[ img_input, text_input, duration_word_input, duration_image_input, fade_input, font_size_input, ypos_input, audio_input ], outputs=[out_video, status] ) demo.launch()