Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import tempfile | |
| from pathlib import Path | |
| import uuid | |
| import subprocess | |
| import shutil | |
| import os | |
| # NEU: Dauer des Fade-In/Out für jedes einzelne Wort (z.B. 0.2 Sekunden) | |
| WORD_FADE_DURATION = 0.2 | |
| FFMPEG_ESCAPE_CHAR = "\\" | |
| # Erlaubte Dateiformate | |
| allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"] | |
| allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"] | |
| def get_font_path(): | |
| """Versucht, eine Standard-Schriftart im Linux-System zu finden.""" | |
| possible_fonts = [ | |
| "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", | |
| "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", | |
| "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf" | |
| ] | |
| for font in possible_fonts: | |
| if os.path.exists(font): | |
| return font | |
| return None # Fallback: FFmpeg soll selbst suchen (klappt manchmal nicht) | |
| def save_temp_audio(audio_file_path): | |
| """ | |
| Speichert die hochgeladene Audio-Datei in einem temporären Verzeichnis. | |
| Erwartet einen Dateipfad-String von Gradio. | |
| """ | |
| if not audio_file_path: | |
| return None, None | |
| # Gradio liefert einen String-Pfad zum temporären Speicherort | |
| input_path = Path(audio_file_path) | |
| # Bestimme die Erweiterung | |
| ext = input_path.suffix | |
| if ext.lower() not in allowed_audios: | |
| ext = ".mp3" | |
| # Erstelle das Zielverzeichnis und den Zielpfad | |
| temp_audio_dir = Path(tempfile.mkdtemp()) | |
| temp_audio = temp_audio_dir / f"input{ext}" | |
| # Kopiere die Datei vom Gradio-Temp-Pfad in unseren eigenen Temp-Pfad | |
| try: | |
| shutil.copyfile(input_path, temp_audio) | |
| # Rückgabe des Verzeichnisses, das später gelöscht werden kann, und des Dateipfads | |
| return temp_audio_dir, temp_audio | |
| except Exception as e: | |
| print(f"Fehler beim Kopieren der Audiodatei: {e}") | |
| if temp_audio_dir.exists(): | |
| shutil.rmtree(temp_audio_dir) | |
| return None, None | |
| def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos): | |
| """Erstellt einen FFmpeg drawtext Filter, der ein Wort mit weichen Übergängen (Alpha-Kanal) einblendet.""" | |
| global FFMPEG_ESCAPE_CHAR | |
| global WORD_FADE_DURATION | |
| # 1. Escaping: Ersetze alle ":" durch "\:" für FFmpeg | |
| escaped_word = word.replace(':', f"{FFMPEG_ESCAPE_CHAR}:") | |
| # Definiere die Start- und Endzeit des WORTES | |
| end_time = start_time + duration | |
| # Zeitpunkte für den Fade | |
| fade_in_end = start_time + WORD_FADE_DURATION | |
| fade_out_start = end_time - WORD_FADE_DURATION | |
| # Alpha-Ausdruck für smooth Fade-In und Fade-Out | |
| # Steuert die Deckkraft basierend auf der Zeit t (relativ zum Clip-Start) | |
| alpha_expression = ( | |
| f"if(lt(t,{start_time}), 0, " | |
| f"if(lt(t,{fade_in_end}), (t-{start_time})/{WORD_FADE_DURATION}, " | |
| f"if(lt(t,{fade_out_start}), 1, " | |
| f"if(lt(t,{end_time}), ({end_time}-t)/{WORD_FADE_DURATION}, 0))))" | |
| ) | |
| # Erstelle den Filterstring | |
| drawtext_filter = ( | |
| f"drawtext=text='{escaped_word}'{font_option}:fontcolor=white:fontsize={font_size}:borderw=2:bordercolor=black:" | |
| f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:" | |
| f"alpha='{alpha_expression}'" # Steuert die Deckkraft (Smoothness) | |
| ) | |
| return drawtext_filter | |
| def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file): | |
| if not images: | |
| return None, "❌ Keine Bilder ausgewählt" | |
| temp_dir = tempfile.mkdtemp() | |
| # Text in Wörter aufteilen | |
| words = input_text.split() if input_text else [] | |
| total_words = len(words) | |
| num_images = len(images) | |
| # Berechnung der gleichmäßigen Verteilung der Wörter auf die Bilder | |
| base_words_per_clip = total_words // num_images | |
| remainder = total_words % num_images | |
| current_word_index = 0 | |
| clips_with_text = [] # Paths der generierten MP4-Clips | |
| # Schriftart finden | |
| font_path = get_font_path() | |
| font_option = f":fontfile='{font_path}'" if font_path else "" | |
| # Audio verarbeiten | |
| # audio_file ist der Pfad-String von Gradio | |
| audio_temp_dir, temp_audio_file = save_temp_audio(audio_file) if audio_file else (None, None) | |
| # --- 1. SCHLEIFE: Erstelle jeden Clip mit seinem Textsegment --- | |
| for i in range(num_images): | |
| img_path = Path(images[i].name) | |
| clip_path = Path(temp_dir) / f"clip_with_text_{i}.mp4" | |
| # 1. Bestimme das Wortsegment für diesen Clip | |
| words_on_this_clip = base_words_per_clip + (1 if i < remainder else 0) | |
| # Extrahieren des Segments aus der Gesamtliste der Wörter | |
| word_segment = words[current_word_index : current_word_index + words_on_this_clip] | |
| current_word_index += len(word_segment) | |
| # 2. Berechne die Clip-Dauer | |
| text_duration = len(word_segment) * duration_per_word | |
| # Die Dauer ist das Maximum aus der gewünschten Bilddauer und der benötigten Textdauer | |
| duration_clip = max(duration_per_image, text_duration) | |
| # 3. Generiere Drawtext Filter (Startzeit ist relativ zum Clip-Start, also 0) | |
| drawtext_filters = [] | |
| word_start_time = 0.0 | |
| for word in word_segment: | |
| filter_str = create_timed_drawtext(word, word_start_time, duration_per_word, font_option, font_size, y_pos) | |
| drawtext_filters.append(filter_str) | |
| word_start_time += duration_per_word | |
| # 4. Basis- und Fade-Filter | |
| base_filters = ( | |
| "scale=w=1280:h=720:force_original_aspect_ratio=decrease," | |
| "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black," | |
| "fps=25,format=yuv420p" | |
| ) | |
| fade_out_start = duration_clip - fade_duration | |
| if fade_out_start < 0: fade_out_start = 0 | |
| fade_img_filter = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start}:d={fade_duration}" | |
| # 5. Kombiniere alle Filter | |
| if drawtext_filters: | |
| all_drawtext_filters = ",".join(drawtext_filters) | |
| vf_filters_clip = f"{base_filters},{all_drawtext_filters},{fade_img_filter}" | |
| else: | |
| # Kein Text mehr: Nur Bild mit Fade | |
| vf_filters_clip = f"{base_filters},{fade_img_filter}" | |
| # 6. FFmpeg Command zum Erstellen des Clips | |
| cmd = [ | |
| "ffmpeg", "-y", "-loop", "1", "-i", str(img_path), | |
| "-t", str(duration_clip), | |
| "-vf", vf_filters_clip, | |
| str(clip_path) | |
| ] | |
| try: | |
| subprocess.run(cmd, check=True, capture_output=True, text=True) | |
| clips_with_text.append(clip_path) | |
| except subprocess.CalledProcessError as e: | |
| # Bereinigung bei Fehler | |
| shutil.rmtree(temp_dir) | |
| if audio_temp_dir: shutil.rmtree(audio_temp_dir) | |
| return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}" | |
| # --- 2. ZUSAMMENFÜGEN --- | |
| filelist_path = Path(temp_dir) / "filelist.txt" | |
| with open(filelist_path, "w") as f: | |
| for clip in clips_with_text: | |
| f.write(f"file '{clip}'\n") | |
| output_video = Path(temp_dir) / f"slideshow_{uuid.uuid4().hex}.mp4" | |
| cmd_concat = [ | |
| "ffmpeg", "-y", "-f", "concat", "-safe", "0", | |
| "-i", str(filelist_path), | |
| "-c:v", "libx264", "-pix_fmt", "yuv420p", | |
| str(output_video) | |
| ] | |
| try: | |
| subprocess.run(cmd_concat, check=True, capture_output=True, text=True) | |
| except subprocess.CalledProcessError as e: | |
| shutil.rmtree(temp_dir) | |
| if audio_temp_dir: shutil.rmtree(audio_temp_dir) | |
| return None, f"❌ FFmpeg Fehler beim Zusammenfügen:\n{e.stderr}" | |
| # --- 3. AUDIO HINZUFÜGEN (falls vorhanden) --- | |
| final_output = output_video | |
| if temp_audio_file: | |
| final_output = Path(temp_dir) / f"final_{uuid.uuid4().hex}.mp4" | |
| cmd_audio = [ | |
| "ffmpeg", "-y", "-i", str(output_video), "-i", str(temp_audio_file), | |
| "-c:v", "copy", "-c:a", "aac", "-shortest", | |
| str(final_output) | |
| ] | |
| try: | |
| subprocess.run(cmd_audio, check=True, capture_output=True, text=True) | |
| except subprocess.CalledProcessError as e: | |
| shutil.rmtree(temp_dir) | |
| if audio_temp_dir: shutil.rmtree(audio_temp_dir) | |
| return None, f"❌ FFmpeg Fehler beim Hinzufügen von Audio:\n{e.stderr}" | |
| # Bereinige das separate Audio-Temp-Verzeichnis | |
| if audio_temp_dir: shutil.rmtree(audio_temp_dir) | |
| return str(final_output), "✅ Video mit Audio erstellt!" | |
| # Nur Video-Pfad zurückgeben | |
| return str(final_output), "✅ Video erstellt (ohne Audio)" | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Slideshow Generator") | |
| with gr.Row(): | |
| img_input = gr.Files(label="Bilder", file_types=allowed_medias) | |
| # TEXT WURDE GEÄNDERT: Neue Beschreibung für Textverteilung | |
| text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.") | |
| with gr.Row(): | |
| duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)") | |
| duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]") | |
| fade_input = gr.Number(value=0.5, label="Bild-Fade Dauer (s)") | |
| font_size_input = gr.Number(value=80, label="Schriftgröße (px)") | |
| ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)") | |
| audio_input = gr.File(label="Audio (optional)", file_types=allowed_audios) | |
| btn = gr.Button("Erstellen", variant="primary") | |
| out_video = gr.Video(label="Ergebnis") | |
| status = gr.Textbox(label="Status") | |
| # KORREKTE REIHENFOLGE DER INPUTS: | |
| # (images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file) | |
| btn.click( | |
| fn=generate_slideshow_with_audio, | |
| inputs=[ | |
| img_input, | |
| text_input, | |
| duration_word_input, | |
| duration_image_input, | |
| fade_input, | |
| font_size_input, | |
| ypos_input, | |
| audio_input | |
| ], | |
| outputs=[out_video, status] | |
| ) | |
| demo.launch() |