import gradio as gr
import tempfile
from pathlib import Path
import uuid
import subprocess
import shutil
import os

# NEU: Definition des Escape-Zeichens, um den F-String-Fehler bei FFmpeg zu vermeiden
FFMPEG_ESCAPE_CHAR = "\\"

# Erlaubte Dateiformate
allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]

def get_font_path():
    """Versucht, eine Standard-Schriftart im Linux-System zu finden."""
    possible_fonts = [
        "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
        "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
        "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf"
    ]
    for font in possible_fonts:
        if os.path.exists(font):
            return font
    return None # Fallback: FFmpeg soll selbst suchen (klappt manchmal nicht)

def save_temp_audio(audio_file):
    if isinstance(audio_file, str):
        ext = Path(audio_file).suffix
        if ext.lower() not in allowed_audios:
            ext = ".mp3"
        temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
        with open(temp_audio, "wb") as f:
            f.write(audio_file.encode())
        return temp_audio
    elif hasattr(audio_file, 'name'):
        ext = Path(audio_file.name).suffix
        if ext.lower() not in allowed_audios:
            ext = ".mp3"
        temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
        audio_file.seek(0)
        with open(temp_audio, "wb") as f:
            shutil.copyfileobj(audio_file, f)
        return temp_audio
    return None

def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos):
    """Erstellt einen FFmpeg drawtext Filter, der ein Wort für eine bestimmte Zeit einblendet."""
    global FFMPEG_ESCAPE_CHAR 

    # 1. Escaping: Ersetze alle ":" durch "\:" für FFmpeg
    escaped_word = word.replace(':', f"{FFMPEG_ESCAPE_CHAR}:")
    
    # Definiere die Start- und Endzeit des Wortes
    end_time = start_time + duration
    
    # Erstelle den Filterstring
    drawtext_filter = (
        f"drawtext=text='{escaped_word}'{font_option}:fontcolor=white:fontsize={font_size}:borderw=2:bordercolor=black:"
        f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
        f"enable='between(t,{start_time},{end_time})'"
    )
    return drawtext_filter


def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file):
    # Debug Print
    print(f"DEBUG: Font Size: {font_size}, Y-Pos: {y_pos}, Duration/Word: {duration_per_word}, Fade: {fade_duration}")

    if not images:
        return None, "❌ Keine Bilder ausgewählt"
    
    temp_dir = tempfile.mkdtemp()
    clips_with_text = []

    # Text in Wörter aufteilen
    words = input_text.split() if input_text else []
    
    # Berechne die Gesamt-Textdauer
    total_text_duration = len(words) * duration_per_word
    
    # Schriftart finden
    font_path = get_font_path()
    font_option = f":fontfile='{font_path}'" if font_path else ""

    # Audio verarbeiten
    temp_audio_file = None
    if audio_file:
        temp_audio_file = save_temp_audio(audio_file)

    # --- 1. ERSTES BILD (Sequenzieller Text) ---
    
    # Dauer des ersten Clips ist mindestens so lang wie der Text
    duration_clip_1 = max(duration_per_image, total_text_duration)
    
    # **KORREKTUR für FFmpeg:** Fade-Out Startzeit in Python berechnen
    fade_out_start_1 = duration_clip_1 - fade_duration
    if fade_out_start_1 < 0: fade_out_start_1 = 0
    
    
    # Generiere die sequentiellen Drawtext-Filter
    drawtext_filters = []
    current_time = 0.0
    for word in words:
        filter_str = create_timed_drawtext(word, current_time, duration_per_word, font_option, font_size, y_pos)
        drawtext_filters.append(filter_str)
        current_time += duration_per_word
    
    # 1. Basisanpassungen
    base_filters = (
        "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
        "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
        "fps=25,format=yuv420p"
    )
    
    # 2. Fade-Filter (jetzt mit korrekter berechneter Startzeit)
    fade_img_filter_1 = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start_1}:d={fade_duration}"
    
    
    if drawtext_filters:
        all_drawtext_filters = ",".join(drawtext_filters)
        vf_filters_clip1 = f"{base_filters},{all_drawtext_filters},{fade_img_filter_1}"
    else:
        vf_filters_clip1 = f"{base_filters},{fade_img_filter_1}"

    # Erstelle Clip 1
    img_path_1 = Path(images[0].name)
    clip_path_1 = Path(temp_dir) / "clip_with_text_0.mp4"
    
    cmd_1 = [
        "ffmpeg", "-y", "-loop", "1", "-i", str(img_path_1),
        "-t", str(duration_clip_1),
        "-vf", vf_filters_clip1,
        str(clip_path_1)
    ]
    
    try:
        subprocess.run(cmd_1, check=True, capture_output=True, text=True)
        clips_with_text.append(clip_path_1)
    except subprocess.CalledProcessError as e:
        return None, f"❌ FFmpeg Fehler bei Bild 1 (mit Text):\n{e.stderr}"

    # --- 2. FOLGE-BILDER (Nur Bild mit Fade) ---
    for i in range(1, len(images)):
        img_path = Path(images[i].name)
        clip_path = Path(temp_dir) / f"clip_{i}.mp4"
        
        # **KORREKTUR für FFmpeg:** Fade-Out Startzeit in Python berechnen
        fade_out_start_n = duration_per_image - fade_duration
        if fade_out_start_n < 0: fade_out_start_n = 0

        # Nur Bild-Filter mit Fade (jetzt mit korrekter berechneter Startzeit)
        fade_img_filter = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start_n}:d={fade_duration}"
        vf_filters_clip = (
            "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
            "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
            f"fps=25,format=yuv420p,{fade_img_filter}"
        )
        
        cmd = [
            "ffmpeg", "-y", "-loop", "1", "-i", str(img_path),
            "-t", str(duration_per_image),
            "-vf", vf_filters_clip,
            str(clip_path)
        ]
        
        try:
            subprocess.run(cmd, check=True, capture_output=True, text=True)
            clips_with_text.append(clip_path)
        except subprocess.CalledProcessError as e:
            return None, f"❌ FFmpeg Fehler bei Bild {i+1} (ohne Text):\n{e.stderr}"

    # Zusammenfügen
    filelist_path = Path(temp_dir) / "filelist.txt"
    with open(filelist_path, "w") as f:
        for clip in clips_with_text:
            f.write(f"file '{clip}'\n")

    output_video = Path(temp_dir) / f"slideshow_{uuid.uuid4().hex}.mp4"
    
    cmd_concat = [
        "ffmpeg", "-y", "-f", "concat", "-safe", "0",
        "-i", str(filelist_path),
        "-c:v", "libx264", "-pix_fmt", "yuv420p",
        str(output_video)
    ]
    
    try:
        subprocess.run(cmd_concat, check=True)
    except subprocess.CalledProcessError as e:
        return None, f"❌ FFmpeg Fehler beim Zusammenfügen:\n{e.stderr}"

    # Audio hinzufügen falls vorhanden
    if temp_audio_file:
        final_output = Path(temp_dir) / f"final_{uuid.uuid4().hex}.mp4"
        cmd_audio = [
            "ffmpeg", "-y", "-i", str(output_video), "-i", str(temp_audio_file),
            "-c:v", "copy", "-c:a", "aac", "-shortest",
            str(final_output)
        ]
        try:
            subprocess.run(cmd_audio, check=True)
        except subprocess.CalledProcessError as e:
            return None, f"❌ FFmpeg Fehler beim Hinzufügen von Audio:\n{e.stderr}"
        return str(final_output), "✅ Video mit Audio erstellt!"

    return str(output_video), "✅ Video erstellt (ohne Audio)"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# Slideshow Generator")

    with gr.Row():
        img_input = gr.Files(label="Bilder", file_types=allowed_medias)
        text_input = gr.Textbox(label="Text (Wörter erscheinen nacheinander auf dem ersten Bild)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.")
    
    with gr.Row():
        duration_image_input = gr.Number(value=3, label="Dauer pro BILD (s) [für Bild 2+ und Min-Dauer für Bild 1]")
        duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]")
        fade_input = gr.Number(value=0.5, label="Fade Dauer (s)")
        font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
        ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)")
    
    audio_input = gr.File(label="Audio (optional)", file_types=allowed_audios)
    btn = gr.Button("Erstellen", variant="primary")
    
    out_video = gr.Video(label="Ergebnis")
    status = gr.Textbox(label="Status")

    # KORREKTE REIHENFOLGE DER INPUTS:
    # (images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file)
    btn.click(
        fn=generate_slideshow_with_audio,
        inputs=[
            img_input, 
            text_input, 
            duration_word_input, 
            duration_image_input, 
            fade_input, 
            font_size_input, 
            ypos_input, 
            audio_input
        ],
        outputs=[out_video, status]
    )

demo.launch()