video-ffmpeg

Sleeping

File size: 5,417 Bytes

e5b621e
085d5e6
1b78077
e5b621e
1b78077
fef9da1
6dc3853
0b567d9
e5b621e
500f777
e5b621e
500f777
 
fef9da1
 
 
 
b6a8e09
6dc3853
 
 
 
 
b6a8e09
6dc3853
b6a8e09
ffc2502
 
b6a8e09
ffc2502
b6a8e09
ffc2502
b6a8e09
ffc2502
 
6dc3853
fef9da1
fd9d93c
ea1c088
 
c9a86ba
 
2a9840f
1b78077
d24cfba
fd9d93c
 
 
 
 
 
 
 
 
 
 
d24cfba
1b78077
b6a8e09
1b78077
d24cfba
 
7e5bf87
 
 
 
 
0604b6a
d24cfba
b6a8e09
0604b6a
feee382
029e266
c9a86ba
2ab5e5e
7e5bf87
085ad5b
36bfe7e
 
 
 
 
c9a86ba
0604b6a
1b78077
36bfe7e
085ad5b
0604b6a
085ad5b
0604b6a
085ad5b
1b78077
36bfe7e
c9a86ba
d24cfba
1b78077
 
 
 
d24cfba
1b78077
 
 
 
 
 
ea75fd0
 
d24cfba
1b78077
085ad5b
0604b6a
085ad5b
 
e5b621e
fd9d93c
36bfe7e
 
e5b621e
fd9d93c
36bfe7e
1b78077
fd9d93c
 
 
 
500f777
ea1c088
2ab5e5e
0b567d9
 
b6a8e09
 
0b567d9
 
 
 
 
 
fd9d93c
0b567d9
 
 
fd9d93c

import gradio as gr
import tempfile
from pathlib import Path
import uuid
import subprocess
import shutil
import io  # Für NamedString-Handling
import shlex  # Für sicheres Escapen von Text

# Erlaubte Dateiformate
allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]

def save_temp_audio(audio_file):
    """
    Speichert die hochgeladene Datei sicher mit korrekter Endung in einem temporären Verzeichnis.
    """
    if isinstance(audio_file, str):
        ext = Path(audio_file).suffix
        if ext.lower() not in allowed_audios:
            ext = ".mp3"  # Standard, falls Endung fehlt
        temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
        with open(temp_audio, "wb") as f:
            f.write(audio_file.encode())
        return temp_audio
    elif hasattr(audio_file, 'name'):
        ext = Path(audio_file.name).suffix
        if ext.lower() not in allowed_audios:
            ext = ".mp3"
        temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
        audio_file.seek(0)
        with open(temp_audio, "wb") as f:
            shutil.copyfileobj(audio_file, f)
        return temp_audio
    else:
        raise ValueError("Das übergebene Audio ist kein gültiges Dateiformat oder NamedString.")

def generate_slideshow_with_audio(images, input_text, duration_per_image=3, y_pos=0.5, fade_duration=0.7, font_size=60, speed=1.0):
    if not images:
        return None, "❌ Keine Bilder ausgewählt"
    
    y_pos = min(max(0.0, y_pos), 0.9)
    temp_dir = tempfile.mkdtemp()
    clips = []

    # Text in Segmente aufteilen
    words = input_text.split()
    total_words = len(words)
    segments_per_image = max(1, total_words // len(images))  # Sicherstellen, dass mindestens 1 Segment pro Bild
    texts = []
    for i in range(len(images)):
        start = i * segments_per_image
        end = min((i + 1) * segments_per_image, total_words)
        texts.append(" ".join(words[start:end]))

    temp_audio_file = None  # Wir gehen davon aus, dass das Audio optional ist.

    for i, img_path in enumerate(images):
        img_path = Path(img_path.name)  # Gradio liefert temporäre Dateipfade
        clip_path = Path(temp_dir) / f"clip_{i}.mp4"
        text = texts[i] if i < len(texts) else ""

        vf_filters = (
            "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
            "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
            "fps=25,format=yuv420p"
        )

        if text:
            safe_text = shlex.quote(text)
            drawtext_filter = (
                f",drawtext=text={safe_text}:fontcolor=white:fontsize={font_size}:borderw=2:"
                f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
                f"alpha='if(lt(t,{fade_duration}), t/{fade_duration}, if(lt(t,{duration_per_image}-{fade_duration}), 1, ({duration_per_image}-t)/{fade_duration}))'"
            )
            vf_filters += drawtext_filter

        cmd = [
            "ffmpeg",
            "-y",
            "-loop", "1",
            "-i", str(img_path),
            "-t", str(duration_per_image),
            "-vf", vf_filters,
            str(clip_path)
        ]
        try:
            subprocess.run(cmd, check=True, capture_output=True, text=True)
        except subprocess.CalledProcessError as e:
            return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"

        clips.append(clip_path)

    # Clips zusammenfügen
    filelist_path = Path(temp_dir) / "filelist.txt"
    with open(filelist_path, "w") as f:
        for clip in clips:
            f.write(f"file '{clip}'\n")

    output_file = Path(temp_dir) / f"slideshow_{uuid.uuid4().hex}.mp4"
    cmd_concat = [
        "ffmpeg",
        "-y",
        "-f", "concat",
        "-safe", "0",
        "-i", str(filelist_path),
        "-c:v", "libx264",
        "-pix_fmt", "yuv420p",
        str(output_file)
    ]
    try:
        subprocess.run(cmd_concat, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        return None, f"❌ FFmpeg Concat Fehler:\n{e.stderr}"

    return str(output_file), "✅ Slideshow mit Text erstellt"

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# Slideshow mit Manuellem Text")

    img_input = gr.Files(label="Bilder auswählen (mehrere)", file_types=allowed_medias)
    text_input = gr.Textbox(
        label="Text eingeben",
        placeholder="Gib hier den Text ein, der in den Bildern angezeigt werden soll",
        lines=5
    )
    duration_input = gr.Number(value=3, label="Dauer pro Bild in Sekunden", precision=1)
    fade_input = gr.Number(value=0.7, label="Fade Dauer in Sekunden", precision=1)
    ypos_input = gr.Slider(minimum=0.0, maximum=0.9, step=0.01, value=0.5, label="Y-Position für alle Texte (0=oben, 0.5=mitte, 0.9=unten)")
    font_size_input = gr.Number(value=60, label="Textgröße (px)")
    speed_input = gr.Slider(minimum=0.1, maximum=3.0, value=1.0, label="Geschwindigkeit der Texteinblendung")
    
    out_video = gr.Video(interactive=False, label="Generiertes Video")
    status = gr.Textbox(interactive=False, label="Status")

    btn = gr.Button("Video erstellen")
    btn.click(
        fn=generate_slideshow_with_audio,
        inputs=[img_input, text_input, duration_input, ypos_input, fade_input, font_size_input, speed_input],
        outputs=[out_video, status]
    )

demo.launch()