Spaces:
Running
Running
File size: 7,392 Bytes
e5b621e 085d5e6 1b78077 e5b621e 1b78077 c9a86ba fef9da1 6dc3853 0b567d9 e5b621e 500f777 e5b621e 500f777 c9a86ba fef9da1 b6a8e09 6dc3853 b6a8e09 6dc3853 b6a8e09 ffc2502 b6a8e09 ffc2502 b6a8e09 ffc2502 b6a8e09 ffc2502 6dc3853 fef9da1 6b5b2bf fef9da1 6b5b2bf fef9da1 6b5b2bf c9a86ba feee382 b6a8e09 feee382 c9a86ba b6a8e09 ea1c088 c9a86ba 2a9840f 1b78077 d24cfba c9a86ba feee382 c9a86ba 500f777 c9a86ba fef9da1 c9a86ba d24cfba fef9da1 d24cfba 1b78077 b6a8e09 1b78077 d24cfba 7e5bf87 0604b6a d24cfba b6a8e09 0604b6a feee382 029e266 c9a86ba 2ab5e5e 7e5bf87 085ad5b 36bfe7e c9a86ba 0604b6a 1b78077 36bfe7e 085ad5b 0604b6a 085ad5b 0604b6a 085ad5b 1b78077 36bfe7e c9a86ba d24cfba 1b78077 d24cfba 1b78077 ea75fd0 d24cfba 1b78077 085ad5b 0604b6a 085ad5b e5b621e fef9da1 c9a86ba fef9da1 c9a86ba 36bfe7e e5b621e c9a86ba 36bfe7e 1b78077 500f777 ea1c088 2ab5e5e 0b567d9 b6a8e09 0b567d9 b6a8e09 0b567d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import gradio as gr
import tempfile
from pathlib import Path
import uuid
import subprocess
import requests
import base64
import math
import shutil
import io # Für NamedString-Handling
import shlex # Für sicheres Escapen von Text
# Erlaubte Dateiformate
allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]
API_URL = "https://text.pollinations.ai/openai"
def save_temp_audio(audio_file):
"""
Speichert die hochgeladene Datei sicher mit korrekter Endung in einem temporären Verzeichnis.
"""
if isinstance(audio_file, str):
ext = Path(audio_file).suffix
if ext.lower() not in allowed_audios:
ext = ".mp3" # Standard, falls Endung fehlt
temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
with open(temp_audio, "wb") as f:
f.write(audio_file.encode())
return temp_audio
elif hasattr(audio_file, 'name'):
ext = Path(audio_file.name).suffix
if ext.lower() not in allowed_audios:
ext = ".mp3"
temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
audio_file.seek(0)
with open(temp_audio, "wb") as f:
shutil.copyfileobj(audio_file, f)
return temp_audio
else:
raise ValueError("Das übergebene Audio ist kein gültiges Dateiformat oder NamedString.")
def convert_to_wav(audio_path):
wav_path = Path(audio_path).with_suffix(".wav")
cmd = ["ffmpeg", "-y", "-i", str(audio_path), "-ar", "16000", "-ac", "1", str(wav_path)]
subprocess.run(cmd, check=True, capture_output=True, text=True)
return wav_path
def transcribe_audio(audio_file):
temp_audio = save_temp_audio(audio_file)
wav_file = convert_to_wav(temp_audio)
with open(wav_file, "rb") as f:
audio_data = base64.b64encode(f.read()).decode()
payload = {
"model": "openai-audio",
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": "Transcribe this audio:"},
{"type": "input_audio", "input_audio": {"data": audio_data, "format": "wav"}}
]
}]
}
try:
response = requests.post(API_URL, json=payload)
response.raise_for_status()
except requests.RequestException as e:
return None, f"❌ API Fehler: {e}"
result = response.json()
text = result['choices'][0]['message']['content']
return text
def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_pos=0.5, fade_duration=0.7, font_size=60, speed=1.0):
if not images:
return None, "❌ Keine Bilder ausgewählt"
y_pos = min(max(0.0, y_pos), 0.9)
temp_dir = tempfile.mkdtemp()
clips = []
if audio_file:
transcript, err = transcribe_audio(audio_file)
if err:
return None, err
words = transcript.split()
total_words = len(words)
segments_per_image = math.ceil(total_words / len(images))
texts = []
for i in range(len(images)):
start = i * segments_per_image
end = min((i + 1) * segments_per_image, total_words)
texts.append(" ".join(words[start:end]))
temp_audio_file = save_temp_audio(audio_file)
else:
texts = [""] * len(images)
temp_audio_file = None
for i, img_path in enumerate(images):
img_path = Path(img_path.name) # Gradio liefert temporäre Dateipfade
clip_path = Path(temp_dir) / f"clip_{i}.mp4"
text = texts[i] if i < len(texts) else ""
vf_filters = (
"scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
"pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
"fps=25,format=yuv420p"
)
if text:
safe_text = shlex.quote(text)
drawtext_filter = (
f",drawtext=text={safe_text}:fontcolor=white:fontsize={font_size}:borderw=2:"
f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
f"alpha='if(lt(t,{fade_duration}), t/{fade_duration}, if(lt(t,{duration_per_image}-{fade_duration}), 1, ({duration_per_image}-t)/{fade_duration}))'"
)
vf_filters += drawtext_filter
cmd = [
"ffmpeg",
"-y",
"-loop", "1",
"-i", str(img_path),
"-t", str(duration_per_image),
"-vf", vf_filters,
str(clip_path)
]
try:
subprocess.run(cmd, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"
clips.append(clip_path)
# Clips zusammenfügen
filelist_path = Path(temp_dir) / "filelist.txt"
with open(filelist_path, "w") as f:
for clip in clips:
f.write(f"file '{clip}'\n")
output_file = Path(temp_dir) / f"slideshow_{uuid.uuid4().hex}.mp4"
cmd_concat = [
"ffmpeg",
"-y",
"-f", "concat",
"-safe", "0",
"-i", str(filelist_path),
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
str(output_file)
]
try:
subprocess.run(cmd_concat, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
return None, f"❌ FFmpeg Concat Fehler:\n{e.stderr}"
if temp_audio_file:
final_output = Path(temp_dir) / f"slideshow_audio_{uuid.uuid4().hex}.mp4"
cmd_audio = [
"ffmpeg",
"-y",
"-i", str(output_file),
"-i", str(temp_audio_file),
"-c:v", "copy",
"-c:a", "aac",
"-shortest",
str(final_output)
]
try:
subprocess.run(cmd_audio, check=True, capture_output=True, text=True)
return str(final_output), "✅ Slideshow mit Audio und automatischen Untertiteln erstellt"
except subprocess.CalledProcessError as e:
return None, f"❌ FFmpeg Audio Merge Fehler:\n{e.stderr}"
return str(output_file), "✅ Slideshow erstellt (ohne Audio)"
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Slideshow mit Audio & automatischen Untertiteln")
img_input = gr.Files(label="Bilder auswählen (mehrere)", file_types=allowed_medias)
audio_input = gr.File(
label="Audio hinzufügen (MP3, WAV, M4A, OGG ... optional)",
file_types=allowed_audios
)
duration_input = gr.Number(value=3, label="Dauer pro Bild in Sekunden", precision=1)
fade_input = gr.Number(value=0.7, label="Fade Dauer in Sekunden", precision=1)
ypos_input = gr.Slider(minimum=0.0, maximum=0.9, step=0.01, value=0.5, label="Y-Position für alle Texte (0=oben, 0.5=mitte, 0.9=unten)")
font_size_input = gr.Number(value=60, label="Textgröße (px)")
speed_input = gr.Slider(minimum=0.1, maximum=3.0, value=1.0, label="Geschwindigkeit der Texteinblendung")
out_video = gr.Video(interactive=False, label="Generiertes Video")
status = gr.Textbox(interactive=False, label="Status")
btn = gr.Button("Video erstellen")
btn.click(
fn=generate_slideshow_with_audio,
inputs=[img_input, audio_input, duration_input, ypos_input, fade_input, font_size_input, speed_input],
outputs=[out_video, status]
)
demo.launch()
|