Tim13ekd commited on
Commit
fd9d93c
·
verified ·
1 Parent(s): b6a8e09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -78
app.py CHANGED
@@ -3,9 +3,6 @@ import tempfile
3
  from pathlib import Path
4
  import uuid
5
  import subprocess
6
- import requests
7
- import base64
8
- import math
9
  import shutil
10
  import io # Für NamedString-Handling
11
  import shlex # Für sicheres Escapen von Text
@@ -14,8 +11,6 @@ import shlex # Für sicheres Escapen von Text
14
  allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
15
  allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]
16
 
17
- API_URL = "https://text.pollinations.ai/openai"
18
-
19
  def save_temp_audio(audio_file):
20
  """
21
  Speichert die hochgeladene Datei sicher mit korrekter Endung in einem temporären Verzeichnis.
@@ -40,38 +35,7 @@ def save_temp_audio(audio_file):
40
  else:
41
  raise ValueError("Das übergebene Audio ist kein gültiges Dateiformat oder NamedString.")
42
 
43
- def convert_to_wav(audio_path):
44
- wav_path = Path(audio_path).with_suffix(".wav")
45
- cmd = ["ffmpeg", "-y", "-i", str(audio_path), "-ar", "16000", "-ac", "1", str(wav_path)]
46
- subprocess.run(cmd, check=True, capture_output=True, text=True)
47
- return wav_path
48
-
49
- def transcribe_audio(audio_file):
50
- temp_audio = save_temp_audio(audio_file)
51
- wav_file = convert_to_wav(temp_audio)
52
- with open(wav_file, "rb") as f:
53
- audio_data = base64.b64encode(f.read()).decode()
54
- payload = {
55
- "model": "openai-audio",
56
- "messages": [{
57
- "role": "user",
58
- "content": [
59
- {"type": "text", "text": "Transcribe this audio:"},
60
- {"type": "input_audio", "input_audio": {"data": audio_data, "format": "wav"}}
61
- ]
62
- }]
63
- }
64
- try:
65
- response = requests.post(API_URL, json=payload)
66
- response.raise_for_status()
67
- except requests.RequestException as e:
68
- return None, f"❌ API Fehler: {e}"
69
-
70
- result = response.json()
71
- text = result['choices'][0]['message']['content']
72
- return text
73
-
74
- def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_pos=0.5, fade_duration=0.7, font_size=60, speed=1.0):
75
  if not images:
76
  return None, "❌ Keine Bilder ausgewählt"
77
 
@@ -79,22 +43,17 @@ def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_po
79
  temp_dir = tempfile.mkdtemp()
80
  clips = []
81
 
82
- if audio_file:
83
- transcript, err = transcribe_audio(audio_file)
84
- if err:
85
- return None, err
86
- words = transcript.split()
87
- total_words = len(words)
88
- segments_per_image = math.ceil(total_words / len(images))
89
- texts = []
90
- for i in range(len(images)):
91
- start = i * segments_per_image
92
- end = min((i + 1) * segments_per_image, total_words)
93
- texts.append(" ".join(words[start:end]))
94
- temp_audio_file = save_temp_audio(audio_file)
95
- else:
96
- texts = [""] * len(images)
97
- temp_audio_file = None
98
 
99
  for i, img_path in enumerate(images):
100
  img_path = Path(img_path.name) # Gradio liefert temporäre Dateipfade
@@ -154,34 +113,17 @@ def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_po
154
  except subprocess.CalledProcessError as e:
155
  return None, f"❌ FFmpeg Concat Fehler:\n{e.stderr}"
156
 
157
- if temp_audio_file:
158
- final_output = Path(temp_dir) / f"slideshow_audio_{uuid.uuid4().hex}.mp4"
159
- cmd_audio = [
160
- "ffmpeg",
161
- "-y",
162
- "-i", str(output_file),
163
- "-i", str(temp_audio_file),
164
- "-c:v", "copy",
165
- "-c:a", "aac",
166
- "-shortest",
167
- str(final_output)
168
- ]
169
- try:
170
- subprocess.run(cmd_audio, check=True, capture_output=True, text=True)
171
- return str(final_output), "✅ Slideshow mit Audio und automatischen Untertiteln erstellt"
172
- except subprocess.CalledProcessError as e:
173
- return None, f"❌ FFmpeg Audio Merge Fehler:\n{e.stderr}"
174
-
175
- return str(output_file), "✅ Slideshow erstellt (ohne Audio)"
176
 
177
  # Gradio UI
178
  with gr.Blocks() as demo:
179
- gr.Markdown("# Slideshow mit Audio & automatischen Untertiteln")
180
 
181
  img_input = gr.Files(label="Bilder auswählen (mehrere)", file_types=allowed_medias)
182
- audio_input = gr.File(
183
- label="Audio hinzufügen (MP3, WAV, M4A, OGG ... optional)",
184
- file_types=allowed_audios
 
185
  )
186
  duration_input = gr.Number(value=3, label="Dauer pro Bild in Sekunden", precision=1)
187
  fade_input = gr.Number(value=0.7, label="Fade Dauer in Sekunden", precision=1)
@@ -195,8 +137,8 @@ with gr.Blocks() as demo:
195
  btn = gr.Button("Video erstellen")
196
  btn.click(
197
  fn=generate_slideshow_with_audio,
198
- inputs=[img_input, audio_input, duration_input, ypos_input, fade_input, font_size_input, speed_input],
199
  outputs=[out_video, status]
200
  )
201
 
202
- demo.launch()
 
3
  from pathlib import Path
4
  import uuid
5
  import subprocess
 
 
 
6
  import shutil
7
  import io # Für NamedString-Handling
8
  import shlex # Für sicheres Escapen von Text
 
11
  allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
12
  allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]
13
 
 
 
14
  def save_temp_audio(audio_file):
15
  """
16
  Speichert die hochgeladene Datei sicher mit korrekter Endung in einem temporären Verzeichnis.
 
35
  else:
36
  raise ValueError("Das übergebene Audio ist kein gültiges Dateiformat oder NamedString.")
37
 
38
+ def generate_slideshow_with_audio(images, input_text, duration_per_image=3, y_pos=0.5, fade_duration=0.7, font_size=60, speed=1.0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  if not images:
40
  return None, "❌ Keine Bilder ausgewählt"
41
 
 
43
  temp_dir = tempfile.mkdtemp()
44
  clips = []
45
 
46
+ # Text in Segmente aufteilen
47
+ words = input_text.split()
48
+ total_words = len(words)
49
+ segments_per_image = max(1, total_words // len(images)) # Sicherstellen, dass mindestens 1 Segment pro Bild
50
+ texts = []
51
+ for i in range(len(images)):
52
+ start = i * segments_per_image
53
+ end = min((i + 1) * segments_per_image, total_words)
54
+ texts.append(" ".join(words[start:end]))
55
+
56
+ temp_audio_file = None # Wir gehen davon aus, dass das Audio optional ist.
 
 
 
 
 
57
 
58
  for i, img_path in enumerate(images):
59
  img_path = Path(img_path.name) # Gradio liefert temporäre Dateipfade
 
113
  except subprocess.CalledProcessError as e:
114
  return None, f"❌ FFmpeg Concat Fehler:\n{e.stderr}"
115
 
116
+ return str(output_file), "✅ Slideshow mit Text erstellt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  # Gradio UI
119
  with gr.Blocks() as demo:
120
+ gr.Markdown("# Slideshow mit Manuellem Text")
121
 
122
  img_input = gr.Files(label="Bilder auswählen (mehrere)", file_types=allowed_medias)
123
+ text_input = gr.Textbox(
124
+ label="Text eingeben",
125
+ placeholder="Gib hier den Text ein, der in den Bildern angezeigt werden soll",
126
+ lines=5
127
  )
128
  duration_input = gr.Number(value=3, label="Dauer pro Bild in Sekunden", precision=1)
129
  fade_input = gr.Number(value=0.7, label="Fade Dauer in Sekunden", precision=1)
 
137
  btn = gr.Button("Video erstellen")
138
  btn.click(
139
  fn=generate_slideshow_with_audio,
140
+ inputs=[img_input, text_input, duration_input, ypos_input, fade_input, font_size_input, speed_input],
141
  outputs=[out_video, status]
142
  )
143
 
144
+ demo.launch()