Tim13ekd commited on
Commit
7644c1e
·
verified ·
1 Parent(s): 377308b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -98
app.py CHANGED
@@ -27,24 +27,19 @@ def get_font_path():
27
  return None # Fallback: FFmpeg soll selbst suchen (klappt manchmal nicht)
28
 
29
  def save_temp_audio(audio_file):
30
- if isinstance(audio_file, str):
31
- ext = Path(audio_file).suffix
32
- if ext.lower() not in allowed_audios:
33
- ext = ".mp3"
34
- temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
35
- with open(temp_audio, "wb") as f:
36
- f.write(audio_file.encode())
37
- return temp_audio
38
- elif hasattr(audio_file, 'name'):
39
  ext = Path(audio_file.name).suffix
40
  if ext.lower() not in allowed_audios:
41
  ext = ".mp3"
42
- temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
 
43
  audio_file.seek(0)
44
  with open(temp_audio, "wb") as f:
45
  shutil.copyfileobj(audio_file, f)
46
- return temp_audio
47
- return None
 
48
 
49
  def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos):
50
  """Erstellt einen FFmpeg drawtext Filter, der ein Wort mit weichen Übergängen (Alpha-Kanal) einblendet."""
@@ -62,10 +57,7 @@ def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_
62
  fade_out_start = end_time - WORD_FADE_DURATION
63
 
64
  # Alpha-Ausdruck für smooth Fade-In und Fade-Out
65
- # if(lt(t, start_time), 0, ...) -> Vor Startzeit: Alpha = 0
66
- # if(lt(t, fade_in_end), (t-start_time)/WORD_FADE_DURATION, ...) -> Fade-In
67
- # if(lt(t, fade_out_start), 1, ...) -> Volle Deckkraft
68
- # if(lt(t, end_time), (end_time-t)/WORD_FADE_DURATION, 0) -> Fade-Out
69
  alpha_expression = (
70
  f"if(lt(t,{start_time}), 0, "
71
  f"if(lt(t,{fade_in_end}), (t-{start_time})/{WORD_FADE_DURATION}, "
@@ -83,102 +75,80 @@ def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_
83
 
84
 
85
  def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file):
86
- # Debug Print
87
- print(f"DEBUG: Font Size: {font_size}, Y-Pos: {y_pos}, Duration/Word: {duration_per_word}, Fade: {fade_duration}")
88
-
89
  if not images:
90
  return None, "❌ Keine Bilder ausgewählt"
91
 
92
  temp_dir = tempfile.mkdtemp()
93
- clips_with_text = []
94
-
95
  # Text in Wörter aufteilen
96
  words = input_text.split() if input_text else []
 
 
97
 
98
- # Berechne die Gesamt-Textdauer
99
- total_text_duration = len(words) * duration_per_word
 
100
 
 
 
 
101
  # Schriftart finden
102
  font_path = get_font_path()
103
  font_option = f":fontfile='{font_path}'" if font_path else ""
104
 
105
  # Audio verarbeiten
106
- temp_audio_file = None
107
- if audio_file:
108
- temp_audio_file = save_temp_audio(audio_file)
109
 
110
- # --- 1. ERSTES BILD (Sequenzieller Text) ---
111
-
112
- # Dauer des ersten Clips ist mindestens so lang wie der Text
113
- duration_clip_1 = max(duration_per_image, total_text_duration)
114
-
115
- # **KORREKTUR für FFmpeg:** Fade-Out Startzeit in Python berechnen
116
- fade_out_start_1 = duration_clip_1 - fade_duration
117
- if fade_out_start_1 < 0: fade_out_start_1 = 0
118
-
119
-
120
- # Generiere die sequentiellen Drawtext-Filter
121
- drawtext_filters = []
122
- current_time = 0.0
123
- for word in words:
124
- filter_str = create_timed_drawtext(word, current_time, duration_per_word, font_option, font_size, y_pos)
125
- drawtext_filters.append(filter_str)
126
- current_time += duration_per_word
127
-
128
- # 1. Basisanpassungen
129
- base_filters = (
130
- "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
131
- "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
132
- "fps=25,format=yuv420p"
133
- )
134
-
135
- # 2. Fade-Filter (jetzt mit korrekter berechneter Startzeit)
136
- fade_img_filter_1 = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start_1}:d={fade_duration}"
137
 
138
-
139
- if drawtext_filters:
140
- all_drawtext_filters = ",".join(drawtext_filters)
141
- vf_filters_clip1 = f"{base_filters},{all_drawtext_filters},{fade_img_filter_1}"
142
- else:
143
- vf_filters_clip1 = f"{base_filters},{fade_img_filter_1}"
144
-
145
- # Erstelle Clip 1
146
- img_path_1 = Path(images[0].name)
147
- clip_path_1 = Path(temp_dir) / "clip_with_text_0.mp4"
148
-
149
- cmd_1 = [
150
- "ffmpeg", "-y", "-loop", "1", "-i", str(img_path_1),
151
- "-t", str(duration_clip_1),
152
- "-vf", vf_filters_clip1,
153
- str(clip_path_1)
154
- ]
155
-
156
- try:
157
- subprocess.run(cmd_1, check=True, capture_output=True, text=True)
158
- clips_with_text.append(clip_path_1)
159
- except subprocess.CalledProcessError as e:
160
- return None, f"❌ FFmpeg Fehler bei Bild 1 (mit Text):\n{e.stderr}"
161
-
162
- # --- 2. FOLGE-BILDER (Nur Bild mit Fade) ---
163
- for i in range(1, len(images)):
164
  img_path = Path(images[i].name)
165
- clip_path = Path(temp_dir) / f"clip_{i}.mp4"
166
-
167
- # **KORREKTUR für FFmpeg:** Fade-Out Startzeit in Python berechnen
168
- fade_out_start_n = duration_per_image - fade_duration
169
- if fade_out_start_n < 0: fade_out_start_n = 0
170
 
171
- # Nur Bild-Filter mit Fade (jetzt mit korrekter berechneter Startzeit)
172
- fade_img_filter = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start_n}:d={fade_duration}"
173
- vf_filters_clip = (
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
175
  "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
176
- f"fps=25,format=yuv420p,{fade_img_filter}"
177
  )
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  cmd = [
180
  "ffmpeg", "-y", "-loop", "1", "-i", str(img_path),
181
- "-t", str(duration_per_image),
182
  "-vf", vf_filters_clip,
183
  str(clip_path)
184
  ]
@@ -187,9 +157,12 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
187
  subprocess.run(cmd, check=True, capture_output=True, text=True)
188
  clips_with_text.append(clip_path)
189
  except subprocess.CalledProcessError as e:
190
- return None, f"❌ FFmpeg Fehler bei Bild {i+1} (ohne Text):\n{e.stderr}"
 
 
 
191
 
192
- # Zusammenfügen
193
  filelist_path = Path(temp_dir) / "filelist.txt"
194
  with open(filelist_path, "w") as f:
195
  for clip in clips_with_text:
@@ -205,11 +178,14 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
205
  ]
206
 
207
  try:
208
- subprocess.run(cmd_concat, check=True)
209
  except subprocess.CalledProcessError as e:
 
 
210
  return None, f"❌ FFmpeg Fehler beim Zusammenfügen:\n{e.stderr}"
211
 
212
- # Audio hinzufügen falls vorhanden
 
213
  if temp_audio_file:
214
  final_output = Path(temp_dir) / f"final_{uuid.uuid4().hex}.mp4"
215
  cmd_audio = [
@@ -218,12 +194,19 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
218
  str(final_output)
219
  ]
220
  try:
221
- subprocess.run(cmd_audio, check=True)
222
  except subprocess.CalledProcessError as e:
 
 
223
  return None, f"❌ FFmpeg Fehler beim Hinzufügen von Audio:\n{e.stderr}"
 
 
 
 
224
  return str(final_output), "✅ Video mit Audio erstellt!"
225
 
226
- return str(output_video), "✅ Video erstellt (ohne Audio)"
 
227
 
228
  # Gradio UI
229
  with gr.Blocks() as demo:
@@ -231,12 +214,13 @@ with gr.Blocks() as demo:
231
 
232
  with gr.Row():
233
  img_input = gr.Files(label="Bilder", file_types=allowed_medias)
234
- text_input = gr.Textbox(label="Text (Wörter erscheinen nacheinander auf dem ersten Bild)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.")
 
235
 
236
  with gr.Row():
237
- duration_image_input = gr.Number(value=3, label="Dauer pro BILD (s) [für Bild 2+ und Min-Dauer für Bild 1]")
238
  duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]")
239
- fade_input = gr.Number(value=0.5, label="Bild-Fade Dauer (s)") # Geändert für Klarheit
240
  font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
241
  ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)")
242
 
 
27
  return None # Fallback: FFmpeg soll selbst suchen (klappt manchmal nicht)
28
 
29
  def save_temp_audio(audio_file):
30
+ """Speichert die hochgeladene Audio-Datei in einem temporären Verzeichnis."""
31
+ if hasattr(audio_file, 'name'):
 
 
 
 
 
 
 
32
  ext = Path(audio_file.name).suffix
33
  if ext.lower() not in allowed_audios:
34
  ext = ".mp3"
35
+ temp_audio_dir = Path(tempfile.mkdtemp())
36
+ temp_audio = temp_audio_dir / f"input{ext}"
37
  audio_file.seek(0)
38
  with open(temp_audio, "wb") as f:
39
  shutil.copyfileobj(audio_file, f)
40
+ # Rückgabe des Verzeichnisses, das später gelöscht werden kann, und des Dateipfads
41
+ return temp_audio_dir, temp_audio
42
+ return None, None
43
 
44
  def create_timed_drawtext(word, start_time, duration, font_option, font_size, y_pos):
45
  """Erstellt einen FFmpeg drawtext Filter, der ein Wort mit weichen Übergängen (Alpha-Kanal) einblendet."""
 
57
  fade_out_start = end_time - WORD_FADE_DURATION
58
 
59
  # Alpha-Ausdruck für smooth Fade-In und Fade-Out
60
+ # Steuert die Deckkraft basierend auf der Zeit t (relativ zum Clip-Start)
 
 
 
61
  alpha_expression = (
62
  f"if(lt(t,{start_time}), 0, "
63
  f"if(lt(t,{fade_in_end}), (t-{start_time})/{WORD_FADE_DURATION}, "
 
75
 
76
 
77
  def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file):
78
+
 
 
79
  if not images:
80
  return None, "❌ Keine Bilder ausgewählt"
81
 
82
  temp_dir = tempfile.mkdtemp()
83
+
 
84
  # Text in Wörter aufteilen
85
  words = input_text.split() if input_text else []
86
+ total_words = len(words)
87
+ num_images = len(images)
88
 
89
+ # Berechnung der gleichmäßigen Verteilung der Wörter auf die Bilder
90
+ base_words_per_clip = total_words // num_images
91
+ remainder = total_words % num_images
92
 
93
+ current_word_index = 0
94
+ clips_with_text = [] # Paths der generierten MP4-Clips
95
+
96
  # Schriftart finden
97
  font_path = get_font_path()
98
  font_option = f":fontfile='{font_path}'" if font_path else ""
99
 
100
  # Audio verarbeiten
101
+ audio_temp_dir, temp_audio_file = save_temp_audio(audio_file) if audio_file else (None, None)
 
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ # --- 1. SCHLEIFE: Erstelle jeden Clip mit seinem Textsegment ---
105
+ for i in range(num_images):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  img_path = Path(images[i].name)
107
+ clip_path = Path(temp_dir) / f"clip_with_text_{i}.mp4"
 
 
 
 
108
 
109
+ # 1. Bestimme das Wortsegment für diesen Clip
110
+ words_on_this_clip = base_words_per_clip + (1 if i < remainder else 0)
111
+
112
+ # Extrahieren des Segments aus der Gesamtliste der Wörter
113
+ word_segment = words[current_word_index : current_word_index + words_on_this_clip]
114
+ current_word_index += len(word_segment)
115
+
116
+ # 2. Berechne die Clip-Dauer
117
+ text_duration = len(word_segment) * duration_per_word
118
+ # Die Dauer ist das Maximum aus der gewünschten Bilddauer und der benötigten Textdauer
119
+ duration_clip = max(duration_per_image, text_duration)
120
+
121
+ # 3. Generiere Drawtext Filter (Startzeit ist relativ zum Clip-Start, also 0)
122
+ drawtext_filters = []
123
+ word_start_time = 0.0
124
+ for word in word_segment:
125
+ filter_str = create_timed_drawtext(word, word_start_time, duration_per_word, font_option, font_size, y_pos)
126
+ drawtext_filters.append(filter_str)
127
+ word_start_time += duration_per_word
128
+
129
+ # 4. Basis- und Fade-Filter
130
+ base_filters = (
131
  "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
132
  "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
133
+ "fps=25,format=yuv420p"
134
  )
135
 
136
+ fade_out_start = duration_clip - fade_duration
137
+ if fade_out_start < 0: fade_out_start = 0
138
+ fade_img_filter = f"fade=t=in:st=0:d={fade_duration},fade=t=out:st={fade_out_start}:d={fade_duration}"
139
+
140
+ # 5. Kombiniere alle Filter
141
+ if drawtext_filters:
142
+ all_drawtext_filters = ",".join(drawtext_filters)
143
+ vf_filters_clip = f"{base_filters},{all_drawtext_filters},{fade_img_filter}"
144
+ else:
145
+ # Kein Text mehr: Nur Bild mit Fade
146
+ vf_filters_clip = f"{base_filters},{fade_img_filter}"
147
+
148
+ # 6. FFmpeg Command zum Erstellen des Clips
149
  cmd = [
150
  "ffmpeg", "-y", "-loop", "1", "-i", str(img_path),
151
+ "-t", str(duration_clip),
152
  "-vf", vf_filters_clip,
153
  str(clip_path)
154
  ]
 
157
  subprocess.run(cmd, check=True, capture_output=True, text=True)
158
  clips_with_text.append(clip_path)
159
  except subprocess.CalledProcessError as e:
160
+ # Bereinigung bei Fehler
161
+ shutil.rmtree(temp_dir)
162
+ if audio_temp_dir: shutil.rmtree(audio_temp_dir)
163
+ return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"
164
 
165
+ # --- 2. ZUSAMMENFÜGEN ---
166
  filelist_path = Path(temp_dir) / "filelist.txt"
167
  with open(filelist_path, "w") as f:
168
  for clip in clips_with_text:
 
178
  ]
179
 
180
  try:
181
+ subprocess.run(cmd_concat, check=True, capture_output=True, text=True)
182
  except subprocess.CalledProcessError as e:
183
+ shutil.rmtree(temp_dir)
184
+ if audio_temp_dir: shutil.rmtree(audio_temp_dir)
185
  return None, f"❌ FFmpeg Fehler beim Zusammenfügen:\n{e.stderr}"
186
 
187
+ # --- 3. AUDIO HINZUFÜGEN (falls vorhanden) ---
188
+ final_output = output_video
189
  if temp_audio_file:
190
  final_output = Path(temp_dir) / f"final_{uuid.uuid4().hex}.mp4"
191
  cmd_audio = [
 
194
  str(final_output)
195
  ]
196
  try:
197
+ subprocess.run(cmd_audio, check=True, capture_output=True, text=True)
198
  except subprocess.CalledProcessError as e:
199
+ shutil.rmtree(temp_dir)
200
+ if audio_temp_dir: shutil.rmtree(audio_temp_dir)
201
  return None, f"❌ FFmpeg Fehler beim Hinzufügen von Audio:\n{e.stderr}"
202
+
203
+ # Bereinige das separate Audio-Temp-Verzeichnis
204
+ if audio_temp_dir: shutil.rmtree(audio_temp_dir)
205
+
206
  return str(final_output), "✅ Video mit Audio erstellt!"
207
 
208
+ # Nur Video-Pfad zurückgeben
209
+ return str(final_output), "✅ Video erstellt (ohne Audio)"
210
 
211
  # Gradio UI
212
  with gr.Blocks() as demo:
 
214
 
215
  with gr.Row():
216
  img_input = gr.Files(label="Bilder", file_types=allowed_medias)
217
+ # TEXT WURDE GEÄNDERT: Neue Beschreibung für Textverteilung
218
+ text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Jedes Wort wird für 'Dauer pro Wort' angezeigt.")
219
 
220
  with gr.Row():
221
+ duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")
222
  duration_word_input = gr.Number(value=1.0, label="Dauer pro WORT (s) [bestimmt Geschwindigkeit der Text-Anzeige]")
223
+ fade_input = gr.Number(value=0.5, label="Bild-Fade Dauer (s)")
224
  font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
225
  ypos_input = gr.Slider(0.0, 1.0, value=0.9, label="Y-Position (0=Oben, 1=Unten)")
226