Tim13ekd commited on
Commit
fef9da1
·
verified ·
1 Parent(s): 500f777

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -8
app.py CHANGED
@@ -6,6 +6,7 @@ import subprocess
6
  import requests
7
  import base64
8
  import math
 
9
 
10
  # Erlaubte Dateiformate
11
  allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
@@ -13,14 +14,28 @@ allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]
13
 
14
  API_URL = "https://text.pollinations.ai/openai"
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def convert_to_wav(audio_path):
17
  wav_path = Path(audio_path).with_suffix(".wav")
18
  cmd = ["ffmpeg", "-y", "-i", str(audio_path), "-ar", "16000", "-ac", "1", str(wav_path)]
19
- subprocess.run(cmd, check=True)
20
  return wav_path
21
 
22
- def transcribe_audio(audio_path):
23
- wav_file = convert_to_wav(audio_path)
 
24
  with open(wav_file, "rb") as f:
25
  audio_data = base64.b64encode(f.read()).decode()
26
  payload = {
@@ -49,7 +64,7 @@ def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_po
49
 
50
  # Transkription, falls Audio vorhanden
51
  if audio_file:
52
- transcript = transcribe_audio(audio_file.name)
53
  words = transcript.split()
54
  total_words = len(words)
55
  segments_per_image = math.ceil(total_words / len(images))
@@ -58,9 +73,12 @@ def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_po
58
  start = i * segments_per_image
59
  end = min((i + 1) * segments_per_image, total_words)
60
  texts.append(" ".join(words[start:end]))
 
61
  else:
62
  texts = [""] * len(images)
 
63
 
 
64
  for i, img_path in enumerate(images):
65
  clip_path = Path(temp_dir) / f"clip_{i}.mp4"
66
  text = texts[i] if i < len(texts) else ""
@@ -72,7 +90,7 @@ def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_po
72
  )
73
 
74
  if text:
75
- # Escape problematische Zeichen
76
  safe_text = text.replace(":", "\\:").replace("'", "\\'").replace(",", "\\,")
77
  drawtext_filter = (
78
  f",drawtext=text='{safe_text}':fontcolor=white:fontsize={font_size}:borderw=2:"
@@ -120,13 +138,13 @@ def generate_slideshow_with_audio(images, audio_file, duration_per_image=3, y_po
120
  return None, f"❌ FFmpeg Concat Fehler:\n{e.stderr}"
121
 
122
  # Audio hinzufügen, falls vorhanden
123
- if audio_file:
124
  final_output = Path(temp_dir) / f"slideshow_audio_{uuid.uuid4().hex}.mp4"
125
  cmd_audio = [
126
  "ffmpeg",
127
  "-y",
128
  "-i", str(output_file),
129
- "-i", audio_file.name,
130
  "-c:v", "copy",
131
  "-c:a", "aac",
132
  "-shortest",
@@ -163,4 +181,4 @@ with gr.Blocks() as demo:
163
  outputs=[out_video, status]
164
  )
165
 
166
- demo.launch()
 
6
  import requests
7
  import base64
8
  import math
9
+ import shutil
10
 
11
  # Erlaubte Dateiformate
12
  allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
 
14
 
15
  API_URL = "https://text.pollinations.ai/openai"
16
 
17
+ def save_temp_audio(audio_file):
18
+ """
19
+ Speichert die hochgeladene Datei sicher mit korrekter Endung in einem temporären Verzeichnis.
20
+ """
21
+ ext = Path(audio_file.name).suffix
22
+ if ext.lower() not in allowed_audios:
23
+ ext = ".mp3" # Standard falls Endung fehlt
24
+ temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
25
+ audio_file.seek(0)
26
+ with open(temp_audio, "wb") as f:
27
+ shutil.copyfileobj(audio_file, f)
28
+ return temp_audio
29
+
30
  def convert_to_wav(audio_path):
31
  wav_path = Path(audio_path).with_suffix(".wav")
32
  cmd = ["ffmpeg", "-y", "-i", str(audio_path), "-ar", "16000", "-ac", "1", str(wav_path)]
33
+ subprocess.run(cmd, check=True, capture_output=True, text=True)
34
  return wav_path
35
 
36
+ def transcribe_audio(audio_file):
37
+ temp_audio = save_temp_audio(audio_file)
38
+ wav_file = convert_to_wav(temp_audio)
39
  with open(wav_file, "rb") as f:
40
  audio_data = base64.b64encode(f.read()).decode()
41
  payload = {
 
64
 
65
  # Transkription, falls Audio vorhanden
66
  if audio_file:
67
+ transcript = transcribe_audio(audio_file)
68
  words = transcript.split()
69
  total_words = len(words)
70
  segments_per_image = math.ceil(total_words / len(images))
 
73
  start = i * segments_per_image
74
  end = min((i + 1) * segments_per_image, total_words)
75
  texts.append(" ".join(words[start:end]))
76
+ temp_audio_file = save_temp_audio(audio_file)
77
  else:
78
  texts = [""] * len(images)
79
+ temp_audio_file = None
80
 
81
+ # Einzelne Clips erstellen
82
  for i, img_path in enumerate(images):
83
  clip_path = Path(temp_dir) / f"clip_{i}.mp4"
84
  text = texts[i] if i < len(texts) else ""
 
90
  )
91
 
92
  if text:
93
+ # Escape problematischer Zeichen für FFmpeg
94
  safe_text = text.replace(":", "\\:").replace("'", "\\'").replace(",", "\\,")
95
  drawtext_filter = (
96
  f",drawtext=text='{safe_text}':fontcolor=white:fontsize={font_size}:borderw=2:"
 
138
  return None, f"❌ FFmpeg Concat Fehler:\n{e.stderr}"
139
 
140
  # Audio hinzufügen, falls vorhanden
141
+ if temp_audio_file:
142
  final_output = Path(temp_dir) / f"slideshow_audio_{uuid.uuid4().hex}.mp4"
143
  cmd_audio = [
144
  "ffmpeg",
145
  "-y",
146
  "-i", str(output_file),
147
+ "-i", str(temp_audio_file),
148
  "-c:v", "copy",
149
  "-c:a", "aac",
150
  "-shortest",
 
181
  outputs=[out_video, status]
182
  )
183
 
184
+ demo.launch()