Nymbo commited on
Commit
cf27b9a
·
verified ·
1 Parent(s): 729de8c

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +8 -8
  2. app.py +482 -0
  3. packages.txt +1 -0
  4. requirements.txt +3 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- title: Groq Playground Master
3
- emoji: 🐢
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 4.37.2
8
  app_file: app.py
9
- pinned: false
10
- license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Groq Playground w/ Whisper
3
+ emoji: 🐇
4
+ colorFrom: gray
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.27.0
8
  app_file: app.py
9
+ pinned: true
10
+ license: other
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import random
4
+ import numpy as np
5
+ import json
6
+ from datetime import timedelta
7
+ import tempfile
8
+ import gradio as gr
9
+ from groq import Groq
10
+
11
+ client = Groq(api_key=os.environ.get("Groq_Api_Key"))
12
+
13
+
14
+ # llms
15
+
16
+ MAX_SEED = np.iinfo(np.int32).max
17
+
18
+ def update_max_tokens(model):
19
+ if model in ["llama3-70b-8192", "llama3-8b-8192", "gemma-7b-it", "gemma2-9b-it"]:
20
+ return gr.update(maximum=8192)
21
+ elif model == "mixtral-8x7b-32768":
22
+ return gr.update(maximum=32768)
23
+
24
+ def create_history_messages(history):
25
+ history_messages = [{"role": "user", "content": m[0]} for m in history]
26
+ history_messages.extend([{"role": "assistant", "content": m[1]} for m in history])
27
+ return history_messages
28
+
29
+ def generate_response(prompt, history, model, temperature, max_tokens, top_p, seed):
30
+ messages = create_history_messages(history)
31
+ messages.append({"role": "user", "content": prompt})
32
+ print(messages)
33
+
34
+ if seed == 0:
35
+ seed = random.randint(1, MAX_SEED)
36
+
37
+ stream = client.chat.completions.create(
38
+ messages=messages,
39
+ model=model,
40
+ temperature=temperature,
41
+ max_tokens=max_tokens,
42
+ top_p=top_p,
43
+ seed=seed,
44
+ stop=None,
45
+ stream=True,
46
+ )
47
+
48
+ response = ""
49
+ for chunk in stream:
50
+ delta_content = chunk.choices[0].delta.content
51
+ if delta_content is not None:
52
+ response += delta_content
53
+ yield response
54
+
55
+ return response
56
+
57
+ # speech to text
58
+
59
+ ALLOWED_FILE_EXTENSIONS = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
60
+ MAX_FILE_SIZE_MB = 25
61
+
62
+ LANGUAGE_CODES = {
63
+ "English": "en",
64
+ "Chinese": "zh",
65
+ "German": "de",
66
+ "Spanish": "es",
67
+ "Russian": "ru",
68
+ "Korean": "ko",
69
+ "French": "fr",
70
+ "Japanese": "ja",
71
+ "Portuguese": "pt",
72
+ "Turkish": "tr",
73
+ "Polish": "pl",
74
+ "Catalan": "ca",
75
+ "Dutch": "nl",
76
+ "Arabic": "ar",
77
+ "Swedish": "sv",
78
+ "Italian": "it",
79
+ "Indonesian": "id",
80
+ "Hindi": "hi",
81
+ "Finnish": "fi",
82
+ "Vietnamese": "vi",
83
+ "Hebrew": "he",
84
+ "Ukrainian": "uk",
85
+ "Greek": "el",
86
+ "Malay": "ms",
87
+ "Czech": "cs",
88
+ "Romanian": "ro",
89
+ "Danish": "da",
90
+ "Hungarian": "hu",
91
+ "Tamil": "ta",
92
+ "Norwegian": "no",
93
+ "Thai": "th",
94
+ "Urdu": "ur",
95
+ "Croatian": "hr",
96
+ "Bulgarian": "bg",
97
+ "Lithuanian": "lt",
98
+ "Latin": "la",
99
+ "Māori": "mi",
100
+ "Malayalam": "ml",
101
+ "Welsh": "cy",
102
+ "Slovak": "sk",
103
+ "Telugu": "te",
104
+ "Persian": "fa",
105
+ "Latvian": "lv",
106
+ "Bengali": "bn",
107
+ "Serbian": "sr",
108
+ "Azerbaijani": "az",
109
+ "Slovenian": "sl",
110
+ "Kannada": "kn",
111
+ "Estonian": "et",
112
+ "Macedonian": "mk",
113
+ "Breton": "br",
114
+ "Basque": "eu",
115
+ "Icelandic": "is",
116
+ "Armenian": "hy",
117
+ "Nepali": "ne",
118
+ "Mongolian": "mn",
119
+ "Bosnian": "bs",
120
+ "Kazakh": "kk",
121
+ "Albanian": "sq",
122
+ "Swahili": "sw",
123
+ "Galician": "gl",
124
+ "Marathi": "mr",
125
+ "Panjabi": "pa",
126
+ "Sinhala": "si",
127
+ "Khmer": "km",
128
+ "Shona": "sn",
129
+ "Yoruba": "yo",
130
+ "Somali": "so",
131
+ "Afrikaans": "af",
132
+ "Occitan": "oc",
133
+ "Georgian": "ka",
134
+ "Belarusian": "be",
135
+ "Tajik": "tg",
136
+ "Sindhi": "sd",
137
+ "Gujarati": "gu",
138
+ "Amharic": "am",
139
+ "Yiddish": "yi",
140
+ "Lao": "lo",
141
+ "Uzbek": "uz",
142
+ "Faroese": "fo",
143
+ "Haitian": "ht",
144
+ "Pashto": "ps",
145
+ "Turkmen": "tk",
146
+ "Norwegian Nynorsk": "nn",
147
+ "Maltese": "mt",
148
+ "Sanskrit": "sa",
149
+ "Luxembourgish": "lb",
150
+ "Burmese": "my",
151
+ "Tibetan": "bo",
152
+ "Tagalog": "tl",
153
+ "Malagasy": "mg",
154
+ "Assamese": "as",
155
+ "Tatar": "tt",
156
+ "Hawaiian": "haw",
157
+ "Lingala": "ln",
158
+ "Hausa": "ha",
159
+ "Bashkir": "ba",
160
+ "jw": "jw",
161
+ "Sundanese": "su",
162
+ }
163
+
164
+ # Checks file extension, size, and downsamples if needed.
165
+ def check_file(audio_file_path):
166
+ if not audio_file_path:
167
+ return None, gr.Error("Please upload an audio file.")
168
+
169
+ file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
170
+ file_extension = audio_file_path.split(".")[-1].lower()
171
+
172
+ if file_extension not in ALLOWED_FILE_EXTENSIONS:
173
+ return (
174
+ None,
175
+ gr.Error(
176
+ f"Invalid file type (.{file_extension}). Allowed types: {', '.join(ALLOWED_FILE_EXTENSIONS)}"
177
+ ),
178
+ )
179
+
180
+ if file_size_mb > MAX_FILE_SIZE_MB:
181
+ gr.Warning(
182
+ f"File size too large ({file_size_mb:.2f} MB). Attempting to downsample to 16kHz. Maximum allowed: {MAX_FILE_SIZE_MB} MB"
183
+ )
184
+
185
+ output_file_path = os.path.splitext(audio_file_path)[0] + "_downsampled.wav"
186
+ try:
187
+ subprocess.run(
188
+ [
189
+ "ffmpeg",
190
+ "-i",
191
+ audio_file_path,
192
+ "-ar",
193
+ "16000",
194
+ "-ac",
195
+ "1",
196
+ "-map",
197
+ "0:a:",
198
+ output_file_path,
199
+ ],
200
+ check=True,
201
+ )
202
+
203
+ # Check size after downsampling
204
+ downsampled_size_mb = os.path.getsize(output_file_path) / (1024 * 1024)
205
+ if downsampled_size_mb > MAX_FILE_SIZE_MB:
206
+ return (
207
+ None,
208
+ gr.Error(
209
+ f"File size still too large after downsampling ({downsampled_size_mb:.2f} MB). Maximum allowed: {MAX_FILE_SIZE_MB} MB"
210
+ ),
211
+ )
212
+
213
+ return output_file_path, None
214
+ except subprocess.CalledProcessError as e:
215
+ return None, gr.Error(f"Error during downsampling: {e}")
216
+ return audio_file_path, None
217
+
218
+
219
+ def transcribe_audio(audio_file_path, prompt, language, auto_detect_language, model):
220
+ # Check and process the file first
221
+ processed_path, error_message = check_file(audio_file_path)
222
+
223
+ # If there's an error during file check
224
+ if error_message:
225
+ return error_message
226
+
227
+ with open(processed_path, "rb") as file:
228
+ transcription = client.audio.transcriptions.create(
229
+ file=(os.path.basename(processed_path), file.read()),
230
+ model=model,
231
+ prompt=prompt,
232
+ response_format="text",
233
+ language=None if auto_detect_language else language,
234
+ temperature=0.0,
235
+ )
236
+ return transcription.text
237
+
238
+
239
+ def translate_audio(audio_file_path, prompt, model):
240
+ # Check and process the file first
241
+ processed_path, error_message = check_file(audio_file_path)
242
+
243
+ # If there's an error during file check
244
+ if error_message:
245
+ return error_message
246
+
247
+ with open(processed_path, "rb") as file:
248
+ translation = client.audio.translations.create(
249
+ file=(os.path.basename(processed_path), file.read()),
250
+ model=model,
251
+ prompt=prompt,
252
+ response_format="text",
253
+ temperature=0.0,
254
+ )
255
+ return translation.text
256
+
257
+
258
+ # subtitles maker
259
+
260
+ def format_time(seconds):
261
+ hours = int(seconds // 3600)
262
+ minutes = int((seconds % 3600) // 60)
263
+ seconds = int(seconds % 60)
264
+ milliseconds = int((seconds % 1) * 1000)
265
+
266
+ return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
267
+
268
+ def json_to_srt(transcription_json):
269
+ srt_lines = []
270
+
271
+ for segment in transcription_json:
272
+ start_time = format_time(segment['start'])
273
+ end_time = format_time(segment['end'])
274
+ text = segment['text']
275
+
276
+ srt_line = f"{segment['id']+1}\n{start_time} --> {end_time}\n{text}\n"
277
+ srt_lines.append(srt_line)
278
+
279
+ return '\n'.join(srt_lines)
280
+
281
+
282
+ def generate_subtitles(audio_file_path, prompt, language, auto_detect_language, model):
283
+ # Check and process the file first
284
+ processed_path, error_message = check_file(audio_file_path)
285
+
286
+ if error_message:
287
+ return None, None, error_message
288
+
289
+ with open(processed_path, "rb") as file:
290
+ transcription_json_response = client.audio.transcriptions.create(
291
+ file=(os.path.basename(processed_path), file.read()),
292
+ model=model,
293
+ prompt=prompt,
294
+ response_format="verbose_json",
295
+ language=None if auto_detect_language else language,
296
+ temperature=0.0,
297
+ )
298
+
299
+ # Directly access the segments attribute
300
+ transcription_json = transcription_json_response.segments
301
+
302
+ try:
303
+ srt_content = json_to_srt(transcription_json)
304
+ except ValueError as e:
305
+ return None, None, f"Error creating SRT file: {e}"
306
+
307
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".srt", delete=False) as temp_srt_file:
308
+ temp_srt_path = temp_srt_file.name
309
+ temp_srt_file.write(srt_content)
310
+
311
+ if audio_file_path.lower().endswith((".mp4", ".webm")):
312
+ try:
313
+ output_file_path = audio_file_path.replace(os.path.splitext(audio_file_path)[1], "_with_subs" + os.path.splitext(audio_file_path)[1])
314
+ subprocess.run(
315
+ [
316
+ "ffmpeg",
317
+ "-i",
318
+ audio_file_path,
319
+ "-vf",
320
+ f"subtitles={temp_srt_path}",
321
+ output_file_path,
322
+ ],
323
+ check=True,
324
+ )
325
+ return temp_srt_path, output_file_path, None
326
+ except subprocess.CalledProcessError as e:
327
+ return None, None, f"Error during subtitle addition: {e}"
328
+
329
+ return temp_srt_path, None, None
330
+
331
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
332
+ with gr.Tabs():
333
+ with gr.TabItem("LLMs"):
334
+ with gr.Row():
335
+ with gr.Column(scale=1, min_width=250):
336
+ model = gr.Dropdown(
337
+ choices=[
338
+ "llama3-70b-8192",
339
+ "llama3-8b-8192",
340
+ "mixtral-8x7b-32768",
341
+ "gemma-7b-it",
342
+ "gemma2-9b-it",
343
+ ],
344
+ value="llama3-70b-8192",
345
+ label="Model",
346
+ )
347
+ temperature = gr.Slider(
348
+ minimum=0.0,
349
+ maximum=1.0,
350
+ step=0.01,
351
+ value=0.5,
352
+ label="Temperature",
353
+ info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative.",
354
+ )
355
+ max_tokens = gr.Slider(
356
+ minimum=1,
357
+ maximum=8192,
358
+ step=1,
359
+ value=4096,
360
+ label="Max Tokens",
361
+ info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b.",
362
+ )
363
+ top_p = gr.Slider(
364
+ minimum=0.0,
365
+ maximum=1.0,
366
+ step=0.01,
367
+ value=0.5,
368
+ label="Top P",
369
+ info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p.",
370
+ )
371
+ seed = gr.Number(
372
+ precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random"
373
+ )
374
+ model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
375
+ with gr.Column(scale=1, min_width=400):
376
+ chatbot = gr.ChatInterface(
377
+ fn=generate_response,
378
+ chatbot=None,
379
+ additional_inputs=[
380
+ model,
381
+ temperature,
382
+ max_tokens,
383
+ top_p,
384
+ seed,
385
+ ],
386
+ )
387
+ model.change(update_max_tokens, inputs=[model], outputs=max_tokens)
388
+ with gr.TabItem("Speech To Text"):
389
+ with gr.Tabs():
390
+ with gr.TabItem("Transcription"):
391
+ gr.Markdown("Transcript audio from files to text!")
392
+ with gr.Row():
393
+ audio_input = gr.File(
394
+ type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
395
+ )
396
+ model_choice_transcribe = gr.Dropdown(
397
+ choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
398
+ value="whisper-large-v3",
399
+ label="Model",
400
+ )
401
+ with gr.Row():
402
+ transcribe_prompt = gr.Textbox(
403
+ label="Prompt (Optional)",
404
+ info="Specify any context or spelling corrections.",
405
+ )
406
+ with gr.Column():
407
+ language = gr.Dropdown(
408
+ choices=[(lang, code) for lang, code in LANGUAGE_CODES.items()],
409
+ value="en",
410
+ label="Language",
411
+ )
412
+ auto_detect_language = gr.Checkbox(label="Auto Detect Language")
413
+ transcribe_button = gr.Button("Transcribe")
414
+ transcription_output = gr.Textbox(label="Transcription")
415
+ transcribe_button.click(
416
+ transcribe_audio,
417
+ inputs=[audio_input, transcribe_prompt, language, auto_detect_language, model_choice_transcribe],
418
+ outputs=transcription_output,
419
+ )
420
+ with gr.TabItem("Translation"):
421
+ gr.Markdown("Transcript audio from files and translate them to English text!")
422
+ with gr.Row():
423
+ audio_input_translate = gr.File(
424
+ type="filepath", label="Upload File containing Audio", file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS]
425
+ )
426
+ model_choice_translate = gr.Dropdown(
427
+ choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
428
+ value="whisper-large-v3",
429
+ label="Model",
430
+ )
431
+ with gr.Row():
432
+ translate_prompt = gr.Textbox(
433
+ label="Prompt (Optional)",
434
+ info="Specify any context or spelling corrections.",
435
+ )
436
+ translate_button = gr.Button("Translate")
437
+ translation_output = gr.Textbox(label="Translation")
438
+ translate_button.click(
439
+ translate_audio,
440
+ inputs=[audio_input_translate, translate_prompt, model_choice_translate],
441
+ outputs=translation_output,
442
+ )
443
+ with gr.TabItem("Subtitle Maker"):
444
+ with gr.Row():
445
+ audio_input_subtitles = gr.File(
446
+ label="Upload Audio/Video",
447
+ file_types=[f".{ext}" for ext in ALLOWED_FILE_EXTENSIONS],
448
+ )
449
+ model_choice_subtitles = gr.Dropdown(
450
+ choices=["whisper-large-v3"], # Only include 'whisper-large-v3'
451
+ value="whisper-large-v3",
452
+ label="Model",
453
+ )
454
+ transcribe_prompt_subtitles = gr.Textbox(
455
+ label="Prompt (Optional)",
456
+ info="Specify any context or spelling corrections.",
457
+ )
458
+ with gr.Row():
459
+ language_subtitles = gr.Dropdown(
460
+ choices=[(lang, code) for lang, code in LANGUAGE_CODES.items()],
461
+ value="en",
462
+ label="Language",
463
+ )
464
+ auto_detect_language_subtitles = gr.Checkbox(
465
+ label="Auto Detect Language"
466
+ )
467
+ transcribe_button_subtitles = gr.Button("Generate Subtitles")
468
+ srt_output = gr.File(label="SRT Output File")
469
+ video_output = gr.File(label="Output Video with Subtitles")
470
+ transcribe_button_subtitles.click(
471
+ generate_subtitles,
472
+ inputs=[
473
+ audio_input_subtitles,
474
+ transcribe_prompt_subtitles,
475
+ language_subtitles,
476
+ auto_detect_language_subtitles,
477
+ model_choice_subtitles,
478
+ ],
479
+ outputs=[srt_output, video_output, gr.Textbox(label="Error")],
480
+ )
481
+
482
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ numpy
2
+ gradio
3
+ groq