AutoSubGenV

Sleeping

App Files Files Community

Athspi commited on Mar 11

Commit

43fec16

verified ·

1 Parent(s): 818e336

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -95

app.py CHANGED Viewed

@@ -12,11 +12,9 @@ logging.getLogger("moviepy").setLevel(logging.ERROR)
 # Configure Gemini API
 genai.configure(api_key=os.environ["GEMINI_API_KEY"])
-# Create the Gemini model
 model = genai.GenerativeModel("gemini-2.0-flash-exp")
-# Enhanced language support
 SUPPORTED_LANGUAGES = [
     "Auto Detect", "English", "Spanish", "French", "German", "Italian",
     "Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
@@ -24,82 +22,100 @@ SUPPORTED_LANGUAGES = [
 ]
 # Magic Prompts
-TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Analyze this audio and generate precise subtitles with accurate timestamps following these rules:
-1. Identify natural speech segments (3-7 words)
-2. Include exact start/end times in [HH:MM:SS.ms] format
-3. Add speaker identification when multiple voices
-4. Preserve emotional tone and punctuation
-5. Format exactly like:
 [00:00:05.250 -> 00:00:08.100]
-Hello world! This is an example.
-[00:00:08.500 -> 00:00:10.200]
-Second subtitle line.
-Return ONLY the subtitles with timestamps, no explanations."""
-TRANSLATION_PROMPT = """You are a certified translator. Translate these subtitles to {target_language} following these rules:
-1. Keep timestamps EXACTLY as original
-2. Match subtitle length to original timing
-3. Preserve names/technical terms
-4. Use natural colloquial speech
-5. Maintain line breaks and formatting
-ORIGINAL SUBTITLES:
 {subtitles}
-TRANSLATED {target_language} SUBTITLES:"""
 def extract_audio(video_path):
-    """Extract high-quality audio from video"""
     video = VideoFileClip(video_path)
-    audio_path = os.path.join(tempfile.gettempdir(), "high_quality_audio.wav")
     video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
     return audio_path
-def parse_timestamp(timestamp_str):
-    """Convert timestamp string to seconds"""
-    h, m, s = map(float, timestamp_str.split(':'))
-    return h * 3600 + m * 60 + s
 def gemini_transcribe(audio_path):
-    """Get timestamped transcription from Gemini"""
     with open(audio_path, "rb") as f:
         audio_data = f.read()
     response = model.generate_content(
-        contents=[TRANSCRIPTION_PROMPT,
-                 {'mime_type': 'audio/wav', 'data': audio_data}]
     )
     return response.text
-def create_srt(subtitles_text):
-    """Convert Gemini's raw output to SRT format"""
-    entries = re.split(r'\n{2,}', subtitles_text.strip())
-    srt_output = []
-    for idx, entry in enumerate(entries, 1):
-        time_match = re.match(r'\[(.*?) -> (.*?)\]', entry)
-        if not time_match:
-            continue
-        start_time = parse_timestamp(time_match.group(1))
-        end_time = parse_timestamp(time_match.group(2))
-        text = entry.split(']', 1)[1].strip()
-        srt_output.append(
-            f"{idx}\n"
-            f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
-            f"{text}\n"
-        )
-    return "".join(srt_output)
 def translate_subtitles(subtitles, target_lang):
-    """Translate subtitles while preserving timing"""
     prompt = TRANSLATION_PROMPT.format(
         target_language=target_lang,
         subtitles=subtitles
@@ -108,41 +124,37 @@ def translate_subtitles(subtitles, target_lang):
     return response.text
 def process_video(video_path, source_lang, target_lang):
-    """Full processing pipeline"""
-    # Audio extraction
-    audio_path = extract_audio(video_path)
-    # Transcription
-    raw_transcription = gemini_transcribe(audio_path)
-    srt_original = create_srt(raw_transcription)
-    # Save original
-    original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
-    with open(original_srt, "w") as f:
-        f.write(srt_original)
-    # Translation
-    translated_srt = None
-    if target_lang != "None":
-        translated_text = translate_subtitles(srt_original, target_lang)
-        translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
-        with open(translated_srt, "w") as f:
-            f.write(translated_text)
-    # Cleanup
-    os.remove(audio_path)
-    return original_srt, translated_srt
 # Gradio Interface
-with gr.Blocks(theme=gr.themes.Default(spacing_size="sm")) as app:
-    gr.Markdown("# 🎬 Professional Subtitle Studio")
-    gr.Markdown("Generate broadcast-quality subtitles with perfect timing")
     with gr.Row():
         with gr.Column():
-            video_input = gr.Video(label="Upload Video", sources=["upload"])
-            lang_row = gr.Row()
             source_lang = gr.Dropdown(
                 label="Source Language",
                 choices=SUPPORTED_LANGUAGES,
@@ -153,17 +165,11 @@ with gr.Blocks(theme=gr.themes.Default(spacing_size="sm")) as app:
                 choices=["None"] + SUPPORTED_LANGUAGES[1:],
                 value="None"
             )
-            process_btn = gr.Button("Generate Subtitles", variant="primary")
-        with gr.Column():
-            original_sub = gr.File(label="Original Subtitles")
-            translated_sub = gr.File(label="Translated Subtitles")
-            preview_area = gr.HTML("""
-                <div style='border: 2px dashed #666; padding: 20px; border-radius: 8px;'>
-                    <h3 style='margin-top: 0;'>Subtitle Preview</h3>
-                    <div id='preview-content' style='height: 300px; overflow-y: auto;'></div>
-                </div>
-            """)
     process_btn.click(
         process_video,

 # Configure Gemini API
 genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 model = genai.GenerativeModel("gemini-2.0-flash-exp")
+# Supported languages
 SUPPORTED_LANGUAGES = [
     "Auto Detect", "English", "Spanish", "French", "German", "Italian",
     "Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
 ]
 # Magic Prompts
+TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Generate precise subtitles with accurate timestamps following these rules:
+1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
+2. Each subtitle 3-7 words
+3. Include speaker changes
+4. Preserve emotional tone
+5. Format example:
 [00:00:05.250 -> 00:00:08.100]
+Example subtitle text
+Return ONLY subtitles with timestamps."""
+TRANSLATION_PROMPT = """Translate these subtitles to {target_language} following:
+1. Keep timestamps identical
+2. Match text length to timing
+3. Preserve technical terms
+4. Use natural speech patterns
+ORIGINAL:
 {subtitles}
+TRANSLATED:"""
+def parse_timestamp(timestamp_str):
+    """Flexible timestamp parser supporting multiple formats"""
+    clean_ts = timestamp_str.strip("[] ").replace(',', '.')
+    parts = clean_ts.split(':')
+    seconds = 0.0
+    if len(parts) == 3:  # HH:MM:SS.ss
+        hours, minutes, seconds_part = parts
+        seconds += float(hours) * 3600
+    elif len(parts) == 2:  # MM:SS.ss
+        minutes, seconds_part = parts
+    else:
+        raise ValueError(f"Invalid timestamp: {timestamp_str}")
+    seconds += float(minutes) * 60
+    seconds += float(seconds_part)
+    return seconds
+def create_srt(subtitles_text):
+    """Robust SRT converter with error handling"""
+    entries = re.split(r'\n{2,}', subtitles_text.strip())
+    srt_output = []
+    for idx, entry in enumerate(entries, 1):
+        try:
+            # Match various timestamp formats
+            time_match = re.search(
+                r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
+                entry
+            )
+            if not time_match:
+                continue
+            start_time = parse_timestamp(time_match.group(1))
+            end_time = parse_timestamp(time_match.group(2))
+            text = entry.split(']', 1)[-1].strip()
+            srt_entry = (
+                f"{idx}\n"
+                f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
+                f"{text}\n"
+            )
+            srt_output.append(srt_entry)
+        except Exception as e:
+            print(f"Skipping invalid entry {idx}: {str(e)}")
+            continue
+    return "\n".join(srt_output)
 def extract_audio(video_path):
+    """High-quality audio extraction"""
     video = VideoFileClip(video_path)
+    audio_path = os.path.join(tempfile.gettempdir(), "hq_audio.wav")
     video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
     return audio_path
 def gemini_transcribe(audio_path):
+    """Audio transcription with Gemini"""
     with open(audio_path, "rb") as f:
         audio_data = f.read()
     response = model.generate_content(
+        [TRANSCRIPTION_PROMPT, {"mime_type": "audio/wav", "data": audio_data}]
     )
     return response.text
 def translate_subtitles(subtitles, target_lang):
+    """Context-aware translation"""
     prompt = TRANSLATION_PROMPT.format(
         target_language=target_lang,
         subtitles=subtitles
     return response.text
 def process_video(video_path, source_lang, target_lang):
+    """Complete processing pipeline"""
+    try:
+        audio_path = extract_audio(video_path)
+        raw_transcription = gemini_transcribe(audio_path)
+        srt_original = create_srt(raw_transcription)
+        original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
+        with open(original_srt, "w") as f:
+            f.write(srt_original)
+        translated_srt = None
+        if target_lang != "None":
+            translated_text = translate_subtitles(srt_original, target_lang)
+            translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
+            with open(translated_srt, "w") as f:
+                f.write(create_srt(translated_text))  # Re-parse translated text
+        os.remove(audio_path)
+        return original_srt, translated_srt
+    except Exception as e:
+        print(f"Processing error: {str(e)}")
+        return None, None
 # Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app:
+    gr.Markdown("# 🎬 Professional Subtitle Generator")
     with gr.Row():
+        video_input = gr.Video(label="Upload Video", sources=["upload"])
         with gr.Column():
             source_lang = gr.Dropdown(
                 label="Source Language",
                 choices=SUPPORTED_LANGUAGES,
                 choices=["None"] + SUPPORTED_LANGUAGES[1:],
                 value="None"
             )
+            process_btn = gr.Button("Generate", variant="primary")
+    with gr.Row():
+        original_sub = gr.File(label="Original Subtitles")
+        translated_sub = gr.File(label="Translated Subtitles")
     process_btn.click(
         process_video,