# app.py - Fully standalone Kabyle TTS with working downloads

import gradio as gr
from transformers import VitsModel, AutoTokenizer
import torch
import scipy.io.wavfile as wavfile
import numpy as np
import os
from datetime import datetime
from pydub import AudioSegment
import pdfplumber

# Load model once at startup
print("Loading facebook/mms-tts-kab...")
model = VitsModel.from_pretrained("facebook/mms-tts-kab")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-kab")

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
sampling_rate = model.config.sampling_rate
print(f"Running on device: {device}")

# Create outputs folder
os.makedirs("outputs", exist_ok=True)

def convert_wav_to_mp3(wav_path, mp3_path):
    """Converts a WAV file to MP3."""
    try:
        audio = AudioSegment.from_wav(wav_path)
        audio.export(mp3_path, format="mp3")
        return mp3_path
    except Exception as e:
        print(f"Failed to convert WAV to MP3: {e}")
        return None

def synthesize(text, speed):
    if not text.strip():
        raise ValueError("Text is empty!")
    
    # Tokenize
    inputs = tokenizer(text.strip(), return_tensors="pt").to(device)
    
    # Generate waveform
    with torch.no_grad():
        waveform = model(**inputs).waveform.cpu().numpy().squeeze()
    
    # Adjust rate for speed
    adjusted_rate = int(sampling_rate * speed)
    
    # Save WAV
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    wav_filepath = os.path.join("outputs", f"kabyle_{timestamp}.wav")
    wavfile.write(wav_filepath, adjusted_rate, np.float32(waveform))
    
    return wav_filepath

def process_input(input_type, typed_text, uploaded_file, speed):
    final_text = ""
    # Choose source
    if input_type == "📝 Type Text":
        final_text = typed_text
    else:
        if uploaded_file is None:
            raise ValueError("Please upload a file.")
        
        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
        
        try:
            if file_extension == ".txt":
                with open(uploaded_file.name, 'r', encoding='utf-8') as f:
                    final_text = f.read()
            elif file_extension == ".pdf":
                with pdfplumber.open(uploaded_file.name) as pdf:
                    text_parts = []
                    for page in pdf.pages:
                        text_parts.append(page.extract_text())
                    final_text = " ".join(text_parts)
            else:
                raise ValueError("Unsupported file type. Please upload a .txt or .pdf file.")

        except Exception as e:
            raise ValueError(f"Could not read file: {e}")
    
    if not final_text.strip():
        raise ValueError("Input text is empty.")

    # Truncate long texts
    if len(final_text) > 1000:
        final_text = final_text[:1000] + " [truncated]"

    # Generate audio
    try:
        wav_path = synthesize(final_text, speed)
        
        # Convert WAV to MP3
        mp3_path = wav_path.replace('.wav', '.mp3')
        convert_wav_to_mp3(wav_path, mp3_path)
        
        # Return gr.update objects for a dynamic UI
        return (
            gr.Audio(value=wav_path, label="Generated Speech", autoplay=False),
            gr.File(value=wav_path, visible=True),
            gr.File(value=mp3_path, visible=True)
        )
    except Exception as e:
        raise RuntimeError(f"Synthesis failed: {str(e)}")


with gr.Blocks(title="🗣️ Kabyle TTS") as demo:
    gr.Markdown("# 🎵 Kabyle Text-to-Speech")
    gr.Markdown("Convert text to speech using Meta's MMS-TTS model for Kabyle.")

    with gr.Row():
        with gr.Column():
            input_type = gr.Radio(
                ["📝 Type Text", "📎 Upload File"],
                value="📝 Type Text",
                label="Input Method"
            )

            typed_text = gr.Textbox(
                label="Enter Text",
                placeholder="Example: Azul fell-ay! Kaci tazmamt.",
                lines=6,
                visible=True
            )
            uploaded_file = gr.File(label="Upload .txt or .pdf", file_types=['.txt', '.pdf'], visible=False)

            speed = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Speech Speed")

            btn = gr.Button("🔊 Generate Speech", variant="primary")

        with gr.Column():
            output_audio = gr.Audio(label="Generated Speech", autoplay=False)

            gr.Markdown("### 💾 Download Audio")
            
            # These are the download components, initially hidden
            file_wav = gr.File(label="WAV Download", file_types=['.wav'], visible=False)
            file_mp3 = gr.File(label="MP3 Download", file_types=['.mp3'], visible=False)

    # Toggle visibility of text input vs file input
    def toggle_inputs(choice):
        return (
            gr.update(visible=choice == "📝 Type Text"),
            gr.update(visible=choice == "📎 Upload File")
        )

    input_type.change(toggle_inputs, input_type, [typed_text, uploaded_file])

    # Main event
    btn.click(
        fn=process_input,
        inputs=[input_type, typed_text, uploaded_file, speed],
        outputs=[output_audio, file_wav, file_mp3],
        queue=True,
    )

    # Hide download components when input changes
    typed_text.change(lambda: (gr.update(visible=False), gr.update(visible=False)), [], [file_wav, file_mp3])
    uploaded_file.change(lambda: (gr.update(visible=False), gr.update(visible=False)), [], [file_wav, file_mp3])


    gr.HTML("""
    <hr style="margin:20px 0; border-top:1px solid #ddd;">
    <p style="text-align:center; color:#666;">
        Powered by 
        <a href="https://huggingface.co/facebook/mms-tts-kab" target="_blank">facebook/mms-tts-kab</a>
    </p>
    """)


# Launch app
if __name__ == "__main__":
    demo.launch(server_port=7860, debug=True)