import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# 1. Load a model that fits in the free tier
# 'flan-t5-large' is powerful but small enough for the free CPU tier
model_name = 'google/flan-t5-large'

print("Loading model... this may take a minute.")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Use GPU if available, otherwise CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# 2. Define the generation function
def generate_text(task_prefix, input_text):
    # Dynamic prompt construction
    prompt = f"{task_prefix}: {input_text}"
    
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    # Generate output
    output_ids = model.generate(
        **inputs,
        max_length=64,
        num_beams=4,
        early_stopping=True,
        no_repeat_ngram_size=2
    )
    
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

# 3. Create the Web Interface
# We map the choices to the actual prefixes the model understands
task_choices = [
    "summarize", 
    "translate French to English", 
    "paraphrase", 
    "generate question", 
    "sst2 sentence"
]

demo = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Dropdown(choices=task_choices, label="Select Task", value="summarize"),
        gr.Textbox(label="Input Text", placeholder="Enter your text here...")
    ],
    outputs=gr.Textbox(label="AI Output"),
    title="Multi-Task AI Generator",
    description="Select a task and enter text. Powered by Google Flan-T5."
)

# 4. Launch
demo.launch(ssr_mode=False)