first-project / app.py
nikitaaaswsdwdw's picture
Update app.py
f88f44b verified
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# 1. Load a model that fits in the free tier
# 'flan-t5-large' is powerful but small enough for the free CPU tier
model_name = 'google/flan-t5-large'
print("Loading model... this may take a minute.")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Use GPU if available, otherwise CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
# 2. Define the generation function
def generate_text(task_prefix, input_text):
# Dynamic prompt construction
prompt = f"{task_prefix}: {input_text}"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
# Generate output
output_ids = model.generate(
**inputs,
max_length=64,
num_beams=4,
early_stopping=True,
no_repeat_ngram_size=2
)
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
# 3. Create the Web Interface
# We map the choices to the actual prefixes the model understands
task_choices = [
"summarize",
"translate French to English",
"paraphrase",
"generate question",
"sst2 sentence"
]
demo = gr.Interface(
fn=generate_text,
inputs=[
gr.Dropdown(choices=task_choices, label="Select Task", value="summarize"),
gr.Textbox(label="Input Text", placeholder="Enter your text here...")
],
outputs=gr.Textbox(label="AI Output"),
title="Multi-Task AI Generator",
description="Select a task and enter text. Powered by Google Flan-T5."
)
# 4. Launch
demo.launch(ssr_mode=False)