import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # 1. Load a model that fits in the free tier # 'flan-t5-large' is powerful but small enough for the free CPU tier model_name = 'google/flan-t5-large' print("Loading model... this may take a minute.") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # Use GPU if available, otherwise CPU device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = model.to(device) # 2. Define the generation function def generate_text(task_prefix, input_text): # Dynamic prompt construction prompt = f"{task_prefix}: {input_text}" inputs = tokenizer(prompt, return_tensors="pt").to(device) # Generate output output_ids = model.generate( **inputs, max_length=64, num_beams=4, early_stopping=True, no_repeat_ngram_size=2 ) return tokenizer.decode(output_ids[0], skip_special_tokens=True) # 3. Create the Web Interface # We map the choices to the actual prefixes the model understands task_choices = [ "summarize", "translate French to English", "paraphrase", "generate question", "sst2 sentence" ] demo = gr.Interface( fn=generate_text, inputs=[ gr.Dropdown(choices=task_choices, label="Select Task", value="summarize"), gr.Textbox(label="Input Text", placeholder="Enter your text here...") ], outputs=gr.Textbox(label="AI Output"), title="Multi-Task AI Generator", description="Select a task and enter text. Powered by Google Flan-T5." ) # 4. Launch demo.launch(ssr_mode=False)