Spaces:

rxpbtn21
/

t5-small-lora-summarizer

Sleeping

File size: 1,453 Bytes

10ba581
 
 
 
 
 
dcb3064
 
 
10ba581
0af3397
10ba581
 
dcb3064
 
0af3397
10ba581
 
 
dcb3064
 
 
 
4008ae5
dcb3064
 
 
 
0af3397
 
10ba581
 
 
 
 
0af3397


import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel
import torch

# Determine device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("rxpbtn21/t5-small-lora-summarizer")

# Load the base model and then the LoRA adapter
# Ensure the base model is also moved to the correct device
base_model = AutoModelForSeq2SeqLM.from_pretrained("t5-small").to(device)
model = PeftModel.from_pretrained(base_model, "rxpbtn21/t5-small-lora-summarizer")
model.eval()

def summarize(text):
    try:
        inputs = tokenizer(text, max_length=512, truncation=True, return_tensors="pt")
        with torch.no_grad():
            # Ensure inputs are on the same device as the model
            outputs = model.generate(input_ids=inputs["input_ids"].to(device), num_beams=4, max_new_tokens=128, early_stopping=True)
        summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return summary
    except Exception as e:
        # Log the error and return an informative message
        print(f"Error during summarization: {e}")
        return f"An error occurred during summarization. Please check the Space logs for details. Error: {e}"

# Create Gradio interface
iface = gr.Interface(fn=summarize, inputs="text", outputs="text", title="LoRA Fine-tuned T5-small Summarizer")

# Launch the interface
iface.launch(share=False)