Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import transformers | |
| model = transformers.AutoModelForCausalLM.from_pretrained( | |
| "baldassarre/llama-3.2-peft-Q4_K_M-GGUF", | |
| dtype="float16", | |
| gguf_file="llama-3.2-peft-q4_k_m.gguf", | |
| ) | |
| tokenizer = transformers.AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct") | |
| def generate(prompt, max_new_tokens): | |
| inputs = tokenizer.apply_chat_template( | |
| [ | |
| {"role": "user", "content": prompt}, | |
| ], | |
| add_generation_prompt=True, | |
| tokenize=True, | |
| return_tensors="pt", | |
| return_dict=True, | |
| ) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| logits_processor=[ | |
| transformers.TemperatureLogitsWarper(temperature=1.5), | |
| transformers.MinPLogitsWarper(min_p=0.1), | |
| ], | |
| ) | |
| txt = tokenizer.decode(outputs[0]) | |
| txt = txt.split("<|start_header_id|>assistant<|end_header_id|>\n\n", maxsplit=1)[1] | |
| txt = txt.split("<|eot_id|>", maxsplit=1)[0] | |
| return txt | |
| demo = gr.Interface( | |
| fn=generate, | |
| inputs=[ | |
| gr.Textbox(placeholder="What is ...", label="Question"), | |
| gr.Slider(10, 500, value=100, label="Max new tokens"), | |
| ], | |
| outputs=gr.Textbox(label="Answer", interactive=False), | |
| title="PEFT Model", | |
| examples=[ | |
| ["What are the ingredients for pizza?", 200], | |
| ["Which city is famous for tortellini?", 20], | |
| ["Who invented the radio?", 20], | |
| ["Explain how to load a dishwasher.", 400], | |
| ] | |
| ) | |
| demo.launch() |