| | from PIL import Image |
| | import requests |
| | from transformers import AutoProcessor, AutoModel, AutoTokenizer |
| | import torch |
| |
|
| | from transformers import TextIteratorStreamer |
| | import threading |
| |
|
| |
|
| | model = AutoModel.from_pretrained("/home/zhidingy/workspace/eagle-next/internvl_chat/work_dirs/release/test/Eagle2-1B",trust_remote_code=True, attn_implementation='flash_attention_2', torch_dtype=torch.bfloat16) |
| | tokenizer = AutoTokenizer.from_pretrained("/home/zhidingy/workspace/eagle-next/internvl_chat/work_dirs/release/test/Eagle2-1B", trust_remote_code=True, use_fast=True) |
| | processor = AutoProcessor.from_pretrained("/home/zhidingy/workspace/eagle-next/internvl_chat/work_dirs/release/test/Eagle2-1B", trust_remote_code=True, use_fast=True) |
| | processor.tokenizer.padding_side = "left" |
| |
|
| | messages = [ |
| | { |
| | "role": "user", |
| | "content": [ |
| | { |
| | "type": "image", |
| | "image": "https://www.ilankelman.org/stopsigns/australia.jpg", |
| | }, |
| | {"type": "text", "text": "Describe this image."}, |
| | ], |
| | } |
| | ] |
| |
|
| | text_list = [processor.apply_chat_template( |
| | messages, tokenize=False, add_generation_prompt=True |
| | )] |
| | image_inputs, video_inputs = processor.process_vision_info(messages) |
| | inputs = processor(text = text_list, images=image_inputs, videos=video_inputs, return_tensors="pt", padding=True) |
| | inputs = inputs.to("cuda") |
| | model = model.to("cuda") |
| | |
| | |
| | |
| | |
| | |
| |
|
| | streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
| |
|
| | generation_kwargs = dict( |
| | **inputs, |
| | streamer=streamer, |
| | max_new_tokens=1024, |
| | do_sample=True, |
| | top_p=0.95, |
| | temperature=0.8 |
| | ) |
| | thread = threading.Thread(target=model.generate, kwargs=generation_kwargs) |
| | thread.start() |
| |
|
| | for new_text in streamer: |
| | print(new_text, end="", flush=True) |
| |
|