Tiny dummy models
Collection
Randomly initialized tiny models for debugging/testing purpose • 176 items • Updated • 6
How to use yujiepan/qvq-preview-tiny-random with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("image-text-to-text", model="yujiepan/qvq-preview-tiny-random")
messages = [
{
"role": "user",
"content": [
{"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
{"type": "text", "text": "What animal is on the candy?"}
]
},
]
pipe(text=messages) # Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText
processor = AutoProcessor.from_pretrained("yujiepan/qvq-preview-tiny-random")
model = AutoModelForImageTextToText.from_pretrained("yujiepan/qvq-preview-tiny-random")
messages = [
{
"role": "user",
"content": [
{"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
{"type": "text", "text": "What animal is on the candy?"}
]
},
]
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))How to use yujiepan/qvq-preview-tiny-random with vLLM:
# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "yujiepan/qvq-preview-tiny-random"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
-H "Content-Type: application/json" \
--data '{
"model": "yujiepan/qvq-preview-tiny-random",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
]
}'docker model run hf.co/yujiepan/qvq-preview-tiny-random
How to use yujiepan/qvq-preview-tiny-random with SGLang:
# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
--model-path "yujiepan/qvq-preview-tiny-random" \
--host 0.0.0.0 \
--port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
-H "Content-Type: application/json" \
--data '{
"model": "yujiepan/qvq-preview-tiny-random",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
]
}'docker run --gpus all \
--shm-size 32g \
-p 30000:30000 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
--env "HF_TOKEN=<secret>" \
--ipc=host \
lmsysorg/sglang:latest \
python3 -m sglang.launch_server \
--model-path "yujiepan/qvq-preview-tiny-random" \
--host 0.0.0.0 \
--port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
-H "Content-Type: application/json" \
--data '{
"model": "yujiepan/qvq-preview-tiny-random",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
]
}'How to use yujiepan/qvq-preview-tiny-random with Docker Model Runner:
docker model run hf.co/yujiepan/qvq-preview-tiny-random
This model is for debugging. It is randomly initialized with the config from Qwen/QVQ-72B-Preview but is of smaller size.
Codes:
import os
from typing import Dict
import requests
import torch
import transformers
from PIL import Image
from torchvision import io
from transformers import (AutoConfig, AutoModelForCausalLM, AutoProcessor,
AutoTokenizer, GenerationConfig,
enable_full_determinism, pipeline, set_seed)
from transformers.models.qwen2_vl import Qwen2VLForConditionalGeneration
model_id = "Qwen/QVQ-72B-Preview"
repo_id = "yujiepan/qvq-preview-tiny-random"
save_path = f"/tmp/{repo_id}"
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
config.hidden_size = 16
config.intermediate_size = 32
config.num_attention_heads = 2
config.num_hidden_layers = 2
config.num_key_value_heads = 1
config.vision_config.embed_dim = 16
config.vision_config.num_heads = 2
config.vision_config.hidden_size = 16
config.vision_config.depth = 2
config.rope_scaling['mrope_section'] = [1, 1, 2] # sum needs to be 4 here
enable_full_determinism(42)
model = Qwen2VLForConditionalGeneration(config=config)
model = model.to(torch.bfloat16).cuda().eval()
model.generation_config = GenerationConfig.from_pretrained(
model_id, trust_remote_code=True,
)
processor = AutoProcessor.from_pretrained(model_id)
model.save_pretrained(save_path)
processor.save_pretrained(save_path)
os.system(f"ls -alh {save_path}")
def try_inference(model_id):
torch.use_deterministic_algorithms(False)
from qwen_vl_utils import process_vision_info
from transformers import (AutoProcessor, AutoTokenizer,
Qwen2VLForConditionalGeneration)
model = Qwen2VLForConditionalGeneration.from_pretrained(
model_id, device_map="cuda"
)
processor = AutoProcessor.from_pretrained(model_id)
messages = [
{
"role": "system",
"content": [
{"type": "text", "text": "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."}
],
},
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/QVQ/demo.png",
},
{"type": "text", "text": "What value should be filled in the blank space?"},
],
}
]
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
inputs = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=True,
return_tensors="pt",
)
inputs = inputs.to("cuda")
generated_ids = model.generate(**inputs, max_new_tokens=32)
output_text = processor.batch_decode(
generated_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False
)
print(output_text)
try_inference(save_path)
Base model
Qwen/Qwen2-VL-72B