Robotics
Transformers
Safetensors
English
glm4v
image-text-to-text
computer-vision
spatial-reasoning
vision-language-model
multi-modal
fine-tuned
Instructions to use hany01rye/TIGeR with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use hany01rye/TIGeR with Transformers:
# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("hany01rye/TIGeR") model = AutoModelForMultimodalLM.from_pretrained("hany01rye/TIGeR") - Notebooks
- Google Colab
- Kaggle
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| TIGeR Model Usage Examples | |
| Demonstrates how to use the fine-tuned spatial reasoning model | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import re | |
| from typing import List, Dict, Any, Optional | |
| from llamafactory.chat.chat_model import ChatModel | |
| def load_model(config_file: str): | |
| """Load model using LLaMA-Factory ChatModel""" | |
| print(f"Loading model with config: {config_file}") | |
| try: | |
| # Simulate command line arguments | |
| original_argv = sys.argv.copy() | |
| sys.argv = [sys.argv[0], config_file] | |
| try: | |
| chat_model = ChatModel() | |
| print("β Model loaded successfully!") | |
| return chat_model | |
| finally: | |
| # Restore original command line arguments | |
| sys.argv = original_argv | |
| except Exception as e: | |
| print(f"β Model loading failed: {e}") | |
| return None | |
| def single_inference_demo(chat_model): | |
| """Single image inference demonstration""" | |
| print("\n" + "="*50) | |
| print("Single Image Inference Demo") | |
| print("="*50) | |
| # Image path - replace with your actual image path | |
| image_paths = [ | |
| "/path/to/your/image.jpg" # Replace with actual image path | |
| ] | |
| # Question - using the same format as TIGeR | |
| question = "Two points are circled on the image, labeled by A and B beside each circle. Which point is closer to the camera? Select from the following choices.\n(A) A is closer\n(B) B is closer" | |
| try: | |
| print(f"π· Loading image: {image_paths[0]}") | |
| print(f"π Question: {question}") | |
| # Prepare messages in the format expected by ChatModel | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": question | |
| } | |
| ] | |
| # Get model response | |
| response = chat_model.chat(messages, images=image_paths) | |
| assistant_texts = [] | |
| for resp in response: | |
| try: | |
| assistant_texts.append(resp.response_text) | |
| except Exception: | |
| assistant_texts.append(str(resp)) | |
| response_text = "\n".join(assistant_texts) | |
| print(f"π‘ Answer: {response_text}") | |
| except FileNotFoundError: | |
| print("β Image file not found, please provide correct image path") | |
| except Exception as e: | |
| print(f"β Error occurred during processing: {e}") | |
| def main(): | |
| """Main function""" | |
| print("π TIGeR Model Usage Examples") | |
| print("="*60) | |
| # Configuration file path - using the config file in the same directory | |
| config_file = "glm4v_tisr_full_inference.yaml" | |
| # Load model | |
| chat_model = load_model(config_file) | |
| if chat_model is None: | |
| print("β Failed to load model. Please check the config file path.") | |
| return | |
| # Run single inference demo | |
| single_inference_demo(chat_model) | |
| print("\n" + "="*60) | |
| print("β Demo completed!") | |
| print("="*60) | |
| if __name__ == "__main__": | |
| main() | |