hany01rye
/

TIGeR

image-text-to-text

computer-vision

spatial-reasoning

vision-language-model

Model card Files Files and versions

TIGeR / example_usage.py

hany01rye's picture

Upload folder using huggingface_hub

27320e1 verified 8 months ago

history blame contribute delete

3.07 kB

	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	TIGeR Model Usage Examples
	Demonstrates how to use the fine-tuned spatial reasoning model
	"""

	import os
	import sys
	import json
	import re
	from typing import List, Dict, Any, Optional
	from llamafactory.chat.chat_model import ChatModel

	def load_model(config_file: str):
	"""Load model using LLaMA-Factory ChatModel"""
	print(f"Loading model with config: {config_file}")

	try:
	# Simulate command line arguments
	original_argv = sys.argv.copy()
	sys.argv = [sys.argv[0], config_file]

	try:
	chat_model = ChatModel()
	print("✅ Model loaded successfully!")
	return chat_model
	finally:
	# Restore original command line arguments
	sys.argv = original_argv

	except Exception as e:
	print(f"❌ Model loading failed: {e}")
	return None

	def single_inference_demo(chat_model):
	"""Single image inference demonstration"""
	print("\n" + "="*50)
	print("Single Image Inference Demo")
	print("="*50)

	# Image path - replace with your actual image path
	image_paths = [
	"/path/to/your/image.jpg" # Replace with actual image path
	]

	# Question - using the same format as TIGeR
	question = "Two points are circled on the image, labeled by A and B beside each circle. Which point is closer to the camera? Select from the following choices.\n(A) A is closer\n(B) B is closer"

	try:
	print(f"📷 Loading image: {image_paths[0]}")
	print(f"🔍 Question: {question}")

	# Prepare messages in the format expected by ChatModel
	messages = [
	{
	"role": "user",
	"content": question
	}
	]

	# Get model response
	response = chat_model.chat(messages, images=image_paths)
	assistant_texts = []

	for resp in response:
	try:
	assistant_texts.append(resp.response_text)
	except Exception:
	assistant_texts.append(str(resp))

	response_text = "\n".join(assistant_texts)
	print(f"💡 Answer: {response_text}")

	except FileNotFoundError:
	print("❌ Image file not found, please provide correct image path")
	except Exception as e:
	print(f"❌ Error occurred during processing: {e}")

	def main():
	"""Main function"""
	print("🚀 TIGeR Model Usage Examples")
	print("="*60)

	# Configuration file path - using the config file in the same directory
	config_file = "glm4v_tisr_full_inference.yaml"

	# Load model
	chat_model = load_model(config_file)
	if chat_model is None:
	print("❌ Failed to load model. Please check the config file path.")
	return

	# Run single inference demo
	single_inference_demo(chat_model)

	print("\n" + "="*60)
	print("✅ Demo completed!")
	print("="*60)

	if __name__ == "__main__":
	main()