Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
gary-boon
Claude Opus 4.5
commited on
Commit
·
3e80769
1
Parent(s):
2860768
Use mistral_common for proper Devstral prompt formatting
Browse files- Add mistral_common>=1.5.0 dependency
- Update prompt_formatter to use MistralTokenizer for Devstral
- Add recommended_temperature to model configs (0.15 for Devstral)
- Remove hardcoded <s> from manual format (tokenizer adds BOS)
The mistral_common library provides the correct chat template
encoding that Devstral expects, which should fix the garbage
token output.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <[email protected]>
- backend/model_config.py +7 -3
- backend/model_service.py +4 -4
- backend/prompt_formatter.py +69 -11
- requirements.txt +1 -0
backend/model_config.py
CHANGED
|
@@ -26,6 +26,7 @@ class ModelConfig(TypedDict):
|
|
| 26 |
uses_chat_template: bool # Whether model expects instruction format
|
| 27 |
prompt_style: str # "completion" | "instruction" - how to format prompts
|
| 28 |
system_prompt: Optional[str] # Default system prompt for instruction models
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
# Supported models registry
|
|
@@ -47,7 +48,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
|
|
| 47 |
"recommended_dtype": "fp16", # fp16 for GPU, fp32 for CPU
|
| 48 |
"uses_chat_template": False, # Base model, raw completion
|
| 49 |
"prompt_style": "completion", # Raw text continuation
|
| 50 |
-
"system_prompt": None # Base models don't use system prompts
|
|
|
|
| 51 |
},
|
| 52 |
"code-llama-7b": {
|
| 53 |
"hf_path": "codellama/CodeLlama-7b-hf",
|
|
@@ -66,7 +68,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
|
|
| 66 |
"recommended_dtype": "fp16",
|
| 67 |
"uses_chat_template": False, # Base model, raw completion
|
| 68 |
"prompt_style": "completion", # Raw text continuation
|
| 69 |
-
"system_prompt": None # Base models don't use system prompts
|
|
|
|
| 70 |
},
|
| 71 |
"devstral-small": {
|
| 72 |
"hf_path": "mistralai/Devstral-Small-2507",
|
|
@@ -85,7 +88,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
|
|
| 85 |
"recommended_dtype": "bf16", # Devstral requires bfloat16
|
| 86 |
"uses_chat_template": True, # Instruction-tuned, requires chat format
|
| 87 |
"prompt_style": "instruction", # Requires system + user messages
|
| 88 |
-
"system_prompt": "You are an expert Python programmer. Continue the code provided by the user. Output only valid Python code, no explanations or markdown."
|
|
|
|
| 89 |
}
|
| 90 |
}
|
| 91 |
|
|
|
|
| 26 |
uses_chat_template: bool # Whether model expects instruction format
|
| 27 |
prompt_style: str # "completion" | "instruction" - how to format prompts
|
| 28 |
system_prompt: Optional[str] # Default system prompt for instruction models
|
| 29 |
+
recommended_temperature: float # Recommended temperature for generation
|
| 30 |
|
| 31 |
|
| 32 |
# Supported models registry
|
|
|
|
| 48 |
"recommended_dtype": "fp16", # fp16 for GPU, fp32 for CPU
|
| 49 |
"uses_chat_template": False, # Base model, raw completion
|
| 50 |
"prompt_style": "completion", # Raw text continuation
|
| 51 |
+
"system_prompt": None, # Base models don't use system prompts
|
| 52 |
+
"recommended_temperature": 0.7 # Standard for code completion
|
| 53 |
},
|
| 54 |
"code-llama-7b": {
|
| 55 |
"hf_path": "codellama/CodeLlama-7b-hf",
|
|
|
|
| 68 |
"recommended_dtype": "fp16",
|
| 69 |
"uses_chat_template": False, # Base model, raw completion
|
| 70 |
"prompt_style": "completion", # Raw text continuation
|
| 71 |
+
"system_prompt": None, # Base models don't use system prompts
|
| 72 |
+
"recommended_temperature": 0.7 # Standard for code completion
|
| 73 |
},
|
| 74 |
"devstral-small": {
|
| 75 |
"hf_path": "mistralai/Devstral-Small-2507",
|
|
|
|
| 88 |
"recommended_dtype": "bf16", # Devstral requires bfloat16
|
| 89 |
"uses_chat_template": True, # Instruction-tuned, requires chat format
|
| 90 |
"prompt_style": "instruction", # Requires system + user messages
|
| 91 |
+
"system_prompt": "You are an expert Python programmer. Continue the code provided by the user. Output only valid Python code, no explanations or markdown.",
|
| 92 |
+
"recommended_temperature": 0.15 # Devstral recommended temperature
|
| 93 |
}
|
| 94 |
}
|
| 95 |
|
backend/model_service.py
CHANGED
|
@@ -1509,10 +1509,10 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
|
|
| 1509 |
if prompt_style == "instruction":
|
| 1510 |
logger.info(f"Formatted prompt preview: {formatted_prompt[:200]}...")
|
| 1511 |
|
| 1512 |
-
# Use temperature
|
| 1513 |
-
if
|
| 1514 |
-
temperature =
|
| 1515 |
-
logger.info(f"Using temperature={temperature}
|
| 1516 |
|
| 1517 |
# Tokenize and prepare
|
| 1518 |
inputs = manager.tokenizer(formatted_prompt, return_tensors="pt").to(manager.device)
|
|
|
|
| 1509 |
if prompt_style == "instruction":
|
| 1510 |
logger.info(f"Formatted prompt preview: {formatted_prompt[:200]}...")
|
| 1511 |
|
| 1512 |
+
# Use model's recommended temperature for instruction models
|
| 1513 |
+
if model_config and "recommended_temperature" in model_config:
|
| 1514 |
+
temperature = model_config["recommended_temperature"]
|
| 1515 |
+
logger.info(f"Using model recommended temperature={temperature}")
|
| 1516 |
|
| 1517 |
# Tokenize and prepare
|
| 1518 |
inputs = manager.tokenizer(formatted_prompt, return_tensors="pt").to(manager.device)
|
backend/prompt_formatter.py
CHANGED
|
@@ -5,7 +5,52 @@ Handles formatting prompts appropriately for different model types:
|
|
| 5 |
- Instruction models: System prompt + user message with chat template
|
| 6 |
"""
|
| 7 |
|
| 8 |
-
from typing import Dict, Optional, Any
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class PromptFormatter:
|
|
@@ -17,9 +62,9 @@ class PromptFormatter:
|
|
| 17 |
- Model treats it as text to continue
|
| 18 |
|
| 19 |
Instruction models (Devstral, instruct variants):
|
| 20 |
-
-
|
| 21 |
-
-
|
| 22 |
-
-
|
| 23 |
"""
|
| 24 |
|
| 25 |
def format(
|
|
@@ -64,8 +109,10 @@ class PromptFormatter:
|
|
| 64 |
"""
|
| 65 |
Format prompt for instruction-tuned models.
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
| 69 |
"""
|
| 70 |
# Get system prompt (override > model default > generic fallback)
|
| 71 |
system_prompt = system_prompt_override or model_config.get("system_prompt")
|
|
@@ -78,7 +125,15 @@ class PromptFormatter:
|
|
| 78 |
{"role": "user", "content": prompt}
|
| 79 |
]
|
| 80 |
|
| 81 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template is not None:
|
| 83 |
try:
|
| 84 |
formatted = tokenizer.apply_chat_template(
|
|
@@ -86,21 +141,24 @@ class PromptFormatter:
|
|
| 86 |
tokenize=False,
|
| 87 |
add_generation_prompt=True
|
| 88 |
)
|
|
|
|
| 89 |
return formatted
|
| 90 |
except Exception as e:
|
| 91 |
-
|
| 92 |
-
print(f"Warning: chat_template failed, using manual format: {e}")
|
| 93 |
|
| 94 |
# Fallback: Manual Mistral/Llama format
|
|
|
|
| 95 |
return self._manual_mistral_format(prompt, system_prompt)
|
| 96 |
|
| 97 |
def _manual_mistral_format(self, prompt: str, system_prompt: str) -> str:
|
| 98 |
"""
|
| 99 |
Manual Mistral instruction format as fallback.
|
| 100 |
|
| 101 |
-
Format:
|
|
|
|
| 102 |
"""
|
| 103 |
-
|
|
|
|
| 104 |
|
| 105 |
|
| 106 |
# Singleton instance for convenience
|
|
|
|
| 5 |
- Instruction models: System prompt + user message with chat template
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
from typing import Dict, Optional, Any, List
|
| 9 |
+
import logging
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _try_mistral_common_format(messages: List[Dict[str, str]], model_name: str) -> Optional[str]:
|
| 15 |
+
"""
|
| 16 |
+
Try to use mistral_common for proper Mistral/Devstral chat formatting.
|
| 17 |
+
Returns None if mistral_common is not available or fails.
|
| 18 |
+
"""
|
| 19 |
+
try:
|
| 20 |
+
from mistral_common.protocol.instruct.messages import (
|
| 21 |
+
SystemMessage, UserMessage
|
| 22 |
+
)
|
| 23 |
+
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
| 24 |
+
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
| 25 |
+
|
| 26 |
+
# Load the tokenizer from HF hub
|
| 27 |
+
tokenizer = MistralTokenizer.from_hf_hub(model_name)
|
| 28 |
+
|
| 29 |
+
# Build messages
|
| 30 |
+
mistral_messages = []
|
| 31 |
+
for msg in messages:
|
| 32 |
+
if msg["role"] == "system":
|
| 33 |
+
mistral_messages.append(SystemMessage(content=msg["content"]))
|
| 34 |
+
elif msg["role"] == "user":
|
| 35 |
+
mistral_messages.append(UserMessage(content=msg["content"]))
|
| 36 |
+
|
| 37 |
+
# Encode to get token IDs
|
| 38 |
+
request = ChatCompletionRequest(messages=mistral_messages)
|
| 39 |
+
tokenized = tokenizer.encode_chat_completion(request)
|
| 40 |
+
|
| 41 |
+
# Decode back to text for use with HF tokenizer
|
| 42 |
+
# This gives us the properly formatted prompt string
|
| 43 |
+
decoded = tokenizer.decode(tokenized.tokens)
|
| 44 |
+
|
| 45 |
+
logger.info(f"Used mistral_common format for {model_name}")
|
| 46 |
+
return decoded
|
| 47 |
+
|
| 48 |
+
except ImportError:
|
| 49 |
+
logger.warning("mistral_common not available, using fallback format")
|
| 50 |
+
return None
|
| 51 |
+
except Exception as e:
|
| 52 |
+
logger.warning(f"mistral_common formatting failed: {e}, using fallback")
|
| 53 |
+
return None
|
| 54 |
|
| 55 |
|
| 56 |
class PromptFormatter:
|
|
|
|
| 62 |
- Model treats it as text to continue
|
| 63 |
|
| 64 |
Instruction models (Devstral, instruct variants):
|
| 65 |
+
- Use mistral_common for Mistral/Devstral models
|
| 66 |
+
- Fallback to tokenizer's chat_template if available
|
| 67 |
+
- Final fallback to manual Mistral format
|
| 68 |
"""
|
| 69 |
|
| 70 |
def format(
|
|
|
|
| 109 |
"""
|
| 110 |
Format prompt for instruction-tuned models.
|
| 111 |
|
| 112 |
+
Priority:
|
| 113 |
+
1. mistral_common for Mistral/Devstral models
|
| 114 |
+
2. Tokenizer's native chat_template
|
| 115 |
+
3. Manual Mistral format fallback
|
| 116 |
"""
|
| 117 |
# Get system prompt (override > model default > generic fallback)
|
| 118 |
system_prompt = system_prompt_override or model_config.get("system_prompt")
|
|
|
|
| 125 |
{"role": "user", "content": prompt}
|
| 126 |
]
|
| 127 |
|
| 128 |
+
# For Mistral/Devstral models, try mistral_common first
|
| 129 |
+
architecture = model_config.get("architecture", "")
|
| 130 |
+
hf_path = model_config.get("hf_path", "")
|
| 131 |
+
if architecture == "mistral" or "mistral" in hf_path.lower():
|
| 132 |
+
formatted = _try_mistral_common_format(messages, hf_path)
|
| 133 |
+
if formatted:
|
| 134 |
+
return formatted
|
| 135 |
+
|
| 136 |
+
# Try tokenizer's native chat template
|
| 137 |
if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template is not None:
|
| 138 |
try:
|
| 139 |
formatted = tokenizer.apply_chat_template(
|
|
|
|
| 141 |
tokenize=False,
|
| 142 |
add_generation_prompt=True
|
| 143 |
)
|
| 144 |
+
logger.info("Used HF tokenizer chat_template")
|
| 145 |
return formatted
|
| 146 |
except Exception as e:
|
| 147 |
+
logger.warning(f"chat_template failed: {e}, using manual format")
|
|
|
|
| 148 |
|
| 149 |
# Fallback: Manual Mistral/Llama format
|
| 150 |
+
# Note: Don't include <s> as the tokenizer adds it during tokenization
|
| 151 |
return self._manual_mistral_format(prompt, system_prompt)
|
| 152 |
|
| 153 |
def _manual_mistral_format(self, prompt: str, system_prompt: str) -> str:
|
| 154 |
"""
|
| 155 |
Manual Mistral instruction format as fallback.
|
| 156 |
|
| 157 |
+
Format: [INST] {system}\n\n{user} [/INST]
|
| 158 |
+
Note: <s> is NOT included as the tokenizer adds BOS automatically.
|
| 159 |
"""
|
| 160 |
+
logger.info("Using manual Mistral instruction format")
|
| 161 |
+
return f"[INST] {system_prompt}\n\n{prompt} [/INST]"
|
| 162 |
|
| 163 |
|
| 164 |
# Singleton instance for convenience
|
requirements.txt
CHANGED
|
@@ -10,6 +10,7 @@ pydantic==2.5.0
|
|
| 10 |
torch>=2.3.0
|
| 11 |
transformers>=4.44.0
|
| 12 |
accelerate>=0.30.0
|
|
|
|
| 13 |
|
| 14 |
# Utilities
|
| 15 |
numpy==1.24.3
|
|
|
|
| 10 |
torch>=2.3.0
|
| 11 |
transformers>=4.44.0
|
| 12 |
accelerate>=0.30.0
|
| 13 |
+
mistral_common>=1.5.0 # Required for Devstral chat template formatting
|
| 14 |
|
| 15 |
# Utilities
|
| 16 |
numpy==1.24.3
|