gary-boon Claude Opus 4.5 commited on
Commit
3e80769
·
1 Parent(s): 2860768

Use mistral_common for proper Devstral prompt formatting

Browse files

- Add mistral_common>=1.5.0 dependency
- Update prompt_formatter to use MistralTokenizer for Devstral
- Add recommended_temperature to model configs (0.15 for Devstral)
- Remove hardcoded <s> from manual format (tokenizer adds BOS)

The mistral_common library provides the correct chat template
encoding that Devstral expects, which should fix the garbage
token output.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <[email protected]>

backend/model_config.py CHANGED
@@ -26,6 +26,7 @@ class ModelConfig(TypedDict):
26
  uses_chat_template: bool # Whether model expects instruction format
27
  prompt_style: str # "completion" | "instruction" - how to format prompts
28
  system_prompt: Optional[str] # Default system prompt for instruction models
 
29
 
30
 
31
  # Supported models registry
@@ -47,7 +48,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
47
  "recommended_dtype": "fp16", # fp16 for GPU, fp32 for CPU
48
  "uses_chat_template": False, # Base model, raw completion
49
  "prompt_style": "completion", # Raw text continuation
50
- "system_prompt": None # Base models don't use system prompts
 
51
  },
52
  "code-llama-7b": {
53
  "hf_path": "codellama/CodeLlama-7b-hf",
@@ -66,7 +68,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
66
  "recommended_dtype": "fp16",
67
  "uses_chat_template": False, # Base model, raw completion
68
  "prompt_style": "completion", # Raw text continuation
69
- "system_prompt": None # Base models don't use system prompts
 
70
  },
71
  "devstral-small": {
72
  "hf_path": "mistralai/Devstral-Small-2507",
@@ -85,7 +88,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
85
  "recommended_dtype": "bf16", # Devstral requires bfloat16
86
  "uses_chat_template": True, # Instruction-tuned, requires chat format
87
  "prompt_style": "instruction", # Requires system + user messages
88
- "system_prompt": "You are an expert Python programmer. Continue the code provided by the user. Output only valid Python code, no explanations or markdown."
 
89
  }
90
  }
91
 
 
26
  uses_chat_template: bool # Whether model expects instruction format
27
  prompt_style: str # "completion" | "instruction" - how to format prompts
28
  system_prompt: Optional[str] # Default system prompt for instruction models
29
+ recommended_temperature: float # Recommended temperature for generation
30
 
31
 
32
  # Supported models registry
 
48
  "recommended_dtype": "fp16", # fp16 for GPU, fp32 for CPU
49
  "uses_chat_template": False, # Base model, raw completion
50
  "prompt_style": "completion", # Raw text continuation
51
+ "system_prompt": None, # Base models don't use system prompts
52
+ "recommended_temperature": 0.7 # Standard for code completion
53
  },
54
  "code-llama-7b": {
55
  "hf_path": "codellama/CodeLlama-7b-hf",
 
68
  "recommended_dtype": "fp16",
69
  "uses_chat_template": False, # Base model, raw completion
70
  "prompt_style": "completion", # Raw text continuation
71
+ "system_prompt": None, # Base models don't use system prompts
72
+ "recommended_temperature": 0.7 # Standard for code completion
73
  },
74
  "devstral-small": {
75
  "hf_path": "mistralai/Devstral-Small-2507",
 
88
  "recommended_dtype": "bf16", # Devstral requires bfloat16
89
  "uses_chat_template": True, # Instruction-tuned, requires chat format
90
  "prompt_style": "instruction", # Requires system + user messages
91
+ "system_prompt": "You are an expert Python programmer. Continue the code provided by the user. Output only valid Python code, no explanations or markdown.",
92
+ "recommended_temperature": 0.15 # Devstral recommended temperature
93
  }
94
  }
95
 
backend/model_service.py CHANGED
@@ -1509,10 +1509,10 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
1509
  if prompt_style == "instruction":
1510
  logger.info(f"Formatted prompt preview: {formatted_prompt[:200]}...")
1511
 
1512
- # Use temperature=0 for instruct models (fully deterministic code)
1513
- if prompt_style == "instruction":
1514
- temperature = 0.0
1515
- logger.info(f"Using temperature={temperature} for deterministic instruct model output")
1516
 
1517
  # Tokenize and prepare
1518
  inputs = manager.tokenizer(formatted_prompt, return_tensors="pt").to(manager.device)
 
1509
  if prompt_style == "instruction":
1510
  logger.info(f"Formatted prompt preview: {formatted_prompt[:200]}...")
1511
 
1512
+ # Use model's recommended temperature for instruction models
1513
+ if model_config and "recommended_temperature" in model_config:
1514
+ temperature = model_config["recommended_temperature"]
1515
+ logger.info(f"Using model recommended temperature={temperature}")
1516
 
1517
  # Tokenize and prepare
1518
  inputs = manager.tokenizer(formatted_prompt, return_tensors="pt").to(manager.device)
backend/prompt_formatter.py CHANGED
@@ -5,7 +5,52 @@ Handles formatting prompts appropriately for different model types:
5
  - Instruction models: System prompt + user message with chat template
6
  """
7
 
8
- from typing import Dict, Optional, Any
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  class PromptFormatter:
@@ -17,9 +62,9 @@ class PromptFormatter:
17
  - Model treats it as text to continue
18
 
19
  Instruction models (Devstral, instruct variants):
20
- - Wrap with system prompt + user message
21
- - Use tokenizer's chat_template if available
22
- - Fallback to manual Mistral format
23
  """
24
 
25
  def format(
@@ -64,8 +109,10 @@ class PromptFormatter:
64
  """
65
  Format prompt for instruction-tuned models.
66
 
67
- Uses the tokenizer's chat_template if available,
68
- otherwise falls back to manual Mistral format.
 
 
69
  """
70
  # Get system prompt (override > model default > generic fallback)
71
  system_prompt = system_prompt_override or model_config.get("system_prompt")
@@ -78,7 +125,15 @@ class PromptFormatter:
78
  {"role": "user", "content": prompt}
79
  ]
80
 
81
- # Try tokenizer's native chat template first
 
 
 
 
 
 
 
 
82
  if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template is not None:
83
  try:
84
  formatted = tokenizer.apply_chat_template(
@@ -86,21 +141,24 @@ class PromptFormatter:
86
  tokenize=False,
87
  add_generation_prompt=True
88
  )
 
89
  return formatted
90
  except Exception as e:
91
- # Fall through to manual format if template fails
92
- print(f"Warning: chat_template failed, using manual format: {e}")
93
 
94
  # Fallback: Manual Mistral/Llama format
 
95
  return self._manual_mistral_format(prompt, system_prompt)
96
 
97
  def _manual_mistral_format(self, prompt: str, system_prompt: str) -> str:
98
  """
99
  Manual Mistral instruction format as fallback.
100
 
101
- Format: <s>[INST] {system}\n\n{user} [/INST]
 
102
  """
103
- return f"<s>[INST] {system_prompt}\n\n{prompt} [/INST]"
 
104
 
105
 
106
  # Singleton instance for convenience
 
5
  - Instruction models: System prompt + user message with chat template
6
  """
7
 
8
+ from typing import Dict, Optional, Any, List
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def _try_mistral_common_format(messages: List[Dict[str, str]], model_name: str) -> Optional[str]:
15
+ """
16
+ Try to use mistral_common for proper Mistral/Devstral chat formatting.
17
+ Returns None if mistral_common is not available or fails.
18
+ """
19
+ try:
20
+ from mistral_common.protocol.instruct.messages import (
21
+ SystemMessage, UserMessage
22
+ )
23
+ from mistral_common.protocol.instruct.request import ChatCompletionRequest
24
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
25
+
26
+ # Load the tokenizer from HF hub
27
+ tokenizer = MistralTokenizer.from_hf_hub(model_name)
28
+
29
+ # Build messages
30
+ mistral_messages = []
31
+ for msg in messages:
32
+ if msg["role"] == "system":
33
+ mistral_messages.append(SystemMessage(content=msg["content"]))
34
+ elif msg["role"] == "user":
35
+ mistral_messages.append(UserMessage(content=msg["content"]))
36
+
37
+ # Encode to get token IDs
38
+ request = ChatCompletionRequest(messages=mistral_messages)
39
+ tokenized = tokenizer.encode_chat_completion(request)
40
+
41
+ # Decode back to text for use with HF tokenizer
42
+ # This gives us the properly formatted prompt string
43
+ decoded = tokenizer.decode(tokenized.tokens)
44
+
45
+ logger.info(f"Used mistral_common format for {model_name}")
46
+ return decoded
47
+
48
+ except ImportError:
49
+ logger.warning("mistral_common not available, using fallback format")
50
+ return None
51
+ except Exception as e:
52
+ logger.warning(f"mistral_common formatting failed: {e}, using fallback")
53
+ return None
54
 
55
 
56
  class PromptFormatter:
 
62
  - Model treats it as text to continue
63
 
64
  Instruction models (Devstral, instruct variants):
65
+ - Use mistral_common for Mistral/Devstral models
66
+ - Fallback to tokenizer's chat_template if available
67
+ - Final fallback to manual Mistral format
68
  """
69
 
70
  def format(
 
109
  """
110
  Format prompt for instruction-tuned models.
111
 
112
+ Priority:
113
+ 1. mistral_common for Mistral/Devstral models
114
+ 2. Tokenizer's native chat_template
115
+ 3. Manual Mistral format fallback
116
  """
117
  # Get system prompt (override > model default > generic fallback)
118
  system_prompt = system_prompt_override or model_config.get("system_prompt")
 
125
  {"role": "user", "content": prompt}
126
  ]
127
 
128
+ # For Mistral/Devstral models, try mistral_common first
129
+ architecture = model_config.get("architecture", "")
130
+ hf_path = model_config.get("hf_path", "")
131
+ if architecture == "mistral" or "mistral" in hf_path.lower():
132
+ formatted = _try_mistral_common_format(messages, hf_path)
133
+ if formatted:
134
+ return formatted
135
+
136
+ # Try tokenizer's native chat template
137
  if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template is not None:
138
  try:
139
  formatted = tokenizer.apply_chat_template(
 
141
  tokenize=False,
142
  add_generation_prompt=True
143
  )
144
+ logger.info("Used HF tokenizer chat_template")
145
  return formatted
146
  except Exception as e:
147
+ logger.warning(f"chat_template failed: {e}, using manual format")
 
148
 
149
  # Fallback: Manual Mistral/Llama format
150
+ # Note: Don't include <s> as the tokenizer adds it during tokenization
151
  return self._manual_mistral_format(prompt, system_prompt)
152
 
153
  def _manual_mistral_format(self, prompt: str, system_prompt: str) -> str:
154
  """
155
  Manual Mistral instruction format as fallback.
156
 
157
+ Format: [INST] {system}\n\n{user} [/INST]
158
+ Note: <s> is NOT included as the tokenizer adds BOS automatically.
159
  """
160
+ logger.info("Using manual Mistral instruction format")
161
+ return f"[INST] {system_prompt}\n\n{prompt} [/INST]"
162
 
163
 
164
  # Singleton instance for convenience
requirements.txt CHANGED
@@ -10,6 +10,7 @@ pydantic==2.5.0
10
  torch>=2.3.0
11
  transformers>=4.44.0
12
  accelerate>=0.30.0
 
13
 
14
  # Utilities
15
  numpy==1.24.3
 
10
  torch>=2.3.0
11
  transformers>=4.44.0
12
  accelerate>=0.30.0
13
+ mistral_common>=1.5.0 # Required for Devstral chat template formatting
14
 
15
  # Utilities
16
  numpy==1.24.3