Spaces:

visualisable-ai
/

api

Running on CPU Upgrade

gary-boon Claude Opus 4.5 commited on 5 days ago

Commit

3e80769

1 Parent(s): 2860768

Use mistral_common for proper Devstral prompt formatting

- Add mistral_common>=1.5.0 dependency
- Update prompt_formatter to use MistralTokenizer for Devstral
- Add recommended_temperature to model configs (0.15 for Devstral)
- Remove hardcoded <s> from manual format (tokenizer adds BOS)

The mistral_common library provides the correct chat template
encoding that Devstral expects, which should fix the garbage
token output.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <[email protected]>

Files changed (4) hide show

backend/model_config.py +7 -3
backend/model_service.py +4 -4
backend/prompt_formatter.py +69 -11
requirements.txt +1 -0

backend/model_config.py CHANGED Viewed

@@ -26,6 +26,7 @@ class ModelConfig(TypedDict):
     uses_chat_template: bool  # Whether model expects instruction format
     prompt_style: str  # "completion" | "instruction" - how to format prompts
     system_prompt: Optional[str]  # Default system prompt for instruction models
 # Supported models registry
@@ -47,7 +48,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
         "recommended_dtype": "fp16",  # fp16 for GPU, fp32 for CPU
         "uses_chat_template": False,  # Base model, raw completion
         "prompt_style": "completion",  # Raw text continuation
-        "system_prompt": None  # Base models don't use system prompts
     },
     "code-llama-7b": {
         "hf_path": "codellama/CodeLlama-7b-hf",
@@ -66,7 +68,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
         "recommended_dtype": "fp16",
         "uses_chat_template": False,  # Base model, raw completion
         "prompt_style": "completion",  # Raw text continuation
-        "system_prompt": None  # Base models don't use system prompts
     },
     "devstral-small": {
         "hf_path": "mistralai/Devstral-Small-2507",
@@ -85,7 +88,8 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
         "recommended_dtype": "bf16",  # Devstral requires bfloat16
         "uses_chat_template": True,  # Instruction-tuned, requires chat format
         "prompt_style": "instruction",  # Requires system + user messages
-        "system_prompt": "You are an expert Python programmer. Continue the code provided by the user. Output only valid Python code, no explanations or markdown."
     }
 }

     uses_chat_template: bool  # Whether model expects instruction format
     prompt_style: str  # "completion" | "instruction" - how to format prompts
     system_prompt: Optional[str]  # Default system prompt for instruction models
+    recommended_temperature: float  # Recommended temperature for generation
 # Supported models registry
         "recommended_dtype": "fp16",  # fp16 for GPU, fp32 for CPU
         "uses_chat_template": False,  # Base model, raw completion
         "prompt_style": "completion",  # Raw text continuation
+        "system_prompt": None,  # Base models don't use system prompts
+        "recommended_temperature": 0.7  # Standard for code completion
     },
     "code-llama-7b": {
         "hf_path": "codellama/CodeLlama-7b-hf",
         "recommended_dtype": "fp16",
         "uses_chat_template": False,  # Base model, raw completion
         "prompt_style": "completion",  # Raw text continuation
+        "system_prompt": None,  # Base models don't use system prompts
+        "recommended_temperature": 0.7  # Standard for code completion
     },
     "devstral-small": {
         "hf_path": "mistralai/Devstral-Small-2507",
         "recommended_dtype": "bf16",  # Devstral requires bfloat16
         "uses_chat_template": True,  # Instruction-tuned, requires chat format
         "prompt_style": "instruction",  # Requires system + user messages
+        "system_prompt": "You are an expert Python programmer. Continue the code provided by the user. Output only valid Python code, no explanations or markdown.",
+        "recommended_temperature": 0.15  # Devstral recommended temperature
     }
 }

backend/model_service.py CHANGED Viewed

@@ -1509,10 +1509,10 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
         if prompt_style == "instruction":
             logger.info(f"Formatted prompt preview: {formatted_prompt[:200]}...")
-        # Use temperature=0 for instruct models (fully deterministic code)
-        if prompt_style == "instruction":
-            temperature = 0.0
-            logger.info(f"Using temperature={temperature} for deterministic instruct model output")
         # Tokenize and prepare
         inputs = manager.tokenizer(formatted_prompt, return_tensors="pt").to(manager.device)

         if prompt_style == "instruction":
             logger.info(f"Formatted prompt preview: {formatted_prompt[:200]}...")
+        # Use model's recommended temperature for instruction models
+        if model_config and "recommended_temperature" in model_config:
+            temperature = model_config["recommended_temperature"]
+            logger.info(f"Using model recommended temperature={temperature}")
         # Tokenize and prepare
         inputs = manager.tokenizer(formatted_prompt, return_tensors="pt").to(manager.device)

backend/prompt_formatter.py CHANGED Viewed

@@ -5,7 +5,52 @@ Handles formatting prompts appropriately for different model types:
 - Instruction models: System prompt + user message with chat template
 """
-from typing import Dict, Optional, Any
 class PromptFormatter:
@@ -17,9 +62,9 @@ class PromptFormatter:
         - Model treats it as text to continue
     Instruction models (Devstral, instruct variants):
-        - Wrap with system prompt + user message
-        - Use tokenizer's chat_template if available
-        - Fallback to manual Mistral format
     """
     def format(
@@ -64,8 +109,10 @@ class PromptFormatter:
         """
         Format prompt for instruction-tuned models.
-        Uses the tokenizer's chat_template if available,
-        otherwise falls back to manual Mistral format.
         """
         # Get system prompt (override > model default > generic fallback)
         system_prompt = system_prompt_override or model_config.get("system_prompt")
@@ -78,7 +125,15 @@ class PromptFormatter:
             {"role": "user", "content": prompt}
         ]
-        # Try tokenizer's native chat template first
         if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template is not None:
             try:
                 formatted = tokenizer.apply_chat_template(
@@ -86,21 +141,24 @@ class PromptFormatter:
                     tokenize=False,
                     add_generation_prompt=True
                 )
                 return formatted
             except Exception as e:
-                # Fall through to manual format if template fails
-                print(f"Warning: chat_template failed, using manual format: {e}")
         # Fallback: Manual Mistral/Llama format
         return self._manual_mistral_format(prompt, system_prompt)
     def _manual_mistral_format(self, prompt: str, system_prompt: str) -> str:
         """
         Manual Mistral instruction format as fallback.
-        Format: <s>[INST] {system}\n\n{user} [/INST]
         """
-        return f"<s>[INST] {system_prompt}\n\n{prompt} [/INST]"
 # Singleton instance for convenience

 - Instruction models: System prompt + user message with chat template
 """
+from typing import Dict, Optional, Any, List
+import logging
+logger = logging.getLogger(__name__)
+def _try_mistral_common_format(messages: List[Dict[str, str]], model_name: str) -> Optional[str]:
+    """
+    Try to use mistral_common for proper Mistral/Devstral chat formatting.
+    Returns None if mistral_common is not available or fails.
+    """
+    try:
+        from mistral_common.protocol.instruct.messages import (
+            SystemMessage, UserMessage
+        )
+        from mistral_common.protocol.instruct.request import ChatCompletionRequest
+        from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
+        # Load the tokenizer from HF hub
+        tokenizer = MistralTokenizer.from_hf_hub(model_name)
+        # Build messages
+        mistral_messages = []
+        for msg in messages:
+            if msg["role"] == "system":
+                mistral_messages.append(SystemMessage(content=msg["content"]))
+            elif msg["role"] == "user":
+                mistral_messages.append(UserMessage(content=msg["content"]))
+        # Encode to get token IDs
+        request = ChatCompletionRequest(messages=mistral_messages)
+        tokenized = tokenizer.encode_chat_completion(request)
+        # Decode back to text for use with HF tokenizer
+        # This gives us the properly formatted prompt string
+        decoded = tokenizer.decode(tokenized.tokens)
+        logger.info(f"Used mistral_common format for {model_name}")
+        return decoded
+    except ImportError:
+        logger.warning("mistral_common not available, using fallback format")
+        return None
+    except Exception as e:
+        logger.warning(f"mistral_common formatting failed: {e}, using fallback")
+        return None
 class PromptFormatter:
         - Model treats it as text to continue
     Instruction models (Devstral, instruct variants):
+        - Use mistral_common for Mistral/Devstral models
+        - Fallback to tokenizer's chat_template if available
+        - Final fallback to manual Mistral format
     """
     def format(
         """
         Format prompt for instruction-tuned models.
+        Priority:
+        1. mistral_common for Mistral/Devstral models
+        2. Tokenizer's native chat_template
+        3. Manual Mistral format fallback
         """
         # Get system prompt (override > model default > generic fallback)
         system_prompt = system_prompt_override or model_config.get("system_prompt")
             {"role": "user", "content": prompt}
         ]
+        # For Mistral/Devstral models, try mistral_common first
+        architecture = model_config.get("architecture", "")
+        hf_path = model_config.get("hf_path", "")
+        if architecture == "mistral" or "mistral" in hf_path.lower():
+            formatted = _try_mistral_common_format(messages, hf_path)
+            if formatted:
+                return formatted
+        # Try tokenizer's native chat template
         if hasattr(tokenizer, 'chat_template') and tokenizer.chat_template is not None:
             try:
                 formatted = tokenizer.apply_chat_template(
                     tokenize=False,
                     add_generation_prompt=True
                 )
+                logger.info("Used HF tokenizer chat_template")
                 return formatted
             except Exception as e:
+                logger.warning(f"chat_template failed: {e}, using manual format")
         # Fallback: Manual Mistral/Llama format
+        # Note: Don't include <s> as the tokenizer adds it during tokenization
         return self._manual_mistral_format(prompt, system_prompt)
     def _manual_mistral_format(self, prompt: str, system_prompt: str) -> str:
         """
         Manual Mistral instruction format as fallback.
+        Format: [INST] {system}\n\n{user} [/INST]
+        Note: <s> is NOT included as the tokenizer adds BOS automatically.
         """
+        logger.info("Using manual Mistral instruction format")
+        return f"[INST] {system_prompt}\n\n{prompt} [/INST]"
 # Singleton instance for convenience

requirements.txt CHANGED Viewed

@@ -10,6 +10,7 @@ pydantic==2.5.0
 torch>=2.3.0
 transformers>=4.44.0
 accelerate>=0.30.0
 # Utilities
 numpy==1.24.3

 torch>=2.3.0
 transformers>=4.44.0
 accelerate>=0.30.0
+mistral_common>=1.5.0  # Required for Devstral chat template formatting
 # Utilities
 numpy==1.24.3