Spaces:

visualisable-ai
/

api

Running on CPU Upgrade

gary-boon Claude Opus 4.5 commited on 4 days ago

Commit

c6f4cc5

1 Parent(s): e20ccaf

Add tokenSections boundaries and update system prompt

- Return tokenSections in research endpoint response with boundaries
for system prompt, user prompt, and output sections
- Estimate system prompt boundary for Devstral using MistralTokenizer
- Update default system prompt to handle both code completion and
instruction-style prompts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <[email protected]>

Files changed (3) hide show

backend/model_config.py +1 -1
backend/model_service.py +49 -0
backend/prompt_formatter.py +1 -1

backend/model_config.py CHANGED Viewed

@@ -88,7 +88,7 @@ SUPPORTED_MODELS: Dict[str, ModelConfig] = {
         "recommended_dtype": "bf16",  # Devstral requires bfloat16
         "uses_chat_template": True,  # Instruction-tuned, requires chat format
         "prompt_style": "instruction",  # Requires system + user messages
-        "system_prompt": "You are an expert Python programmer. Continue the code provided by the user. Output only valid Python code, no explanations or markdown.",
         "recommended_temperature": 0.15  # Devstral recommended temperature
     }
 }

         "recommended_dtype": "bf16",  # Devstral requires bfloat16
         "uses_chat_template": True,  # Instruction-tuned, requires chat format
         "prompt_style": "instruction",  # Requires system + user messages
+        "system_prompt": "You are an expert Python programmer. If given partial code, continue it. If given a description or request, write the appropriate implementation. Output only valid Python code, no explanations or markdown.",
         "recommended_temperature": 0.15  # Devstral recommended temperature
     }
 }

backend/model_service.py CHANGED Viewed

@@ -1801,6 +1801,54 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
         generation_time = time.time() - start_time
         # Build response
         response = {
             "prompt": prompt,
@@ -1808,6 +1856,7 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
                            for tid, t in zip(prompt_token_ids, prompt_tokens)],
             "generatedTokens": [{"text": t, "idx": tid, "bytes": len(t.encode('utf-8')), "type": "generated"}
                               for tid, t in zip(generated_token_ids, generated_tokens)],
             "tokenAlternatives": token_alternatives_by_step,  # Top-k alternatives for each token
             "layersDataByStep": layer_data_by_token,  # Layer data for ALL generation steps
             "layersData": layer_data_by_token[-1] if layer_data_by_token else [],  # Keep for backward compatibility

         generation_time = time.time() - start_time
+        # Calculate token section boundaries for UI display
+        total_tokens = prompt_length + len(generated_token_ids)
+        system_prompt_text = system_prompt_override or (model_config.get("system_prompt") if model_config else None)
+        # For instruction models, estimate where system prompt ends
+        # This is approximate due to control tokens in chat templates
+        system_prompt_end = 0
+        if prompt_style == "instruction" and system_prompt_text:
+            if manager.model_id == "devstral-small" and manager.mistral_tokenizer is not None:
+                # For Devstral, try encoding with empty system to estimate boundary
+                try:
+                    no_system_tokens = manager.mistral_tokenizer.encode_chat("", prompt)
+                    system_prompt_end = prompt_length - len(no_system_tokens)
+                    # Ensure non-negative and within bounds
+                    system_prompt_end = max(0, min(system_prompt_end, prompt_length))
+                    logger.info(f"Estimated system prompt boundary: {system_prompt_end} tokens")
+                except Exception as e:
+                    logger.warning(f"Could not estimate system prompt boundary: {e}")
+                    system_prompt_end = 0
+            else:
+                # For other instruction models, rough estimate based on character ratio
+                # This is very approximate but provides some visual separation
+                total_chars = len(system_prompt_text or "") + len(prompt)
+                if total_chars > 0:
+                    system_ratio = len(system_prompt_text or "") / total_chars
+                    system_prompt_end = int(prompt_length * system_ratio)
+        token_sections = {
+            "systemPrompt": {
+                "start": 0,
+                "end": system_prompt_end,
+                "text": system_prompt_text,
+                "tokenCount": system_prompt_end
+            },
+            "userPrompt": {
+                "start": system_prompt_end,
+                "end": prompt_length,
+                "text": prompt,
+                "tokenCount": prompt_length - system_prompt_end
+            },
+            "output": {
+                "start": prompt_length,
+                "end": total_tokens,
+                "text": "".join(generated_tokens),
+                "tokenCount": len(generated_token_ids)
+            }
+        }
         # Build response
         response = {
             "prompt": prompt,
                            for tid, t in zip(prompt_token_ids, prompt_tokens)],
             "generatedTokens": [{"text": t, "idx": tid, "bytes": len(t.encode('utf-8')), "type": "generated"}
                               for tid, t in zip(generated_token_ids, generated_tokens)],
+            "tokenSections": token_sections,  # Section boundaries for UI coloring
             "tokenAlternatives": token_alternatives_by_step,  # Top-k alternatives for each token
             "layersDataByStep": layer_data_by_token,  # Layer data for ALL generation steps
             "layersData": layer_data_by_token[-1] if layer_data_by_token else [],  # Keep for backward compatibility

backend/prompt_formatter.py CHANGED Viewed

@@ -74,7 +74,7 @@ class PromptFormatter:
         # Get system prompt (override > model default > generic fallback)
         system_prompt = system_prompt_override or model_config.get("system_prompt")
         if not system_prompt:
-            system_prompt = "You are a helpful coding assistant. Continue the code provided."
         # Build messages list
         messages = [

         # Get system prompt (override > model default > generic fallback)
         system_prompt = system_prompt_override or model_config.get("system_prompt")
         if not system_prompt:
+            system_prompt = "You are a helpful coding assistant. If given partial code, continue it. If given a description or request, write the appropriate implementation."
         # Build messages list
         messages = [